VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 19285

Last change on this file since 19285 was 18579, checked in by vboxsync, 16 years ago

IPRT: Solaris 10 compilation

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 162.6 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42/* Solaris 10 header ugliness */
43#ifdef u
44#undef u
45#endif
46
47#ifdef _MSC_VER
48# if _MSC_VER >= 1400
49# define RT_INLINE_ASM_USES_INTRIN 1
50# include <intrin.h>
51 /* Emit the intrinsics at all optimization levels. */
52# pragma intrinsic(_ReadWriteBarrier)
53# pragma intrinsic(__cpuid)
54# pragma intrinsic(_enable)
55# pragma intrinsic(_disable)
56# pragma intrinsic(__rdtsc)
57# pragma intrinsic(__readmsr)
58# pragma intrinsic(__writemsr)
59# pragma intrinsic(__outbyte)
60# pragma intrinsic(__outbytestring)
61# pragma intrinsic(__outword)
62# pragma intrinsic(__outwordstring)
63# pragma intrinsic(__outdword)
64# pragma intrinsic(__outdwordstring)
65# pragma intrinsic(__inbyte)
66# pragma intrinsic(__inbytestring)
67# pragma intrinsic(__inword)
68# pragma intrinsic(__inwordstring)
69# pragma intrinsic(__indword)
70# pragma intrinsic(__indwordstring)
71# pragma intrinsic(__invlpg)
72# pragma intrinsic(__stosd)
73# pragma intrinsic(__stosw)
74# pragma intrinsic(__stosb)
75# pragma intrinsic(__readcr0)
76# pragma intrinsic(__readcr2)
77# pragma intrinsic(__readcr3)
78# pragma intrinsic(__readcr4)
79# pragma intrinsic(__writecr0)
80# pragma intrinsic(__writecr3)
81# pragma intrinsic(__writecr4)
82# pragma intrinsic(__readdr)
83# pragma intrinsic(__writedr)
84# pragma intrinsic(_BitScanForward)
85# pragma intrinsic(_BitScanReverse)
86# pragma intrinsic(_bittest)
87# pragma intrinsic(_bittestandset)
88# pragma intrinsic(_bittestandreset)
89# pragma intrinsic(_bittestandcomplement)
90# pragma intrinsic(_byteswap_ushort)
91# pragma intrinsic(_byteswap_ulong)
92# pragma intrinsic(_interlockedbittestandset)
93# pragma intrinsic(_interlockedbittestandreset)
94# pragma intrinsic(_InterlockedAnd)
95# pragma intrinsic(_InterlockedOr)
96# pragma intrinsic(_InterlockedIncrement)
97# pragma intrinsic(_InterlockedDecrement)
98# pragma intrinsic(_InterlockedExchange)
99# pragma intrinsic(_InterlockedExchangeAdd)
100# pragma intrinsic(_InterlockedCompareExchange)
101# pragma intrinsic(_InterlockedCompareExchange64)
102# ifdef RT_ARCH_AMD64
103# pragma intrinsic(__stosq)
104# pragma intrinsic(__readcr8)
105# pragma intrinsic(__writecr8)
106# pragma intrinsic(_byteswap_uint64)
107# pragma intrinsic(_InterlockedExchange64)
108# endif
109# endif
110#endif
111#ifndef RT_INLINE_ASM_USES_INTRIN
112# define RT_INLINE_ASM_USES_INTRIN 0
113#endif
114
115
116
117/** @defgroup grp_asm ASM - Assembly Routines
118 * @ingroup grp_rt
119 *
120 * @remarks The difference between ordered and unordered atomic operations are that
121 * the former will complete outstanding reads and writes before continuing
122 * while the latter doesn't make any promisses about the order. Ordered
123 * operations doesn't, it seems, make any 100% promise wrt to whether
124 * the operation will complete before any subsequent memory access.
125 * (please, correct if wrong.)
126 *
127 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
128 * are unordered (note the Uo).
129 *
130 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
131 * or even optimize assembler instructions away. For instance, in the following code
132 * the second rdmsr instruction is optimized away because gcc treats that instruction
133 * as deterministic:
134 *
135 * @code
136 * static inline uint64_t rdmsr_low(int idx)
137 * {
138 * uint32_t low;
139 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
140 * }
141 * ...
142 * uint32_t msr1 = rdmsr_low(1);
143 * foo(msr1);
144 * msr1 = rdmsr_low(1);
145 * bar(msr1);
146 * @endcode
147 *
148 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
149 * use the result of the first call as input parameter for bar() as well. For rdmsr this
150 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
151 * machine status information in general.
152 *
153 * @{
154 */
155
156/** @def RT_INLINE_ASM_EXTERNAL
157 * Defined as 1 if the compiler does not support inline assembly.
158 * The ASM* functions will then be implemented in an external .asm file.
159 *
160 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
161 * inline assembly in their AMD64 compiler.
162 */
163#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
164# define RT_INLINE_ASM_EXTERNAL 1
165#else
166# define RT_INLINE_ASM_EXTERNAL 0
167#endif
168
169/** @def RT_INLINE_ASM_GNU_STYLE
170 * Defined as 1 if the compiler understands GNU style inline assembly.
171 */
172#if defined(_MSC_VER)
173# define RT_INLINE_ASM_GNU_STYLE 0
174#else
175# define RT_INLINE_ASM_GNU_STYLE 1
176#endif
177
178
179/** @todo find a more proper place for this structure? */
180#pragma pack(1)
181/** IDTR */
182typedef struct RTIDTR
183{
184 /** Size of the IDT. */
185 uint16_t cbIdt;
186 /** Address of the IDT. */
187 uintptr_t pIdt;
188} RTIDTR, *PRTIDTR;
189#pragma pack()
190
191#pragma pack(1)
192/** GDTR */
193typedef struct RTGDTR
194{
195 /** Size of the GDT. */
196 uint16_t cbGdt;
197 /** Address of the GDT. */
198 uintptr_t pGdt;
199} RTGDTR, *PRTGDTR;
200#pragma pack()
201
202
203/** @def ASMReturnAddress
204 * Gets the return address of the current (or calling if you like) function or method.
205 */
206#ifdef _MSC_VER
207# ifdef __cplusplus
208extern "C"
209# endif
210void * _ReturnAddress(void);
211# pragma intrinsic(_ReturnAddress)
212# define ASMReturnAddress() _ReturnAddress()
213#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
214# define ASMReturnAddress() __builtin_return_address(0)
215#else
216# error "Unsupported compiler."
217#endif
218
219
220/**
221 * Gets the content of the IDTR CPU register.
222 * @param pIdtr Where to store the IDTR contents.
223 */
224#if RT_INLINE_ASM_EXTERNAL
225DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
226#else
227DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
228{
229# if RT_INLINE_ASM_GNU_STYLE
230 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
231# else
232 __asm
233 {
234# ifdef RT_ARCH_AMD64
235 mov rax, [pIdtr]
236 sidt [rax]
237# else
238 mov eax, [pIdtr]
239 sidt [eax]
240# endif
241 }
242# endif
243}
244#endif
245
246
247/**
248 * Sets the content of the IDTR CPU register.
249 * @param pIdtr Where to load the IDTR contents from
250 */
251#if RT_INLINE_ASM_EXTERNAL
252DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
253#else
254DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
255{
256# if RT_INLINE_ASM_GNU_STYLE
257 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
258# else
259 __asm
260 {
261# ifdef RT_ARCH_AMD64
262 mov rax, [pIdtr]
263 lidt [rax]
264# else
265 mov eax, [pIdtr]
266 lidt [eax]
267# endif
268 }
269# endif
270}
271#endif
272
273
274/**
275 * Gets the content of the GDTR CPU register.
276 * @param pGdtr Where to store the GDTR contents.
277 */
278#if RT_INLINE_ASM_EXTERNAL
279DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
280#else
281DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
282{
283# if RT_INLINE_ASM_GNU_STYLE
284 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
285# else
286 __asm
287 {
288# ifdef RT_ARCH_AMD64
289 mov rax, [pGdtr]
290 sgdt [rax]
291# else
292 mov eax, [pGdtr]
293 sgdt [eax]
294# endif
295 }
296# endif
297}
298#endif
299
300/**
301 * Get the cs register.
302 * @returns cs.
303 */
304#if RT_INLINE_ASM_EXTERNAL
305DECLASM(RTSEL) ASMGetCS(void);
306#else
307DECLINLINE(RTSEL) ASMGetCS(void)
308{
309 RTSEL SelCS;
310# if RT_INLINE_ASM_GNU_STYLE
311 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
312# else
313 __asm
314 {
315 mov ax, cs
316 mov [SelCS], ax
317 }
318# endif
319 return SelCS;
320}
321#endif
322
323
324/**
325 * Get the DS register.
326 * @returns DS.
327 */
328#if RT_INLINE_ASM_EXTERNAL
329DECLASM(RTSEL) ASMGetDS(void);
330#else
331DECLINLINE(RTSEL) ASMGetDS(void)
332{
333 RTSEL SelDS;
334# if RT_INLINE_ASM_GNU_STYLE
335 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
336# else
337 __asm
338 {
339 mov ax, ds
340 mov [SelDS], ax
341 }
342# endif
343 return SelDS;
344}
345#endif
346
347
348/**
349 * Get the ES register.
350 * @returns ES.
351 */
352#if RT_INLINE_ASM_EXTERNAL
353DECLASM(RTSEL) ASMGetES(void);
354#else
355DECLINLINE(RTSEL) ASMGetES(void)
356{
357 RTSEL SelES;
358# if RT_INLINE_ASM_GNU_STYLE
359 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
360# else
361 __asm
362 {
363 mov ax, es
364 mov [SelES], ax
365 }
366# endif
367 return SelES;
368}
369#endif
370
371
372/**
373 * Get the FS register.
374 * @returns FS.
375 */
376#if RT_INLINE_ASM_EXTERNAL
377DECLASM(RTSEL) ASMGetFS(void);
378#else
379DECLINLINE(RTSEL) ASMGetFS(void)
380{
381 RTSEL SelFS;
382# if RT_INLINE_ASM_GNU_STYLE
383 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
384# else
385 __asm
386 {
387 mov ax, fs
388 mov [SelFS], ax
389 }
390# endif
391 return SelFS;
392}
393# endif
394
395
396/**
397 * Get the GS register.
398 * @returns GS.
399 */
400#if RT_INLINE_ASM_EXTERNAL
401DECLASM(RTSEL) ASMGetGS(void);
402#else
403DECLINLINE(RTSEL) ASMGetGS(void)
404{
405 RTSEL SelGS;
406# if RT_INLINE_ASM_GNU_STYLE
407 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
408# else
409 __asm
410 {
411 mov ax, gs
412 mov [SelGS], ax
413 }
414# endif
415 return SelGS;
416}
417#endif
418
419
420/**
421 * Get the SS register.
422 * @returns SS.
423 */
424#if RT_INLINE_ASM_EXTERNAL
425DECLASM(RTSEL) ASMGetSS(void);
426#else
427DECLINLINE(RTSEL) ASMGetSS(void)
428{
429 RTSEL SelSS;
430# if RT_INLINE_ASM_GNU_STYLE
431 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
432# else
433 __asm
434 {
435 mov ax, ss
436 mov [SelSS], ax
437 }
438# endif
439 return SelSS;
440}
441#endif
442
443
444/**
445 * Get the TR register.
446 * @returns TR.
447 */
448#if RT_INLINE_ASM_EXTERNAL
449DECLASM(RTSEL) ASMGetTR(void);
450#else
451DECLINLINE(RTSEL) ASMGetTR(void)
452{
453 RTSEL SelTR;
454# if RT_INLINE_ASM_GNU_STYLE
455 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
456# else
457 __asm
458 {
459 str ax
460 mov [SelTR], ax
461 }
462# endif
463 return SelTR;
464}
465#endif
466
467
468/**
469 * Get the [RE]FLAGS register.
470 * @returns [RE]FLAGS.
471 */
472#if RT_INLINE_ASM_EXTERNAL
473DECLASM(RTCCUINTREG) ASMGetFlags(void);
474#else
475DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
476{
477 RTCCUINTREG uFlags;
478# if RT_INLINE_ASM_GNU_STYLE
479# ifdef RT_ARCH_AMD64
480 __asm__ __volatile__("pushfq\n\t"
481 "popq %0\n\t"
482 : "=g" (uFlags));
483# else
484 __asm__ __volatile__("pushfl\n\t"
485 "popl %0\n\t"
486 : "=g" (uFlags));
487# endif
488# else
489 __asm
490 {
491# ifdef RT_ARCH_AMD64
492 pushfq
493 pop [uFlags]
494# else
495 pushfd
496 pop [uFlags]
497# endif
498 }
499# endif
500 return uFlags;
501}
502#endif
503
504
505/**
506 * Set the [RE]FLAGS register.
507 * @param uFlags The new [RE]FLAGS value.
508 */
509#if RT_INLINE_ASM_EXTERNAL
510DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
511#else
512DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
513{
514# if RT_INLINE_ASM_GNU_STYLE
515# ifdef RT_ARCH_AMD64
516 __asm__ __volatile__("pushq %0\n\t"
517 "popfq\n\t"
518 : : "g" (uFlags));
519# else
520 __asm__ __volatile__("pushl %0\n\t"
521 "popfl\n\t"
522 : : "g" (uFlags));
523# endif
524# else
525 __asm
526 {
527# ifdef RT_ARCH_AMD64
528 push [uFlags]
529 popfq
530# else
531 push [uFlags]
532 popfd
533# endif
534 }
535# endif
536}
537#endif
538
539
540/**
541 * Gets the content of the CPU timestamp counter register.
542 *
543 * @returns TSC.
544 */
545#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
546DECLASM(uint64_t) ASMReadTSC(void);
547#else
548DECLINLINE(uint64_t) ASMReadTSC(void)
549{
550 RTUINT64U u;
551# if RT_INLINE_ASM_GNU_STYLE
552 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
553# else
554# if RT_INLINE_ASM_USES_INTRIN
555 u.u = __rdtsc();
556# else
557 __asm
558 {
559 rdtsc
560 mov [u.s.Lo], eax
561 mov [u.s.Hi], edx
562 }
563# endif
564# endif
565 return u.u;
566}
567#endif
568
569
570/**
571 * Performs the cpuid instruction returning all registers.
572 *
573 * @param uOperator CPUID operation (eax).
574 * @param pvEAX Where to store eax.
575 * @param pvEBX Where to store ebx.
576 * @param pvECX Where to store ecx.
577 * @param pvEDX Where to store edx.
578 * @remark We're using void pointers to ease the use of special bitfield structures and such.
579 */
580#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
581DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
582#else
583DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
584{
585# if RT_INLINE_ASM_GNU_STYLE
586# ifdef RT_ARCH_AMD64
587 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
588 __asm__ ("cpuid\n\t"
589 : "=a" (uRAX),
590 "=b" (uRBX),
591 "=c" (uRCX),
592 "=d" (uRDX)
593 : "0" (uOperator));
594 *(uint32_t *)pvEAX = (uint32_t)uRAX;
595 *(uint32_t *)pvEBX = (uint32_t)uRBX;
596 *(uint32_t *)pvECX = (uint32_t)uRCX;
597 *(uint32_t *)pvEDX = (uint32_t)uRDX;
598# else
599 __asm__ ("xchgl %%ebx, %1\n\t"
600 "cpuid\n\t"
601 "xchgl %%ebx, %1\n\t"
602 : "=a" (*(uint32_t *)pvEAX),
603 "=r" (*(uint32_t *)pvEBX),
604 "=c" (*(uint32_t *)pvECX),
605 "=d" (*(uint32_t *)pvEDX)
606 : "0" (uOperator));
607# endif
608
609# elif RT_INLINE_ASM_USES_INTRIN
610 int aInfo[4];
611 __cpuid(aInfo, uOperator);
612 *(uint32_t *)pvEAX = aInfo[0];
613 *(uint32_t *)pvEBX = aInfo[1];
614 *(uint32_t *)pvECX = aInfo[2];
615 *(uint32_t *)pvEDX = aInfo[3];
616
617# else
618 uint32_t uEAX;
619 uint32_t uEBX;
620 uint32_t uECX;
621 uint32_t uEDX;
622 __asm
623 {
624 push ebx
625 mov eax, [uOperator]
626 cpuid
627 mov [uEAX], eax
628 mov [uEBX], ebx
629 mov [uECX], ecx
630 mov [uEDX], edx
631 pop ebx
632 }
633 *(uint32_t *)pvEAX = uEAX;
634 *(uint32_t *)pvEBX = uEBX;
635 *(uint32_t *)pvECX = uECX;
636 *(uint32_t *)pvEDX = uEDX;
637# endif
638}
639#endif
640
641
642/**
643 * Performs the cpuid instruction returning all registers.
644 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
645 *
646 * @param uOperator CPUID operation (eax).
647 * @param uIdxECX ecx index
648 * @param pvEAX Where to store eax.
649 * @param pvEBX Where to store ebx.
650 * @param pvECX Where to store ecx.
651 * @param pvEDX Where to store edx.
652 * @remark We're using void pointers to ease the use of special bitfield structures and such.
653 */
654#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
655DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
656#else
657DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
658{
659# if RT_INLINE_ASM_GNU_STYLE
660# ifdef RT_ARCH_AMD64
661 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
662 __asm__ ("cpuid\n\t"
663 : "=a" (uRAX),
664 "=b" (uRBX),
665 "=c" (uRCX),
666 "=d" (uRDX)
667 : "0" (uOperator),
668 "2" (uIdxECX));
669 *(uint32_t *)pvEAX = (uint32_t)uRAX;
670 *(uint32_t *)pvEBX = (uint32_t)uRBX;
671 *(uint32_t *)pvECX = (uint32_t)uRCX;
672 *(uint32_t *)pvEDX = (uint32_t)uRDX;
673# else
674 __asm__ ("xchgl %%ebx, %1\n\t"
675 "cpuid\n\t"
676 "xchgl %%ebx, %1\n\t"
677 : "=a" (*(uint32_t *)pvEAX),
678 "=r" (*(uint32_t *)pvEBX),
679 "=c" (*(uint32_t *)pvECX),
680 "=d" (*(uint32_t *)pvEDX)
681 : "0" (uOperator),
682 "2" (uIdxECX));
683# endif
684
685# elif RT_INLINE_ASM_USES_INTRIN
686 int aInfo[4];
687 /* ??? another intrinsic ??? */
688 __cpuid(aInfo, uOperator);
689 *(uint32_t *)pvEAX = aInfo[0];
690 *(uint32_t *)pvEBX = aInfo[1];
691 *(uint32_t *)pvECX = aInfo[2];
692 *(uint32_t *)pvEDX = aInfo[3];
693
694# else
695 uint32_t uEAX;
696 uint32_t uEBX;
697 uint32_t uECX;
698 uint32_t uEDX;
699 __asm
700 {
701 push ebx
702 mov eax, [uOperator]
703 mov ecx, [uIdxECX]
704 cpuid
705 mov [uEAX], eax
706 mov [uEBX], ebx
707 mov [uECX], ecx
708 mov [uEDX], edx
709 pop ebx
710 }
711 *(uint32_t *)pvEAX = uEAX;
712 *(uint32_t *)pvEBX = uEBX;
713 *(uint32_t *)pvECX = uECX;
714 *(uint32_t *)pvEDX = uEDX;
715# endif
716}
717#endif
718
719
720/**
721 * Performs the cpuid instruction returning ecx and edx.
722 *
723 * @param uOperator CPUID operation (eax).
724 * @param pvECX Where to store ecx.
725 * @param pvEDX Where to store edx.
726 * @remark We're using void pointers to ease the use of special bitfield structures and such.
727 */
728#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
729DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
730#else
731DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
732{
733 uint32_t uEBX;
734 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
735}
736#endif
737
738
739/**
740 * Performs the cpuid instruction returning edx.
741 *
742 * @param uOperator CPUID operation (eax).
743 * @returns EDX after cpuid operation.
744 */
745#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
746DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
747#else
748DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
749{
750 RTCCUINTREG xDX;
751# if RT_INLINE_ASM_GNU_STYLE
752# ifdef RT_ARCH_AMD64
753 RTCCUINTREG uSpill;
754 __asm__ ("cpuid"
755 : "=a" (uSpill),
756 "=d" (xDX)
757 : "0" (uOperator)
758 : "rbx", "rcx");
759# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
760 __asm__ ("push %%ebx\n\t"
761 "cpuid\n\t"
762 "pop %%ebx\n\t"
763 : "=a" (uOperator),
764 "=d" (xDX)
765 : "0" (uOperator)
766 : "ecx");
767# else
768 __asm__ ("cpuid"
769 : "=a" (uOperator),
770 "=d" (xDX)
771 : "0" (uOperator)
772 : "ebx", "ecx");
773# endif
774
775# elif RT_INLINE_ASM_USES_INTRIN
776 int aInfo[4];
777 __cpuid(aInfo, uOperator);
778 xDX = aInfo[3];
779
780# else
781 __asm
782 {
783 push ebx
784 mov eax, [uOperator]
785 cpuid
786 mov [xDX], edx
787 pop ebx
788 }
789# endif
790 return (uint32_t)xDX;
791}
792#endif
793
794
795/**
796 * Performs the cpuid instruction returning ecx.
797 *
798 * @param uOperator CPUID operation (eax).
799 * @returns ECX after cpuid operation.
800 */
801#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
802DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
803#else
804DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
805{
806 RTCCUINTREG xCX;
807# if RT_INLINE_ASM_GNU_STYLE
808# ifdef RT_ARCH_AMD64
809 RTCCUINTREG uSpill;
810 __asm__ ("cpuid"
811 : "=a" (uSpill),
812 "=c" (xCX)
813 : "0" (uOperator)
814 : "rbx", "rdx");
815# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
816 __asm__ ("push %%ebx\n\t"
817 "cpuid\n\t"
818 "pop %%ebx\n\t"
819 : "=a" (uOperator),
820 "=c" (xCX)
821 : "0" (uOperator)
822 : "edx");
823# else
824 __asm__ ("cpuid"
825 : "=a" (uOperator),
826 "=c" (xCX)
827 : "0" (uOperator)
828 : "ebx", "edx");
829
830# endif
831
832# elif RT_INLINE_ASM_USES_INTRIN
833 int aInfo[4];
834 __cpuid(aInfo, uOperator);
835 xCX = aInfo[2];
836
837# else
838 __asm
839 {
840 push ebx
841 mov eax, [uOperator]
842 cpuid
843 mov [xCX], ecx
844 pop ebx
845 }
846# endif
847 return (uint32_t)xCX;
848}
849#endif
850
851
852/**
853 * Checks if the current CPU supports CPUID.
854 *
855 * @returns true if CPUID is supported.
856 */
857DECLINLINE(bool) ASMHasCpuId(void)
858{
859#ifdef RT_ARCH_AMD64
860 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
861#else /* !RT_ARCH_AMD64 */
862 bool fRet = false;
863# if RT_INLINE_ASM_GNU_STYLE
864 uint32_t u1;
865 uint32_t u2;
866 __asm__ ("pushf\n\t"
867 "pop %1\n\t"
868 "mov %1, %2\n\t"
869 "xorl $0x200000, %1\n\t"
870 "push %1\n\t"
871 "popf\n\t"
872 "pushf\n\t"
873 "pop %1\n\t"
874 "cmpl %1, %2\n\t"
875 "setne %0\n\t"
876 "push %2\n\t"
877 "popf\n\t"
878 : "=m" (fRet), "=r" (u1), "=r" (u2));
879# else
880 __asm
881 {
882 pushfd
883 pop eax
884 mov ebx, eax
885 xor eax, 0200000h
886 push eax
887 popfd
888 pushfd
889 pop eax
890 cmp eax, ebx
891 setne fRet
892 push ebx
893 popfd
894 }
895# endif
896 return fRet;
897#endif /* !RT_ARCH_AMD64 */
898}
899
900
901/**
902 * Gets the APIC ID of the current CPU.
903 *
904 * @returns the APIC ID.
905 */
906#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
907DECLASM(uint8_t) ASMGetApicId(void);
908#else
909DECLINLINE(uint8_t) ASMGetApicId(void)
910{
911 RTCCUINTREG xBX;
912# if RT_INLINE_ASM_GNU_STYLE
913# ifdef RT_ARCH_AMD64
914 RTCCUINTREG uSpill;
915 __asm__ ("cpuid"
916 : "=a" (uSpill),
917 "=b" (xBX)
918 : "0" (1)
919 : "rcx", "rdx");
920# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
921 RTCCUINTREG uSpill;
922 __asm__ ("mov %%ebx,%1\n\t"
923 "cpuid\n\t"
924 "xchgl %%ebx,%1\n\t"
925 : "=a" (uSpill),
926 "=r" (xBX)
927 : "0" (1)
928 : "ecx", "edx");
929# else
930 RTCCUINTREG uSpill;
931 __asm__ ("cpuid"
932 : "=a" (uSpill),
933 "=b" (xBX)
934 : "0" (1)
935 : "ecx", "edx");
936# endif
937
938# elif RT_INLINE_ASM_USES_INTRIN
939 int aInfo[4];
940 __cpuid(aInfo, 1);
941 xBX = aInfo[1];
942
943# else
944 __asm
945 {
946 push ebx
947 mov eax, 1
948 cpuid
949 mov [xBX], ebx
950 pop ebx
951 }
952# endif
953 return (uint8_t)(xBX >> 24);
954}
955#endif
956
957
958/**
959 * Tests if it an genuin Intel CPU based on the ASMCpuId(0) output.
960 *
961 * @returns true/false.
962 * @param uEBX EBX return from ASMCpuId(0)
963 * @param uECX ECX return from ASMCpuId(0)
964 * @param uEDX EDX return from ASMCpuId(0)
965 */
966DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
967{
968 return uEBX == 0x756e6547
969 && uECX == 0x6c65746e
970 && uEDX == 0x49656e69;
971}
972
973
974/**
975 * Tests if this is an genuin Intel CPU.
976 *
977 * @returns true/false.
978 */
979DECLINLINE(bool) ASMIsIntelCpu(void)
980{
981 uint32_t uEAX, uEBX, uECX, uEDX;
982 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
983 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
984}
985
986
987/**
988 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
989 *
990 * @returns Family.
991 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
992 */
993DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
994{
995 return ((uEAX >> 8) & 0xf) == 0xf
996 ? ((uEAX >> 20) & 0x7f) + 0xf
997 : ((uEAX >> 8) & 0xf);
998}
999
1000
1001/**
1002 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
1003 *
1004 * @returns Model.
1005 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1006 * @param fIntel Whether it's an intel CPU.
1007 */
1008DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
1009{
1010 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
1011 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1012 : ((uEAX >> 4) & 0xf);
1013}
1014
1015
1016/**
1017 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1018 *
1019 * @returns Model.
1020 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1021 * @param fIntel Whether it's an intel CPU.
1022 */
1023DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1024{
1025 return ((uEAX >> 8) & 0xf) == 0xf
1026 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1027 : ((uEAX >> 4) & 0xf);
1028}
1029
1030
1031/**
1032 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1033 *
1034 * @returns Model.
1035 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1036 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1037 */
1038DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1039{
1040 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1041 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1042 : ((uEAX >> 4) & 0xf);
1043}
1044
1045
1046/**
1047 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1048 *
1049 * @returns Model.
1050 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1051 */
1052DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1053{
1054 return uEAX & 0xf;
1055}
1056
1057
1058/**
1059 * Get cr0.
1060 * @returns cr0.
1061 */
1062#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1063DECLASM(RTCCUINTREG) ASMGetCR0(void);
1064#else
1065DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1066{
1067 RTCCUINTREG uCR0;
1068# if RT_INLINE_ASM_USES_INTRIN
1069 uCR0 = __readcr0();
1070
1071# elif RT_INLINE_ASM_GNU_STYLE
1072# ifdef RT_ARCH_AMD64
1073 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1074# else
1075 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1076# endif
1077# else
1078 __asm
1079 {
1080# ifdef RT_ARCH_AMD64
1081 mov rax, cr0
1082 mov [uCR0], rax
1083# else
1084 mov eax, cr0
1085 mov [uCR0], eax
1086# endif
1087 }
1088# endif
1089 return uCR0;
1090}
1091#endif
1092
1093
1094/**
1095 * Sets the CR0 register.
1096 * @param uCR0 The new CR0 value.
1097 */
1098#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1099DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1100#else
1101DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1102{
1103# if RT_INLINE_ASM_USES_INTRIN
1104 __writecr0(uCR0);
1105
1106# elif RT_INLINE_ASM_GNU_STYLE
1107# ifdef RT_ARCH_AMD64
1108 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1109# else
1110 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1111# endif
1112# else
1113 __asm
1114 {
1115# ifdef RT_ARCH_AMD64
1116 mov rax, [uCR0]
1117 mov cr0, rax
1118# else
1119 mov eax, [uCR0]
1120 mov cr0, eax
1121# endif
1122 }
1123# endif
1124}
1125#endif
1126
1127
1128/**
1129 * Get cr2.
1130 * @returns cr2.
1131 */
1132#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1133DECLASM(RTCCUINTREG) ASMGetCR2(void);
1134#else
1135DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1136{
1137 RTCCUINTREG uCR2;
1138# if RT_INLINE_ASM_USES_INTRIN
1139 uCR2 = __readcr2();
1140
1141# elif RT_INLINE_ASM_GNU_STYLE
1142# ifdef RT_ARCH_AMD64
1143 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1144# else
1145 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1146# endif
1147# else
1148 __asm
1149 {
1150# ifdef RT_ARCH_AMD64
1151 mov rax, cr2
1152 mov [uCR2], rax
1153# else
1154 mov eax, cr2
1155 mov [uCR2], eax
1156# endif
1157 }
1158# endif
1159 return uCR2;
1160}
1161#endif
1162
1163
1164/**
1165 * Sets the CR2 register.
1166 * @param uCR2 The new CR0 value.
1167 */
1168#if RT_INLINE_ASM_EXTERNAL
1169DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1170#else
1171DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1172{
1173# if RT_INLINE_ASM_GNU_STYLE
1174# ifdef RT_ARCH_AMD64
1175 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1176# else
1177 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1178# endif
1179# else
1180 __asm
1181 {
1182# ifdef RT_ARCH_AMD64
1183 mov rax, [uCR2]
1184 mov cr2, rax
1185# else
1186 mov eax, [uCR2]
1187 mov cr2, eax
1188# endif
1189 }
1190# endif
1191}
1192#endif
1193
1194
1195/**
1196 * Get cr3.
1197 * @returns cr3.
1198 */
1199#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1200DECLASM(RTCCUINTREG) ASMGetCR3(void);
1201#else
1202DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1203{
1204 RTCCUINTREG uCR3;
1205# if RT_INLINE_ASM_USES_INTRIN
1206 uCR3 = __readcr3();
1207
1208# elif RT_INLINE_ASM_GNU_STYLE
1209# ifdef RT_ARCH_AMD64
1210 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1211# else
1212 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1213# endif
1214# else
1215 __asm
1216 {
1217# ifdef RT_ARCH_AMD64
1218 mov rax, cr3
1219 mov [uCR3], rax
1220# else
1221 mov eax, cr3
1222 mov [uCR3], eax
1223# endif
1224 }
1225# endif
1226 return uCR3;
1227}
1228#endif
1229
1230
1231/**
1232 * Sets the CR3 register.
1233 *
1234 * @param uCR3 New CR3 value.
1235 */
1236#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1237DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1238#else
1239DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1240{
1241# if RT_INLINE_ASM_USES_INTRIN
1242 __writecr3(uCR3);
1243
1244# elif RT_INLINE_ASM_GNU_STYLE
1245# ifdef RT_ARCH_AMD64
1246 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1247# else
1248 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1249# endif
1250# else
1251 __asm
1252 {
1253# ifdef RT_ARCH_AMD64
1254 mov rax, [uCR3]
1255 mov cr3, rax
1256# else
1257 mov eax, [uCR3]
1258 mov cr3, eax
1259# endif
1260 }
1261# endif
1262}
1263#endif
1264
1265
1266/**
1267 * Reloads the CR3 register.
1268 */
1269#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1270DECLASM(void) ASMReloadCR3(void);
1271#else
1272DECLINLINE(void) ASMReloadCR3(void)
1273{
1274# if RT_INLINE_ASM_USES_INTRIN
1275 __writecr3(__readcr3());
1276
1277# elif RT_INLINE_ASM_GNU_STYLE
1278 RTCCUINTREG u;
1279# ifdef RT_ARCH_AMD64
1280 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1281 "movq %0, %%cr3\n\t"
1282 : "=r" (u));
1283# else
1284 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1285 "movl %0, %%cr3\n\t"
1286 : "=r" (u));
1287# endif
1288# else
1289 __asm
1290 {
1291# ifdef RT_ARCH_AMD64
1292 mov rax, cr3
1293 mov cr3, rax
1294# else
1295 mov eax, cr3
1296 mov cr3, eax
1297# endif
1298 }
1299# endif
1300}
1301#endif
1302
1303
1304/**
1305 * Get cr4.
1306 * @returns cr4.
1307 */
1308#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1309DECLASM(RTCCUINTREG) ASMGetCR4(void);
1310#else
1311DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1312{
1313 RTCCUINTREG uCR4;
1314# if RT_INLINE_ASM_USES_INTRIN
1315 uCR4 = __readcr4();
1316
1317# elif RT_INLINE_ASM_GNU_STYLE
1318# ifdef RT_ARCH_AMD64
1319 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1320# else
1321 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1322# endif
1323# else
1324 __asm
1325 {
1326# ifdef RT_ARCH_AMD64
1327 mov rax, cr4
1328 mov [uCR4], rax
1329# else
1330 push eax /* just in case */
1331 /*mov eax, cr4*/
1332 _emit 0x0f
1333 _emit 0x20
1334 _emit 0xe0
1335 mov [uCR4], eax
1336 pop eax
1337# endif
1338 }
1339# endif
1340 return uCR4;
1341}
1342#endif
1343
1344
1345/**
1346 * Sets the CR4 register.
1347 *
1348 * @param uCR4 New CR4 value.
1349 */
1350#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1351DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1352#else
1353DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1354{
1355# if RT_INLINE_ASM_USES_INTRIN
1356 __writecr4(uCR4);
1357
1358# elif RT_INLINE_ASM_GNU_STYLE
1359# ifdef RT_ARCH_AMD64
1360 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1361# else
1362 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1363# endif
1364# else
1365 __asm
1366 {
1367# ifdef RT_ARCH_AMD64
1368 mov rax, [uCR4]
1369 mov cr4, rax
1370# else
1371 mov eax, [uCR4]
1372 _emit 0x0F
1373 _emit 0x22
1374 _emit 0xE0 /* mov cr4, eax */
1375# endif
1376 }
1377# endif
1378}
1379#endif
1380
1381
1382/**
1383 * Get cr8.
1384 * @returns cr8.
1385 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1386 */
1387#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1388DECLASM(RTCCUINTREG) ASMGetCR8(void);
1389#else
1390DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1391{
1392# ifdef RT_ARCH_AMD64
1393 RTCCUINTREG uCR8;
1394# if RT_INLINE_ASM_USES_INTRIN
1395 uCR8 = __readcr8();
1396
1397# elif RT_INLINE_ASM_GNU_STYLE
1398 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1399# else
1400 __asm
1401 {
1402 mov rax, cr8
1403 mov [uCR8], rax
1404 }
1405# endif
1406 return uCR8;
1407# else /* !RT_ARCH_AMD64 */
1408 return 0;
1409# endif /* !RT_ARCH_AMD64 */
1410}
1411#endif
1412
1413
1414/**
1415 * Enables interrupts (EFLAGS.IF).
1416 */
1417#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1418DECLASM(void) ASMIntEnable(void);
1419#else
1420DECLINLINE(void) ASMIntEnable(void)
1421{
1422# if RT_INLINE_ASM_GNU_STYLE
1423 __asm("sti\n");
1424# elif RT_INLINE_ASM_USES_INTRIN
1425 _enable();
1426# else
1427 __asm sti
1428# endif
1429}
1430#endif
1431
1432
1433/**
1434 * Disables interrupts (!EFLAGS.IF).
1435 */
1436#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1437DECLASM(void) ASMIntDisable(void);
1438#else
1439DECLINLINE(void) ASMIntDisable(void)
1440{
1441# if RT_INLINE_ASM_GNU_STYLE
1442 __asm("cli\n");
1443# elif RT_INLINE_ASM_USES_INTRIN
1444 _disable();
1445# else
1446 __asm cli
1447# endif
1448}
1449#endif
1450
1451
1452/**
1453 * Disables interrupts and returns previous xFLAGS.
1454 */
1455#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1456DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1457#else
1458DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1459{
1460 RTCCUINTREG xFlags;
1461# if RT_INLINE_ASM_GNU_STYLE
1462# ifdef RT_ARCH_AMD64
1463 __asm__ __volatile__("pushfq\n\t"
1464 "cli\n\t"
1465 "popq %0\n\t"
1466 : "=rm" (xFlags));
1467# else
1468 __asm__ __volatile__("pushfl\n\t"
1469 "cli\n\t"
1470 "popl %0\n\t"
1471 : "=rm" (xFlags));
1472# endif
1473# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1474 xFlags = ASMGetFlags();
1475 _disable();
1476# else
1477 __asm {
1478 pushfd
1479 cli
1480 pop [xFlags]
1481 }
1482# endif
1483 return xFlags;
1484}
1485#endif
1486
1487
1488/**
1489 * Reads a machine specific register.
1490 *
1491 * @returns Register content.
1492 * @param uRegister Register to read.
1493 */
1494#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1495DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1496#else
1497DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1498{
1499 RTUINT64U u;
1500# if RT_INLINE_ASM_GNU_STYLE
1501 __asm__ __volatile__("rdmsr\n\t"
1502 : "=a" (u.s.Lo),
1503 "=d" (u.s.Hi)
1504 : "c" (uRegister));
1505
1506# elif RT_INLINE_ASM_USES_INTRIN
1507 u.u = __readmsr(uRegister);
1508
1509# else
1510 __asm
1511 {
1512 mov ecx, [uRegister]
1513 rdmsr
1514 mov [u.s.Lo], eax
1515 mov [u.s.Hi], edx
1516 }
1517# endif
1518
1519 return u.u;
1520}
1521#endif
1522
1523
1524/**
1525 * Writes a machine specific register.
1526 *
1527 * @returns Register content.
1528 * @param uRegister Register to write to.
1529 * @param u64Val Value to write.
1530 */
1531#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1532DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1533#else
1534DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1535{
1536 RTUINT64U u;
1537
1538 u.u = u64Val;
1539# if RT_INLINE_ASM_GNU_STYLE
1540 __asm__ __volatile__("wrmsr\n\t"
1541 ::"a" (u.s.Lo),
1542 "d" (u.s.Hi),
1543 "c" (uRegister));
1544
1545# elif RT_INLINE_ASM_USES_INTRIN
1546 __writemsr(uRegister, u.u);
1547
1548# else
1549 __asm
1550 {
1551 mov ecx, [uRegister]
1552 mov edx, [u.s.Hi]
1553 mov eax, [u.s.Lo]
1554 wrmsr
1555 }
1556# endif
1557}
1558#endif
1559
1560
1561/**
1562 * Reads low part of a machine specific register.
1563 *
1564 * @returns Register content.
1565 * @param uRegister Register to read.
1566 */
1567#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1568DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1569#else
1570DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1571{
1572 uint32_t u32;
1573# if RT_INLINE_ASM_GNU_STYLE
1574 __asm__ __volatile__("rdmsr\n\t"
1575 : "=a" (u32)
1576 : "c" (uRegister)
1577 : "edx");
1578
1579# elif RT_INLINE_ASM_USES_INTRIN
1580 u32 = (uint32_t)__readmsr(uRegister);
1581
1582#else
1583 __asm
1584 {
1585 mov ecx, [uRegister]
1586 rdmsr
1587 mov [u32], eax
1588 }
1589# endif
1590
1591 return u32;
1592}
1593#endif
1594
1595
1596/**
1597 * Reads high part of a machine specific register.
1598 *
1599 * @returns Register content.
1600 * @param uRegister Register to read.
1601 */
1602#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1603DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1604#else
1605DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1606{
1607 uint32_t u32;
1608# if RT_INLINE_ASM_GNU_STYLE
1609 __asm__ __volatile__("rdmsr\n\t"
1610 : "=d" (u32)
1611 : "c" (uRegister)
1612 : "eax");
1613
1614# elif RT_INLINE_ASM_USES_INTRIN
1615 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1616
1617# else
1618 __asm
1619 {
1620 mov ecx, [uRegister]
1621 rdmsr
1622 mov [u32], edx
1623 }
1624# endif
1625
1626 return u32;
1627}
1628#endif
1629
1630
1631/**
1632 * Gets dr0.
1633 *
1634 * @returns dr0.
1635 */
1636#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1637DECLASM(RTCCUINTREG) ASMGetDR0(void);
1638#else
1639DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
1640{
1641 RTCCUINTREG uDR0;
1642# if RT_INLINE_ASM_USES_INTRIN
1643 uDR0 = __readdr(0);
1644# elif RT_INLINE_ASM_GNU_STYLE
1645# ifdef RT_ARCH_AMD64
1646 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));
1647# else
1648 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));
1649# endif
1650# else
1651 __asm
1652 {
1653# ifdef RT_ARCH_AMD64
1654 mov rax, dr0
1655 mov [uDR0], rax
1656# else
1657 mov eax, dr0
1658 mov [uDR0], eax
1659# endif
1660 }
1661# endif
1662 return uDR0;
1663}
1664#endif
1665
1666
1667/**
1668 * Gets dr1.
1669 *
1670 * @returns dr1.
1671 */
1672#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1673DECLASM(RTCCUINTREG) ASMGetDR1(void);
1674#else
1675DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
1676{
1677 RTCCUINTREG uDR1;
1678# if RT_INLINE_ASM_USES_INTRIN
1679 uDR1 = __readdr(1);
1680# elif RT_INLINE_ASM_GNU_STYLE
1681# ifdef RT_ARCH_AMD64
1682 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));
1683# else
1684 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));
1685# endif
1686# else
1687 __asm
1688 {
1689# ifdef RT_ARCH_AMD64
1690 mov rax, dr1
1691 mov [uDR1], rax
1692# else
1693 mov eax, dr1
1694 mov [uDR1], eax
1695# endif
1696 }
1697# endif
1698 return uDR1;
1699}
1700#endif
1701
1702
1703/**
1704 * Gets dr2.
1705 *
1706 * @returns dr2.
1707 */
1708#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1709DECLASM(RTCCUINTREG) ASMGetDR2(void);
1710#else
1711DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
1712{
1713 RTCCUINTREG uDR2;
1714# if RT_INLINE_ASM_USES_INTRIN
1715 uDR2 = __readdr(2);
1716# elif RT_INLINE_ASM_GNU_STYLE
1717# ifdef RT_ARCH_AMD64
1718 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));
1719# else
1720 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));
1721# endif
1722# else
1723 __asm
1724 {
1725# ifdef RT_ARCH_AMD64
1726 mov rax, dr2
1727 mov [uDR2], rax
1728# else
1729 mov eax, dr2
1730 mov [uDR2], eax
1731# endif
1732 }
1733# endif
1734 return uDR2;
1735}
1736#endif
1737
1738
1739/**
1740 * Gets dr3.
1741 *
1742 * @returns dr3.
1743 */
1744#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1745DECLASM(RTCCUINTREG) ASMGetDR3(void);
1746#else
1747DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
1748{
1749 RTCCUINTREG uDR3;
1750# if RT_INLINE_ASM_USES_INTRIN
1751 uDR3 = __readdr(3);
1752# elif RT_INLINE_ASM_GNU_STYLE
1753# ifdef RT_ARCH_AMD64
1754 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));
1755# else
1756 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));
1757# endif
1758# else
1759 __asm
1760 {
1761# ifdef RT_ARCH_AMD64
1762 mov rax, dr3
1763 mov [uDR3], rax
1764# else
1765 mov eax, dr3
1766 mov [uDR3], eax
1767# endif
1768 }
1769# endif
1770 return uDR3;
1771}
1772#endif
1773
1774
1775/**
1776 * Gets dr6.
1777 *
1778 * @returns dr6.
1779 */
1780#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1781DECLASM(RTCCUINTREG) ASMGetDR6(void);
1782#else
1783DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1784{
1785 RTCCUINTREG uDR6;
1786# if RT_INLINE_ASM_USES_INTRIN
1787 uDR6 = __readdr(6);
1788# elif RT_INLINE_ASM_GNU_STYLE
1789# ifdef RT_ARCH_AMD64
1790 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1791# else
1792 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1793# endif
1794# else
1795 __asm
1796 {
1797# ifdef RT_ARCH_AMD64
1798 mov rax, dr6
1799 mov [uDR6], rax
1800# else
1801 mov eax, dr6
1802 mov [uDR6], eax
1803# endif
1804 }
1805# endif
1806 return uDR6;
1807}
1808#endif
1809
1810
1811/**
1812 * Reads and clears DR6.
1813 *
1814 * @returns DR6.
1815 */
1816#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1817DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1818#else
1819DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1820{
1821 RTCCUINTREG uDR6;
1822# if RT_INLINE_ASM_USES_INTRIN
1823 uDR6 = __readdr(6);
1824 __writedr(6, 0xffff0ff0U); /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1825# elif RT_INLINE_ASM_GNU_STYLE
1826 RTCCUINTREG uNewValue = 0xffff0ff0U;/* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1827# ifdef RT_ARCH_AMD64
1828 __asm__ __volatile__("movq %%dr6, %0\n\t"
1829 "movq %1, %%dr6\n\t"
1830 : "=r" (uDR6)
1831 : "r" (uNewValue));
1832# else
1833 __asm__ __volatile__("movl %%dr6, %0\n\t"
1834 "movl %1, %%dr6\n\t"
1835 : "=r" (uDR6)
1836 : "r" (uNewValue));
1837# endif
1838# else
1839 __asm
1840 {
1841# ifdef RT_ARCH_AMD64
1842 mov rax, dr6
1843 mov [uDR6], rax
1844 mov rcx, rax
1845 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1846 mov dr6, rcx
1847# else
1848 mov eax, dr6
1849 mov [uDR6], eax
1850 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1851 mov dr6, ecx
1852# endif
1853 }
1854# endif
1855 return uDR6;
1856}
1857#endif
1858
1859
1860/**
1861 * Gets dr7.
1862 *
1863 * @returns dr7.
1864 */
1865#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1866DECLASM(RTCCUINTREG) ASMGetDR7(void);
1867#else
1868DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1869{
1870 RTCCUINTREG uDR7;
1871# if RT_INLINE_ASM_USES_INTRIN
1872 uDR7 = __readdr(7);
1873# elif RT_INLINE_ASM_GNU_STYLE
1874# ifdef RT_ARCH_AMD64
1875 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1876# else
1877 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1878# endif
1879# else
1880 __asm
1881 {
1882# ifdef RT_ARCH_AMD64
1883 mov rax, dr7
1884 mov [uDR7], rax
1885# else
1886 mov eax, dr7
1887 mov [uDR7], eax
1888# endif
1889 }
1890# endif
1891 return uDR7;
1892}
1893#endif
1894
1895
1896/**
1897 * Sets dr0.
1898 *
1899 * @param uDRVal Debug register value to write
1900 */
1901#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1902DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);
1903#else
1904DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)
1905{
1906# if RT_INLINE_ASM_USES_INTRIN
1907 __writedr(0, uDRVal);
1908# elif RT_INLINE_ASM_GNU_STYLE
1909# ifdef RT_ARCH_AMD64
1910 __asm__ __volatile__("movq %0, %%dr0\n\t" : : "r" (uDRVal));
1911# else
1912 __asm__ __volatile__("movl %0, %%dr0\n\t" : : "r" (uDRVal));
1913# endif
1914# else
1915 __asm
1916 {
1917# ifdef RT_ARCH_AMD64
1918 mov rax, [uDRVal]
1919 mov dr0, rax
1920# else
1921 mov eax, [uDRVal]
1922 mov dr0, eax
1923# endif
1924 }
1925# endif
1926}
1927#endif
1928
1929
1930/**
1931 * Sets dr1.
1932 *
1933 * @param uDRVal Debug register value to write
1934 */
1935#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1936DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);
1937#else
1938DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)
1939{
1940# if RT_INLINE_ASM_USES_INTRIN
1941 __writedr(1, uDRVal);
1942# elif RT_INLINE_ASM_GNU_STYLE
1943# ifdef RT_ARCH_AMD64
1944 __asm__ __volatile__("movq %0, %%dr1\n\t" : : "r" (uDRVal));
1945# else
1946 __asm__ __volatile__("movl %0, %%dr1\n\t" : : "r" (uDRVal));
1947# endif
1948# else
1949 __asm
1950 {
1951# ifdef RT_ARCH_AMD64
1952 mov rax, [uDRVal]
1953 mov dr1, rax
1954# else
1955 mov eax, [uDRVal]
1956 mov dr1, eax
1957# endif
1958 }
1959# endif
1960}
1961#endif
1962
1963
1964/**
1965 * Sets dr2.
1966 *
1967 * @param uDRVal Debug register value to write
1968 */
1969#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1970DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);
1971#else
1972DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)
1973{
1974# if RT_INLINE_ASM_USES_INTRIN
1975 __writedr(2, uDRVal);
1976# elif RT_INLINE_ASM_GNU_STYLE
1977# ifdef RT_ARCH_AMD64
1978 __asm__ __volatile__("movq %0, %%dr2\n\t" : : "r" (uDRVal));
1979# else
1980 __asm__ __volatile__("movl %0, %%dr2\n\t" : : "r" (uDRVal));
1981# endif
1982# else
1983 __asm
1984 {
1985# ifdef RT_ARCH_AMD64
1986 mov rax, [uDRVal]
1987 mov dr2, rax
1988# else
1989 mov eax, [uDRVal]
1990 mov dr2, eax
1991# endif
1992 }
1993# endif
1994}
1995#endif
1996
1997
1998/**
1999 * Sets dr3.
2000 *
2001 * @param uDRVal Debug register value to write
2002 */
2003#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2004DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);
2005#else
2006DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)
2007{
2008# if RT_INLINE_ASM_USES_INTRIN
2009 __writedr(3, uDRVal);
2010# elif RT_INLINE_ASM_GNU_STYLE
2011# ifdef RT_ARCH_AMD64
2012 __asm__ __volatile__("movq %0, %%dr3\n\t" : : "r" (uDRVal));
2013# else
2014 __asm__ __volatile__("movl %0, %%dr3\n\t" : : "r" (uDRVal));
2015# endif
2016# else
2017 __asm
2018 {
2019# ifdef RT_ARCH_AMD64
2020 mov rax, [uDRVal]
2021 mov dr3, rax
2022# else
2023 mov eax, [uDRVal]
2024 mov dr3, eax
2025# endif
2026 }
2027# endif
2028}
2029#endif
2030
2031
2032/**
2033 * Sets dr6.
2034 *
2035 * @param uDRVal Debug register value to write
2036 */
2037#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2038DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);
2039#else
2040DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)
2041{
2042# if RT_INLINE_ASM_USES_INTRIN
2043 __writedr(6, uDRVal);
2044# elif RT_INLINE_ASM_GNU_STYLE
2045# ifdef RT_ARCH_AMD64
2046 __asm__ __volatile__("movq %0, %%dr6\n\t" : : "r" (uDRVal));
2047# else
2048 __asm__ __volatile__("movl %0, %%dr6\n\t" : : "r" (uDRVal));
2049# endif
2050# else
2051 __asm
2052 {
2053# ifdef RT_ARCH_AMD64
2054 mov rax, [uDRVal]
2055 mov dr6, rax
2056# else
2057 mov eax, [uDRVal]
2058 mov dr6, eax
2059# endif
2060 }
2061# endif
2062}
2063#endif
2064
2065
2066/**
2067 * Sets dr7.
2068 *
2069 * @param uDRVal Debug register value to write
2070 */
2071#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2072DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);
2073#else
2074DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)
2075{
2076# if RT_INLINE_ASM_USES_INTRIN
2077 __writedr(7, uDRVal);
2078# elif RT_INLINE_ASM_GNU_STYLE
2079# ifdef RT_ARCH_AMD64
2080 __asm__ __volatile__("movq %0, %%dr7\n\t" : : "r" (uDRVal));
2081# else
2082 __asm__ __volatile__("movl %0, %%dr7\n\t" : : "r" (uDRVal));
2083# endif
2084# else
2085 __asm
2086 {
2087# ifdef RT_ARCH_AMD64
2088 mov rax, [uDRVal]
2089 mov dr7, rax
2090# else
2091 mov eax, [uDRVal]
2092 mov dr7, eax
2093# endif
2094 }
2095# endif
2096}
2097#endif
2098
2099
2100/**
2101 * Compiler memory barrier.
2102 *
2103 * Ensure that the compiler does not use any cached (register/tmp stack) memory
2104 * values or any outstanding writes when returning from this function.
2105 *
2106 * This function must be used if non-volatile data is modified by a
2107 * device or the VMM. Typical cases are port access, MMIO access,
2108 * trapping instruction, etc.
2109 */
2110#if RT_INLINE_ASM_GNU_STYLE
2111# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
2112#elif RT_INLINE_ASM_USES_INTRIN
2113# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
2114#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
2115DECLINLINE(void) ASMCompilerBarrier(void)
2116{
2117 __asm
2118 {
2119 }
2120}
2121#endif
2122
2123
2124/**
2125 * Writes a 8-bit unsigned integer to an I/O port, ordered.
2126 *
2127 * @param Port I/O port to write to.
2128 * @param u8 8-bit integer to write.
2129 */
2130#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2131DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
2132#else
2133DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
2134{
2135# if RT_INLINE_ASM_GNU_STYLE
2136 __asm__ __volatile__("outb %b1, %w0\n\t"
2137 :: "Nd" (Port),
2138 "a" (u8));
2139
2140# elif RT_INLINE_ASM_USES_INTRIN
2141 __outbyte(Port, u8);
2142
2143# else
2144 __asm
2145 {
2146 mov dx, [Port]
2147 mov al, [u8]
2148 out dx, al
2149 }
2150# endif
2151}
2152#endif
2153
2154
2155/**
2156 * Reads a 8-bit unsigned integer from an I/O port, ordered.
2157 *
2158 * @returns 8-bit integer.
2159 * @param Port I/O port to read from.
2160 */
2161#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2162DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
2163#else
2164DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
2165{
2166 uint8_t u8;
2167# if RT_INLINE_ASM_GNU_STYLE
2168 __asm__ __volatile__("inb %w1, %b0\n\t"
2169 : "=a" (u8)
2170 : "Nd" (Port));
2171
2172# elif RT_INLINE_ASM_USES_INTRIN
2173 u8 = __inbyte(Port);
2174
2175# else
2176 __asm
2177 {
2178 mov dx, [Port]
2179 in al, dx
2180 mov [u8], al
2181 }
2182# endif
2183 return u8;
2184}
2185#endif
2186
2187
2188/**
2189 * Writes a 16-bit unsigned integer to an I/O port, ordered.
2190 *
2191 * @param Port I/O port to write to.
2192 * @param u16 16-bit integer to write.
2193 */
2194#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2195DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
2196#else
2197DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
2198{
2199# if RT_INLINE_ASM_GNU_STYLE
2200 __asm__ __volatile__("outw %w1, %w0\n\t"
2201 :: "Nd" (Port),
2202 "a" (u16));
2203
2204# elif RT_INLINE_ASM_USES_INTRIN
2205 __outword(Port, u16);
2206
2207# else
2208 __asm
2209 {
2210 mov dx, [Port]
2211 mov ax, [u16]
2212 out dx, ax
2213 }
2214# endif
2215}
2216#endif
2217
2218
2219/**
2220 * Reads a 16-bit unsigned integer from an I/O port, ordered.
2221 *
2222 * @returns 16-bit integer.
2223 * @param Port I/O port to read from.
2224 */
2225#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2226DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
2227#else
2228DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
2229{
2230 uint16_t u16;
2231# if RT_INLINE_ASM_GNU_STYLE
2232 __asm__ __volatile__("inw %w1, %w0\n\t"
2233 : "=a" (u16)
2234 : "Nd" (Port));
2235
2236# elif RT_INLINE_ASM_USES_INTRIN
2237 u16 = __inword(Port);
2238
2239# else
2240 __asm
2241 {
2242 mov dx, [Port]
2243 in ax, dx
2244 mov [u16], ax
2245 }
2246# endif
2247 return u16;
2248}
2249#endif
2250
2251
2252/**
2253 * Writes a 32-bit unsigned integer to an I/O port, ordered.
2254 *
2255 * @param Port I/O port to write to.
2256 * @param u32 32-bit integer to write.
2257 */
2258#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2259DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
2260#else
2261DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
2262{
2263# if RT_INLINE_ASM_GNU_STYLE
2264 __asm__ __volatile__("outl %1, %w0\n\t"
2265 :: "Nd" (Port),
2266 "a" (u32));
2267
2268# elif RT_INLINE_ASM_USES_INTRIN
2269 __outdword(Port, u32);
2270
2271# else
2272 __asm
2273 {
2274 mov dx, [Port]
2275 mov eax, [u32]
2276 out dx, eax
2277 }
2278# endif
2279}
2280#endif
2281
2282
2283/**
2284 * Reads a 32-bit unsigned integer from an I/O port, ordered.
2285 *
2286 * @returns 32-bit integer.
2287 * @param Port I/O port to read from.
2288 */
2289#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2290DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
2291#else
2292DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
2293{
2294 uint32_t u32;
2295# if RT_INLINE_ASM_GNU_STYLE
2296 __asm__ __volatile__("inl %w1, %0\n\t"
2297 : "=a" (u32)
2298 : "Nd" (Port));
2299
2300# elif RT_INLINE_ASM_USES_INTRIN
2301 u32 = __indword(Port);
2302
2303# else
2304 __asm
2305 {
2306 mov dx, [Port]
2307 in eax, dx
2308 mov [u32], eax
2309 }
2310# endif
2311 return u32;
2312}
2313#endif
2314
2315
2316/**
2317 * Writes a string of 8-bit unsigned integer items to an I/O port, ordered.
2318 *
2319 * @param Port I/O port to write to.
2320 * @param pau8 Pointer to the string buffer.
2321 * @param c The number of items to write.
2322 */
2323#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2324DECLASM(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c);
2325#else
2326DECLINLINE(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c)
2327{
2328# if RT_INLINE_ASM_GNU_STYLE
2329 __asm__ __volatile__("rep; outsb\n\t"
2330 : "+S" (pau8),
2331 "+c" (c)
2332 : "d" (Port));
2333
2334# elif RT_INLINE_ASM_USES_INTRIN
2335 __outbytestring(Port, (unsigned char *)pau8, (unsigned long)c);
2336
2337# else
2338 __asm
2339 {
2340 mov dx, [Port]
2341 mov ecx, [c]
2342 mov eax, [pau8]
2343 xchg esi, eax
2344 rep outsb
2345 xchg esi, eax
2346 }
2347# endif
2348}
2349#endif
2350
2351
2352/**
2353 * Reads a string of 8-bit unsigned integer items from an I/O port, ordered.
2354 *
2355 * @param Port I/O port to read from.
2356 * @param pau8 Pointer to the string buffer (output).
2357 * @param c The number of items to read.
2358 */
2359#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2360DECLASM(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c);
2361#else
2362DECLINLINE(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c)
2363{
2364# if RT_INLINE_ASM_GNU_STYLE
2365 __asm__ __volatile__("rep; insb\n\t"
2366 : "+D" (pau8),
2367 "+c" (c)
2368 : "d" (Port));
2369
2370# elif RT_INLINE_ASM_USES_INTRIN
2371 __inbytestring(Port, pau8, (unsigned long)c);
2372
2373# else
2374 __asm
2375 {
2376 mov dx, [Port]
2377 mov ecx, [c]
2378 mov eax, [pau8]
2379 xchg edi, eax
2380 rep insb
2381 xchg edi, eax
2382 }
2383# endif
2384}
2385#endif
2386
2387
2388/**
2389 * Writes a string of 16-bit unsigned integer items to an I/O port, ordered.
2390 *
2391 * @param Port I/O port to write to.
2392 * @param pau16 Pointer to the string buffer.
2393 * @param c The number of items to write.
2394 */
2395#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2396DECLASM(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c);
2397#else
2398DECLINLINE(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c)
2399{
2400# if RT_INLINE_ASM_GNU_STYLE
2401 __asm__ __volatile__("rep; outsw\n\t"
2402 : "+S" (pau16),
2403 "+c" (c)
2404 : "d" (Port));
2405
2406# elif RT_INLINE_ASM_USES_INTRIN
2407 __outwordstring(Port, (unsigned short *)pau16, (unsigned long)c);
2408
2409# else
2410 __asm
2411 {
2412 mov dx, [Port]
2413 mov ecx, [c]
2414 mov eax, [pau16]
2415 xchg esi, eax
2416 rep outsw
2417 xchg esi, eax
2418 }
2419# endif
2420}
2421#endif
2422
2423
2424/**
2425 * Reads a string of 16-bit unsigned integer items from an I/O port, ordered.
2426 *
2427 * @param Port I/O port to read from.
2428 * @param pau16 Pointer to the string buffer (output).
2429 * @param c The number of items to read.
2430 */
2431#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2432DECLASM(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c);
2433#else
2434DECLINLINE(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c)
2435{
2436# if RT_INLINE_ASM_GNU_STYLE
2437 __asm__ __volatile__("rep; insw\n\t"
2438 : "+D" (pau16),
2439 "+c" (c)
2440 : "d" (Port));
2441
2442# elif RT_INLINE_ASM_USES_INTRIN
2443 __inwordstring(Port, pau16, (unsigned long)c);
2444
2445# else
2446 __asm
2447 {
2448 mov dx, [Port]
2449 mov ecx, [c]
2450 mov eax, [pau16]
2451 xchg edi, eax
2452 rep insw
2453 xchg edi, eax
2454 }
2455# endif
2456}
2457#endif
2458
2459
2460/**
2461 * Writes a string of 32-bit unsigned integer items to an I/O port, ordered.
2462 *
2463 * @param Port I/O port to write to.
2464 * @param pau32 Pointer to the string buffer.
2465 * @param c The number of items to write.
2466 */
2467#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2468DECLASM(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c);
2469#else
2470DECLINLINE(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c)
2471{
2472# if RT_INLINE_ASM_GNU_STYLE
2473 __asm__ __volatile__("rep; outsl\n\t"
2474 : "+S" (pau32),
2475 "+c" (c)
2476 : "d" (Port));
2477
2478# elif RT_INLINE_ASM_USES_INTRIN
2479 __outdwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2480
2481# else
2482 __asm
2483 {
2484 mov dx, [Port]
2485 mov ecx, [c]
2486 mov eax, [pau32]
2487 xchg esi, eax
2488 rep outsd
2489 xchg esi, eax
2490 }
2491# endif
2492}
2493#endif
2494
2495
2496/**
2497 * Reads a string of 32-bit unsigned integer items from an I/O port, ordered.
2498 *
2499 * @param Port I/O port to read from.
2500 * @param pau32 Pointer to the string buffer (output).
2501 * @param c The number of items to read.
2502 */
2503#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2504DECLASM(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c);
2505#else
2506DECLINLINE(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c)
2507{
2508# if RT_INLINE_ASM_GNU_STYLE
2509 __asm__ __volatile__("rep; insl\n\t"
2510 : "+D" (pau32),
2511 "+c" (c)
2512 : "d" (Port));
2513
2514# elif RT_INLINE_ASM_USES_INTRIN
2515 __indwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2516
2517# else
2518 __asm
2519 {
2520 mov dx, [Port]
2521 mov ecx, [c]
2522 mov eax, [pau32]
2523 xchg edi, eax
2524 rep insd
2525 xchg edi, eax
2526 }
2527# endif
2528}
2529#endif
2530
2531
2532/**
2533 * Atomically Exchange an unsigned 8-bit value, ordered.
2534 *
2535 * @returns Current *pu8 value
2536 * @param pu8 Pointer to the 8-bit variable to update.
2537 * @param u8 The 8-bit value to assign to *pu8.
2538 */
2539#if RT_INLINE_ASM_EXTERNAL
2540DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
2541#else
2542DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
2543{
2544# if RT_INLINE_ASM_GNU_STYLE
2545 __asm__ __volatile__("xchgb %0, %1\n\t"
2546 : "=m" (*pu8),
2547 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
2548 : "1" (u8),
2549 "m" (*pu8));
2550# else
2551 __asm
2552 {
2553# ifdef RT_ARCH_AMD64
2554 mov rdx, [pu8]
2555 mov al, [u8]
2556 xchg [rdx], al
2557 mov [u8], al
2558# else
2559 mov edx, [pu8]
2560 mov al, [u8]
2561 xchg [edx], al
2562 mov [u8], al
2563# endif
2564 }
2565# endif
2566 return u8;
2567}
2568#endif
2569
2570
2571/**
2572 * Atomically Exchange a signed 8-bit value, ordered.
2573 *
2574 * @returns Current *pu8 value
2575 * @param pi8 Pointer to the 8-bit variable to update.
2576 * @param i8 The 8-bit value to assign to *pi8.
2577 */
2578DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
2579{
2580 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
2581}
2582
2583
2584/**
2585 * Atomically Exchange a bool value, ordered.
2586 *
2587 * @returns Current *pf value
2588 * @param pf Pointer to the 8-bit variable to update.
2589 * @param f The 8-bit value to assign to *pi8.
2590 */
2591DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2592{
2593#ifdef _MSC_VER
2594 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2595#else
2596 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2597#endif
2598}
2599
2600
2601/**
2602 * Atomically Exchange an unsigned 16-bit value, ordered.
2603 *
2604 * @returns Current *pu16 value
2605 * @param pu16 Pointer to the 16-bit variable to update.
2606 * @param u16 The 16-bit value to assign to *pu16.
2607 */
2608#if RT_INLINE_ASM_EXTERNAL
2609DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2610#else
2611DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2612{
2613# if RT_INLINE_ASM_GNU_STYLE
2614 __asm__ __volatile__("xchgw %0, %1\n\t"
2615 : "=m" (*pu16),
2616 "=r" (u16)
2617 : "1" (u16),
2618 "m" (*pu16));
2619# else
2620 __asm
2621 {
2622# ifdef RT_ARCH_AMD64
2623 mov rdx, [pu16]
2624 mov ax, [u16]
2625 xchg [rdx], ax
2626 mov [u16], ax
2627# else
2628 mov edx, [pu16]
2629 mov ax, [u16]
2630 xchg [edx], ax
2631 mov [u16], ax
2632# endif
2633 }
2634# endif
2635 return u16;
2636}
2637#endif
2638
2639
2640/**
2641 * Atomically Exchange a signed 16-bit value, ordered.
2642 *
2643 * @returns Current *pu16 value
2644 * @param pi16 Pointer to the 16-bit variable to update.
2645 * @param i16 The 16-bit value to assign to *pi16.
2646 */
2647DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2648{
2649 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2650}
2651
2652
2653/**
2654 * Atomically Exchange an unsigned 32-bit value, ordered.
2655 *
2656 * @returns Current *pu32 value
2657 * @param pu32 Pointer to the 32-bit variable to update.
2658 * @param u32 The 32-bit value to assign to *pu32.
2659 */
2660#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2661DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2662#else
2663DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2664{
2665# if RT_INLINE_ASM_GNU_STYLE
2666 __asm__ __volatile__("xchgl %0, %1\n\t"
2667 : "=m" (*pu32),
2668 "=r" (u32)
2669 : "1" (u32),
2670 "m" (*pu32));
2671
2672# elif RT_INLINE_ASM_USES_INTRIN
2673 u32 = _InterlockedExchange((long *)pu32, u32);
2674
2675# else
2676 __asm
2677 {
2678# ifdef RT_ARCH_AMD64
2679 mov rdx, [pu32]
2680 mov eax, u32
2681 xchg [rdx], eax
2682 mov [u32], eax
2683# else
2684 mov edx, [pu32]
2685 mov eax, u32
2686 xchg [edx], eax
2687 mov [u32], eax
2688# endif
2689 }
2690# endif
2691 return u32;
2692}
2693#endif
2694
2695
2696/**
2697 * Atomically Exchange a signed 32-bit value, ordered.
2698 *
2699 * @returns Current *pu32 value
2700 * @param pi32 Pointer to the 32-bit variable to update.
2701 * @param i32 The 32-bit value to assign to *pi32.
2702 */
2703DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2704{
2705 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2706}
2707
2708
2709/**
2710 * Atomically Exchange an unsigned 64-bit value, ordered.
2711 *
2712 * @returns Current *pu64 value
2713 * @param pu64 Pointer to the 64-bit variable to update.
2714 * @param u64 The 64-bit value to assign to *pu64.
2715 */
2716#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2717DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2718#else
2719DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2720{
2721# if defined(RT_ARCH_AMD64)
2722# if RT_INLINE_ASM_USES_INTRIN
2723 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2724
2725# elif RT_INLINE_ASM_GNU_STYLE
2726 __asm__ __volatile__("xchgq %0, %1\n\t"
2727 : "=m" (*pu64),
2728 "=r" (u64)
2729 : "1" (u64),
2730 "m" (*pu64));
2731# else
2732 __asm
2733 {
2734 mov rdx, [pu64]
2735 mov rax, [u64]
2736 xchg [rdx], rax
2737 mov [u64], rax
2738 }
2739# endif
2740# else /* !RT_ARCH_AMD64 */
2741# if RT_INLINE_ASM_GNU_STYLE
2742# if defined(PIC) || defined(__PIC__)
2743 uint32_t u32EBX = (uint32_t)u64;
2744 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2745 "xchgl %%ebx, %3\n\t"
2746 "1:\n\t"
2747 "lock; cmpxchg8b (%5)\n\t"
2748 "jnz 1b\n\t"
2749 "movl %3, %%ebx\n\t"
2750 /*"xchgl %%esi, %5\n\t"*/
2751 : "=A" (u64),
2752 "=m" (*pu64)
2753 : "0" (*pu64),
2754 "m" ( u32EBX ),
2755 "c" ( (uint32_t)(u64 >> 32) ),
2756 "S" (pu64));
2757# else /* !PIC */
2758 __asm__ __volatile__("1:\n\t"
2759 "lock; cmpxchg8b %1\n\t"
2760 "jnz 1b\n\t"
2761 : "=A" (u64),
2762 "=m" (*pu64)
2763 : "0" (*pu64),
2764 "b" ( (uint32_t)u64 ),
2765 "c" ( (uint32_t)(u64 >> 32) ));
2766# endif
2767# else
2768 __asm
2769 {
2770 mov ebx, dword ptr [u64]
2771 mov ecx, dword ptr [u64 + 4]
2772 mov edi, pu64
2773 mov eax, dword ptr [edi]
2774 mov edx, dword ptr [edi + 4]
2775 retry:
2776 lock cmpxchg8b [edi]
2777 jnz retry
2778 mov dword ptr [u64], eax
2779 mov dword ptr [u64 + 4], edx
2780 }
2781# endif
2782# endif /* !RT_ARCH_AMD64 */
2783 return u64;
2784}
2785#endif
2786
2787
2788/**
2789 * Atomically Exchange an signed 64-bit value, ordered.
2790 *
2791 * @returns Current *pi64 value
2792 * @param pi64 Pointer to the 64-bit variable to update.
2793 * @param i64 The 64-bit value to assign to *pi64.
2794 */
2795DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2796{
2797 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2798}
2799
2800
2801#ifdef RT_ARCH_AMD64
2802/**
2803 * Atomically Exchange an unsigned 128-bit value, ordered.
2804 *
2805 * @returns Current *pu128.
2806 * @param pu128 Pointer to the 128-bit variable to update.
2807 * @param u128 The 128-bit value to assign to *pu128.
2808 *
2809 * @remark We cannot really assume that any hardware supports this. Nor do I have
2810 * GAS support for it. So, for the time being we'll BREAK the atomic
2811 * bit of this function and use two 64-bit exchanges instead.
2812 */
2813# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2814DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2815# else
2816DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2817{
2818 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2819 {
2820 /** @todo this is clumsy code */
2821 RTUINT128U u128Ret;
2822 u128Ret.u = u128;
2823 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2824 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2825 return u128Ret.u;
2826 }
2827#if 0 /* later? */
2828 else
2829 {
2830# if RT_INLINE_ASM_GNU_STYLE
2831 __asm__ __volatile__("1:\n\t"
2832 "lock; cmpxchg8b %1\n\t"
2833 "jnz 1b\n\t"
2834 : "=A" (u128),
2835 "=m" (*pu128)
2836 : "0" (*pu128),
2837 "b" ( (uint64_t)u128 ),
2838 "c" ( (uint64_t)(u128 >> 64) ));
2839# else
2840 __asm
2841 {
2842 mov rbx, dword ptr [u128]
2843 mov rcx, dword ptr [u128 + 8]
2844 mov rdi, pu128
2845 mov rax, dword ptr [rdi]
2846 mov rdx, dword ptr [rdi + 8]
2847 retry:
2848 lock cmpxchg16b [rdi]
2849 jnz retry
2850 mov dword ptr [u128], rax
2851 mov dword ptr [u128 + 8], rdx
2852 }
2853# endif
2854 }
2855 return u128;
2856#endif
2857}
2858# endif
2859#endif /* RT_ARCH_AMD64 */
2860
2861
2862/**
2863 * Atomically Exchange a pointer value, ordered.
2864 *
2865 * @returns Current *ppv value
2866 * @param ppv Pointer to the pointer variable to update.
2867 * @param pv The pointer value to assign to *ppv.
2868 */
2869DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
2870{
2871#if ARCH_BITS == 32
2872 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2873#elif ARCH_BITS == 64
2874 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2875#else
2876# error "ARCH_BITS is bogus"
2877#endif
2878}
2879
2880
2881/**
2882 * Atomically Exchange a raw-mode context pointer value, ordered.
2883 *
2884 * @returns Current *ppv value
2885 * @param ppvRC Pointer to the pointer variable to update.
2886 * @param pvRC The pointer value to assign to *ppv.
2887 */
2888DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
2889{
2890 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
2891}
2892
2893
2894/**
2895 * Atomically Exchange a ring-0 pointer value, ordered.
2896 *
2897 * @returns Current *ppv value
2898 * @param ppvR0 Pointer to the pointer variable to update.
2899 * @param pvR0 The pointer value to assign to *ppv.
2900 */
2901DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
2902{
2903#if R0_ARCH_BITS == 32
2904 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
2905#elif R0_ARCH_BITS == 64
2906 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
2907#else
2908# error "R0_ARCH_BITS is bogus"
2909#endif
2910}
2911
2912
2913/**
2914 * Atomically Exchange a ring-3 pointer value, ordered.
2915 *
2916 * @returns Current *ppv value
2917 * @param ppvR3 Pointer to the pointer variable to update.
2918 * @param pvR3 The pointer value to assign to *ppv.
2919 */
2920DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
2921{
2922#if R3_ARCH_BITS == 32
2923 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
2924#elif R3_ARCH_BITS == 64
2925 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
2926#else
2927# error "R3_ARCH_BITS is bogus"
2928#endif
2929}
2930
2931
2932/** @def ASMAtomicXchgHandle
2933 * Atomically Exchange a typical IPRT handle value, ordered.
2934 *
2935 * @param ph Pointer to the value to update.
2936 * @param hNew The new value to assigned to *pu.
2937 * @param phRes Where to store the current *ph value.
2938 *
2939 * @remarks This doesn't currently work for all handles (like RTFILE).
2940 */
2941#define ASMAtomicXchgHandle(ph, hNew, phRes) \
2942 do { \
2943 *(void **)(phRes) = ASMAtomicXchgPtr((void * volatile *)(ph), (const void *)(hNew)); \
2944 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2945 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
2946 } while (0)
2947
2948
2949/**
2950 * Atomically Exchange a value which size might differ
2951 * between platforms or compilers, ordered.
2952 *
2953 * @param pu Pointer to the variable to update.
2954 * @param uNew The value to assign to *pu.
2955 * @todo This is busted as its missing the result argument.
2956 */
2957#define ASMAtomicXchgSize(pu, uNew) \
2958 do { \
2959 switch (sizeof(*(pu))) { \
2960 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2961 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2962 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2963 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2964 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2965 } \
2966 } while (0)
2967
2968/**
2969 * Atomically Exchange a value which size might differ
2970 * between platforms or compilers, ordered.
2971 *
2972 * @param pu Pointer to the variable to update.
2973 * @param uNew The value to assign to *pu.
2974 * @param puRes Where to store the current *pu value.
2975 */
2976#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
2977 do { \
2978 switch (sizeof(*(pu))) { \
2979 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2980 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2981 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2982 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2983 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2984 } \
2985 } while (0)
2986
2987
2988/**
2989 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
2990 *
2991 * @returns true if xchg was done.
2992 * @returns false if xchg wasn't done.
2993 *
2994 * @param pu32 Pointer to the value to update.
2995 * @param u32New The new value to assigned to *pu32.
2996 * @param u32Old The old value to *pu32 compare with.
2997 */
2998#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2999DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
3000#else
3001DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
3002{
3003# if RT_INLINE_ASM_GNU_STYLE
3004 uint8_t u8Ret;
3005 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3006 "setz %1\n\t"
3007 : "=m" (*pu32),
3008 "=qm" (u8Ret),
3009 "=a" (u32Old)
3010 : "r" (u32New),
3011 "2" (u32Old),
3012 "m" (*pu32));
3013 return (bool)u8Ret;
3014
3015# elif RT_INLINE_ASM_USES_INTRIN
3016 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
3017
3018# else
3019 uint32_t u32Ret;
3020 __asm
3021 {
3022# ifdef RT_ARCH_AMD64
3023 mov rdx, [pu32]
3024# else
3025 mov edx, [pu32]
3026# endif
3027 mov eax, [u32Old]
3028 mov ecx, [u32New]
3029# ifdef RT_ARCH_AMD64
3030 lock cmpxchg [rdx], ecx
3031# else
3032 lock cmpxchg [edx], ecx
3033# endif
3034 setz al
3035 movzx eax, al
3036 mov [u32Ret], eax
3037 }
3038 return !!u32Ret;
3039# endif
3040}
3041#endif
3042
3043
3044/**
3045 * Atomically Compare and Exchange a signed 32-bit value, ordered.
3046 *
3047 * @returns true if xchg was done.
3048 * @returns false if xchg wasn't done.
3049 *
3050 * @param pi32 Pointer to the value to update.
3051 * @param i32New The new value to assigned to *pi32.
3052 * @param i32Old The old value to *pi32 compare with.
3053 */
3054DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
3055{
3056 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
3057}
3058
3059
3060/**
3061 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
3062 *
3063 * @returns true if xchg was done.
3064 * @returns false if xchg wasn't done.
3065 *
3066 * @param pu64 Pointer to the 64-bit variable to update.
3067 * @param u64New The 64-bit value to assign to *pu64.
3068 * @param u64Old The value to compare with.
3069 */
3070#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3071DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
3072#else
3073DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
3074{
3075# if RT_INLINE_ASM_USES_INTRIN
3076 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
3077
3078# elif defined(RT_ARCH_AMD64)
3079# if RT_INLINE_ASM_GNU_STYLE
3080 uint8_t u8Ret;
3081 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3082 "setz %1\n\t"
3083 : "=m" (*pu64),
3084 "=qm" (u8Ret),
3085 "=a" (u64Old)
3086 : "r" (u64New),
3087 "2" (u64Old),
3088 "m" (*pu64));
3089 return (bool)u8Ret;
3090# else
3091 bool fRet;
3092 __asm
3093 {
3094 mov rdx, [pu32]
3095 mov rax, [u64Old]
3096 mov rcx, [u64New]
3097 lock cmpxchg [rdx], rcx
3098 setz al
3099 mov [fRet], al
3100 }
3101 return fRet;
3102# endif
3103# else /* !RT_ARCH_AMD64 */
3104 uint32_t u32Ret;
3105# if RT_INLINE_ASM_GNU_STYLE
3106# if defined(PIC) || defined(__PIC__)
3107 uint32_t u32EBX = (uint32_t)u64New;
3108 uint32_t u32Spill;
3109 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
3110 "lock; cmpxchg8b (%6)\n\t"
3111 "setz %%al\n\t"
3112 "movl %4, %%ebx\n\t"
3113 "movzbl %%al, %%eax\n\t"
3114 : "=a" (u32Ret),
3115 "=d" (u32Spill),
3116# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
3117 "+m" (*pu64)
3118# else
3119 "=m" (*pu64)
3120# endif
3121 : "A" (u64Old),
3122 "m" ( u32EBX ),
3123 "c" ( (uint32_t)(u64New >> 32) ),
3124 "S" (pu64));
3125# else /* !PIC */
3126 uint32_t u32Spill;
3127 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
3128 "setz %%al\n\t"
3129 "movzbl %%al, %%eax\n\t"
3130 : "=a" (u32Ret),
3131 "=d" (u32Spill),
3132 "+m" (*pu64)
3133 : "A" (u64Old),
3134 "b" ( (uint32_t)u64New ),
3135 "c" ( (uint32_t)(u64New >> 32) ));
3136# endif
3137 return (bool)u32Ret;
3138# else
3139 __asm
3140 {
3141 mov ebx, dword ptr [u64New]
3142 mov ecx, dword ptr [u64New + 4]
3143 mov edi, [pu64]
3144 mov eax, dword ptr [u64Old]
3145 mov edx, dword ptr [u64Old + 4]
3146 lock cmpxchg8b [edi]
3147 setz al
3148 movzx eax, al
3149 mov dword ptr [u32Ret], eax
3150 }
3151 return !!u32Ret;
3152# endif
3153# endif /* !RT_ARCH_AMD64 */
3154}
3155#endif
3156
3157
3158/**
3159 * Atomically Compare and exchange a signed 64-bit value, ordered.
3160 *
3161 * @returns true if xchg was done.
3162 * @returns false if xchg wasn't done.
3163 *
3164 * @param pi64 Pointer to the 64-bit variable to update.
3165 * @param i64 The 64-bit value to assign to *pu64.
3166 * @param i64Old The value to compare with.
3167 */
3168DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
3169{
3170 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
3171}
3172
3173
3174/**
3175 * Atomically Compare and Exchange a pointer value, ordered.
3176 *
3177 * @returns true if xchg was done.
3178 * @returns false if xchg wasn't done.
3179 *
3180 * @param ppv Pointer to the value to update.
3181 * @param pvNew The new value to assigned to *ppv.
3182 * @param pvOld The old value to *ppv compare with.
3183 */
3184DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld)
3185{
3186#if ARCH_BITS == 32
3187 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
3188#elif ARCH_BITS == 64
3189 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
3190#else
3191# error "ARCH_BITS is bogus"
3192#endif
3193}
3194
3195
3196/** @def ASMAtomicCmpXchgHandle
3197 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3198 *
3199 * @param ph Pointer to the value to update.
3200 * @param hNew The new value to assigned to *pu.
3201 * @param hOld The old value to *pu compare with.
3202 * @param fRc Where to store the result.
3203 *
3204 * @remarks This doesn't currently work for all handles (like RTFILE).
3205 */
3206#define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3207 do { \
3208 (fRc) = ASMAtomicCmpXchgPtr((void * volatile *)(ph), (void *)(hNew), (void *)(hOld)); \
3209 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3210 } while (0)
3211
3212
3213/** @def ASMAtomicCmpXchgSize
3214 * Atomically Compare and Exchange a value which size might differ
3215 * between platforms or compilers, ordered.
3216 *
3217 * @param pu Pointer to the value to update.
3218 * @param uNew The new value to assigned to *pu.
3219 * @param uOld The old value to *pu compare with.
3220 * @param fRc Where to store the result.
3221 */
3222#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
3223 do { \
3224 switch (sizeof(*(pu))) { \
3225 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
3226 break; \
3227 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
3228 break; \
3229 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3230 (fRc) = false; \
3231 break; \
3232 } \
3233 } while (0)
3234
3235
3236/**
3237 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
3238 * passes back old value, ordered.
3239 *
3240 * @returns true if xchg was done.
3241 * @returns false if xchg wasn't done.
3242 *
3243 * @param pu32 Pointer to the value to update.
3244 * @param u32New The new value to assigned to *pu32.
3245 * @param u32Old The old value to *pu32 compare with.
3246 * @param pu32Old Pointer store the old value at.
3247 */
3248#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3249DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
3250#else
3251DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
3252{
3253# if RT_INLINE_ASM_GNU_STYLE
3254 uint8_t u8Ret;
3255 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3256 "setz %1\n\t"
3257 : "=m" (*pu32),
3258 "=qm" (u8Ret),
3259 "=a" (*pu32Old)
3260 : "r" (u32New),
3261 "a" (u32Old),
3262 "m" (*pu32));
3263 return (bool)u8Ret;
3264
3265# elif RT_INLINE_ASM_USES_INTRIN
3266 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
3267
3268# else
3269 uint32_t u32Ret;
3270 __asm
3271 {
3272# ifdef RT_ARCH_AMD64
3273 mov rdx, [pu32]
3274# else
3275 mov edx, [pu32]
3276# endif
3277 mov eax, [u32Old]
3278 mov ecx, [u32New]
3279# ifdef RT_ARCH_AMD64
3280 lock cmpxchg [rdx], ecx
3281 mov rdx, [pu32Old]
3282 mov [rdx], eax
3283# else
3284 lock cmpxchg [edx], ecx
3285 mov edx, [pu32Old]
3286 mov [edx], eax
3287# endif
3288 setz al
3289 movzx eax, al
3290 mov [u32Ret], eax
3291 }
3292 return !!u32Ret;
3293# endif
3294}
3295#endif
3296
3297
3298/**
3299 * Atomically Compare and Exchange a signed 32-bit value, additionally
3300 * passes back old value, ordered.
3301 *
3302 * @returns true if xchg was done.
3303 * @returns false if xchg wasn't done.
3304 *
3305 * @param pi32 Pointer to the value to update.
3306 * @param i32New The new value to assigned to *pi32.
3307 * @param i32Old The old value to *pi32 compare with.
3308 * @param pi32Old Pointer store the old value at.
3309 */
3310DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
3311{
3312 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
3313}
3314
3315
3316/**
3317 * Atomically Compare and exchange an unsigned 64-bit value, additionally
3318 * passing back old value, ordered.
3319 *
3320 * @returns true if xchg was done.
3321 * @returns false if xchg wasn't done.
3322 *
3323 * @param pu64 Pointer to the 64-bit variable to update.
3324 * @param u64New The 64-bit value to assign to *pu64.
3325 * @param u64Old The value to compare with.
3326 * @param pu64Old Pointer store the old value at.
3327 */
3328#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3329DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
3330#else
3331DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
3332{
3333# if RT_INLINE_ASM_USES_INTRIN
3334 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
3335
3336# elif defined(RT_ARCH_AMD64)
3337# if RT_INLINE_ASM_GNU_STYLE
3338 uint8_t u8Ret;
3339 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3340 "setz %1\n\t"
3341 : "=m" (*pu64),
3342 "=qm" (u8Ret),
3343 "=a" (*pu64Old)
3344 : "r" (u64New),
3345 "a" (u64Old),
3346 "m" (*pu64));
3347 return (bool)u8Ret;
3348# else
3349 bool fRet;
3350 __asm
3351 {
3352 mov rdx, [pu32]
3353 mov rax, [u64Old]
3354 mov rcx, [u64New]
3355 lock cmpxchg [rdx], rcx
3356 mov rdx, [pu64Old]
3357 mov [rdx], rax
3358 setz al
3359 mov [fRet], al
3360 }
3361 return fRet;
3362# endif
3363# else /* !RT_ARCH_AMD64 */
3364# if RT_INLINE_ASM_GNU_STYLE
3365 uint64_t u64Ret;
3366# if defined(PIC) || defined(__PIC__)
3367 /* NB: this code uses a memory clobber description, because the clean
3368 * solution with an output value for *pu64 makes gcc run out of registers.
3369 * This will cause suboptimal code, and anyone with a better solution is
3370 * welcome to improve this. */
3371 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
3372 "lock; cmpxchg8b %3\n\t"
3373 "xchgl %%ebx, %1\n\t"
3374 : "=A" (u64Ret)
3375 : "DS" ((uint32_t)u64New),
3376 "c" ((uint32_t)(u64New >> 32)),
3377 "m" (*pu64),
3378 "0" (u64Old)
3379 : "memory" );
3380# else /* !PIC */
3381 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
3382 : "=A" (u64Ret),
3383 "=m" (*pu64)
3384 : "b" ((uint32_t)u64New),
3385 "c" ((uint32_t)(u64New >> 32)),
3386 "m" (*pu64),
3387 "0" (u64Old));
3388# endif
3389 *pu64Old = u64Ret;
3390 return u64Ret == u64Old;
3391# else
3392 uint32_t u32Ret;
3393 __asm
3394 {
3395 mov ebx, dword ptr [u64New]
3396 mov ecx, dword ptr [u64New + 4]
3397 mov edi, [pu64]
3398 mov eax, dword ptr [u64Old]
3399 mov edx, dword ptr [u64Old + 4]
3400 lock cmpxchg8b [edi]
3401 mov ebx, [pu64Old]
3402 mov [ebx], eax
3403 setz al
3404 movzx eax, al
3405 add ebx, 4
3406 mov [ebx], edx
3407 mov dword ptr [u32Ret], eax
3408 }
3409 return !!u32Ret;
3410# endif
3411# endif /* !RT_ARCH_AMD64 */
3412}
3413#endif
3414
3415
3416/**
3417 * Atomically Compare and exchange a signed 64-bit value, additionally
3418 * passing back old value, ordered.
3419 *
3420 * @returns true if xchg was done.
3421 * @returns false if xchg wasn't done.
3422 *
3423 * @param pi64 Pointer to the 64-bit variable to update.
3424 * @param i64 The 64-bit value to assign to *pu64.
3425 * @param i64Old The value to compare with.
3426 * @param pi64Old Pointer store the old value at.
3427 */
3428DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
3429{
3430 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
3431}
3432
3433/** @def ASMAtomicCmpXchgExHandle
3434 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3435 *
3436 * @param ph Pointer to the value to update.
3437 * @param hNew The new value to assigned to *pu.
3438 * @param hOld The old value to *pu compare with.
3439 * @param fRc Where to store the result.
3440 * @param phOldVal Pointer to where to store the old value.
3441 *
3442 * @remarks This doesn't currently work for all handles (like RTFILE).
3443 */
3444#if ARCH_BITS == 32
3445# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3446 do { \
3447 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
3448 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3449 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
3450 } while (0)
3451#elif ARCH_BITS == 64
3452# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3453 do { \
3454 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
3455 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3456 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
3457 } while (0)
3458#endif
3459
3460
3461/** @def ASMAtomicCmpXchgExSize
3462 * Atomically Compare and Exchange a value which size might differ
3463 * between platforms or compilers. Additionally passes back old value.
3464 *
3465 * @param pu Pointer to the value to update.
3466 * @param uNew The new value to assigned to *pu.
3467 * @param uOld The old value to *pu compare with.
3468 * @param fRc Where to store the result.
3469 * @param puOldVal Pointer to where to store the old value.
3470 */
3471#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
3472 do { \
3473 switch (sizeof(*(pu))) { \
3474 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
3475 break; \
3476 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
3477 break; \
3478 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3479 (fRc) = false; \
3480 (uOldVal) = 0; \
3481 break; \
3482 } \
3483 } while (0)
3484
3485
3486/**
3487 * Atomically Compare and Exchange a pointer value, additionally
3488 * passing back old value, ordered.
3489 *
3490 * @returns true if xchg was done.
3491 * @returns false if xchg wasn't done.
3492 *
3493 * @param ppv Pointer to the value to update.
3494 * @param pvNew The new value to assigned to *ppv.
3495 * @param pvOld The old value to *ppv compare with.
3496 * @param ppvOld Pointer store the old value at.
3497 */
3498DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
3499{
3500#if ARCH_BITS == 32
3501 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
3502#elif ARCH_BITS == 64
3503 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
3504#else
3505# error "ARCH_BITS is bogus"
3506#endif
3507}
3508
3509
3510/**
3511 * Atomically exchanges and adds to a 32-bit value, ordered.
3512 *
3513 * @returns The old value.
3514 * @param pu32 Pointer to the value.
3515 * @param u32 Number to add.
3516 */
3517#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3518DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
3519#else
3520DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
3521{
3522# if RT_INLINE_ASM_USES_INTRIN
3523 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
3524 return u32;
3525
3526# elif RT_INLINE_ASM_GNU_STYLE
3527 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3528 : "=r" (u32),
3529 "=m" (*pu32)
3530 : "0" (u32),
3531 "m" (*pu32)
3532 : "memory");
3533 return u32;
3534# else
3535 __asm
3536 {
3537 mov eax, [u32]
3538# ifdef RT_ARCH_AMD64
3539 mov rdx, [pu32]
3540 lock xadd [rdx], eax
3541# else
3542 mov edx, [pu32]
3543 lock xadd [edx], eax
3544# endif
3545 mov [u32], eax
3546 }
3547 return u32;
3548# endif
3549}
3550#endif
3551
3552
3553/**
3554 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3555 *
3556 * @returns The old value.
3557 * @param pi32 Pointer to the value.
3558 * @param i32 Number to add.
3559 */
3560DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
3561{
3562 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
3563}
3564
3565
3566/**
3567 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3568 *
3569 * @returns The old value.
3570 * @param pu32 Pointer to the value.
3571 * @param u32 Number to subtract.
3572 */
3573DECLINLINE(uint32_t) ASMAtomicSubU32(int32_t volatile *pi32, uint32_t u32)
3574{
3575 return ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-(int32_t)u32);
3576}
3577
3578
3579/**
3580 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3581 *
3582 * @returns The old value.
3583 * @param pi32 Pointer to the value.
3584 * @param i32 Number to subtract.
3585 */
3586DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
3587{
3588 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
3589}
3590
3591
3592/**
3593 * Atomically increment a 32-bit value, ordered.
3594 *
3595 * @returns The new value.
3596 * @param pu32 Pointer to the value to increment.
3597 */
3598#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3599DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
3600#else
3601DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
3602{
3603 uint32_t u32;
3604# if RT_INLINE_ASM_USES_INTRIN
3605 u32 = _InterlockedIncrement((long *)pu32);
3606 return u32;
3607
3608# elif RT_INLINE_ASM_GNU_STYLE
3609 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3610 : "=r" (u32),
3611 "=m" (*pu32)
3612 : "0" (1),
3613 "m" (*pu32)
3614 : "memory");
3615 return u32+1;
3616# else
3617 __asm
3618 {
3619 mov eax, 1
3620# ifdef RT_ARCH_AMD64
3621 mov rdx, [pu32]
3622 lock xadd [rdx], eax
3623# else
3624 mov edx, [pu32]
3625 lock xadd [edx], eax
3626# endif
3627 mov u32, eax
3628 }
3629 return u32+1;
3630# endif
3631}
3632#endif
3633
3634
3635/**
3636 * Atomically increment a signed 32-bit value, ordered.
3637 *
3638 * @returns The new value.
3639 * @param pi32 Pointer to the value to increment.
3640 */
3641DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3642{
3643 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3644}
3645
3646
3647/**
3648 * Atomically decrement an unsigned 32-bit value, ordered.
3649 *
3650 * @returns The new value.
3651 * @param pu32 Pointer to the value to decrement.
3652 */
3653#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3654DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3655#else
3656DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3657{
3658 uint32_t u32;
3659# if RT_INLINE_ASM_USES_INTRIN
3660 u32 = _InterlockedDecrement((long *)pu32);
3661 return u32;
3662
3663# elif RT_INLINE_ASM_GNU_STYLE
3664 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3665 : "=r" (u32),
3666 "=m" (*pu32)
3667 : "0" (-1),
3668 "m" (*pu32)
3669 : "memory");
3670 return u32-1;
3671# else
3672 __asm
3673 {
3674 mov eax, -1
3675# ifdef RT_ARCH_AMD64
3676 mov rdx, [pu32]
3677 lock xadd [rdx], eax
3678# else
3679 mov edx, [pu32]
3680 lock xadd [edx], eax
3681# endif
3682 mov u32, eax
3683 }
3684 return u32-1;
3685# endif
3686}
3687#endif
3688
3689
3690/**
3691 * Atomically decrement a signed 32-bit value, ordered.
3692 *
3693 * @returns The new value.
3694 * @param pi32 Pointer to the value to decrement.
3695 */
3696DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3697{
3698 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3699}
3700
3701
3702/**
3703 * Atomically Or an unsigned 32-bit value, ordered.
3704 *
3705 * @param pu32 Pointer to the pointer variable to OR u32 with.
3706 * @param u32 The value to OR *pu32 with.
3707 */
3708#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3709DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3710#else
3711DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3712{
3713# if RT_INLINE_ASM_USES_INTRIN
3714 _InterlockedOr((long volatile *)pu32, (long)u32);
3715
3716# elif RT_INLINE_ASM_GNU_STYLE
3717 __asm__ __volatile__("lock; orl %1, %0\n\t"
3718 : "=m" (*pu32)
3719 : "ir" (u32),
3720 "m" (*pu32));
3721# else
3722 __asm
3723 {
3724 mov eax, [u32]
3725# ifdef RT_ARCH_AMD64
3726 mov rdx, [pu32]
3727 lock or [rdx], eax
3728# else
3729 mov edx, [pu32]
3730 lock or [edx], eax
3731# endif
3732 }
3733# endif
3734}
3735#endif
3736
3737
3738/**
3739 * Atomically Or a signed 32-bit value, ordered.
3740 *
3741 * @param pi32 Pointer to the pointer variable to OR u32 with.
3742 * @param i32 The value to OR *pu32 with.
3743 */
3744DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3745{
3746 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3747}
3748
3749
3750/**
3751 * Atomically And an unsigned 32-bit value, ordered.
3752 *
3753 * @param pu32 Pointer to the pointer variable to AND u32 with.
3754 * @param u32 The value to AND *pu32 with.
3755 */
3756#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3757DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3758#else
3759DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3760{
3761# if RT_INLINE_ASM_USES_INTRIN
3762 _InterlockedAnd((long volatile *)pu32, u32);
3763
3764# elif RT_INLINE_ASM_GNU_STYLE
3765 __asm__ __volatile__("lock; andl %1, %0\n\t"
3766 : "=m" (*pu32)
3767 : "ir" (u32),
3768 "m" (*pu32));
3769# else
3770 __asm
3771 {
3772 mov eax, [u32]
3773# ifdef RT_ARCH_AMD64
3774 mov rdx, [pu32]
3775 lock and [rdx], eax
3776# else
3777 mov edx, [pu32]
3778 lock and [edx], eax
3779# endif
3780 }
3781# endif
3782}
3783#endif
3784
3785
3786/**
3787 * Atomically And a signed 32-bit value, ordered.
3788 *
3789 * @param pi32 Pointer to the pointer variable to AND i32 with.
3790 * @param i32 The value to AND *pi32 with.
3791 */
3792DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3793{
3794 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3795}
3796
3797
3798/**
3799 * Memory fence, waits for any pending writes and reads to complete.
3800 */
3801DECLINLINE(void) ASMMemoryFence(void)
3802{
3803 /** @todo use mfence? check if all cpus we care for support it. */
3804 uint32_t volatile u32;
3805 ASMAtomicXchgU32(&u32, 0);
3806}
3807
3808
3809/**
3810 * Write fence, waits for any pending writes to complete.
3811 */
3812DECLINLINE(void) ASMWriteFence(void)
3813{
3814 /** @todo use sfence? check if all cpus we care for support it. */
3815 ASMMemoryFence();
3816}
3817
3818
3819/**
3820 * Read fence, waits for any pending reads to complete.
3821 */
3822DECLINLINE(void) ASMReadFence(void)
3823{
3824 /** @todo use lfence? check if all cpus we care for support it. */
3825 ASMMemoryFence();
3826}
3827
3828
3829/**
3830 * Atomically reads an unsigned 8-bit value, ordered.
3831 *
3832 * @returns Current *pu8 value
3833 * @param pu8 Pointer to the 8-bit variable to read.
3834 */
3835DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
3836{
3837 ASMMemoryFence();
3838 return *pu8; /* byte reads are atomic on x86 */
3839}
3840
3841
3842/**
3843 * Atomically reads an unsigned 8-bit value, unordered.
3844 *
3845 * @returns Current *pu8 value
3846 * @param pu8 Pointer to the 8-bit variable to read.
3847 */
3848DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
3849{
3850 return *pu8; /* byte reads are atomic on x86 */
3851}
3852
3853
3854/**
3855 * Atomically reads a signed 8-bit value, ordered.
3856 *
3857 * @returns Current *pi8 value
3858 * @param pi8 Pointer to the 8-bit variable to read.
3859 */
3860DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
3861{
3862 ASMMemoryFence();
3863 return *pi8; /* byte reads are atomic on x86 */
3864}
3865
3866
3867/**
3868 * Atomically reads a signed 8-bit value, unordered.
3869 *
3870 * @returns Current *pi8 value
3871 * @param pi8 Pointer to the 8-bit variable to read.
3872 */
3873DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
3874{
3875 return *pi8; /* byte reads are atomic on x86 */
3876}
3877
3878
3879/**
3880 * Atomically reads an unsigned 16-bit value, ordered.
3881 *
3882 * @returns Current *pu16 value
3883 * @param pu16 Pointer to the 16-bit variable to read.
3884 */
3885DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3886{
3887 ASMMemoryFence();
3888 Assert(!((uintptr_t)pu16 & 1));
3889 return *pu16;
3890}
3891
3892
3893/**
3894 * Atomically reads an unsigned 16-bit value, unordered.
3895 *
3896 * @returns Current *pu16 value
3897 * @param pu16 Pointer to the 16-bit variable to read.
3898 */
3899DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3900{
3901 Assert(!((uintptr_t)pu16 & 1));
3902 return *pu16;
3903}
3904
3905
3906/**
3907 * Atomically reads a signed 16-bit value, ordered.
3908 *
3909 * @returns Current *pi16 value
3910 * @param pi16 Pointer to the 16-bit variable to read.
3911 */
3912DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3913{
3914 ASMMemoryFence();
3915 Assert(!((uintptr_t)pi16 & 1));
3916 return *pi16;
3917}
3918
3919
3920/**
3921 * Atomically reads a signed 16-bit value, unordered.
3922 *
3923 * @returns Current *pi16 value
3924 * @param pi16 Pointer to the 16-bit variable to read.
3925 */
3926DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3927{
3928 Assert(!((uintptr_t)pi16 & 1));
3929 return *pi16;
3930}
3931
3932
3933/**
3934 * Atomically reads an unsigned 32-bit value, ordered.
3935 *
3936 * @returns Current *pu32 value
3937 * @param pu32 Pointer to the 32-bit variable to read.
3938 */
3939DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3940{
3941 ASMMemoryFence();
3942 Assert(!((uintptr_t)pu32 & 3));
3943 return *pu32;
3944}
3945
3946
3947/**
3948 * Atomically reads an unsigned 32-bit value, unordered.
3949 *
3950 * @returns Current *pu32 value
3951 * @param pu32 Pointer to the 32-bit variable to read.
3952 */
3953DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3954{
3955 Assert(!((uintptr_t)pu32 & 3));
3956 return *pu32;
3957}
3958
3959
3960/**
3961 * Atomically reads a signed 32-bit value, ordered.
3962 *
3963 * @returns Current *pi32 value
3964 * @param pi32 Pointer to the 32-bit variable to read.
3965 */
3966DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3967{
3968 ASMMemoryFence();
3969 Assert(!((uintptr_t)pi32 & 3));
3970 return *pi32;
3971}
3972
3973
3974/**
3975 * Atomically reads a signed 32-bit value, unordered.
3976 *
3977 * @returns Current *pi32 value
3978 * @param pi32 Pointer to the 32-bit variable to read.
3979 */
3980DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
3981{
3982 Assert(!((uintptr_t)pi32 & 3));
3983 return *pi32;
3984}
3985
3986
3987/**
3988 * Atomically reads an unsigned 64-bit value, ordered.
3989 *
3990 * @returns Current *pu64 value
3991 * @param pu64 Pointer to the 64-bit variable to read.
3992 * The memory pointed to must be writable.
3993 * @remark This will fault if the memory is read-only!
3994 */
3995#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3996DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
3997#else
3998DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
3999{
4000 uint64_t u64;
4001# ifdef RT_ARCH_AMD64
4002 Assert(!((uintptr_t)pu64 & 7));
4003/*# if RT_INLINE_ASM_GNU_STYLE
4004 __asm__ __volatile__( "mfence\n\t"
4005 "movq %1, %0\n\t"
4006 : "=r" (u64)
4007 : "m" (*pu64));
4008# else
4009 __asm
4010 {
4011 mfence
4012 mov rdx, [pu64]
4013 mov rax, [rdx]
4014 mov [u64], rax
4015 }
4016# endif*/
4017 ASMMemoryFence();
4018 u64 = *pu64;
4019# else /* !RT_ARCH_AMD64 */
4020# if RT_INLINE_ASM_GNU_STYLE
4021# if defined(PIC) || defined(__PIC__)
4022 uint32_t u32EBX = 0;
4023 Assert(!((uintptr_t)pu64 & 7));
4024 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
4025 "lock; cmpxchg8b (%5)\n\t"
4026 "movl %3, %%ebx\n\t"
4027 : "=A" (u64),
4028# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4029 "+m" (*pu64)
4030# else
4031 "=m" (*pu64)
4032# endif
4033 : "0" (0),
4034 "m" (u32EBX),
4035 "c" (0),
4036 "S" (pu64));
4037# else /* !PIC */
4038 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
4039 : "=A" (u64),
4040 "+m" (*pu64)
4041 : "0" (0),
4042 "b" (0),
4043 "c" (0));
4044# endif
4045# else
4046 Assert(!((uintptr_t)pu64 & 7));
4047 __asm
4048 {
4049 xor eax, eax
4050 xor edx, edx
4051 mov edi, pu64
4052 xor ecx, ecx
4053 xor ebx, ebx
4054 lock cmpxchg8b [edi]
4055 mov dword ptr [u64], eax
4056 mov dword ptr [u64 + 4], edx
4057 }
4058# endif
4059# endif /* !RT_ARCH_AMD64 */
4060 return u64;
4061}
4062#endif
4063
4064
4065/**
4066 * Atomically reads an unsigned 64-bit value, unordered.
4067 *
4068 * @returns Current *pu64 value
4069 * @param pu64 Pointer to the 64-bit variable to read.
4070 * The memory pointed to must be writable.
4071 * @remark This will fault if the memory is read-only!
4072 */
4073#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4074DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
4075#else
4076DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
4077{
4078 uint64_t u64;
4079# ifdef RT_ARCH_AMD64
4080 Assert(!((uintptr_t)pu64 & 7));
4081/*# if RT_INLINE_ASM_GNU_STYLE
4082 Assert(!((uintptr_t)pu64 & 7));
4083 __asm__ __volatile__("movq %1, %0\n\t"
4084 : "=r" (u64)
4085 : "m" (*pu64));
4086# else
4087 __asm
4088 {
4089 mov rdx, [pu64]
4090 mov rax, [rdx]
4091 mov [u64], rax
4092 }
4093# endif */
4094 u64 = *pu64;
4095# else /* !RT_ARCH_AMD64 */
4096# if RT_INLINE_ASM_GNU_STYLE
4097# if defined(PIC) || defined(__PIC__)
4098 uint32_t u32EBX = 0;
4099 uint32_t u32Spill;
4100 Assert(!((uintptr_t)pu64 & 7));
4101 __asm__ __volatile__("xor %%eax,%%eax\n\t"
4102 "xor %%ecx,%%ecx\n\t"
4103 "xor %%edx,%%edx\n\t"
4104 "xchgl %%ebx, %3\n\t"
4105 "lock; cmpxchg8b (%4)\n\t"
4106 "movl %3, %%ebx\n\t"
4107 : "=A" (u64),
4108# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4109 "+m" (*pu64),
4110# else
4111 "=m" (*pu64),
4112# endif
4113 "=c" (u32Spill)
4114 : "m" (u32EBX),
4115 "S" (pu64));
4116# else /* !PIC */
4117 __asm__ __volatile__("cmpxchg8b %1\n\t"
4118 : "=A" (u64),
4119 "+m" (*pu64)
4120 : "0" (0),
4121 "b" (0),
4122 "c" (0));
4123# endif
4124# else
4125 Assert(!((uintptr_t)pu64 & 7));
4126 __asm
4127 {
4128 xor eax, eax
4129 xor edx, edx
4130 mov edi, pu64
4131 xor ecx, ecx
4132 xor ebx, ebx
4133 lock cmpxchg8b [edi]
4134 mov dword ptr [u64], eax
4135 mov dword ptr [u64 + 4], edx
4136 }
4137# endif
4138# endif /* !RT_ARCH_AMD64 */
4139 return u64;
4140}
4141#endif
4142
4143
4144/**
4145 * Atomically reads a signed 64-bit value, ordered.
4146 *
4147 * @returns Current *pi64 value
4148 * @param pi64 Pointer to the 64-bit variable to read.
4149 * The memory pointed to must be writable.
4150 * @remark This will fault if the memory is read-only!
4151 */
4152DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
4153{
4154 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
4155}
4156
4157
4158/**
4159 * Atomically reads a signed 64-bit value, unordered.
4160 *
4161 * @returns Current *pi64 value
4162 * @param pi64 Pointer to the 64-bit variable to read.
4163 * The memory pointed to must be writable.
4164 * @remark This will fault if the memory is read-only!
4165 */
4166DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
4167{
4168 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
4169}
4170
4171
4172/**
4173 * Atomically reads a pointer value, ordered.
4174 *
4175 * @returns Current *pv value
4176 * @param ppv Pointer to the pointer variable to read.
4177 */
4178DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
4179{
4180#if ARCH_BITS == 32
4181 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
4182#elif ARCH_BITS == 64
4183 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
4184#else
4185# error "ARCH_BITS is bogus"
4186#endif
4187}
4188
4189
4190/**
4191 * Atomically reads a pointer value, unordered.
4192 *
4193 * @returns Current *pv value
4194 * @param ppv Pointer to the pointer variable to read.
4195 */
4196DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
4197{
4198#if ARCH_BITS == 32
4199 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
4200#elif ARCH_BITS == 64
4201 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
4202#else
4203# error "ARCH_BITS is bogus"
4204#endif
4205}
4206
4207
4208/**
4209 * Atomically reads a boolean value, ordered.
4210 *
4211 * @returns Current *pf value
4212 * @param pf Pointer to the boolean variable to read.
4213 */
4214DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
4215{
4216 ASMMemoryFence();
4217 return *pf; /* byte reads are atomic on x86 */
4218}
4219
4220
4221/**
4222 * Atomically reads a boolean value, unordered.
4223 *
4224 * @returns Current *pf value
4225 * @param pf Pointer to the boolean variable to read.
4226 */
4227DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
4228{
4229 return *pf; /* byte reads are atomic on x86 */
4230}
4231
4232
4233/**
4234 * Atomically read a typical IPRT handle value, ordered.
4235 *
4236 * @param ph Pointer to the handle variable to read.
4237 * @param phRes Where to store the result.
4238 *
4239 * @remarks This doesn't currently work for all handles (like RTFILE).
4240 */
4241#define ASMAtomicReadHandle(ph, phRes) \
4242 do { \
4243 *(void **)(phRes) = ASMAtomicReadPtr((void * volatile *)(ph)); \
4244 AssertCompile(sizeof(*ph) == sizeof(void *)); \
4245 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
4246 } while (0)
4247
4248
4249/**
4250 * Atomically read a typical IPRT handle value, unordered.
4251 *
4252 * @param ph Pointer to the handle variable to read.
4253 * @param phRes Where to store the result.
4254 *
4255 * @remarks This doesn't currently work for all handles (like RTFILE).
4256 */
4257#define ASMAtomicUoReadHandle(ph, phRes) \
4258 do { \
4259 *(void **)(phRes) = ASMAtomicUoReadPtr((void * volatile *)(ph)); \
4260 AssertCompile(sizeof(*ph) == sizeof(void *)); \
4261 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
4262 } while (0)
4263
4264
4265/**
4266 * Atomically read a value which size might differ
4267 * between platforms or compilers, ordered.
4268 *
4269 * @param pu Pointer to the variable to update.
4270 * @param puRes Where to store the result.
4271 */
4272#define ASMAtomicReadSize(pu, puRes) \
4273 do { \
4274 switch (sizeof(*(pu))) { \
4275 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4276 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
4277 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
4278 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
4279 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4280 } \
4281 } while (0)
4282
4283
4284/**
4285 * Atomically read a value which size might differ
4286 * between platforms or compilers, unordered.
4287 *
4288 * @param pu Pointer to the variable to update.
4289 * @param puRes Where to store the result.
4290 */
4291#define ASMAtomicUoReadSize(pu, puRes) \
4292 do { \
4293 switch (sizeof(*(pu))) { \
4294 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4295 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
4296 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
4297 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
4298 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4299 } \
4300 } while (0)
4301
4302
4303/**
4304 * Atomically writes an unsigned 8-bit value, ordered.
4305 *
4306 * @param pu8 Pointer to the 8-bit variable.
4307 * @param u8 The 8-bit value to assign to *pu8.
4308 */
4309DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
4310{
4311 ASMAtomicXchgU8(pu8, u8);
4312}
4313
4314
4315/**
4316 * Atomically writes an unsigned 8-bit value, unordered.
4317 *
4318 * @param pu8 Pointer to the 8-bit variable.
4319 * @param u8 The 8-bit value to assign to *pu8.
4320 */
4321DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
4322{
4323 *pu8 = u8; /* byte writes are atomic on x86 */
4324}
4325
4326
4327/**
4328 * Atomically writes a signed 8-bit value, ordered.
4329 *
4330 * @param pi8 Pointer to the 8-bit variable to read.
4331 * @param i8 The 8-bit value to assign to *pi8.
4332 */
4333DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
4334{
4335 ASMAtomicXchgS8(pi8, i8);
4336}
4337
4338
4339/**
4340 * Atomically writes a signed 8-bit value, unordered.
4341 *
4342 * @param pi8 Pointer to the 8-bit variable to read.
4343 * @param i8 The 8-bit value to assign to *pi8.
4344 */
4345DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
4346{
4347 *pi8 = i8; /* byte writes are atomic on x86 */
4348}
4349
4350
4351/**
4352 * Atomically writes an unsigned 16-bit value, ordered.
4353 *
4354 * @param pu16 Pointer to the 16-bit variable.
4355 * @param u16 The 16-bit value to assign to *pu16.
4356 */
4357DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
4358{
4359 ASMAtomicXchgU16(pu16, u16);
4360}
4361
4362
4363/**
4364 * Atomically writes an unsigned 16-bit value, unordered.
4365 *
4366 * @param pu16 Pointer to the 16-bit variable.
4367 * @param u16 The 16-bit value to assign to *pu16.
4368 */
4369DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
4370{
4371 Assert(!((uintptr_t)pu16 & 1));
4372 *pu16 = u16;
4373}
4374
4375
4376/**
4377 * Atomically writes a signed 16-bit value, ordered.
4378 *
4379 * @param pi16 Pointer to the 16-bit variable to read.
4380 * @param i16 The 16-bit value to assign to *pi16.
4381 */
4382DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
4383{
4384 ASMAtomicXchgS16(pi16, i16);
4385}
4386
4387
4388/**
4389 * Atomically writes a signed 16-bit value, unordered.
4390 *
4391 * @param pi16 Pointer to the 16-bit variable to read.
4392 * @param i16 The 16-bit value to assign to *pi16.
4393 */
4394DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
4395{
4396 Assert(!((uintptr_t)pi16 & 1));
4397 *pi16 = i16;
4398}
4399
4400
4401/**
4402 * Atomically writes an unsigned 32-bit value, ordered.
4403 *
4404 * @param pu32 Pointer to the 32-bit variable.
4405 * @param u32 The 32-bit value to assign to *pu32.
4406 */
4407DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
4408{
4409 ASMAtomicXchgU32(pu32, u32);
4410}
4411
4412
4413/**
4414 * Atomically writes an unsigned 32-bit value, unordered.
4415 *
4416 * @param pu32 Pointer to the 32-bit variable.
4417 * @param u32 The 32-bit value to assign to *pu32.
4418 */
4419DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
4420{
4421 Assert(!((uintptr_t)pu32 & 3));
4422 *pu32 = u32;
4423}
4424
4425
4426/**
4427 * Atomically writes a signed 32-bit value, ordered.
4428 *
4429 * @param pi32 Pointer to the 32-bit variable to read.
4430 * @param i32 The 32-bit value to assign to *pi32.
4431 */
4432DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
4433{
4434 ASMAtomicXchgS32(pi32, i32);
4435}
4436
4437
4438/**
4439 * Atomically writes a signed 32-bit value, unordered.
4440 *
4441 * @param pi32 Pointer to the 32-bit variable to read.
4442 * @param i32 The 32-bit value to assign to *pi32.
4443 */
4444DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
4445{
4446 Assert(!((uintptr_t)pi32 & 3));
4447 *pi32 = i32;
4448}
4449
4450
4451/**
4452 * Atomically writes an unsigned 64-bit value, ordered.
4453 *
4454 * @param pu64 Pointer to the 64-bit variable.
4455 * @param u64 The 64-bit value to assign to *pu64.
4456 */
4457DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
4458{
4459 ASMAtomicXchgU64(pu64, u64);
4460}
4461
4462
4463/**
4464 * Atomically writes an unsigned 64-bit value, unordered.
4465 *
4466 * @param pu64 Pointer to the 64-bit variable.
4467 * @param u64 The 64-bit value to assign to *pu64.
4468 */
4469DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
4470{
4471 Assert(!((uintptr_t)pu64 & 7));
4472#if ARCH_BITS == 64
4473 *pu64 = u64;
4474#else
4475 ASMAtomicXchgU64(pu64, u64);
4476#endif
4477}
4478
4479
4480/**
4481 * Atomically writes a signed 64-bit value, ordered.
4482 *
4483 * @param pi64 Pointer to the 64-bit variable.
4484 * @param i64 The 64-bit value to assign to *pi64.
4485 */
4486DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
4487{
4488 ASMAtomicXchgS64(pi64, i64);
4489}
4490
4491
4492/**
4493 * Atomically writes a signed 64-bit value, unordered.
4494 *
4495 * @param pi64 Pointer to the 64-bit variable.
4496 * @param i64 The 64-bit value to assign to *pi64.
4497 */
4498DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
4499{
4500 Assert(!((uintptr_t)pi64 & 7));
4501#if ARCH_BITS == 64
4502 *pi64 = i64;
4503#else
4504 ASMAtomicXchgS64(pi64, i64);
4505#endif
4506}
4507
4508
4509/**
4510 * Atomically writes a boolean value, unordered.
4511 *
4512 * @param pf Pointer to the boolean variable.
4513 * @param f The boolean value to assign to *pf.
4514 */
4515DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
4516{
4517 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
4518}
4519
4520
4521/**
4522 * Atomically writes a boolean value, unordered.
4523 *
4524 * @param pf Pointer to the boolean variable.
4525 * @param f The boolean value to assign to *pf.
4526 */
4527DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
4528{
4529 *pf = f; /* byte writes are atomic on x86 */
4530}
4531
4532
4533/**
4534 * Atomically writes a pointer value, ordered.
4535 *
4536 * @returns Current *pv value
4537 * @param ppv Pointer to the pointer variable.
4538 * @param pv The pointer value to assigne to *ppv.
4539 */
4540DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv)
4541{
4542#if ARCH_BITS == 32
4543 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4544#elif ARCH_BITS == 64
4545 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4546#else
4547# error "ARCH_BITS is bogus"
4548#endif
4549}
4550
4551
4552/**
4553 * Atomically writes a pointer value, unordered.
4554 *
4555 * @returns Current *pv value
4556 * @param ppv Pointer to the pointer variable.
4557 * @param pv The pointer value to assigne to *ppv.
4558 */
4559DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv)
4560{
4561#if ARCH_BITS == 32
4562 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4563#elif ARCH_BITS == 64
4564 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4565#else
4566# error "ARCH_BITS is bogus"
4567#endif
4568}
4569
4570
4571/**
4572 * Atomically write a typical IPRT handle value, ordered.
4573 *
4574 * @param ph Pointer to the variable to update.
4575 * @param hNew The value to assign to *ph.
4576 *
4577 * @remarks This doesn't currently work for all handles (like RTFILE).
4578 */
4579#define ASMAtomicWriteHandle(ph, hNew) \
4580 do { \
4581 ASMAtomicWritePtr((void * volatile *)(ph), (const void *)hNew); \
4582 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4583 } while (0)
4584
4585
4586/**
4587 * Atomically write a typical IPRT handle value, unordered.
4588 *
4589 * @param ph Pointer to the variable to update.
4590 * @param hNew The value to assign to *ph.
4591 *
4592 * @remarks This doesn't currently work for all handles (like RTFILE).
4593 */
4594#define ASMAtomicUoWriteHandle(ph, hNew) \
4595 do { \
4596 ASMAtomicUoWritePtr((void * volatile *)(ph), (const void *)hNew); \
4597 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4598 } while (0)
4599
4600
4601/**
4602 * Atomically write a value which size might differ
4603 * between platforms or compilers, ordered.
4604 *
4605 * @param pu Pointer to the variable to update.
4606 * @param uNew The value to assign to *pu.
4607 */
4608#define ASMAtomicWriteSize(pu, uNew) \
4609 do { \
4610 switch (sizeof(*(pu))) { \
4611 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4612 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4613 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4614 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4615 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4616 } \
4617 } while (0)
4618
4619/**
4620 * Atomically write a value which size might differ
4621 * between platforms or compilers, unordered.
4622 *
4623 * @param pu Pointer to the variable to update.
4624 * @param uNew The value to assign to *pu.
4625 */
4626#define ASMAtomicUoWriteSize(pu, uNew) \
4627 do { \
4628 switch (sizeof(*(pu))) { \
4629 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4630 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4631 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4632 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4633 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4634 } \
4635 } while (0)
4636
4637
4638
4639
4640/**
4641 * Invalidate page.
4642 *
4643 * @param pv Address of the page to invalidate.
4644 */
4645#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4646DECLASM(void) ASMInvalidatePage(void *pv);
4647#else
4648DECLINLINE(void) ASMInvalidatePage(void *pv)
4649{
4650# if RT_INLINE_ASM_USES_INTRIN
4651 __invlpg(pv);
4652
4653# elif RT_INLINE_ASM_GNU_STYLE
4654 __asm__ __volatile__("invlpg %0\n\t"
4655 : : "m" (*(uint8_t *)pv));
4656# else
4657 __asm
4658 {
4659# ifdef RT_ARCH_AMD64
4660 mov rax, [pv]
4661 invlpg [rax]
4662# else
4663 mov eax, [pv]
4664 invlpg [eax]
4665# endif
4666 }
4667# endif
4668}
4669#endif
4670
4671
4672#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4673# if PAGE_SIZE != 0x1000
4674# error "PAGE_SIZE is not 0x1000!"
4675# endif
4676#endif
4677
4678/**
4679 * Zeros a 4K memory page.
4680 *
4681 * @param pv Pointer to the memory block. This must be page aligned.
4682 */
4683#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4684DECLASM(void) ASMMemZeroPage(volatile void *pv);
4685# else
4686DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
4687{
4688# if RT_INLINE_ASM_USES_INTRIN
4689# ifdef RT_ARCH_AMD64
4690 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
4691# else
4692 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
4693# endif
4694
4695# elif RT_INLINE_ASM_GNU_STYLE
4696 RTCCUINTREG uDummy;
4697# ifdef RT_ARCH_AMD64
4698 __asm__ __volatile__ ("rep stosq"
4699 : "=D" (pv),
4700 "=c" (uDummy)
4701 : "0" (pv),
4702 "c" (0x1000 >> 3),
4703 "a" (0)
4704 : "memory");
4705# else
4706 __asm__ __volatile__ ("rep stosl"
4707 : "=D" (pv),
4708 "=c" (uDummy)
4709 : "0" (pv),
4710 "c" (0x1000 >> 2),
4711 "a" (0)
4712 : "memory");
4713# endif
4714# else
4715 __asm
4716 {
4717# ifdef RT_ARCH_AMD64
4718 xor rax, rax
4719 mov ecx, 0200h
4720 mov rdi, [pv]
4721 rep stosq
4722# else
4723 xor eax, eax
4724 mov ecx, 0400h
4725 mov edi, [pv]
4726 rep stosd
4727# endif
4728 }
4729# endif
4730}
4731# endif
4732
4733
4734/**
4735 * Zeros a memory block with a 32-bit aligned size.
4736 *
4737 * @param pv Pointer to the memory block.
4738 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4739 */
4740#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4741DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
4742#else
4743DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
4744{
4745# if RT_INLINE_ASM_USES_INTRIN
4746# ifdef RT_ARCH_AMD64
4747 if (!(cb & 7))
4748 __stosq((unsigned __int64 *)pv, 0, cb / 8);
4749 else
4750# endif
4751 __stosd((unsigned long *)pv, 0, cb / 4);
4752
4753# elif RT_INLINE_ASM_GNU_STYLE
4754 __asm__ __volatile__ ("rep stosl"
4755 : "=D" (pv),
4756 "=c" (cb)
4757 : "0" (pv),
4758 "1" (cb >> 2),
4759 "a" (0)
4760 : "memory");
4761# else
4762 __asm
4763 {
4764 xor eax, eax
4765# ifdef RT_ARCH_AMD64
4766 mov rcx, [cb]
4767 shr rcx, 2
4768 mov rdi, [pv]
4769# else
4770 mov ecx, [cb]
4771 shr ecx, 2
4772 mov edi, [pv]
4773# endif
4774 rep stosd
4775 }
4776# endif
4777}
4778#endif
4779
4780
4781/**
4782 * Fills a memory block with a 32-bit aligned size.
4783 *
4784 * @param pv Pointer to the memory block.
4785 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4786 * @param u32 The value to fill with.
4787 */
4788#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4789DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
4790#else
4791DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
4792{
4793# if RT_INLINE_ASM_USES_INTRIN
4794# ifdef RT_ARCH_AMD64
4795 if (!(cb & 7))
4796 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
4797 else
4798# endif
4799 __stosd((unsigned long *)pv, u32, cb / 4);
4800
4801# elif RT_INLINE_ASM_GNU_STYLE
4802 __asm__ __volatile__ ("rep stosl"
4803 : "=D" (pv),
4804 "=c" (cb)
4805 : "0" (pv),
4806 "1" (cb >> 2),
4807 "a" (u32)
4808 : "memory");
4809# else
4810 __asm
4811 {
4812# ifdef RT_ARCH_AMD64
4813 mov rcx, [cb]
4814 shr rcx, 2
4815 mov rdi, [pv]
4816# else
4817 mov ecx, [cb]
4818 shr ecx, 2
4819 mov edi, [pv]
4820# endif
4821 mov eax, [u32]
4822 rep stosd
4823 }
4824# endif
4825}
4826#endif
4827
4828
4829/**
4830 * Checks if a memory block is filled with the specified byte.
4831 *
4832 * This is a sort of inverted memchr.
4833 *
4834 * @returns Pointer to the byte which doesn't equal u8.
4835 * @returns NULL if all equal to u8.
4836 *
4837 * @param pv Pointer to the memory block.
4838 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4839 * @param u8 The value it's supposed to be filled with.
4840 */
4841#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4842DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
4843#else
4844DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
4845{
4846/** @todo rewrite this in inline assembly? */
4847 uint8_t const *pb = (uint8_t const *)pv;
4848 for (; cb; cb--, pb++)
4849 if (RT_UNLIKELY(*pb != u8))
4850 return (void *)pb;
4851 return NULL;
4852}
4853#endif
4854
4855
4856/**
4857 * Checks if a memory block is filled with the specified 32-bit value.
4858 *
4859 * This is a sort of inverted memchr.
4860 *
4861 * @returns Pointer to the first value which doesn't equal u32.
4862 * @returns NULL if all equal to u32.
4863 *
4864 * @param pv Pointer to the memory block.
4865 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4866 * @param u32 The value it's supposed to be filled with.
4867 */
4868#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4869DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
4870#else
4871DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
4872{
4873/** @todo rewrite this in inline assembly? */
4874 uint32_t const *pu32 = (uint32_t const *)pv;
4875 for (; cb; cb -= 4, pu32++)
4876 if (RT_UNLIKELY(*pu32 != u32))
4877 return (uint32_t *)pu32;
4878 return NULL;
4879}
4880#endif
4881
4882
4883/**
4884 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
4885 *
4886 * @returns u32F1 * u32F2.
4887 */
4888#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4889DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
4890#else
4891DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
4892{
4893# ifdef RT_ARCH_AMD64
4894 return (uint64_t)u32F1 * u32F2;
4895# else /* !RT_ARCH_AMD64 */
4896 uint64_t u64;
4897# if RT_INLINE_ASM_GNU_STYLE
4898 __asm__ __volatile__("mull %%edx"
4899 : "=A" (u64)
4900 : "a" (u32F2), "d" (u32F1));
4901# else
4902 __asm
4903 {
4904 mov edx, [u32F1]
4905 mov eax, [u32F2]
4906 mul edx
4907 mov dword ptr [u64], eax
4908 mov dword ptr [u64 + 4], edx
4909 }
4910# endif
4911 return u64;
4912# endif /* !RT_ARCH_AMD64 */
4913}
4914#endif
4915
4916
4917/**
4918 * Multiplies two signed 32-bit values returning a signed 64-bit result.
4919 *
4920 * @returns u32F1 * u32F2.
4921 */
4922#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4923DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
4924#else
4925DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
4926{
4927# ifdef RT_ARCH_AMD64
4928 return (int64_t)i32F1 * i32F2;
4929# else /* !RT_ARCH_AMD64 */
4930 int64_t i64;
4931# if RT_INLINE_ASM_GNU_STYLE
4932 __asm__ __volatile__("imull %%edx"
4933 : "=A" (i64)
4934 : "a" (i32F2), "d" (i32F1));
4935# else
4936 __asm
4937 {
4938 mov edx, [i32F1]
4939 mov eax, [i32F2]
4940 imul edx
4941 mov dword ptr [i64], eax
4942 mov dword ptr [i64 + 4], edx
4943 }
4944# endif
4945 return i64;
4946# endif /* !RT_ARCH_AMD64 */
4947}
4948#endif
4949
4950
4951/**
4952 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
4953 *
4954 * @returns u64 / u32.
4955 */
4956#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4957DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
4958#else
4959DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
4960{
4961# ifdef RT_ARCH_AMD64
4962 return (uint32_t)(u64 / u32);
4963# else /* !RT_ARCH_AMD64 */
4964# if RT_INLINE_ASM_GNU_STYLE
4965 RTCCUINTREG uDummy;
4966 __asm__ __volatile__("divl %3"
4967 : "=a" (u32), "=d"(uDummy)
4968 : "A" (u64), "r" (u32));
4969# else
4970 __asm
4971 {
4972 mov eax, dword ptr [u64]
4973 mov edx, dword ptr [u64 + 4]
4974 mov ecx, [u32]
4975 div ecx
4976 mov [u32], eax
4977 }
4978# endif
4979 return u32;
4980# endif /* !RT_ARCH_AMD64 */
4981}
4982#endif
4983
4984
4985/**
4986 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
4987 *
4988 * @returns u64 / u32.
4989 */
4990#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4991DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
4992#else
4993DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
4994{
4995# ifdef RT_ARCH_AMD64
4996 return (int32_t)(i64 / i32);
4997# else /* !RT_ARCH_AMD64 */
4998# if RT_INLINE_ASM_GNU_STYLE
4999 RTCCUINTREG iDummy;
5000 __asm__ __volatile__("idivl %3"
5001 : "=a" (i32), "=d"(iDummy)
5002 : "A" (i64), "r" (i32));
5003# else
5004 __asm
5005 {
5006 mov eax, dword ptr [i64]
5007 mov edx, dword ptr [i64 + 4]
5008 mov ecx, [i32]
5009 idiv ecx
5010 mov [i32], eax
5011 }
5012# endif
5013 return i32;
5014# endif /* !RT_ARCH_AMD64 */
5015}
5016#endif
5017
5018
5019/**
5020 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
5021 * returning the rest.
5022 *
5023 * @returns u64 % u32.
5024 *
5025 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5026 */
5027#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5028DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
5029#else
5030DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
5031{
5032# ifdef RT_ARCH_AMD64
5033 return (uint32_t)(u64 % u32);
5034# else /* !RT_ARCH_AMD64 */
5035# if RT_INLINE_ASM_GNU_STYLE
5036 RTCCUINTREG uDummy;
5037 __asm__ __volatile__("divl %3"
5038 : "=a" (uDummy), "=d"(u32)
5039 : "A" (u64), "r" (u32));
5040# else
5041 __asm
5042 {
5043 mov eax, dword ptr [u64]
5044 mov edx, dword ptr [u64 + 4]
5045 mov ecx, [u32]
5046 div ecx
5047 mov [u32], edx
5048 }
5049# endif
5050 return u32;
5051# endif /* !RT_ARCH_AMD64 */
5052}
5053#endif
5054
5055
5056/**
5057 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
5058 * returning the rest.
5059 *
5060 * @returns u64 % u32.
5061 *
5062 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5063 */
5064#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5065DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
5066#else
5067DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
5068{
5069# ifdef RT_ARCH_AMD64
5070 return (int32_t)(i64 % i32);
5071# else /* !RT_ARCH_AMD64 */
5072# if RT_INLINE_ASM_GNU_STYLE
5073 RTCCUINTREG iDummy;
5074 __asm__ __volatile__("idivl %3"
5075 : "=a" (iDummy), "=d"(i32)
5076 : "A" (i64), "r" (i32));
5077# else
5078 __asm
5079 {
5080 mov eax, dword ptr [i64]
5081 mov edx, dword ptr [i64 + 4]
5082 mov ecx, [i32]
5083 idiv ecx
5084 mov [i32], edx
5085 }
5086# endif
5087 return i32;
5088# endif /* !RT_ARCH_AMD64 */
5089}
5090#endif
5091
5092
5093/**
5094 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
5095 * using a 96 bit intermediate result.
5096 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
5097 * __udivdi3 and __umoddi3 even if this inline function is not used.
5098 *
5099 * @returns (u64A * u32B) / u32C.
5100 * @param u64A The 64-bit value.
5101 * @param u32B The 32-bit value to multiple by A.
5102 * @param u32C The 32-bit value to divide A*B by.
5103 */
5104#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
5105DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
5106#else
5107DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
5108{
5109# if RT_INLINE_ASM_GNU_STYLE
5110# ifdef RT_ARCH_AMD64
5111 uint64_t u64Result, u64Spill;
5112 __asm__ __volatile__("mulq %2\n\t"
5113 "divq %3\n\t"
5114 : "=a" (u64Result),
5115 "=d" (u64Spill)
5116 : "r" ((uint64_t)u32B),
5117 "r" ((uint64_t)u32C),
5118 "0" (u64A),
5119 "1" (0));
5120 return u64Result;
5121# else
5122 uint32_t u32Dummy;
5123 uint64_t u64Result;
5124 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
5125 edx = u64Lo.hi = (u64A.lo * u32B).hi */
5126 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
5127 eax = u64A.hi */
5128 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
5129 edx = u32C */
5130 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
5131 edx = u32B */
5132 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
5133 edx = u64Hi.hi = (u64A.hi * u32B).hi */
5134 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
5135 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
5136 "divl %%ecx \n\t" /* eax = u64Hi / u32C
5137 edx = u64Hi % u32C */
5138 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
5139 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
5140 "divl %%ecx \n\t" /* u64Result.lo */
5141 "movl %%edi,%%edx \n\t" /* u64Result.hi */
5142 : "=A"(u64Result), "=c"(u32Dummy),
5143 "=S"(u32Dummy), "=D"(u32Dummy)
5144 : "a"((uint32_t)u64A),
5145 "S"((uint32_t)(u64A >> 32)),
5146 "c"(u32B),
5147 "D"(u32C));
5148 return u64Result;
5149# endif
5150# else
5151 RTUINT64U u;
5152 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
5153 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
5154 u64Hi += (u64Lo >> 32);
5155 u.s.Hi = (uint32_t)(u64Hi / u32C);
5156 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
5157 return u.u;
5158# endif
5159}
5160#endif
5161
5162
5163/**
5164 * Probes a byte pointer for read access.
5165 *
5166 * While the function will not fault if the byte is not read accessible,
5167 * the idea is to do this in a safe place like before acquiring locks
5168 * and such like.
5169 *
5170 * Also, this functions guarantees that an eager compiler is not going
5171 * to optimize the probing away.
5172 *
5173 * @param pvByte Pointer to the byte.
5174 */
5175#if RT_INLINE_ASM_EXTERNAL
5176DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
5177#else
5178DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
5179{
5180 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5181 uint8_t u8;
5182# if RT_INLINE_ASM_GNU_STYLE
5183 __asm__ __volatile__("movb (%1), %0\n\t"
5184 : "=r" (u8)
5185 : "r" (pvByte));
5186# else
5187 __asm
5188 {
5189# ifdef RT_ARCH_AMD64
5190 mov rax, [pvByte]
5191 mov al, [rax]
5192# else
5193 mov eax, [pvByte]
5194 mov al, [eax]
5195# endif
5196 mov [u8], al
5197 }
5198# endif
5199 return u8;
5200}
5201#endif
5202
5203/**
5204 * Probes a buffer for read access page by page.
5205 *
5206 * While the function will fault if the buffer is not fully read
5207 * accessible, the idea is to do this in a safe place like before
5208 * acquiring locks and such like.
5209 *
5210 * Also, this functions guarantees that an eager compiler is not going
5211 * to optimize the probing away.
5212 *
5213 * @param pvBuf Pointer to the buffer.
5214 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5215 */
5216DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
5217{
5218 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5219 /* the first byte */
5220 const uint8_t *pu8 = (const uint8_t *)pvBuf;
5221 ASMProbeReadByte(pu8);
5222
5223 /* the pages in between pages. */
5224 while (cbBuf > /*PAGE_SIZE*/0x1000)
5225 {
5226 ASMProbeReadByte(pu8);
5227 cbBuf -= /*PAGE_SIZE*/0x1000;
5228 pu8 += /*PAGE_SIZE*/0x1000;
5229 }
5230
5231 /* the last byte */
5232 ASMProbeReadByte(pu8 + cbBuf - 1);
5233}
5234
5235
5236/** @def ASMBreakpoint
5237 * Debugger Breakpoint.
5238 * @remark In the gnu world we add a nop instruction after the int3 to
5239 * force gdb to remain at the int3 source line.
5240 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
5241 * @internal
5242 */
5243#if RT_INLINE_ASM_GNU_STYLE
5244# ifndef __L4ENV__
5245# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
5246# else
5247# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
5248# endif
5249#else
5250# define ASMBreakpoint() __debugbreak()
5251#endif
5252
5253
5254
5255/** @defgroup grp_inline_bits Bit Operations
5256 * @{
5257 */
5258
5259
5260/**
5261 * Sets a bit in a bitmap.
5262 *
5263 * @param pvBitmap Pointer to the bitmap.
5264 * @param iBit The bit to set.
5265 */
5266#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5267DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
5268#else
5269DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
5270{
5271# if RT_INLINE_ASM_USES_INTRIN
5272 _bittestandset((long *)pvBitmap, iBit);
5273
5274# elif RT_INLINE_ASM_GNU_STYLE
5275 __asm__ __volatile__ ("btsl %1, %0"
5276 : "=m" (*(volatile long *)pvBitmap)
5277 : "Ir" (iBit),
5278 "m" (*(volatile long *)pvBitmap)
5279 : "memory");
5280# else
5281 __asm
5282 {
5283# ifdef RT_ARCH_AMD64
5284 mov rax, [pvBitmap]
5285 mov edx, [iBit]
5286 bts [rax], edx
5287# else
5288 mov eax, [pvBitmap]
5289 mov edx, [iBit]
5290 bts [eax], edx
5291# endif
5292 }
5293# endif
5294}
5295#endif
5296
5297
5298/**
5299 * Atomically sets a bit in a bitmap, ordered.
5300 *
5301 * @param pvBitmap Pointer to the bitmap.
5302 * @param iBit The bit to set.
5303 */
5304#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5305DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
5306#else
5307DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
5308{
5309# if RT_INLINE_ASM_USES_INTRIN
5310 _interlockedbittestandset((long *)pvBitmap, iBit);
5311# elif RT_INLINE_ASM_GNU_STYLE
5312 __asm__ __volatile__ ("lock; btsl %1, %0"
5313 : "=m" (*(volatile long *)pvBitmap)
5314 : "Ir" (iBit),
5315 "m" (*(volatile long *)pvBitmap)
5316 : "memory");
5317# else
5318 __asm
5319 {
5320# ifdef RT_ARCH_AMD64
5321 mov rax, [pvBitmap]
5322 mov edx, [iBit]
5323 lock bts [rax], edx
5324# else
5325 mov eax, [pvBitmap]
5326 mov edx, [iBit]
5327 lock bts [eax], edx
5328# endif
5329 }
5330# endif
5331}
5332#endif
5333
5334
5335/**
5336 * Clears a bit in a bitmap.
5337 *
5338 * @param pvBitmap Pointer to the bitmap.
5339 * @param iBit The bit to clear.
5340 */
5341#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5342DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
5343#else
5344DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
5345{
5346# if RT_INLINE_ASM_USES_INTRIN
5347 _bittestandreset((long *)pvBitmap, iBit);
5348
5349# elif RT_INLINE_ASM_GNU_STYLE
5350 __asm__ __volatile__ ("btrl %1, %0"
5351 : "=m" (*(volatile long *)pvBitmap)
5352 : "Ir" (iBit),
5353 "m" (*(volatile long *)pvBitmap)
5354 : "memory");
5355# else
5356 __asm
5357 {
5358# ifdef RT_ARCH_AMD64
5359 mov rax, [pvBitmap]
5360 mov edx, [iBit]
5361 btr [rax], edx
5362# else
5363 mov eax, [pvBitmap]
5364 mov edx, [iBit]
5365 btr [eax], edx
5366# endif
5367 }
5368# endif
5369}
5370#endif
5371
5372
5373/**
5374 * Atomically clears a bit in a bitmap, ordered.
5375 *
5376 * @param pvBitmap Pointer to the bitmap.
5377 * @param iBit The bit to toggle set.
5378 * @remark No memory barrier, take care on smp.
5379 */
5380#if RT_INLINE_ASM_EXTERNAL
5381DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
5382#else
5383DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
5384{
5385# if RT_INLINE_ASM_GNU_STYLE
5386 __asm__ __volatile__ ("lock; btrl %1, %0"
5387 : "=m" (*(volatile long *)pvBitmap)
5388 : "Ir" (iBit),
5389 "m" (*(volatile long *)pvBitmap)
5390 : "memory");
5391# else
5392 __asm
5393 {
5394# ifdef RT_ARCH_AMD64
5395 mov rax, [pvBitmap]
5396 mov edx, [iBit]
5397 lock btr [rax], edx
5398# else
5399 mov eax, [pvBitmap]
5400 mov edx, [iBit]
5401 lock btr [eax], edx
5402# endif
5403 }
5404# endif
5405}
5406#endif
5407
5408
5409/**
5410 * Toggles a bit in a bitmap.
5411 *
5412 * @param pvBitmap Pointer to the bitmap.
5413 * @param iBit The bit to toggle.
5414 */
5415#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5416DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
5417#else
5418DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
5419{
5420# if RT_INLINE_ASM_USES_INTRIN
5421 _bittestandcomplement((long *)pvBitmap, iBit);
5422# elif RT_INLINE_ASM_GNU_STYLE
5423 __asm__ __volatile__ ("btcl %1, %0"
5424 : "=m" (*(volatile long *)pvBitmap)
5425 : "Ir" (iBit),
5426 "m" (*(volatile long *)pvBitmap)
5427 : "memory");
5428# else
5429 __asm
5430 {
5431# ifdef RT_ARCH_AMD64
5432 mov rax, [pvBitmap]
5433 mov edx, [iBit]
5434 btc [rax], edx
5435# else
5436 mov eax, [pvBitmap]
5437 mov edx, [iBit]
5438 btc [eax], edx
5439# endif
5440 }
5441# endif
5442}
5443#endif
5444
5445
5446/**
5447 * Atomically toggles a bit in a bitmap, ordered.
5448 *
5449 * @param pvBitmap Pointer to the bitmap.
5450 * @param iBit The bit to test and set.
5451 */
5452#if RT_INLINE_ASM_EXTERNAL
5453DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
5454#else
5455DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
5456{
5457# if RT_INLINE_ASM_GNU_STYLE
5458 __asm__ __volatile__ ("lock; btcl %1, %0"
5459 : "=m" (*(volatile long *)pvBitmap)
5460 : "Ir" (iBit),
5461 "m" (*(volatile long *)pvBitmap)
5462 : "memory");
5463# else
5464 __asm
5465 {
5466# ifdef RT_ARCH_AMD64
5467 mov rax, [pvBitmap]
5468 mov edx, [iBit]
5469 lock btc [rax], edx
5470# else
5471 mov eax, [pvBitmap]
5472 mov edx, [iBit]
5473 lock btc [eax], edx
5474# endif
5475 }
5476# endif
5477}
5478#endif
5479
5480
5481/**
5482 * Tests and sets a bit in a bitmap.
5483 *
5484 * @returns true if the bit was set.
5485 * @returns false if the bit was clear.
5486 * @param pvBitmap Pointer to the bitmap.
5487 * @param iBit The bit to test and set.
5488 */
5489#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5490DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5491#else
5492DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5493{
5494 union { bool f; uint32_t u32; uint8_t u8; } rc;
5495# if RT_INLINE_ASM_USES_INTRIN
5496 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
5497
5498# elif RT_INLINE_ASM_GNU_STYLE
5499 __asm__ __volatile__ ("btsl %2, %1\n\t"
5500 "setc %b0\n\t"
5501 "andl $1, %0\n\t"
5502 : "=q" (rc.u32),
5503 "=m" (*(volatile long *)pvBitmap)
5504 : "Ir" (iBit),
5505 "m" (*(volatile long *)pvBitmap)
5506 : "memory");
5507# else
5508 __asm
5509 {
5510 mov edx, [iBit]
5511# ifdef RT_ARCH_AMD64
5512 mov rax, [pvBitmap]
5513 bts [rax], edx
5514# else
5515 mov eax, [pvBitmap]
5516 bts [eax], edx
5517# endif
5518 setc al
5519 and eax, 1
5520 mov [rc.u32], eax
5521 }
5522# endif
5523 return rc.f;
5524}
5525#endif
5526
5527
5528/**
5529 * Atomically tests and sets a bit in a bitmap, ordered.
5530 *
5531 * @returns true if the bit was set.
5532 * @returns false if the bit was clear.
5533 * @param pvBitmap Pointer to the bitmap.
5534 * @param iBit The bit to set.
5535 */
5536#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5537DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5538#else
5539DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5540{
5541 union { bool f; uint32_t u32; uint8_t u8; } rc;
5542# if RT_INLINE_ASM_USES_INTRIN
5543 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
5544# elif RT_INLINE_ASM_GNU_STYLE
5545 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
5546 "setc %b0\n\t"
5547 "andl $1, %0\n\t"
5548 : "=q" (rc.u32),
5549 "=m" (*(volatile long *)pvBitmap)
5550 : "Ir" (iBit),
5551 "m" (*(volatile long *)pvBitmap)
5552 : "memory");
5553# else
5554 __asm
5555 {
5556 mov edx, [iBit]
5557# ifdef RT_ARCH_AMD64
5558 mov rax, [pvBitmap]
5559 lock bts [rax], edx
5560# else
5561 mov eax, [pvBitmap]
5562 lock bts [eax], edx
5563# endif
5564 setc al
5565 and eax, 1
5566 mov [rc.u32], eax
5567 }
5568# endif
5569 return rc.f;
5570}
5571#endif
5572
5573
5574/**
5575 * Tests and clears a bit in a bitmap.
5576 *
5577 * @returns true if the bit was set.
5578 * @returns false if the bit was clear.
5579 * @param pvBitmap Pointer to the bitmap.
5580 * @param iBit The bit to test and clear.
5581 */
5582#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5583DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5584#else
5585DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5586{
5587 union { bool f; uint32_t u32; uint8_t u8; } rc;
5588# if RT_INLINE_ASM_USES_INTRIN
5589 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
5590
5591# elif RT_INLINE_ASM_GNU_STYLE
5592 __asm__ __volatile__ ("btrl %2, %1\n\t"
5593 "setc %b0\n\t"
5594 "andl $1, %0\n\t"
5595 : "=q" (rc.u32),
5596 "=m" (*(volatile long *)pvBitmap)
5597 : "Ir" (iBit),
5598 "m" (*(volatile long *)pvBitmap)
5599 : "memory");
5600# else
5601 __asm
5602 {
5603 mov edx, [iBit]
5604# ifdef RT_ARCH_AMD64
5605 mov rax, [pvBitmap]
5606 btr [rax], edx
5607# else
5608 mov eax, [pvBitmap]
5609 btr [eax], edx
5610# endif
5611 setc al
5612 and eax, 1
5613 mov [rc.u32], eax
5614 }
5615# endif
5616 return rc.f;
5617}
5618#endif
5619
5620
5621/**
5622 * Atomically tests and clears a bit in a bitmap, ordered.
5623 *
5624 * @returns true if the bit was set.
5625 * @returns false if the bit was clear.
5626 * @param pvBitmap Pointer to the bitmap.
5627 * @param iBit The bit to test and clear.
5628 * @remark No memory barrier, take care on smp.
5629 */
5630#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5631DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5632#else
5633DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5634{
5635 union { bool f; uint32_t u32; uint8_t u8; } rc;
5636# if RT_INLINE_ASM_USES_INTRIN
5637 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
5638
5639# elif RT_INLINE_ASM_GNU_STYLE
5640 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
5641 "setc %b0\n\t"
5642 "andl $1, %0\n\t"
5643 : "=q" (rc.u32),
5644 "=m" (*(volatile long *)pvBitmap)
5645 : "Ir" (iBit),
5646 "m" (*(volatile long *)pvBitmap)
5647 : "memory");
5648# else
5649 __asm
5650 {
5651 mov edx, [iBit]
5652# ifdef RT_ARCH_AMD64
5653 mov rax, [pvBitmap]
5654 lock btr [rax], edx
5655# else
5656 mov eax, [pvBitmap]
5657 lock btr [eax], edx
5658# endif
5659 setc al
5660 and eax, 1
5661 mov [rc.u32], eax
5662 }
5663# endif
5664 return rc.f;
5665}
5666#endif
5667
5668
5669/**
5670 * Tests and toggles a bit in a bitmap.
5671 *
5672 * @returns true if the bit was set.
5673 * @returns false if the bit was clear.
5674 * @param pvBitmap Pointer to the bitmap.
5675 * @param iBit The bit to test and toggle.
5676 */
5677#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5678DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5679#else
5680DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5681{
5682 union { bool f; uint32_t u32; uint8_t u8; } rc;
5683# if RT_INLINE_ASM_USES_INTRIN
5684 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
5685
5686# elif RT_INLINE_ASM_GNU_STYLE
5687 __asm__ __volatile__ ("btcl %2, %1\n\t"
5688 "setc %b0\n\t"
5689 "andl $1, %0\n\t"
5690 : "=q" (rc.u32),
5691 "=m" (*(volatile long *)pvBitmap)
5692 : "Ir" (iBit),
5693 "m" (*(volatile long *)pvBitmap)
5694 : "memory");
5695# else
5696 __asm
5697 {
5698 mov edx, [iBit]
5699# ifdef RT_ARCH_AMD64
5700 mov rax, [pvBitmap]
5701 btc [rax], edx
5702# else
5703 mov eax, [pvBitmap]
5704 btc [eax], edx
5705# endif
5706 setc al
5707 and eax, 1
5708 mov [rc.u32], eax
5709 }
5710# endif
5711 return rc.f;
5712}
5713#endif
5714
5715
5716/**
5717 * Atomically tests and toggles a bit in a bitmap, ordered.
5718 *
5719 * @returns true if the bit was set.
5720 * @returns false if the bit was clear.
5721 * @param pvBitmap Pointer to the bitmap.
5722 * @param iBit The bit to test and toggle.
5723 */
5724#if RT_INLINE_ASM_EXTERNAL
5725DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5726#else
5727DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5728{
5729 union { bool f; uint32_t u32; uint8_t u8; } rc;
5730# if RT_INLINE_ASM_GNU_STYLE
5731 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
5732 "setc %b0\n\t"
5733 "andl $1, %0\n\t"
5734 : "=q" (rc.u32),
5735 "=m" (*(volatile long *)pvBitmap)
5736 : "Ir" (iBit),
5737 "m" (*(volatile long *)pvBitmap)
5738 : "memory");
5739# else
5740 __asm
5741 {
5742 mov edx, [iBit]
5743# ifdef RT_ARCH_AMD64
5744 mov rax, [pvBitmap]
5745 lock btc [rax], edx
5746# else
5747 mov eax, [pvBitmap]
5748 lock btc [eax], edx
5749# endif
5750 setc al
5751 and eax, 1
5752 mov [rc.u32], eax
5753 }
5754# endif
5755 return rc.f;
5756}
5757#endif
5758
5759
5760/**
5761 * Tests if a bit in a bitmap is set.
5762 *
5763 * @returns true if the bit is set.
5764 * @returns false if the bit is clear.
5765 * @param pvBitmap Pointer to the bitmap.
5766 * @param iBit The bit to test.
5767 */
5768#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5769DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
5770#else
5771DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
5772{
5773 union { bool f; uint32_t u32; uint8_t u8; } rc;
5774# if RT_INLINE_ASM_USES_INTRIN
5775 rc.u32 = _bittest((long *)pvBitmap, iBit);
5776# elif RT_INLINE_ASM_GNU_STYLE
5777
5778 __asm__ __volatile__ ("btl %2, %1\n\t"
5779 "setc %b0\n\t"
5780 "andl $1, %0\n\t"
5781 : "=q" (rc.u32)
5782 : "m" (*(const volatile long *)pvBitmap),
5783 "Ir" (iBit)
5784 : "memory");
5785# else
5786 __asm
5787 {
5788 mov edx, [iBit]
5789# ifdef RT_ARCH_AMD64
5790 mov rax, [pvBitmap]
5791 bt [rax], edx
5792# else
5793 mov eax, [pvBitmap]
5794 bt [eax], edx
5795# endif
5796 setc al
5797 and eax, 1
5798 mov [rc.u32], eax
5799 }
5800# endif
5801 return rc.f;
5802}
5803#endif
5804
5805
5806/**
5807 * Clears a bit range within a bitmap.
5808 *
5809 * @param pvBitmap Pointer to the bitmap.
5810 * @param iBitStart The First bit to clear.
5811 * @param iBitEnd The first bit not to clear.
5812 */
5813DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5814{
5815 if (iBitStart < iBitEnd)
5816 {
5817 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5818 int iStart = iBitStart & ~31;
5819 int iEnd = iBitEnd & ~31;
5820 if (iStart == iEnd)
5821 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
5822 else
5823 {
5824 /* bits in first dword. */
5825 if (iBitStart & 31)
5826 {
5827 *pu32 &= (1 << (iBitStart & 31)) - 1;
5828 pu32++;
5829 iBitStart = iStart + 32;
5830 }
5831
5832 /* whole dword. */
5833 if (iBitStart != iEnd)
5834 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
5835
5836 /* bits in last dword. */
5837 if (iBitEnd & 31)
5838 {
5839 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5840 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
5841 }
5842 }
5843 }
5844}
5845
5846
5847/**
5848 * Sets a bit range within a bitmap.
5849 *
5850 * @param pvBitmap Pointer to the bitmap.
5851 * @param iBitStart The First bit to set.
5852 * @param iBitEnd The first bit not to set.
5853 */
5854DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5855{
5856 if (iBitStart < iBitEnd)
5857 {
5858 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5859 int iStart = iBitStart & ~31;
5860 int iEnd = iBitEnd & ~31;
5861 if (iStart == iEnd)
5862 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
5863 else
5864 {
5865 /* bits in first dword. */
5866 if (iBitStart & 31)
5867 {
5868 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
5869 pu32++;
5870 iBitStart = iStart + 32;
5871 }
5872
5873 /* whole dword. */
5874 if (iBitStart != iEnd)
5875 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
5876
5877 /* bits in last dword. */
5878 if (iBitEnd & 31)
5879 {
5880 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5881 *pu32 |= (1 << (iBitEnd & 31)) - 1;
5882 }
5883 }
5884 }
5885}
5886
5887
5888/**
5889 * Finds the first clear bit in a bitmap.
5890 *
5891 * @returns Index of the first zero bit.
5892 * @returns -1 if no clear bit was found.
5893 * @param pvBitmap Pointer to the bitmap.
5894 * @param cBits The number of bits in the bitmap. Multiple of 32.
5895 */
5896#if RT_INLINE_ASM_EXTERNAL
5897DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
5898#else
5899DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
5900{
5901 if (cBits)
5902 {
5903 int32_t iBit;
5904# if RT_INLINE_ASM_GNU_STYLE
5905 RTCCUINTREG uEAX, uECX, uEDI;
5906 cBits = RT_ALIGN_32(cBits, 32);
5907 __asm__ __volatile__("repe; scasl\n\t"
5908 "je 1f\n\t"
5909# ifdef RT_ARCH_AMD64
5910 "lea -4(%%rdi), %%rdi\n\t"
5911 "xorl (%%rdi), %%eax\n\t"
5912 "subq %5, %%rdi\n\t"
5913# else
5914 "lea -4(%%edi), %%edi\n\t"
5915 "xorl (%%edi), %%eax\n\t"
5916 "subl %5, %%edi\n\t"
5917# endif
5918 "shll $3, %%edi\n\t"
5919 "bsfl %%eax, %%edx\n\t"
5920 "addl %%edi, %%edx\n\t"
5921 "1:\t\n"
5922 : "=d" (iBit),
5923 "=&c" (uECX),
5924 "=&D" (uEDI),
5925 "=&a" (uEAX)
5926 : "0" (0xffffffff),
5927 "mr" (pvBitmap),
5928 "1" (cBits >> 5),
5929 "2" (pvBitmap),
5930 "3" (0xffffffff));
5931# else
5932 cBits = RT_ALIGN_32(cBits, 32);
5933 __asm
5934 {
5935# ifdef RT_ARCH_AMD64
5936 mov rdi, [pvBitmap]
5937 mov rbx, rdi
5938# else
5939 mov edi, [pvBitmap]
5940 mov ebx, edi
5941# endif
5942 mov edx, 0ffffffffh
5943 mov eax, edx
5944 mov ecx, [cBits]
5945 shr ecx, 5
5946 repe scasd
5947 je done
5948
5949# ifdef RT_ARCH_AMD64
5950 lea rdi, [rdi - 4]
5951 xor eax, [rdi]
5952 sub rdi, rbx
5953# else
5954 lea edi, [edi - 4]
5955 xor eax, [edi]
5956 sub edi, ebx
5957# endif
5958 shl edi, 3
5959 bsf edx, eax
5960 add edx, edi
5961 done:
5962 mov [iBit], edx
5963 }
5964# endif
5965 return iBit;
5966 }
5967 return -1;
5968}
5969#endif
5970
5971
5972/**
5973 * Finds the next clear bit in a bitmap.
5974 *
5975 * @returns Index of the first zero bit.
5976 * @returns -1 if no clear bit was found.
5977 * @param pvBitmap Pointer to the bitmap.
5978 * @param cBits The number of bits in the bitmap. Multiple of 32.
5979 * @param iBitPrev The bit returned from the last search.
5980 * The search will start at iBitPrev + 1.
5981 */
5982#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5983DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5984#else
5985DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5986{
5987 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
5988 int iBit = ++iBitPrev & 31;
5989 if (iBit)
5990 {
5991 /*
5992 * Inspect the 32-bit word containing the unaligned bit.
5993 */
5994 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
5995
5996# if RT_INLINE_ASM_USES_INTRIN
5997 unsigned long ulBit = 0;
5998 if (_BitScanForward(&ulBit, u32))
5999 return ulBit + iBitPrev;
6000# else
6001# if RT_INLINE_ASM_GNU_STYLE
6002 __asm__ __volatile__("bsf %1, %0\n\t"
6003 "jnz 1f\n\t"
6004 "movl $-1, %0\n\t"
6005 "1:\n\t"
6006 : "=r" (iBit)
6007 : "r" (u32));
6008# else
6009 __asm
6010 {
6011 mov edx, [u32]
6012 bsf eax, edx
6013 jnz done
6014 mov eax, 0ffffffffh
6015 done:
6016 mov [iBit], eax
6017 }
6018# endif
6019 if (iBit >= 0)
6020 return iBit + iBitPrev;
6021# endif
6022
6023 /*
6024 * Skip ahead and see if there is anything left to search.
6025 */
6026 iBitPrev |= 31;
6027 iBitPrev++;
6028 if (cBits <= (uint32_t)iBitPrev)
6029 return -1;
6030 }
6031
6032 /*
6033 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6034 */
6035 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6036 if (iBit >= 0)
6037 iBit += iBitPrev;
6038 return iBit;
6039}
6040#endif
6041
6042
6043/**
6044 * Finds the first set bit in a bitmap.
6045 *
6046 * @returns Index of the first set bit.
6047 * @returns -1 if no clear bit was found.
6048 * @param pvBitmap Pointer to the bitmap.
6049 * @param cBits The number of bits in the bitmap. Multiple of 32.
6050 */
6051#if RT_INLINE_ASM_EXTERNAL
6052DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
6053#else
6054DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
6055{
6056 if (cBits)
6057 {
6058 int32_t iBit;
6059# if RT_INLINE_ASM_GNU_STYLE
6060 RTCCUINTREG uEAX, uECX, uEDI;
6061 cBits = RT_ALIGN_32(cBits, 32);
6062 __asm__ __volatile__("repe; scasl\n\t"
6063 "je 1f\n\t"
6064# ifdef RT_ARCH_AMD64
6065 "lea -4(%%rdi), %%rdi\n\t"
6066 "movl (%%rdi), %%eax\n\t"
6067 "subq %5, %%rdi\n\t"
6068# else
6069 "lea -4(%%edi), %%edi\n\t"
6070 "movl (%%edi), %%eax\n\t"
6071 "subl %5, %%edi\n\t"
6072# endif
6073 "shll $3, %%edi\n\t"
6074 "bsfl %%eax, %%edx\n\t"
6075 "addl %%edi, %%edx\n\t"
6076 "1:\t\n"
6077 : "=d" (iBit),
6078 "=&c" (uECX),
6079 "=&D" (uEDI),
6080 "=&a" (uEAX)
6081 : "0" (0xffffffff),
6082 "mr" (pvBitmap),
6083 "1" (cBits >> 5),
6084 "2" (pvBitmap),
6085 "3" (0));
6086# else
6087 cBits = RT_ALIGN_32(cBits, 32);
6088 __asm
6089 {
6090# ifdef RT_ARCH_AMD64
6091 mov rdi, [pvBitmap]
6092 mov rbx, rdi
6093# else
6094 mov edi, [pvBitmap]
6095 mov ebx, edi
6096# endif
6097 mov edx, 0ffffffffh
6098 xor eax, eax
6099 mov ecx, [cBits]
6100 shr ecx, 5
6101 repe scasd
6102 je done
6103# ifdef RT_ARCH_AMD64
6104 lea rdi, [rdi - 4]
6105 mov eax, [rdi]
6106 sub rdi, rbx
6107# else
6108 lea edi, [edi - 4]
6109 mov eax, [edi]
6110 sub edi, ebx
6111# endif
6112 shl edi, 3
6113 bsf edx, eax
6114 add edx, edi
6115 done:
6116 mov [iBit], edx
6117 }
6118# endif
6119 return iBit;
6120 }
6121 return -1;
6122}
6123#endif
6124
6125
6126/**
6127 * Finds the next set bit in a bitmap.
6128 *
6129 * @returns Index of the next set bit.
6130 * @returns -1 if no set bit was found.
6131 * @param pvBitmap Pointer to the bitmap.
6132 * @param cBits The number of bits in the bitmap. Multiple of 32.
6133 * @param iBitPrev The bit returned from the last search.
6134 * The search will start at iBitPrev + 1.
6135 */
6136#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6137DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6138#else
6139DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6140{
6141 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6142 int iBit = ++iBitPrev & 31;
6143 if (iBit)
6144 {
6145 /*
6146 * Inspect the 32-bit word containing the unaligned bit.
6147 */
6148 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
6149
6150# if RT_INLINE_ASM_USES_INTRIN
6151 unsigned long ulBit = 0;
6152 if (_BitScanForward(&ulBit, u32))
6153 return ulBit + iBitPrev;
6154# else
6155# if RT_INLINE_ASM_GNU_STYLE
6156 __asm__ __volatile__("bsf %1, %0\n\t"
6157 "jnz 1f\n\t"
6158 "movl $-1, %0\n\t"
6159 "1:\n\t"
6160 : "=r" (iBit)
6161 : "r" (u32));
6162# else
6163 __asm
6164 {
6165 mov edx, [u32]
6166 bsf eax, edx
6167 jnz done
6168 mov eax, 0ffffffffh
6169 done:
6170 mov [iBit], eax
6171 }
6172# endif
6173 if (iBit >= 0)
6174 return iBit + iBitPrev;
6175# endif
6176
6177 /*
6178 * Skip ahead and see if there is anything left to search.
6179 */
6180 iBitPrev |= 31;
6181 iBitPrev++;
6182 if (cBits <= (uint32_t)iBitPrev)
6183 return -1;
6184 }
6185
6186 /*
6187 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6188 */
6189 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6190 if (iBit >= 0)
6191 iBit += iBitPrev;
6192 return iBit;
6193}
6194#endif
6195
6196
6197/**
6198 * Finds the first bit which is set in the given 32-bit integer.
6199 * Bits are numbered from 1 (least significant) to 32.
6200 *
6201 * @returns index [1..32] of the first set bit.
6202 * @returns 0 if all bits are cleared.
6203 * @param u32 Integer to search for set bits.
6204 * @remark Similar to ffs() in BSD.
6205 */
6206DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
6207{
6208# if RT_INLINE_ASM_USES_INTRIN
6209 unsigned long iBit;
6210 if (_BitScanForward(&iBit, u32))
6211 iBit++;
6212 else
6213 iBit = 0;
6214# elif RT_INLINE_ASM_GNU_STYLE
6215 uint32_t iBit;
6216 __asm__ __volatile__("bsf %1, %0\n\t"
6217 "jnz 1f\n\t"
6218 "xorl %0, %0\n\t"
6219 "jmp 2f\n"
6220 "1:\n\t"
6221 "incl %0\n"
6222 "2:\n\t"
6223 : "=r" (iBit)
6224 : "rm" (u32));
6225# else
6226 uint32_t iBit;
6227 _asm
6228 {
6229 bsf eax, [u32]
6230 jnz found
6231 xor eax, eax
6232 jmp done
6233 found:
6234 inc eax
6235 done:
6236 mov [iBit], eax
6237 }
6238# endif
6239 return iBit;
6240}
6241
6242
6243/**
6244 * Finds the first bit which is set in the given 32-bit integer.
6245 * Bits are numbered from 1 (least significant) to 32.
6246 *
6247 * @returns index [1..32] of the first set bit.
6248 * @returns 0 if all bits are cleared.
6249 * @param i32 Integer to search for set bits.
6250 * @remark Similar to ffs() in BSD.
6251 */
6252DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
6253{
6254 return ASMBitFirstSetU32((uint32_t)i32);
6255}
6256
6257
6258/**
6259 * Finds the last bit which is set in the given 32-bit integer.
6260 * Bits are numbered from 1 (least significant) to 32.
6261 *
6262 * @returns index [1..32] of the last set bit.
6263 * @returns 0 if all bits are cleared.
6264 * @param u32 Integer to search for set bits.
6265 * @remark Similar to fls() in BSD.
6266 */
6267DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
6268{
6269# if RT_INLINE_ASM_USES_INTRIN
6270 unsigned long iBit;
6271 if (_BitScanReverse(&iBit, u32))
6272 iBit++;
6273 else
6274 iBit = 0;
6275# elif RT_INLINE_ASM_GNU_STYLE
6276 uint32_t iBit;
6277 __asm__ __volatile__("bsrl %1, %0\n\t"
6278 "jnz 1f\n\t"
6279 "xorl %0, %0\n\t"
6280 "jmp 2f\n"
6281 "1:\n\t"
6282 "incl %0\n"
6283 "2:\n\t"
6284 : "=r" (iBit)
6285 : "rm" (u32));
6286# else
6287 uint32_t iBit;
6288 _asm
6289 {
6290 bsr eax, [u32]
6291 jnz found
6292 xor eax, eax
6293 jmp done
6294 found:
6295 inc eax
6296 done:
6297 mov [iBit], eax
6298 }
6299# endif
6300 return iBit;
6301}
6302
6303
6304/**
6305 * Finds the last bit which is set in the given 32-bit integer.
6306 * Bits are numbered from 1 (least significant) to 32.
6307 *
6308 * @returns index [1..32] of the last set bit.
6309 * @returns 0 if all bits are cleared.
6310 * @param i32 Integer to search for set bits.
6311 * @remark Similar to fls() in BSD.
6312 */
6313DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
6314{
6315 return ASMBitLastSetS32((uint32_t)i32);
6316}
6317
6318/**
6319 * Reverse the byte order of the given 16-bit integer.
6320 *
6321 * @returns Revert
6322 * @param u16 16-bit integer value.
6323 */
6324DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
6325{
6326#if RT_INLINE_ASM_USES_INTRIN
6327 u16 = _byteswap_ushort(u16);
6328#elif RT_INLINE_ASM_GNU_STYLE
6329 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
6330#else
6331 _asm
6332 {
6333 mov ax, [u16]
6334 ror ax, 8
6335 mov [u16], ax
6336 }
6337#endif
6338 return u16;
6339}
6340
6341/**
6342 * Reverse the byte order of the given 32-bit integer.
6343 *
6344 * @returns Revert
6345 * @param u32 32-bit integer value.
6346 */
6347DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
6348{
6349#if RT_INLINE_ASM_USES_INTRIN
6350 u32 = _byteswap_ulong(u32);
6351#elif RT_INLINE_ASM_GNU_STYLE
6352 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6353#else
6354 _asm
6355 {
6356 mov eax, [u32]
6357 bswap eax
6358 mov [u32], eax
6359 }
6360#endif
6361 return u32;
6362}
6363
6364
6365/**
6366 * Reverse the byte order of the given 64-bit integer.
6367 *
6368 * @returns Revert
6369 * @param u64 64-bit integer value.
6370 */
6371DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
6372{
6373#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6374 u64 = _byteswap_uint64(u64);
6375#else
6376 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6377 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6378#endif
6379 return u64;
6380}
6381
6382
6383/** @} */
6384
6385
6386/** @} */
6387#endif
6388
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette