VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 6653

Last change on this file since 6653 was 6653, checked in by vboxsync, 17 years ago

Yet another fix for ASMAtomicCmpXchgExU64, needed by 32bit gcc with
debug build, to prevent gcc from running out of registers.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 111.4 KB
Line 
1/** @file
2 * innotek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31/** @todo #include <iprt/param.h> for PAGE_SIZE. */
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37#ifdef _MSC_VER
38# if _MSC_VER >= 1400
39# define RT_INLINE_ASM_USES_INTRIN 1
40# include <intrin.h>
41 /* Emit the intrinsics at all optimization levels. */
42# pragma intrinsic(_ReadWriteBarrier)
43# pragma intrinsic(__cpuid)
44# pragma intrinsic(_enable)
45# pragma intrinsic(_disable)
46# pragma intrinsic(__rdtsc)
47# pragma intrinsic(__readmsr)
48# pragma intrinsic(__writemsr)
49# pragma intrinsic(__outbyte)
50# pragma intrinsic(__outword)
51# pragma intrinsic(__outdword)
52# pragma intrinsic(__inbyte)
53# pragma intrinsic(__inword)
54# pragma intrinsic(__indword)
55# pragma intrinsic(__invlpg)
56# pragma intrinsic(__stosd)
57# pragma intrinsic(__stosw)
58# pragma intrinsic(__stosb)
59# pragma intrinsic(__readcr0)
60# pragma intrinsic(__readcr2)
61# pragma intrinsic(__readcr3)
62# pragma intrinsic(__readcr4)
63# pragma intrinsic(__writecr0)
64# pragma intrinsic(__writecr3)
65# pragma intrinsic(__writecr4)
66# pragma intrinsic(_BitScanForward)
67# pragma intrinsic(_BitScanReverse)
68# pragma intrinsic(_bittest)
69# pragma intrinsic(_bittestandset)
70# pragma intrinsic(_bittestandreset)
71# pragma intrinsic(_bittestandcomplement)
72# pragma intrinsic(_byteswap_ushort)
73# pragma intrinsic(_byteswap_ulong)
74# pragma intrinsic(_interlockedbittestandset)
75# pragma intrinsic(_interlockedbittestandreset)
76# pragma intrinsic(_InterlockedAnd)
77# pragma intrinsic(_InterlockedOr)
78# pragma intrinsic(_InterlockedIncrement)
79# pragma intrinsic(_InterlockedDecrement)
80# pragma intrinsic(_InterlockedExchange)
81# pragma intrinsic(_InterlockedCompareExchange)
82# pragma intrinsic(_InterlockedCompareExchange64)
83# ifdef RT_ARCH_AMD64
84# pragma intrinsic(__stosq)
85# pragma intrinsic(__readcr8)
86# pragma intrinsic(__writecr8)
87# pragma intrinsic(_byteswap_uint64)
88# pragma intrinsic(_InterlockedExchange64)
89# endif
90# endif
91#endif
92#ifndef RT_INLINE_ASM_USES_INTRIN
93# define RT_INLINE_ASM_USES_INTRIN 0
94#endif
95
96
97
98/** @defgroup grp_asm ASM - Assembly Routines
99 * @ingroup grp_rt
100 * @{
101 */
102
103/** @def RT_INLINE_ASM_EXTERNAL
104 * Defined as 1 if the compiler does not support inline assembly.
105 * The ASM* functions will then be implemented in an external .asm file.
106 *
107 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
108 * inline assmebly in their AMD64 compiler.
109 */
110#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
111# define RT_INLINE_ASM_EXTERNAL 1
112#else
113# define RT_INLINE_ASM_EXTERNAL 0
114#endif
115
116/** @def RT_INLINE_ASM_GNU_STYLE
117 * Defined as 1 if the compiler understand GNU style inline assembly.
118 */
119#if defined(_MSC_VER)
120# define RT_INLINE_ASM_GNU_STYLE 0
121#else
122# define RT_INLINE_ASM_GNU_STYLE 1
123#endif
124
125
126/** @todo find a more proper place for this structure? */
127#pragma pack(1)
128/** IDTR */
129typedef struct RTIDTR
130{
131 /** Size of the IDT. */
132 uint16_t cbIdt;
133 /** Address of the IDT. */
134 uintptr_t pIdt;
135} RTIDTR, *PRTIDTR;
136#pragma pack()
137
138#pragma pack(1)
139/** GDTR */
140typedef struct RTGDTR
141{
142 /** Size of the GDT. */
143 uint16_t cbGdt;
144 /** Address of the GDT. */
145 uintptr_t pGdt;
146} RTGDTR, *PRTGDTR;
147#pragma pack()
148
149
150/** @def ASMReturnAddress
151 * Gets the return address of the current (or calling if you like) function or method.
152 */
153#ifdef _MSC_VER
154# ifdef __cplusplus
155extern "C"
156# endif
157void * _ReturnAddress(void);
158# pragma intrinsic(_ReturnAddress)
159# define ASMReturnAddress() _ReturnAddress()
160#elif defined(__GNUC__) || defined(__DOXYGEN__)
161# define ASMReturnAddress() __builtin_return_address(0)
162#else
163# error "Unsupported compiler."
164#endif
165
166
167/**
168 * Gets the content of the IDTR CPU register.
169 * @param pIdtr Where to store the IDTR contents.
170 */
171#if RT_INLINE_ASM_EXTERNAL
172DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
173#else
174DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
175{
176# if RT_INLINE_ASM_GNU_STYLE
177 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
178# else
179 __asm
180 {
181# ifdef RT_ARCH_AMD64
182 mov rax, [pIdtr]
183 sidt [rax]
184# else
185 mov eax, [pIdtr]
186 sidt [eax]
187# endif
188 }
189# endif
190}
191#endif
192
193
194/**
195 * Sets the content of the IDTR CPU register.
196 * @param pIdtr Where to load the IDTR contents from
197 */
198#if RT_INLINE_ASM_EXTERNAL
199DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
200#else
201DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
202{
203# if RT_INLINE_ASM_GNU_STYLE
204 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
205# else
206 __asm
207 {
208# ifdef RT_ARCH_AMD64
209 mov rax, [pIdtr]
210 lidt [rax]
211# else
212 mov eax, [pIdtr]
213 lidt [eax]
214# endif
215 }
216# endif
217}
218#endif
219
220
221/**
222 * Gets the content of the GDTR CPU register.
223 * @param pGdtr Where to store the GDTR contents.
224 */
225#if RT_INLINE_ASM_EXTERNAL
226DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
227#else
228DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
229{
230# if RT_INLINE_ASM_GNU_STYLE
231 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
232# else
233 __asm
234 {
235# ifdef RT_ARCH_AMD64
236 mov rax, [pGdtr]
237 sgdt [rax]
238# else
239 mov eax, [pGdtr]
240 sgdt [eax]
241# endif
242 }
243# endif
244}
245#endif
246
247/**
248 * Get the cs register.
249 * @returns cs.
250 */
251#if RT_INLINE_ASM_EXTERNAL
252DECLASM(RTSEL) ASMGetCS(void);
253#else
254DECLINLINE(RTSEL) ASMGetCS(void)
255{
256 RTSEL SelCS;
257# if RT_INLINE_ASM_GNU_STYLE
258 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
259# else
260 __asm
261 {
262 mov ax, cs
263 mov [SelCS], ax
264 }
265# endif
266 return SelCS;
267}
268#endif
269
270
271/**
272 * Get the DS register.
273 * @returns DS.
274 */
275#if RT_INLINE_ASM_EXTERNAL
276DECLASM(RTSEL) ASMGetDS(void);
277#else
278DECLINLINE(RTSEL) ASMGetDS(void)
279{
280 RTSEL SelDS;
281# if RT_INLINE_ASM_GNU_STYLE
282 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
283# else
284 __asm
285 {
286 mov ax, ds
287 mov [SelDS], ax
288 }
289# endif
290 return SelDS;
291}
292#endif
293
294
295/**
296 * Get the ES register.
297 * @returns ES.
298 */
299#if RT_INLINE_ASM_EXTERNAL
300DECLASM(RTSEL) ASMGetES(void);
301#else
302DECLINLINE(RTSEL) ASMGetES(void)
303{
304 RTSEL SelES;
305# if RT_INLINE_ASM_GNU_STYLE
306 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
307# else
308 __asm
309 {
310 mov ax, es
311 mov [SelES], ax
312 }
313# endif
314 return SelES;
315}
316#endif
317
318
319/**
320 * Get the FS register.
321 * @returns FS.
322 */
323#if RT_INLINE_ASM_EXTERNAL
324DECLASM(RTSEL) ASMGetFS(void);
325#else
326DECLINLINE(RTSEL) ASMGetFS(void)
327{
328 RTSEL SelFS;
329# if RT_INLINE_ASM_GNU_STYLE
330 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
331# else
332 __asm
333 {
334 mov ax, fs
335 mov [SelFS], ax
336 }
337# endif
338 return SelFS;
339}
340# endif
341
342
343/**
344 * Get the GS register.
345 * @returns GS.
346 */
347#if RT_INLINE_ASM_EXTERNAL
348DECLASM(RTSEL) ASMGetGS(void);
349#else
350DECLINLINE(RTSEL) ASMGetGS(void)
351{
352 RTSEL SelGS;
353# if RT_INLINE_ASM_GNU_STYLE
354 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
355# else
356 __asm
357 {
358 mov ax, gs
359 mov [SelGS], ax
360 }
361# endif
362 return SelGS;
363}
364#endif
365
366
367/**
368 * Get the SS register.
369 * @returns SS.
370 */
371#if RT_INLINE_ASM_EXTERNAL
372DECLASM(RTSEL) ASMGetSS(void);
373#else
374DECLINLINE(RTSEL) ASMGetSS(void)
375{
376 RTSEL SelSS;
377# if RT_INLINE_ASM_GNU_STYLE
378 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
379# else
380 __asm
381 {
382 mov ax, ss
383 mov [SelSS], ax
384 }
385# endif
386 return SelSS;
387}
388#endif
389
390
391/**
392 * Get the TR register.
393 * @returns TR.
394 */
395#if RT_INLINE_ASM_EXTERNAL
396DECLASM(RTSEL) ASMGetTR(void);
397#else
398DECLINLINE(RTSEL) ASMGetTR(void)
399{
400 RTSEL SelTR;
401# if RT_INLINE_ASM_GNU_STYLE
402 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
403# else
404 __asm
405 {
406 str ax
407 mov [SelTR], ax
408 }
409# endif
410 return SelTR;
411}
412#endif
413
414
415/**
416 * Get the [RE]FLAGS register.
417 * @returns [RE]FLAGS.
418 */
419#if RT_INLINE_ASM_EXTERNAL
420DECLASM(RTCCUINTREG) ASMGetFlags(void);
421#else
422DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
423{
424 RTCCUINTREG uFlags;
425# if RT_INLINE_ASM_GNU_STYLE
426# ifdef RT_ARCH_AMD64
427 __asm__ __volatile__("pushfq\n\t"
428 "popq %0\n\t"
429 : "=g" (uFlags));
430# else
431 __asm__ __volatile__("pushfl\n\t"
432 "popl %0\n\t"
433 : "=g" (uFlags));
434# endif
435# else
436 __asm
437 {
438# ifdef RT_ARCH_AMD64
439 pushfq
440 pop [uFlags]
441# else
442 pushfd
443 pop [uFlags]
444# endif
445 }
446# endif
447 return uFlags;
448}
449#endif
450
451
452/**
453 * Set the [RE]FLAGS register.
454 * @param uFlags The new [RE]FLAGS value.
455 */
456#if RT_INLINE_ASM_EXTERNAL
457DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
458#else
459DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
460{
461# if RT_INLINE_ASM_GNU_STYLE
462# ifdef RT_ARCH_AMD64
463 __asm__ __volatile__("pushq %0\n\t"
464 "popfq\n\t"
465 : : "g" (uFlags));
466# else
467 __asm__ __volatile__("pushl %0\n\t"
468 "popfl\n\t"
469 : : "g" (uFlags));
470# endif
471# else
472 __asm
473 {
474# ifdef RT_ARCH_AMD64
475 push [uFlags]
476 popfq
477# else
478 push [uFlags]
479 popfd
480# endif
481 }
482# endif
483}
484#endif
485
486
487/**
488 * Gets the content of the CPU timestamp counter register.
489 *
490 * @returns TSC.
491 */
492#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
493DECLASM(uint64_t) ASMReadTSC(void);
494#else
495DECLINLINE(uint64_t) ASMReadTSC(void)
496{
497 RTUINT64U u;
498# if RT_INLINE_ASM_GNU_STYLE
499 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
500# else
501# if RT_INLINE_ASM_USES_INTRIN
502 u.u = __rdtsc();
503# else
504 __asm
505 {
506 rdtsc
507 mov [u.s.Lo], eax
508 mov [u.s.Hi], edx
509 }
510# endif
511# endif
512 return u.u;
513}
514#endif
515
516
517/**
518 * Performs the cpuid instruction returning all registers.
519 *
520 * @param uOperator CPUID operation (eax).
521 * @param pvEAX Where to store eax.
522 * @param pvEBX Where to store ebx.
523 * @param pvECX Where to store ecx.
524 * @param pvEDX Where to store edx.
525 * @remark We're using void pointers to ease the use of special bitfield structures and such.
526 */
527#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
528DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
529#else
530DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
531{
532# if RT_INLINE_ASM_GNU_STYLE
533# ifdef RT_ARCH_AMD64
534 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
535 __asm__ ("cpuid\n\t"
536 : "=a" (uRAX),
537 "=b" (uRBX),
538 "=c" (uRCX),
539 "=d" (uRDX)
540 : "0" (uOperator));
541 *(uint32_t *)pvEAX = (uint32_t)uRAX;
542 *(uint32_t *)pvEBX = (uint32_t)uRBX;
543 *(uint32_t *)pvECX = (uint32_t)uRCX;
544 *(uint32_t *)pvEDX = (uint32_t)uRDX;
545# else
546 __asm__ ("xchgl %%ebx, %1\n\t"
547 "cpuid\n\t"
548 "xchgl %%ebx, %1\n\t"
549 : "=a" (*(uint32_t *)pvEAX),
550 "=r" (*(uint32_t *)pvEBX),
551 "=c" (*(uint32_t *)pvECX),
552 "=d" (*(uint32_t *)pvEDX)
553 : "0" (uOperator));
554# endif
555
556# elif RT_INLINE_ASM_USES_INTRIN
557 int aInfo[4];
558 __cpuid(aInfo, uOperator);
559 *(uint32_t *)pvEAX = aInfo[0];
560 *(uint32_t *)pvEBX = aInfo[1];
561 *(uint32_t *)pvECX = aInfo[2];
562 *(uint32_t *)pvEDX = aInfo[3];
563
564# else
565 uint32_t uEAX;
566 uint32_t uEBX;
567 uint32_t uECX;
568 uint32_t uEDX;
569 __asm
570 {
571 push ebx
572 mov eax, [uOperator]
573 cpuid
574 mov [uEAX], eax
575 mov [uEBX], ebx
576 mov [uECX], ecx
577 mov [uEDX], edx
578 pop ebx
579 }
580 *(uint32_t *)pvEAX = uEAX;
581 *(uint32_t *)pvEBX = uEBX;
582 *(uint32_t *)pvECX = uECX;
583 *(uint32_t *)pvEDX = uEDX;
584# endif
585}
586#endif
587
588
589/**
590 * Performs the cpuid instruction returning all registers.
591 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
592 *
593 * @param uOperator CPUID operation (eax).
594 * @param uIdxECX ecx index
595 * @param pvEAX Where to store eax.
596 * @param pvEBX Where to store ebx.
597 * @param pvECX Where to store ecx.
598 * @param pvEDX Where to store edx.
599 * @remark We're using void pointers to ease the use of special bitfield structures and such.
600 */
601#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
602DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
603#else
604DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
605{
606# if RT_INLINE_ASM_GNU_STYLE
607# ifdef RT_ARCH_AMD64
608 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
609 __asm__ ("cpuid\n\t"
610 : "=a" (uRAX),
611 "=b" (uRBX),
612 "=c" (uRCX),
613 "=d" (uRDX)
614 : "0" (uOperator),
615 "2" (uIdxECX));
616 *(uint32_t *)pvEAX = (uint32_t)uRAX;
617 *(uint32_t *)pvEBX = (uint32_t)uRBX;
618 *(uint32_t *)pvECX = (uint32_t)uRCX;
619 *(uint32_t *)pvEDX = (uint32_t)uRDX;
620# else
621 __asm__ ("xchgl %%ebx, %1\n\t"
622 "cpuid\n\t"
623 "xchgl %%ebx, %1\n\t"
624 : "=a" (*(uint32_t *)pvEAX),
625 "=r" (*(uint32_t *)pvEBX),
626 "=c" (*(uint32_t *)pvECX),
627 "=d" (*(uint32_t *)pvEDX)
628 : "0" (uOperator),
629 "2" (uIdxECX));
630# endif
631
632# elif RT_INLINE_ASM_USES_INTRIN
633 int aInfo[4];
634 /* ??? another intrinsic ??? */
635 __cpuid(aInfo, uOperator);
636 *(uint32_t *)pvEAX = aInfo[0];
637 *(uint32_t *)pvEBX = aInfo[1];
638 *(uint32_t *)pvECX = aInfo[2];
639 *(uint32_t *)pvEDX = aInfo[3];
640
641# else
642 uint32_t uEAX;
643 uint32_t uEBX;
644 uint32_t uECX;
645 uint32_t uEDX;
646 __asm
647 {
648 push ebx
649 mov eax, [uOperator]
650 mov ecx, [uIdxECX]
651 cpuid
652 mov [uEAX], eax
653 mov [uEBX], ebx
654 mov [uECX], ecx
655 mov [uEDX], edx
656 pop ebx
657 }
658 *(uint32_t *)pvEAX = uEAX;
659 *(uint32_t *)pvEBX = uEBX;
660 *(uint32_t *)pvECX = uECX;
661 *(uint32_t *)pvEDX = uEDX;
662# endif
663}
664#endif
665
666
667/**
668 * Performs the cpuid instruction returning ecx and edx.
669 *
670 * @param uOperator CPUID operation (eax).
671 * @param pvECX Where to store ecx.
672 * @param pvEDX Where to store edx.
673 * @remark We're using void pointers to ease the use of special bitfield structures and such.
674 */
675#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
676DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
677#else
678DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
679{
680 uint32_t uEBX;
681 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
682}
683#endif
684
685
686/**
687 * Performs the cpuid instruction returning edx.
688 *
689 * @param uOperator CPUID operation (eax).
690 * @returns EDX after cpuid operation.
691 */
692#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
693DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
694#else
695DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
696{
697 RTCCUINTREG xDX;
698# if RT_INLINE_ASM_GNU_STYLE
699# ifdef RT_ARCH_AMD64
700 RTCCUINTREG uSpill;
701 __asm__ ("cpuid"
702 : "=a" (uSpill),
703 "=d" (xDX)
704 : "0" (uOperator)
705 : "rbx", "rcx");
706# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
707 __asm__ ("push %%ebx\n\t"
708 "cpuid\n\t"
709 "pop %%ebx\n\t"
710 : "=a" (uOperator),
711 "=d" (xDX)
712 : "0" (uOperator)
713 : "ecx");
714# else
715 __asm__ ("cpuid"
716 : "=a" (uOperator),
717 "=d" (xDX)
718 : "0" (uOperator)
719 : "ebx", "ecx");
720# endif
721
722# elif RT_INLINE_ASM_USES_INTRIN
723 int aInfo[4];
724 __cpuid(aInfo, uOperator);
725 xDX = aInfo[3];
726
727# else
728 __asm
729 {
730 push ebx
731 mov eax, [uOperator]
732 cpuid
733 mov [xDX], edx
734 pop ebx
735 }
736# endif
737 return (uint32_t)xDX;
738}
739#endif
740
741
742/**
743 * Performs the cpuid instruction returning ecx.
744 *
745 * @param uOperator CPUID operation (eax).
746 * @returns ECX after cpuid operation.
747 */
748#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
749DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
750#else
751DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
752{
753 RTCCUINTREG xCX;
754# if RT_INLINE_ASM_GNU_STYLE
755# ifdef RT_ARCH_AMD64
756 RTCCUINTREG uSpill;
757 __asm__ ("cpuid"
758 : "=a" (uSpill),
759 "=c" (xCX)
760 : "0" (uOperator)
761 : "rbx", "rdx");
762# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
763 __asm__ ("push %%ebx\n\t"
764 "cpuid\n\t"
765 "pop %%ebx\n\t"
766 : "=a" (uOperator),
767 "=c" (xCX)
768 : "0" (uOperator)
769 : "edx");
770# else
771 __asm__ ("cpuid"
772 : "=a" (uOperator),
773 "=c" (xCX)
774 : "0" (uOperator)
775 : "ebx", "edx");
776
777# endif
778
779# elif RT_INLINE_ASM_USES_INTRIN
780 int aInfo[4];
781 __cpuid(aInfo, uOperator);
782 xCX = aInfo[2];
783
784# else
785 __asm
786 {
787 push ebx
788 mov eax, [uOperator]
789 cpuid
790 mov [xCX], ecx
791 pop ebx
792 }
793# endif
794 return (uint32_t)xCX;
795}
796#endif
797
798
799/**
800 * Checks if the current CPU supports CPUID.
801 *
802 * @returns true if CPUID is supported.
803 */
804DECLINLINE(bool) ASMHasCpuId(void)
805{
806#ifdef RT_ARCH_AMD64
807 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
808#else /* !RT_ARCH_AMD64 */
809 bool fRet = false;
810# if RT_INLINE_ASM_GNU_STYLE
811 uint32_t u1;
812 uint32_t u2;
813 __asm__ ("pushf\n\t"
814 "pop %1\n\t"
815 "mov %1, %2\n\t"
816 "xorl $0x200000, %1\n\t"
817 "push %1\n\t"
818 "popf\n\t"
819 "pushf\n\t"
820 "pop %1\n\t"
821 "cmpl %1, %2\n\t"
822 "setne %0\n\t"
823 "push %2\n\t"
824 "popf\n\t"
825 : "=m" (fRet), "=r" (u1), "=r" (u2));
826# else
827 __asm
828 {
829 pushfd
830 pop eax
831 mov ebx, eax
832 xor eax, 0200000h
833 push eax
834 popfd
835 pushfd
836 pop eax
837 cmp eax, ebx
838 setne fRet
839 push ebx
840 popfd
841 }
842# endif
843 return fRet;
844#endif /* !RT_ARCH_AMD64 */
845}
846
847
848/**
849 * Gets the APIC ID of the current CPU.
850 *
851 * @returns the APIC ID.
852 */
853#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
854DECLASM(uint8_t) ASMGetApicId(void);
855#else
856DECLINLINE(uint8_t) ASMGetApicId(void)
857{
858 RTCCUINTREG xBX;
859# if RT_INLINE_ASM_GNU_STYLE
860# ifdef RT_ARCH_AMD64
861 RTCCUINTREG uSpill;
862 __asm__ ("cpuid"
863 : "=a" (uSpill),
864 "=b" (xBX)
865 : "0" (1)
866 : "rcx", "rdx");
867# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
868 RTCCUINTREG uSpill;
869 __asm__ ("mov %%ebx,%1\n\t"
870 "cpuid\n\t"
871 "xchgl %%ebx,%1\n\t"
872 : "=a" (uSpill),
873 "=r" (xBX)
874 : "0" (1)
875 : "ecx", "edx");
876# else
877 RTCCUINTREG uSpill;
878 __asm__ ("cpuid"
879 : "=a" (uSpill),
880 "=b" (xBX)
881 : "0" (1)
882 : "ecx", "edx");
883# endif
884
885# elif RT_INLINE_ASM_USES_INTRIN
886 int aInfo[4];
887 __cpuid(aInfo, 1);
888 xBX = aInfo[1];
889
890# else
891 __asm
892 {
893 push ebx
894 mov eax, 1
895 cpuid
896 mov [xBX], ebx
897 pop ebx
898 }
899# endif
900 return (uint8_t)(xBX >> 24);
901}
902#endif
903
904/**
905 * Get cr0.
906 * @returns cr0.
907 */
908#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
909DECLASM(RTCCUINTREG) ASMGetCR0(void);
910#else
911DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
912{
913 RTCCUINTREG uCR0;
914# if RT_INLINE_ASM_USES_INTRIN
915 uCR0 = __readcr0();
916
917# elif RT_INLINE_ASM_GNU_STYLE
918# ifdef RT_ARCH_AMD64
919 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
920# else
921 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
922# endif
923# else
924 __asm
925 {
926# ifdef RT_ARCH_AMD64
927 mov rax, cr0
928 mov [uCR0], rax
929# else
930 mov eax, cr0
931 mov [uCR0], eax
932# endif
933 }
934# endif
935 return uCR0;
936}
937#endif
938
939
940/**
941 * Sets the CR0 register.
942 * @param uCR0 The new CR0 value.
943 */
944#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
945DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
946#else
947DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
948{
949# if RT_INLINE_ASM_USES_INTRIN
950 __writecr0(uCR0);
951
952# elif RT_INLINE_ASM_GNU_STYLE
953# ifdef RT_ARCH_AMD64
954 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
955# else
956 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
957# endif
958# else
959 __asm
960 {
961# ifdef RT_ARCH_AMD64
962 mov rax, [uCR0]
963 mov cr0, rax
964# else
965 mov eax, [uCR0]
966 mov cr0, eax
967# endif
968 }
969# endif
970}
971#endif
972
973
974/**
975 * Get cr2.
976 * @returns cr2.
977 */
978#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
979DECLASM(RTCCUINTREG) ASMGetCR2(void);
980#else
981DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
982{
983 RTCCUINTREG uCR2;
984# if RT_INLINE_ASM_USES_INTRIN
985 uCR2 = __readcr2();
986
987# elif RT_INLINE_ASM_GNU_STYLE
988# ifdef RT_ARCH_AMD64
989 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
990# else
991 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
992# endif
993# else
994 __asm
995 {
996# ifdef RT_ARCH_AMD64
997 mov rax, cr2
998 mov [uCR2], rax
999# else
1000 mov eax, cr2
1001 mov [uCR2], eax
1002# endif
1003 }
1004# endif
1005 return uCR2;
1006}
1007#endif
1008
1009
1010/**
1011 * Sets the CR2 register.
1012 * @param uCR2 The new CR0 value.
1013 */
1014#if RT_INLINE_ASM_EXTERNAL
1015DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1016#else
1017DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1018{
1019# if RT_INLINE_ASM_GNU_STYLE
1020# ifdef RT_ARCH_AMD64
1021 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1022# else
1023 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1024# endif
1025# else
1026 __asm
1027 {
1028# ifdef RT_ARCH_AMD64
1029 mov rax, [uCR2]
1030 mov cr2, rax
1031# else
1032 mov eax, [uCR2]
1033 mov cr2, eax
1034# endif
1035 }
1036# endif
1037}
1038#endif
1039
1040
1041/**
1042 * Get cr3.
1043 * @returns cr3.
1044 */
1045#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1046DECLASM(RTCCUINTREG) ASMGetCR3(void);
1047#else
1048DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1049{
1050 RTCCUINTREG uCR3;
1051# if RT_INLINE_ASM_USES_INTRIN
1052 uCR3 = __readcr3();
1053
1054# elif RT_INLINE_ASM_GNU_STYLE
1055# ifdef RT_ARCH_AMD64
1056 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
1057# else
1058 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
1059# endif
1060# else
1061 __asm
1062 {
1063# ifdef RT_ARCH_AMD64
1064 mov rax, cr3
1065 mov [uCR3], rax
1066# else
1067 mov eax, cr3
1068 mov [uCR3], eax
1069# endif
1070 }
1071# endif
1072 return uCR3;
1073}
1074#endif
1075
1076
1077/**
1078 * Sets the CR3 register.
1079 *
1080 * @param uCR3 New CR3 value.
1081 */
1082#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1083DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1084#else
1085DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1086{
1087# if RT_INLINE_ASM_USES_INTRIN
1088 __writecr3(uCR3);
1089
1090# elif RT_INLINE_ASM_GNU_STYLE
1091# ifdef RT_ARCH_AMD64
1092 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1093# else
1094 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1095# endif
1096# else
1097 __asm
1098 {
1099# ifdef RT_ARCH_AMD64
1100 mov rax, [uCR3]
1101 mov cr3, rax
1102# else
1103 mov eax, [uCR3]
1104 mov cr3, eax
1105# endif
1106 }
1107# endif
1108}
1109#endif
1110
1111
1112/**
1113 * Reloads the CR3 register.
1114 */
1115#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1116DECLASM(void) ASMReloadCR3(void);
1117#else
1118DECLINLINE(void) ASMReloadCR3(void)
1119{
1120# if RT_INLINE_ASM_USES_INTRIN
1121 __writecr3(__readcr3());
1122
1123# elif RT_INLINE_ASM_GNU_STYLE
1124 RTCCUINTREG u;
1125# ifdef RT_ARCH_AMD64
1126 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1127 "movq %0, %%cr3\n\t"
1128 : "=r" (u));
1129# else
1130 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1131 "movl %0, %%cr3\n\t"
1132 : "=r" (u));
1133# endif
1134# else
1135 __asm
1136 {
1137# ifdef RT_ARCH_AMD64
1138 mov rax, cr3
1139 mov cr3, rax
1140# else
1141 mov eax, cr3
1142 mov cr3, eax
1143# endif
1144 }
1145# endif
1146}
1147#endif
1148
1149
1150/**
1151 * Get cr4.
1152 * @returns cr4.
1153 */
1154#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1155DECLASM(RTCCUINTREG) ASMGetCR4(void);
1156#else
1157DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1158{
1159 RTCCUINTREG uCR4;
1160# if RT_INLINE_ASM_USES_INTRIN
1161 uCR4 = __readcr4();
1162
1163# elif RT_INLINE_ASM_GNU_STYLE
1164# ifdef RT_ARCH_AMD64
1165 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1166# else
1167 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1168# endif
1169# else
1170 __asm
1171 {
1172# ifdef RT_ARCH_AMD64
1173 mov rax, cr4
1174 mov [uCR4], rax
1175# else
1176 push eax /* just in case */
1177 /*mov eax, cr4*/
1178 _emit 0x0f
1179 _emit 0x20
1180 _emit 0xe0
1181 mov [uCR4], eax
1182 pop eax
1183# endif
1184 }
1185# endif
1186 return uCR4;
1187}
1188#endif
1189
1190
1191/**
1192 * Sets the CR4 register.
1193 *
1194 * @param uCR4 New CR4 value.
1195 */
1196#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1197DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1198#else
1199DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1200{
1201# if RT_INLINE_ASM_USES_INTRIN
1202 __writecr4(uCR4);
1203
1204# elif RT_INLINE_ASM_GNU_STYLE
1205# ifdef RT_ARCH_AMD64
1206 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1207# else
1208 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1209# endif
1210# else
1211 __asm
1212 {
1213# ifdef RT_ARCH_AMD64
1214 mov rax, [uCR4]
1215 mov cr4, rax
1216# else
1217 mov eax, [uCR4]
1218 _emit 0x0F
1219 _emit 0x22
1220 _emit 0xE0 /* mov cr4, eax */
1221# endif
1222 }
1223# endif
1224}
1225#endif
1226
1227
1228/**
1229 * Get cr8.
1230 * @returns cr8.
1231 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1232 */
1233#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1234DECLASM(RTCCUINTREG) ASMGetCR8(void);
1235#else
1236DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1237{
1238# ifdef RT_ARCH_AMD64
1239 RTCCUINTREG uCR8;
1240# if RT_INLINE_ASM_USES_INTRIN
1241 uCR8 = __readcr8();
1242
1243# elif RT_INLINE_ASM_GNU_STYLE
1244 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1245# else
1246 __asm
1247 {
1248 mov rax, cr8
1249 mov [uCR8], rax
1250 }
1251# endif
1252 return uCR8;
1253# else /* !RT_ARCH_AMD64 */
1254 return 0;
1255# endif /* !RT_ARCH_AMD64 */
1256}
1257#endif
1258
1259
1260/**
1261 * Enables interrupts (EFLAGS.IF).
1262 */
1263#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1264DECLASM(void) ASMIntEnable(void);
1265#else
1266DECLINLINE(void) ASMIntEnable(void)
1267{
1268# if RT_INLINE_ASM_GNU_STYLE
1269 __asm("sti\n");
1270# elif RT_INLINE_ASM_USES_INTRIN
1271 _enable();
1272# else
1273 __asm sti
1274# endif
1275}
1276#endif
1277
1278
1279/**
1280 * Disables interrupts (!EFLAGS.IF).
1281 */
1282#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1283DECLASM(void) ASMIntDisable(void);
1284#else
1285DECLINLINE(void) ASMIntDisable(void)
1286{
1287# if RT_INLINE_ASM_GNU_STYLE
1288 __asm("cli\n");
1289# elif RT_INLINE_ASM_USES_INTRIN
1290 _disable();
1291# else
1292 __asm cli
1293# endif
1294}
1295#endif
1296
1297
1298/**
1299 * Disables interrupts and returns previous xFLAGS.
1300 */
1301#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1302DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1303#else
1304DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1305{
1306 RTCCUINTREG xFlags;
1307# if RT_INLINE_ASM_GNU_STYLE
1308# ifdef RT_ARCH_AMD64
1309 __asm__ __volatile__("pushfq\n\t"
1310 "cli\n\t"
1311 "popq %0\n\t"
1312 : "=rm" (xFlags));
1313# else
1314 __asm__ __volatile__("pushfl\n\t"
1315 "cli\n\t"
1316 "popl %0\n\t"
1317 : "=rm" (xFlags));
1318# endif
1319# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1320 xFlags = ASMGetFlags();
1321 _disable();
1322# else
1323 __asm {
1324 pushfd
1325 cli
1326 pop [xFlags]
1327 }
1328# endif
1329 return xFlags;
1330}
1331#endif
1332
1333
1334/**
1335 * Reads a machine specific register.
1336 *
1337 * @returns Register content.
1338 * @param uRegister Register to read.
1339 */
1340#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1341DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1342#else
1343DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1344{
1345 RTUINT64U u;
1346# if RT_INLINE_ASM_GNU_STYLE
1347 __asm__ ("rdmsr\n\t"
1348 : "=a" (u.s.Lo),
1349 "=d" (u.s.Hi)
1350 : "c" (uRegister));
1351
1352# elif RT_INLINE_ASM_USES_INTRIN
1353 u.u = __readmsr(uRegister);
1354
1355# else
1356 __asm
1357 {
1358 mov ecx, [uRegister]
1359 rdmsr
1360 mov [u.s.Lo], eax
1361 mov [u.s.Hi], edx
1362 }
1363# endif
1364
1365 return u.u;
1366}
1367#endif
1368
1369
1370/**
1371 * Writes a machine specific register.
1372 *
1373 * @returns Register content.
1374 * @param uRegister Register to write to.
1375 * @param u64Val Value to write.
1376 */
1377#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1378DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1379#else
1380DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1381{
1382 RTUINT64U u;
1383
1384 u.u = u64Val;
1385# if RT_INLINE_ASM_GNU_STYLE
1386 __asm__ __volatile__("wrmsr\n\t"
1387 ::"a" (u.s.Lo),
1388 "d" (u.s.Hi),
1389 "c" (uRegister));
1390
1391# elif RT_INLINE_ASM_USES_INTRIN
1392 __writemsr(uRegister, u.u);
1393
1394# else
1395 __asm
1396 {
1397 mov ecx, [uRegister]
1398 mov edx, [u.s.Hi]
1399 mov eax, [u.s.Lo]
1400 wrmsr
1401 }
1402# endif
1403}
1404#endif
1405
1406
1407/**
1408 * Reads low part of a machine specific register.
1409 *
1410 * @returns Register content.
1411 * @param uRegister Register to read.
1412 */
1413#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1414DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1415#else
1416DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1417{
1418 uint32_t u32;
1419# if RT_INLINE_ASM_GNU_STYLE
1420 __asm__ ("rdmsr\n\t"
1421 : "=a" (u32)
1422 : "c" (uRegister)
1423 : "edx");
1424
1425# elif RT_INLINE_ASM_USES_INTRIN
1426 u32 = (uint32_t)__readmsr(uRegister);
1427
1428#else
1429 __asm
1430 {
1431 mov ecx, [uRegister]
1432 rdmsr
1433 mov [u32], eax
1434 }
1435# endif
1436
1437 return u32;
1438}
1439#endif
1440
1441
1442/**
1443 * Reads high part of a machine specific register.
1444 *
1445 * @returns Register content.
1446 * @param uRegister Register to read.
1447 */
1448#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1449DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1450#else
1451DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1452{
1453 uint32_t u32;
1454# if RT_INLINE_ASM_GNU_STYLE
1455 __asm__ ("rdmsr\n\t"
1456 : "=d" (u32)
1457 : "c" (uRegister)
1458 : "eax");
1459
1460# elif RT_INLINE_ASM_USES_INTRIN
1461 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1462
1463# else
1464 __asm
1465 {
1466 mov ecx, [uRegister]
1467 rdmsr
1468 mov [u32], edx
1469 }
1470# endif
1471
1472 return u32;
1473}
1474#endif
1475
1476
1477/**
1478 * Gets dr7.
1479 *
1480 * @returns dr7.
1481 */
1482#if RT_INLINE_ASM_EXTERNAL
1483DECLASM(RTCCUINTREG) ASMGetDR7(void);
1484#else
1485DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1486{
1487 RTCCUINTREG uDR7;
1488# if RT_INLINE_ASM_GNU_STYLE
1489# ifdef RT_ARCH_AMD64
1490 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1491# else
1492 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1493# endif
1494# else
1495 __asm
1496 {
1497# ifdef RT_ARCH_AMD64
1498 mov rax, dr7
1499 mov [uDR7], rax
1500# else
1501 mov eax, dr7
1502 mov [uDR7], eax
1503# endif
1504 }
1505# endif
1506 return uDR7;
1507}
1508#endif
1509
1510
1511/**
1512 * Gets dr6.
1513 *
1514 * @returns dr6.
1515 */
1516#if RT_INLINE_ASM_EXTERNAL
1517DECLASM(RTCCUINTREG) ASMGetDR6(void);
1518#else
1519DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1520{
1521 RTCCUINTREG uDR6;
1522# if RT_INLINE_ASM_GNU_STYLE
1523# ifdef RT_ARCH_AMD64
1524 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1525# else
1526 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1527# endif
1528# else
1529 __asm
1530 {
1531# ifdef RT_ARCH_AMD64
1532 mov rax, dr6
1533 mov [uDR6], rax
1534# else
1535 mov eax, dr6
1536 mov [uDR6], eax
1537# endif
1538 }
1539# endif
1540 return uDR6;
1541}
1542#endif
1543
1544
1545/**
1546 * Reads and clears DR6.
1547 *
1548 * @returns DR6.
1549 */
1550#if RT_INLINE_ASM_EXTERNAL
1551DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1552#else
1553DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1554{
1555 RTCCUINTREG uDR6;
1556# if RT_INLINE_ASM_GNU_STYLE
1557 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1558# ifdef RT_ARCH_AMD64
1559 __asm__ ("movq %%dr6, %0\n\t"
1560 "movq %1, %%dr6\n\t"
1561 : "=r" (uDR6)
1562 : "r" (uNewValue));
1563# else
1564 __asm__ ("movl %%dr6, %0\n\t"
1565 "movl %1, %%dr6\n\t"
1566 : "=r" (uDR6)
1567 : "r" (uNewValue));
1568# endif
1569# else
1570 __asm
1571 {
1572# ifdef RT_ARCH_AMD64
1573 mov rax, dr6
1574 mov [uDR6], rax
1575 mov rcx, rax
1576 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1577 mov dr6, rcx
1578# else
1579 mov eax, dr6
1580 mov [uDR6], eax
1581 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1582 mov dr6, ecx
1583# endif
1584 }
1585# endif
1586 return uDR6;
1587}
1588#endif
1589
1590
1591/**
1592 * Compiler memory barrier.
1593 *
1594 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1595 * values or any outstanding writes when returning from this function.
1596 *
1597 * This function must be used if non-volatile data is modified by a
1598 * device or the VMM. Typical cases are port access, MMIO access,
1599 * trapping instruction, etc.
1600 */
1601#if RT_INLINE_ASM_GNU_STYLE
1602# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1603#elif RT_INLINE_ASM_USES_INTRIN
1604# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1605#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1606DECLINLINE(void) ASMCompilerBarrier(void)
1607{
1608 __asm
1609 {
1610 }
1611}
1612#endif
1613
1614
1615/**
1616 * Writes a 8-bit unsigned integer to an I/O port.
1617 *
1618 * @param Port I/O port to read from.
1619 * @param u8 8-bit integer to write.
1620 */
1621#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1622DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1623#else
1624DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1625{
1626# if RT_INLINE_ASM_GNU_STYLE
1627 __asm__ __volatile__("outb %b1, %w0\n\t"
1628 :: "Nd" (Port),
1629 "a" (u8));
1630
1631# elif RT_INLINE_ASM_USES_INTRIN
1632 __outbyte(Port, u8);
1633
1634# else
1635 __asm
1636 {
1637 mov dx, [Port]
1638 mov al, [u8]
1639 out dx, al
1640 }
1641# endif
1642}
1643#endif
1644
1645
1646/**
1647 * Gets a 8-bit unsigned integer from an I/O port.
1648 *
1649 * @returns 8-bit integer.
1650 * @param Port I/O port to read from.
1651 */
1652#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1653DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1654#else
1655DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1656{
1657 uint8_t u8;
1658# if RT_INLINE_ASM_GNU_STYLE
1659 __asm__ __volatile__("inb %w1, %b0\n\t"
1660 : "=a" (u8)
1661 : "Nd" (Port));
1662
1663# elif RT_INLINE_ASM_USES_INTRIN
1664 u8 = __inbyte(Port);
1665
1666# else
1667 __asm
1668 {
1669 mov dx, [Port]
1670 in al, dx
1671 mov [u8], al
1672 }
1673# endif
1674 return u8;
1675}
1676#endif
1677
1678
1679/**
1680 * Writes a 16-bit unsigned integer to an I/O port.
1681 *
1682 * @param Port I/O port to read from.
1683 * @param u16 16-bit integer to write.
1684 */
1685#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1686DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1687#else
1688DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1689{
1690# if RT_INLINE_ASM_GNU_STYLE
1691 __asm__ __volatile__("outw %w1, %w0\n\t"
1692 :: "Nd" (Port),
1693 "a" (u16));
1694
1695# elif RT_INLINE_ASM_USES_INTRIN
1696 __outword(Port, u16);
1697
1698# else
1699 __asm
1700 {
1701 mov dx, [Port]
1702 mov ax, [u16]
1703 out dx, ax
1704 }
1705# endif
1706}
1707#endif
1708
1709
1710/**
1711 * Gets a 16-bit unsigned integer from an I/O port.
1712 *
1713 * @returns 16-bit integer.
1714 * @param Port I/O port to read from.
1715 */
1716#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1717DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1718#else
1719DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1720{
1721 uint16_t u16;
1722# if RT_INLINE_ASM_GNU_STYLE
1723 __asm__ __volatile__("inw %w1, %w0\n\t"
1724 : "=a" (u16)
1725 : "Nd" (Port));
1726
1727# elif RT_INLINE_ASM_USES_INTRIN
1728 u16 = __inword(Port);
1729
1730# else
1731 __asm
1732 {
1733 mov dx, [Port]
1734 in ax, dx
1735 mov [u16], ax
1736 }
1737# endif
1738 return u16;
1739}
1740#endif
1741
1742
1743/**
1744 * Writes a 32-bit unsigned integer to an I/O port.
1745 *
1746 * @param Port I/O port to read from.
1747 * @param u32 32-bit integer to write.
1748 */
1749#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1750DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1751#else
1752DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1753{
1754# if RT_INLINE_ASM_GNU_STYLE
1755 __asm__ __volatile__("outl %1, %w0\n\t"
1756 :: "Nd" (Port),
1757 "a" (u32));
1758
1759# elif RT_INLINE_ASM_USES_INTRIN
1760 __outdword(Port, u32);
1761
1762# else
1763 __asm
1764 {
1765 mov dx, [Port]
1766 mov eax, [u32]
1767 out dx, eax
1768 }
1769# endif
1770}
1771#endif
1772
1773
1774/**
1775 * Gets a 32-bit unsigned integer from an I/O port.
1776 *
1777 * @returns 32-bit integer.
1778 * @param Port I/O port to read from.
1779 */
1780#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1781DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1782#else
1783DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1784{
1785 uint32_t u32;
1786# if RT_INLINE_ASM_GNU_STYLE
1787 __asm__ __volatile__("inl %w1, %0\n\t"
1788 : "=a" (u32)
1789 : "Nd" (Port));
1790
1791# elif RT_INLINE_ASM_USES_INTRIN
1792 u32 = __indword(Port);
1793
1794# else
1795 __asm
1796 {
1797 mov dx, [Port]
1798 in eax, dx
1799 mov [u32], eax
1800 }
1801# endif
1802 return u32;
1803}
1804#endif
1805
1806
1807/**
1808 * Atomically Exchange an unsigned 8-bit value.
1809 *
1810 * @returns Current *pu8 value
1811 * @param pu8 Pointer to the 8-bit variable to update.
1812 * @param u8 The 8-bit value to assign to *pu8.
1813 */
1814#if RT_INLINE_ASM_EXTERNAL
1815DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1816#else
1817DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1818{
1819# if RT_INLINE_ASM_GNU_STYLE
1820 __asm__ __volatile__("xchgb %0, %1\n\t"
1821 : "=m" (*pu8),
1822 "=r" (u8)
1823 : "1" (u8));
1824# else
1825 __asm
1826 {
1827# ifdef RT_ARCH_AMD64
1828 mov rdx, [pu8]
1829 mov al, [u8]
1830 xchg [rdx], al
1831 mov [u8], al
1832# else
1833 mov edx, [pu8]
1834 mov al, [u8]
1835 xchg [edx], al
1836 mov [u8], al
1837# endif
1838 }
1839# endif
1840 return u8;
1841}
1842#endif
1843
1844
1845/**
1846 * Atomically Exchange a signed 8-bit value.
1847 *
1848 * @returns Current *pu8 value
1849 * @param pi8 Pointer to the 8-bit variable to update.
1850 * @param i8 The 8-bit value to assign to *pi8.
1851 */
1852DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1853{
1854 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1855}
1856
1857
1858/**
1859 * Atomically Exchange a bool value.
1860 *
1861 * @returns Current *pf value
1862 * @param pf Pointer to the 8-bit variable to update.
1863 * @param f The 8-bit value to assign to *pi8.
1864 */
1865DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
1866{
1867#ifdef _MSC_VER
1868 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1869#else
1870 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1871#endif
1872}
1873
1874
1875/**
1876 * Atomically Exchange an unsigned 16-bit value.
1877 *
1878 * @returns Current *pu16 value
1879 * @param pu16 Pointer to the 16-bit variable to update.
1880 * @param u16 The 16-bit value to assign to *pu16.
1881 */
1882#if RT_INLINE_ASM_EXTERNAL
1883DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1884#else
1885DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1886{
1887# if RT_INLINE_ASM_GNU_STYLE
1888 __asm__ __volatile__("xchgw %0, %1\n\t"
1889 : "=m" (*pu16),
1890 "=r" (u16)
1891 : "1" (u16));
1892# else
1893 __asm
1894 {
1895# ifdef RT_ARCH_AMD64
1896 mov rdx, [pu16]
1897 mov ax, [u16]
1898 xchg [rdx], ax
1899 mov [u16], ax
1900# else
1901 mov edx, [pu16]
1902 mov ax, [u16]
1903 xchg [edx], ax
1904 mov [u16], ax
1905# endif
1906 }
1907# endif
1908 return u16;
1909}
1910#endif
1911
1912
1913/**
1914 * Atomically Exchange a signed 16-bit value.
1915 *
1916 * @returns Current *pu16 value
1917 * @param pi16 Pointer to the 16-bit variable to update.
1918 * @param i16 The 16-bit value to assign to *pi16.
1919 */
1920DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1921{
1922 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1923}
1924
1925
1926/**
1927 * Atomically Exchange an unsigned 32-bit value.
1928 *
1929 * @returns Current *pu32 value
1930 * @param pu32 Pointer to the 32-bit variable to update.
1931 * @param u32 The 32-bit value to assign to *pu32.
1932 */
1933#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1934DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1935#else
1936DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1937{
1938# if RT_INLINE_ASM_GNU_STYLE
1939 __asm__ __volatile__("xchgl %0, %1\n\t"
1940 : "=m" (*pu32),
1941 "=r" (u32)
1942 : "1" (u32));
1943
1944# elif RT_INLINE_ASM_USES_INTRIN
1945 u32 = _InterlockedExchange((long *)pu32, u32);
1946
1947# else
1948 __asm
1949 {
1950# ifdef RT_ARCH_AMD64
1951 mov rdx, [pu32]
1952 mov eax, u32
1953 xchg [rdx], eax
1954 mov [u32], eax
1955# else
1956 mov edx, [pu32]
1957 mov eax, u32
1958 xchg [edx], eax
1959 mov [u32], eax
1960# endif
1961 }
1962# endif
1963 return u32;
1964}
1965#endif
1966
1967
1968/**
1969 * Atomically Exchange a signed 32-bit value.
1970 *
1971 * @returns Current *pu32 value
1972 * @param pi32 Pointer to the 32-bit variable to update.
1973 * @param i32 The 32-bit value to assign to *pi32.
1974 */
1975DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1976{
1977 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1978}
1979
1980
1981/**
1982 * Atomically Exchange an unsigned 64-bit value.
1983 *
1984 * @returns Current *pu64 value
1985 * @param pu64 Pointer to the 64-bit variable to update.
1986 * @param u64 The 64-bit value to assign to *pu64.
1987 */
1988#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1989DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
1990#else
1991DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
1992{
1993# if defined(RT_ARCH_AMD64)
1994# if RT_INLINE_ASM_USES_INTRIN
1995 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
1996
1997# elif RT_INLINE_ASM_GNU_STYLE
1998 __asm__ __volatile__("xchgq %0, %1\n\t"
1999 : "=m" (*pu64),
2000 "=r" (u64)
2001 : "1" (u64));
2002# else
2003 __asm
2004 {
2005 mov rdx, [pu64]
2006 mov rax, [u64]
2007 xchg [rdx], rax
2008 mov [u64], rax
2009 }
2010# endif
2011# else /* !RT_ARCH_AMD64 */
2012# if RT_INLINE_ASM_GNU_STYLE
2013# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2014 uint32_t u32 = (uint32_t)u64;
2015 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2016 "xchgl %%ebx, %3\n\t"
2017 "1:\n\t"
2018 "lock; cmpxchg8b (%5)\n\t"
2019 "jnz 1b\n\t"
2020 "xchgl %%ebx, %3\n\t"
2021 /*"xchgl %%esi, %5\n\t"*/
2022 : "=A" (u64),
2023 "=m" (*pu64)
2024 : "0" (*pu64),
2025 "m" ( u32 ),
2026 "c" ( (uint32_t)(u64 >> 32) ),
2027 "S" (pu64) );
2028# else /* !PIC */
2029 __asm__ __volatile__("1:\n\t"
2030 "lock; cmpxchg8b %1\n\t"
2031 "jnz 1b\n\t"
2032 : "=A" (u64),
2033 "=m" (*pu64)
2034 : "0" (*pu64),
2035 "b" ( (uint32_t)u64 ),
2036 "c" ( (uint32_t)(u64 >> 32) ));
2037# endif
2038# else
2039 __asm
2040 {
2041 mov ebx, dword ptr [u64]
2042 mov ecx, dword ptr [u64 + 4]
2043 mov edi, pu64
2044 mov eax, dword ptr [edi]
2045 mov edx, dword ptr [edi + 4]
2046 retry:
2047 lock cmpxchg8b [edi]
2048 jnz retry
2049 mov dword ptr [u64], eax
2050 mov dword ptr [u64 + 4], edx
2051 }
2052# endif
2053# endif /* !RT_ARCH_AMD64 */
2054 return u64;
2055}
2056#endif
2057
2058
2059/**
2060 * Atomically Exchange an signed 64-bit value.
2061 *
2062 * @returns Current *pi64 value
2063 * @param pi64 Pointer to the 64-bit variable to update.
2064 * @param i64 The 64-bit value to assign to *pi64.
2065 */
2066DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2067{
2068 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2069}
2070
2071
2072#ifdef RT_ARCH_AMD64
2073/**
2074 * Atomically Exchange an unsigned 128-bit value.
2075 *
2076 * @returns Current *pu128.
2077 * @param pu128 Pointer to the 128-bit variable to update.
2078 * @param u128 The 128-bit value to assign to *pu128.
2079 *
2080 * @remark We cannot really assume that any hardware supports this. Nor do I have
2081 * GAS support for it. So, for the time being we'll BREAK the atomic
2082 * bit of this function and use two 64-bit exchanges instead.
2083 */
2084# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2085DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2086# else
2087DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2088{
2089 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2090 {
2091 /** @todo this is clumsy code */
2092 RTUINT128U u128Ret;
2093 u128Ret.u = u128;
2094 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2095 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2096 return u128Ret.u;
2097 }
2098#if 0 /* later? */
2099 else
2100 {
2101# if RT_INLINE_ASM_GNU_STYLE
2102 __asm__ __volatile__("1:\n\t"
2103 "lock; cmpxchg8b %1\n\t"
2104 "jnz 1b\n\t"
2105 : "=A" (u128),
2106 "=m" (*pu128)
2107 : "0" (*pu128),
2108 "b" ( (uint64_t)u128 ),
2109 "c" ( (uint64_t)(u128 >> 64) ));
2110# else
2111 __asm
2112 {
2113 mov rbx, dword ptr [u128]
2114 mov rcx, dword ptr [u128 + 4]
2115 mov rdi, pu128
2116 mov rax, dword ptr [rdi]
2117 mov rdx, dword ptr [rdi + 4]
2118 retry:
2119 lock cmpxchg16b [rdi]
2120 jnz retry
2121 mov dword ptr [u128], rax
2122 mov dword ptr [u128 + 4], rdx
2123 }
2124# endif
2125 }
2126 return u128;
2127#endif
2128}
2129# endif
2130#endif /* RT_ARCH_AMD64 */
2131
2132
2133/**
2134 * Atomically Reads a unsigned 64-bit value.
2135 *
2136 * @returns Current *pu64 value
2137 * @param pu64 Pointer to the 64-bit variable to read.
2138 * The memory pointed to must be writable.
2139 * @remark This will fault if the memory is read-only!
2140 */
2141#if RT_INLINE_ASM_EXTERNAL
2142DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
2143#else
2144DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
2145{
2146 uint64_t u64;
2147# ifdef RT_ARCH_AMD64
2148# if RT_INLINE_ASM_GNU_STYLE
2149 __asm__ __volatile__("movq %1, %0\n\t"
2150 : "=r" (u64)
2151 : "m" (*pu64));
2152# else
2153 __asm
2154 {
2155 mov rdx, [pu64]
2156 mov rax, [rdx]
2157 mov [u64], rax
2158 }
2159# endif
2160# else /* !RT_ARCH_AMD64 */
2161# if RT_INLINE_ASM_GNU_STYLE
2162# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2163 uint32_t u32EBX = 0;
2164 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2165 "lock; cmpxchg8b (%5)\n\t"
2166 "xchgl %%ebx, %3\n\t"
2167 : "=A" (u64),
2168 "=m" (*pu64)
2169 : "0" (0),
2170 "m" (u32EBX),
2171 "c" (0),
2172 "S" (pu64));
2173# else /* !PIC */
2174 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2175 : "=A" (u64),
2176 "=m" (*pu64)
2177 : "0" (0),
2178 "b" (0),
2179 "c" (0));
2180# endif
2181# else
2182 __asm
2183 {
2184 xor eax, eax
2185 xor edx, edx
2186 mov edi, pu64
2187 xor ecx, ecx
2188 xor ebx, ebx
2189 lock cmpxchg8b [edi]
2190 mov dword ptr [u64], eax
2191 mov dword ptr [u64 + 4], edx
2192 }
2193# endif
2194# endif /* !RT_ARCH_AMD64 */
2195 return u64;
2196}
2197#endif
2198
2199
2200/**
2201 * Atomically Reads a signed 64-bit value.
2202 *
2203 * @returns Current *pi64 value
2204 * @param pi64 Pointer to the 64-bit variable to read.
2205 * The memory pointed to must be writable.
2206 * @remark This will fault if the memory is read-only!
2207 */
2208DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
2209{
2210 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
2211}
2212
2213
2214/**
2215 * Atomically Exchange a value which size might differ
2216 * between platforms or compilers.
2217 *
2218 * @param pu Pointer to the variable to update.
2219 * @param uNew The value to assign to *pu.
2220 */
2221#define ASMAtomicXchgSize(pu, uNew) \
2222 do { \
2223 switch (sizeof(*(pu))) { \
2224 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2225 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2226 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2227 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2228 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2229 } \
2230 } while (0)
2231
2232
2233/**
2234 * Atomically Exchange a pointer value.
2235 *
2236 * @returns Current *ppv value
2237 * @param ppv Pointer to the pointer variable to update.
2238 * @param pv The pointer value to assign to *ppv.
2239 */
2240DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2241{
2242#if ARCH_BITS == 32
2243 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2244#elif ARCH_BITS == 64
2245 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2246#else
2247# error "ARCH_BITS is bogus"
2248#endif
2249}
2250
2251
2252/**
2253 * Atomically Compare and Exchange an unsigned 32-bit value.
2254 *
2255 * @returns true if xchg was done.
2256 * @returns false if xchg wasn't done.
2257 *
2258 * @param pu32 Pointer to the value to update.
2259 * @param u32New The new value to assigned to *pu32.
2260 * @param u32Old The old value to *pu32 compare with.
2261 */
2262#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2263DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2264#else
2265DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old)
2266{
2267# if RT_INLINE_ASM_GNU_STYLE
2268 uint8_t u8Ret;
2269 __asm__ __volatile__("lock; cmpxchgl %2, %0\n\t"
2270 "setz %1\n\t"
2271 : "=m" (*pu32),
2272 "=qm" (u8Ret)
2273 : "r" (u32New),
2274 "a" (u32Old));
2275 return (bool)u8Ret;
2276
2277# elif RT_INLINE_ASM_USES_INTRIN
2278 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2279
2280# else
2281 uint32_t u32Ret;
2282 __asm
2283 {
2284# ifdef RT_ARCH_AMD64
2285 mov rdx, [pu32]
2286# else
2287 mov edx, [pu32]
2288# endif
2289 mov eax, [u32Old]
2290 mov ecx, [u32New]
2291# ifdef RT_ARCH_AMD64
2292 lock cmpxchg [rdx], ecx
2293# else
2294 lock cmpxchg [edx], ecx
2295# endif
2296 setz al
2297 movzx eax, al
2298 mov [u32Ret], eax
2299 }
2300 return !!u32Ret;
2301# endif
2302}
2303#endif
2304
2305
2306/**
2307 * Atomically Compare and Exchange a signed 32-bit value.
2308 *
2309 * @returns true if xchg was done.
2310 * @returns false if xchg wasn't done.
2311 *
2312 * @param pi32 Pointer to the value to update.
2313 * @param i32New The new value to assigned to *pi32.
2314 * @param i32Old The old value to *pi32 compare with.
2315 */
2316DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2317{
2318 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2319}
2320
2321
2322/**
2323 * Atomically Compare and exchange an unsigned 64-bit value.
2324 *
2325 * @returns true if xchg was done.
2326 * @returns false if xchg wasn't done.
2327 *
2328 * @param pu64 Pointer to the 64-bit variable to update.
2329 * @param u64New The 64-bit value to assign to *pu64.
2330 * @param u64Old The value to compare with.
2331 */
2332#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2333DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2334#else
2335DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2336{
2337# if RT_INLINE_ASM_USES_INTRIN
2338 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2339
2340# elif defined(RT_ARCH_AMD64)
2341# if RT_INLINE_ASM_GNU_STYLE
2342 uint8_t u8Ret;
2343 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2344 "setz %1\n\t"
2345 : "=m" (*pu64),
2346 "=qm" (u8Ret)
2347 : "r" (u64New),
2348 "a" (u64Old));
2349 return (bool)u8Ret;
2350# else
2351 bool fRet;
2352 __asm
2353 {
2354 mov rdx, [pu32]
2355 mov rax, [u64Old]
2356 mov rcx, [u64New]
2357 lock cmpxchg [rdx], rcx
2358 setz al
2359 mov [fRet], al
2360 }
2361 return fRet;
2362# endif
2363# else /* !RT_ARCH_AMD64 */
2364 uint32_t u32Ret;
2365# if RT_INLINE_ASM_GNU_STYLE
2366# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2367 uint32_t u32 = (uint32_t)u64New;
2368 uint32_t u32Spill;
2369 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2370 "lock; cmpxchg8b (%6)\n\t"
2371 "setz %%al\n\t"
2372 "xchgl %%ebx, %4\n\t"
2373 "movzbl %%al, %%eax\n\t"
2374 : "=a" (u32Ret),
2375 "=d" (u32Spill),
2376 "=m" (*pu64)
2377 : "A" (u64Old),
2378 "m" ( u32 ),
2379 "c" ( (uint32_t)(u64New >> 32) ),
2380 "S" (pu64) );
2381# else /* !PIC */
2382 uint32_t u32Spill;
2383 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2384 "setz %%al\n\t"
2385 "movzbl %%al, %%eax\n\t"
2386 : "=a" (u32Ret),
2387 "=d" (u32Spill),
2388 "=m" (*pu64)
2389 : "A" (u64Old),
2390 "b" ( (uint32_t)u64New ),
2391 "c" ( (uint32_t)(u64New >> 32) ));
2392# endif
2393 return (bool)u32Ret;
2394# else
2395 __asm
2396 {
2397 mov ebx, dword ptr [u64New]
2398 mov ecx, dword ptr [u64New + 4]
2399 mov edi, [pu64]
2400 mov eax, dword ptr [u64Old]
2401 mov edx, dword ptr [u64Old + 4]
2402 lock cmpxchg8b [edi]
2403 setz al
2404 movzx eax, al
2405 mov dword ptr [u32Ret], eax
2406 }
2407 return !!u32Ret;
2408# endif
2409# endif /* !RT_ARCH_AMD64 */
2410}
2411#endif
2412
2413
2414/**
2415 * Atomically Compare and exchange a signed 64-bit value.
2416 *
2417 * @returns true if xchg was done.
2418 * @returns false if xchg wasn't done.
2419 *
2420 * @param pi64 Pointer to the 64-bit variable to update.
2421 * @param i64 The 64-bit value to assign to *pu64.
2422 * @param i64Old The value to compare with.
2423 */
2424DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2425{
2426 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2427}
2428
2429
2430/** @def ASMAtomicCmpXchgSize
2431 * Atomically Compare and Exchange a value which size might differ
2432 * between platforms or compilers.
2433 *
2434 * @param pu Pointer to the value to update.
2435 * @param uNew The new value to assigned to *pu.
2436 * @param uOld The old value to *pu compare with.
2437 * @param fRc Where to store the result.
2438 */
2439#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2440 do { \
2441 switch (sizeof(*(pu))) { \
2442 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2443 break; \
2444 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2445 break; \
2446 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2447 (fRc) = false; \
2448 break; \
2449 } \
2450 } while (0)
2451
2452
2453/**
2454 * Atomically Compare and Exchange a pointer value.
2455 *
2456 * @returns true if xchg was done.
2457 * @returns false if xchg wasn't done.
2458 *
2459 * @param ppv Pointer to the value to update.
2460 * @param pvNew The new value to assigned to *ppv.
2461 * @param pvOld The old value to *ppv compare with.
2462 */
2463DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2464{
2465#if ARCH_BITS == 32
2466 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2467#elif ARCH_BITS == 64
2468 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2469#else
2470# error "ARCH_BITS is bogus"
2471#endif
2472}
2473
2474
2475/**
2476 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
2477 * passes back old value.
2478 *
2479 * @returns true if xchg was done.
2480 * @returns false if xchg wasn't done.
2481 *
2482 * @param pu32 Pointer to the value to update.
2483 * @param u32New The new value to assigned to *pu32.
2484 * @param u32Old The old value to *pu32 compare with.
2485 * @param pu32Old Pointer store the old value at.
2486 */
2487#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2488DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
2489#else
2490DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
2491{
2492# if RT_INLINE_ASM_GNU_STYLE
2493 uint8_t u8Ret;
2494 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2495 "setz %1\n\t"
2496 : "=m" (*pu32),
2497 "=qm" (u8Ret),
2498 "=a" (*pu32Old)
2499 : "r" (u32New),
2500 "a" (u32Old));
2501 return (bool)u8Ret;
2502
2503# elif RT_INLINE_ASM_USES_INTRIN
2504 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
2505
2506# else
2507 uint32_t u32Ret;
2508 __asm
2509 {
2510# ifdef RT_ARCH_AMD64
2511 mov rdx, [pu32]
2512# else
2513 mov edx, [pu32]
2514# endif
2515 mov eax, [u32Old]
2516 mov ecx, [u32New]
2517# ifdef RT_ARCH_AMD64
2518 lock cmpxchg [rdx], ecx
2519 mov rdx, [pu32Old]
2520 mov [rdx], eax
2521# else
2522 lock cmpxchg [edx], ecx
2523 mov edx, [pu32Old]
2524 mov [edx], eax
2525# endif
2526 setz al
2527 movzx eax, al
2528 mov [u32Ret], eax
2529 }
2530 return !!u32Ret;
2531# endif
2532}
2533#endif
2534
2535
2536/**
2537 * Atomically Compare and Exchange a signed 32-bit value, additionally
2538 * passes back old value.
2539 *
2540 * @returns true if xchg was done.
2541 * @returns false if xchg wasn't done.
2542 *
2543 * @param pi32 Pointer to the value to update.
2544 * @param i32New The new value to assigned to *pi32.
2545 * @param i32Old The old value to *pi32 compare with.
2546 * @param pi32Old Pointer store the old value at.
2547 */
2548DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
2549{
2550 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
2551}
2552
2553
2554/**
2555 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2556 * passing back old value.
2557 *
2558 * @returns true if xchg was done.
2559 * @returns false if xchg wasn't done.
2560 *
2561 * @param pu64 Pointer to the 64-bit variable to update.
2562 * @param u64New The 64-bit value to assign to *pu64.
2563 * @param u64Old The value to compare with.
2564 * @param pu32Old Pointer store the old value at.
2565 */
2566#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2567DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
2568#else
2569DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
2570{
2571# if RT_INLINE_ASM_USES_INTRIN
2572 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
2573
2574# elif defined(RT_ARCH_AMD64)
2575# if RT_INLINE_ASM_GNU_STYLE
2576 uint8_t u8Ret;
2577 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2578 "setz %1\n\t"
2579 : "=m" (*pu64),
2580 "=qm" (u8Ret),
2581 "=a" (*pu64Old)
2582 : "r" (u64New),
2583 "a" (u64Old));
2584 return (bool)u8Ret;
2585# else
2586 bool fRet;
2587 __asm
2588 {
2589 mov rdx, [pu32]
2590 mov rax, [u64Old]
2591 mov rcx, [u64New]
2592 lock cmpxchg [rdx], rcx
2593 mov rdx, [pu64Old]
2594 mov [rdx], rax
2595 setz al
2596 mov [fRet], al
2597 }
2598 return fRet;
2599# endif
2600# else /* !RT_ARCH_AMD64 */
2601# if RT_INLINE_ASM_GNU_STYLE
2602 uint64_t u64Ret;
2603# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2604 /* NB: this code uses a memory clobber description, because the clean
2605 * solution with an output value for *pu64 makes gcc run out of registers.
2606 * This will cause suboptimal code, and anyone with a better solution is
2607 * welcome to improve this. */
2608 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2609 "lock; cmpxchg8b %3\n\t"
2610 "xchgl %%ebx, %1\n\t"
2611 : "=A" (u64Ret)
2612 : "DS" ((uint32_t)u64New),
2613 "c" ((uint32_t)(u64New >> 32)),
2614 "m" (*pu64),
2615 "0" (u64Old)
2616 : "memory" );
2617# else /* !PIC */
2618 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2619 : "=A" (u64Ret),
2620 "=m" (*pu64)
2621 : "b" ((uint32_t)u64New),
2622 "c" ((uint32_t)(u64New >> 32)),
2623 "m" (*pu64),
2624 "0" (u64Old));
2625# endif
2626 *pu64Old = u64Ret;
2627 return u64Ret == u64Old;
2628# else
2629 uint32_t u32Ret;
2630 __asm
2631 {
2632 mov ebx, dword ptr [u64New]
2633 mov ecx, dword ptr [u64New + 4]
2634 mov edi, [pu64]
2635 mov eax, dword ptr [u64Old]
2636 mov edx, dword ptr [u64Old + 4]
2637 lock cmpxchg8b [edi]
2638 mov ebx, [pu64Old]
2639 mov [ebx], eax
2640 setz al
2641 movzx eax, al
2642 add ebx, 4
2643 mov [ebx], edx
2644 mov dword ptr [u32Ret], eax
2645 }
2646 return !!u32Ret;
2647# endif
2648# endif /* !RT_ARCH_AMD64 */
2649}
2650#endif
2651
2652
2653/**
2654 * Atomically Compare and exchange a signed 64-bit value, additionally
2655 * passing back old value.
2656 *
2657 * @returns true if xchg was done.
2658 * @returns false if xchg wasn't done.
2659 *
2660 * @param pi64 Pointer to the 64-bit variable to update.
2661 * @param i64 The 64-bit value to assign to *pu64.
2662 * @param i64Old The value to compare with.
2663 * @param pi64Old Pointer store the old value at.
2664 */
2665DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
2666{
2667 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
2668}
2669
2670
2671/** @def ASMAtomicCmpXchgExSize
2672 * Atomically Compare and Exchange a value which size might differ
2673 * between platforms or compilers. Additionally passes back old value.
2674 *
2675 * @param pu Pointer to the value to update.
2676 * @param uNew The new value to assigned to *pu.
2677 * @param uOld The old value to *pu compare with.
2678 * @param fRc Where to store the result.
2679 * @param uOldVal Where to store the old value.
2680 */
2681#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, uOldVal) \
2682 do { \
2683 switch (sizeof(*(pu))) { \
2684 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)&(uOldVal)); \
2685 break; \
2686 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)&(uOldVal)); \
2687 break; \
2688 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2689 (fRc) = false; \
2690 (uOldVal) = 0; \
2691 break; \
2692 } \
2693 } while (0)
2694
2695
2696/**
2697 * Atomically Compare and Exchange a pointer value, additionally
2698 * passing back old value.
2699 *
2700 * @returns true if xchg was done.
2701 * @returns false if xchg wasn't done.
2702 *
2703 * @param ppv Pointer to the value to update.
2704 * @param pvNew The new value to assigned to *ppv.
2705 * @param pvOld The old value to *ppv compare with.
2706 * @param ppvOld Pointer store the old value at.
2707 */
2708DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, void *pvNew, void *pvOld, void **ppvOld)
2709{
2710#if ARCH_BITS == 32
2711 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
2712#elif ARCH_BITS == 64
2713 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
2714#else
2715# error "ARCH_BITS is bogus"
2716#endif
2717}
2718
2719
2720/**
2721 * Atomically increment a 32-bit value.
2722 *
2723 * @returns The new value.
2724 * @param pu32 Pointer to the value to increment.
2725 */
2726#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2727DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2728#else
2729DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2730{
2731 uint32_t u32;
2732# if RT_INLINE_ASM_USES_INTRIN
2733 u32 = _InterlockedIncrement((long *)pu32);
2734 return u32;
2735
2736# elif RT_INLINE_ASM_GNU_STYLE
2737 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2738 : "=r" (u32),
2739 "=m" (*pu32)
2740 : "0" (1)
2741 : "memory");
2742 return u32+1;
2743# else
2744 __asm
2745 {
2746 mov eax, 1
2747# ifdef RT_ARCH_AMD64
2748 mov rdx, [pu32]
2749 lock xadd [rdx], eax
2750# else
2751 mov edx, [pu32]
2752 lock xadd [edx], eax
2753# endif
2754 mov u32, eax
2755 }
2756 return u32+1;
2757# endif
2758}
2759#endif
2760
2761
2762/**
2763 * Atomically increment a signed 32-bit value.
2764 *
2765 * @returns The new value.
2766 * @param pi32 Pointer to the value to increment.
2767 */
2768DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2769{
2770 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2771}
2772
2773
2774/**
2775 * Atomically decrement an unsigned 32-bit value.
2776 *
2777 * @returns The new value.
2778 * @param pu32 Pointer to the value to decrement.
2779 */
2780#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2781DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2782#else
2783DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2784{
2785 uint32_t u32;
2786# if RT_INLINE_ASM_USES_INTRIN
2787 u32 = _InterlockedDecrement((long *)pu32);
2788 return u32;
2789
2790# elif RT_INLINE_ASM_GNU_STYLE
2791 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2792 : "=r" (u32),
2793 "=m" (*pu32)
2794 : "0" (-1)
2795 : "memory");
2796 return u32-1;
2797# else
2798 __asm
2799 {
2800 mov eax, -1
2801# ifdef RT_ARCH_AMD64
2802 mov rdx, [pu32]
2803 lock xadd [rdx], eax
2804# else
2805 mov edx, [pu32]
2806 lock xadd [edx], eax
2807# endif
2808 mov u32, eax
2809 }
2810 return u32-1;
2811# endif
2812}
2813#endif
2814
2815
2816/**
2817 * Atomically decrement a signed 32-bit value.
2818 *
2819 * @returns The new value.
2820 * @param pi32 Pointer to the value to decrement.
2821 */
2822DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2823{
2824 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2825}
2826
2827
2828/**
2829 * Atomically Or an unsigned 32-bit value.
2830 *
2831 * @param pu32 Pointer to the pointer variable to OR u32 with.
2832 * @param u32 The value to OR *pu32 with.
2833 */
2834#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2835DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2836#else
2837DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2838{
2839# if RT_INLINE_ASM_USES_INTRIN
2840 _InterlockedOr((long volatile *)pu32, (long)u32);
2841
2842# elif RT_INLINE_ASM_GNU_STYLE
2843 __asm__ __volatile__("lock; orl %1, %0\n\t"
2844 : "=m" (*pu32)
2845 : "ir" (u32));
2846# else
2847 __asm
2848 {
2849 mov eax, [u32]
2850# ifdef RT_ARCH_AMD64
2851 mov rdx, [pu32]
2852 lock or [rdx], eax
2853# else
2854 mov edx, [pu32]
2855 lock or [edx], eax
2856# endif
2857 }
2858# endif
2859}
2860#endif
2861
2862
2863/**
2864 * Atomically Or a signed 32-bit value.
2865 *
2866 * @param pi32 Pointer to the pointer variable to OR u32 with.
2867 * @param i32 The value to OR *pu32 with.
2868 */
2869DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2870{
2871 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2872}
2873
2874
2875/**
2876 * Atomically And an unsigned 32-bit value.
2877 *
2878 * @param pu32 Pointer to the pointer variable to AND u32 with.
2879 * @param u32 The value to AND *pu32 with.
2880 */
2881#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2882DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2883#else
2884DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2885{
2886# if RT_INLINE_ASM_USES_INTRIN
2887 _InterlockedAnd((long volatile *)pu32, u32);
2888
2889# elif RT_INLINE_ASM_GNU_STYLE
2890 __asm__ __volatile__("lock; andl %1, %0\n\t"
2891 : "=m" (*pu32)
2892 : "ir" (u32));
2893# else
2894 __asm
2895 {
2896 mov eax, [u32]
2897# ifdef RT_ARCH_AMD64
2898 mov rdx, [pu32]
2899 lock and [rdx], eax
2900# else
2901 mov edx, [pu32]
2902 lock and [edx], eax
2903# endif
2904 }
2905# endif
2906}
2907#endif
2908
2909
2910/**
2911 * Atomically And a signed 32-bit value.
2912 *
2913 * @param pi32 Pointer to the pointer variable to AND i32 with.
2914 * @param i32 The value to AND *pi32 with.
2915 */
2916DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2917{
2918 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2919}
2920
2921
2922/**
2923 * Invalidate page.
2924 *
2925 * @param pv Address of the page to invalidate.
2926 */
2927#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2928DECLASM(void) ASMInvalidatePage(void *pv);
2929#else
2930DECLINLINE(void) ASMInvalidatePage(void *pv)
2931{
2932# if RT_INLINE_ASM_USES_INTRIN
2933 __invlpg(pv);
2934
2935# elif RT_INLINE_ASM_GNU_STYLE
2936 __asm__ __volatile__("invlpg %0\n\t"
2937 : : "m" (*(uint8_t *)pv));
2938# else
2939 __asm
2940 {
2941# ifdef RT_ARCH_AMD64
2942 mov rax, [pv]
2943 invlpg [rax]
2944# else
2945 mov eax, [pv]
2946 invlpg [eax]
2947# endif
2948 }
2949# endif
2950}
2951#endif
2952
2953
2954#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
2955# if PAGE_SIZE != 0x1000
2956# error "PAGE_SIZE is not 0x1000!"
2957# endif
2958#endif
2959
2960/**
2961 * Zeros a 4K memory page.
2962 *
2963 * @param pv Pointer to the memory block. This must be page aligned.
2964 */
2965#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2966DECLASM(void) ASMMemZeroPage(volatile void *pv);
2967# else
2968DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
2969{
2970# if RT_INLINE_ASM_USES_INTRIN
2971# ifdef RT_ARCH_AMD64
2972 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
2973# else
2974 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
2975# endif
2976
2977# elif RT_INLINE_ASM_GNU_STYLE
2978 RTUINTREG uDummy;
2979# ifdef RT_ARCH_AMD64
2980 __asm__ __volatile__ ("rep stosq"
2981 : "=D" (pv),
2982 "=c" (uDummy)
2983 : "0" (pv),
2984 "c" (0x1000 >> 3),
2985 "a" (0)
2986 : "memory");
2987# else
2988 __asm__ __volatile__ ("rep stosl"
2989 : "=D" (pv),
2990 "=c" (uDummy)
2991 : "0" (pv),
2992 "c" (0x1000 >> 2),
2993 "a" (0)
2994 : "memory");
2995# endif
2996# else
2997 __asm
2998 {
2999# ifdef RT_ARCH_AMD64
3000 xor rax, rax
3001 mov ecx, 0200h
3002 mov rdi, [pv]
3003 rep stosq
3004# else
3005 xor eax, eax
3006 mov ecx, 0400h
3007 mov edi, [pv]
3008 rep stosd
3009# endif
3010 }
3011# endif
3012}
3013# endif
3014
3015
3016/**
3017 * Zeros a memory block with a 32-bit aligned size.
3018 *
3019 * @param pv Pointer to the memory block.
3020 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3021 */
3022#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3023DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3024#else
3025DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3026{
3027# if RT_INLINE_ASM_USES_INTRIN
3028 __stosd((unsigned long *)pv, 0, cb >> 2);
3029
3030# elif RT_INLINE_ASM_GNU_STYLE
3031 __asm__ __volatile__ ("rep stosl"
3032 : "=D" (pv),
3033 "=c" (cb)
3034 : "0" (pv),
3035 "1" (cb >> 2),
3036 "a" (0)
3037 : "memory");
3038# else
3039 __asm
3040 {
3041 xor eax, eax
3042# ifdef RT_ARCH_AMD64
3043 mov rcx, [cb]
3044 shr rcx, 2
3045 mov rdi, [pv]
3046# else
3047 mov ecx, [cb]
3048 shr ecx, 2
3049 mov edi, [pv]
3050# endif
3051 rep stosd
3052 }
3053# endif
3054}
3055#endif
3056
3057
3058/**
3059 * Fills a memory block with a 32-bit aligned size.
3060 *
3061 * @param pv Pointer to the memory block.
3062 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3063 * @param u32 The value to fill with.
3064 */
3065#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3066DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3067#else
3068DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3069{
3070# if RT_INLINE_ASM_USES_INTRIN
3071 __stosd((unsigned long *)pv, 0, cb >> 2);
3072
3073# elif RT_INLINE_ASM_GNU_STYLE
3074 __asm__ __volatile__ ("rep stosl"
3075 : "=D" (pv),
3076 "=c" (cb)
3077 : "0" (pv),
3078 "1" (cb >> 2),
3079 "a" (u32)
3080 : "memory");
3081# else
3082 __asm
3083 {
3084# ifdef RT_ARCH_AMD64
3085 mov rcx, [cb]
3086 shr rcx, 2
3087 mov rdi, [pv]
3088# else
3089 mov ecx, [cb]
3090 shr ecx, 2
3091 mov edi, [pv]
3092# endif
3093 mov eax, [u32]
3094 rep stosd
3095 }
3096# endif
3097}
3098#endif
3099
3100
3101/**
3102 * Checks if a memory block is filled with the specified byte.
3103 *
3104 * This is a sort of inverted memchr.
3105 *
3106 * @returns Pointer to the byte which doesn't equal u8.
3107 * @returns NULL if all equal to u8.
3108 *
3109 * @param pv Pointer to the memory block.
3110 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3111 * @param u8 The value it's supposed to be filled with.
3112 */
3113#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3114DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
3115#else
3116DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3117{
3118/** @todo rewrite this in inline assembly. */
3119 uint8_t const *pb = (uint8_t const *)pv;
3120 for (; cb; cb--, pb++)
3121 if (RT_UNLIKELY(*pb != u8))
3122 return (void *)pb;
3123 return NULL;
3124}
3125#endif
3126
3127
3128
3129/**
3130 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
3131 *
3132 * @returns u32F1 * u32F2.
3133 */
3134#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3135DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
3136#else
3137DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
3138{
3139# ifdef RT_ARCH_AMD64
3140 return (uint64_t)u32F1 * u32F2;
3141# else /* !RT_ARCH_AMD64 */
3142 uint64_t u64;
3143# if RT_INLINE_ASM_GNU_STYLE
3144 __asm__ __volatile__("mull %%edx"
3145 : "=A" (u64)
3146 : "a" (u32F2), "d" (u32F1));
3147# else
3148 __asm
3149 {
3150 mov edx, [u32F1]
3151 mov eax, [u32F2]
3152 mul edx
3153 mov dword ptr [u64], eax
3154 mov dword ptr [u64 + 4], edx
3155 }
3156# endif
3157 return u64;
3158# endif /* !RT_ARCH_AMD64 */
3159}
3160#endif
3161
3162
3163/**
3164 * Multiplies two signed 32-bit values returning a signed 64-bit result.
3165 *
3166 * @returns u32F1 * u32F2.
3167 */
3168#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3169DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
3170#else
3171DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
3172{
3173# ifdef RT_ARCH_AMD64
3174 return (int64_t)i32F1 * i32F2;
3175# else /* !RT_ARCH_AMD64 */
3176 int64_t i64;
3177# if RT_INLINE_ASM_GNU_STYLE
3178 __asm__ __volatile__("imull %%edx"
3179 : "=A" (i64)
3180 : "a" (i32F2), "d" (i32F1));
3181# else
3182 __asm
3183 {
3184 mov edx, [i32F1]
3185 mov eax, [i32F2]
3186 imul edx
3187 mov dword ptr [i64], eax
3188 mov dword ptr [i64 + 4], edx
3189 }
3190# endif
3191 return i64;
3192# endif /* !RT_ARCH_AMD64 */
3193}
3194#endif
3195
3196
3197/**
3198 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
3199 *
3200 * @returns u64 / u32.
3201 */
3202#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3203DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
3204#else
3205DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
3206{
3207# ifdef RT_ARCH_AMD64
3208 return (uint32_t)(u64 / u32);
3209# else /* !RT_ARCH_AMD64 */
3210# if RT_INLINE_ASM_GNU_STYLE
3211 RTUINTREG uDummy;
3212 __asm__ __volatile__("divl %3"
3213 : "=a" (u32), "=d"(uDummy)
3214 : "A" (u64), "r" (u32));
3215# else
3216 __asm
3217 {
3218 mov eax, dword ptr [u64]
3219 mov edx, dword ptr [u64 + 4]
3220 mov ecx, [u32]
3221 div ecx
3222 mov [u32], eax
3223 }
3224# endif
3225 return u32;
3226# endif /* !RT_ARCH_AMD64 */
3227}
3228#endif
3229
3230
3231/**
3232 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
3233 *
3234 * @returns u64 / u32.
3235 */
3236#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3237DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
3238#else
3239DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
3240{
3241# ifdef RT_ARCH_AMD64
3242 return (int32_t)(i64 / i32);
3243# else /* !RT_ARCH_AMD64 */
3244# if RT_INLINE_ASM_GNU_STYLE
3245 RTUINTREG iDummy;
3246 __asm__ __volatile__("idivl %3"
3247 : "=a" (i32), "=d"(iDummy)
3248 : "A" (i64), "r" (i32));
3249# else
3250 __asm
3251 {
3252 mov eax, dword ptr [i64]
3253 mov edx, dword ptr [i64 + 4]
3254 mov ecx, [i32]
3255 idiv ecx
3256 mov [i32], eax
3257 }
3258# endif
3259 return i32;
3260# endif /* !RT_ARCH_AMD64 */
3261}
3262#endif
3263
3264
3265/**
3266 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
3267 * using a 96 bit intermediate result.
3268 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
3269 * __udivdi3 and __umoddi3 even if this inline function is not used.
3270 *
3271 * @returns (u64A * u32B) / u32C.
3272 * @param u64A The 64-bit value.
3273 * @param u32B The 32-bit value to multiple by A.
3274 * @param u32C The 32-bit value to divide A*B by.
3275 */
3276#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
3277DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
3278#else
3279DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
3280{
3281# if RT_INLINE_ASM_GNU_STYLE
3282# ifdef RT_ARCH_AMD64
3283 uint64_t u64Result, u64Spill;
3284 __asm__ __volatile__("mulq %2\n\t"
3285 "divq %3\n\t"
3286 : "=a" (u64Result),
3287 "=d" (u64Spill)
3288 : "r" ((uint64_t)u32B),
3289 "r" ((uint64_t)u32C),
3290 "0" (u64A),
3291 "1" (0));
3292 return u64Result;
3293# else
3294 uint32_t u32Dummy;
3295 uint64_t u64Result;
3296 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
3297 edx = u64Lo.hi = (u64A.lo * u32B).hi */
3298 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
3299 eax = u64A.hi */
3300 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
3301 edx = u32C */
3302 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
3303 edx = u32B */
3304 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
3305 edx = u64Hi.hi = (u64A.hi * u32B).hi */
3306 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
3307 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
3308 "divl %%ecx \n\t" /* eax = u64Hi / u32C
3309 edx = u64Hi % u32C */
3310 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
3311 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
3312 "divl %%ecx \n\t" /* u64Result.lo */
3313 "movl %%edi,%%edx \n\t" /* u64Result.hi */
3314 : "=A"(u64Result), "=c"(u32Dummy),
3315 "=S"(u32Dummy), "=D"(u32Dummy)
3316 : "a"((uint32_t)u64A),
3317 "S"((uint32_t)(u64A >> 32)),
3318 "c"(u32B),
3319 "D"(u32C));
3320 return u64Result;
3321# endif
3322# else
3323 RTUINT64U u;
3324 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
3325 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
3326 u64Hi += (u64Lo >> 32);
3327 u.s.Hi = (uint32_t)(u64Hi / u32C);
3328 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
3329 return u.u;
3330# endif
3331}
3332#endif
3333
3334
3335/**
3336 * Probes a byte pointer for read access.
3337 *
3338 * While the function will not fault if the byte is not read accessible,
3339 * the idea is to do this in a safe place like before acquiring locks
3340 * and such like.
3341 *
3342 * Also, this functions guarantees that an eager compiler is not going
3343 * to optimize the probing away.
3344 *
3345 * @param pvByte Pointer to the byte.
3346 */
3347#if RT_INLINE_ASM_EXTERNAL
3348DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
3349#else
3350DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
3351{
3352 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3353 uint8_t u8;
3354# if RT_INLINE_ASM_GNU_STYLE
3355 __asm__ __volatile__("movb (%1), %0\n\t"
3356 : "=r" (u8)
3357 : "r" (pvByte));
3358# else
3359 __asm
3360 {
3361# ifdef RT_ARCH_AMD64
3362 mov rax, [pvByte]
3363 mov al, [rax]
3364# else
3365 mov eax, [pvByte]
3366 mov al, [eax]
3367# endif
3368 mov [u8], al
3369 }
3370# endif
3371 return u8;
3372}
3373#endif
3374
3375/**
3376 * Probes a buffer for read access page by page.
3377 *
3378 * While the function will fault if the buffer is not fully read
3379 * accessible, the idea is to do this in a safe place like before
3380 * acquiring locks and such like.
3381 *
3382 * Also, this functions guarantees that an eager compiler is not going
3383 * to optimize the probing away.
3384 *
3385 * @param pvBuf Pointer to the buffer.
3386 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3387 */
3388DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3389{
3390 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3391 /* the first byte */
3392 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3393 ASMProbeReadByte(pu8);
3394
3395 /* the pages in between pages. */
3396 while (cbBuf > /*PAGE_SIZE*/0x1000)
3397 {
3398 ASMProbeReadByte(pu8);
3399 cbBuf -= /*PAGE_SIZE*/0x1000;
3400 pu8 += /*PAGE_SIZE*/0x1000;
3401 }
3402
3403 /* the last byte */
3404 ASMProbeReadByte(pu8 + cbBuf - 1);
3405}
3406
3407
3408/** @def ASMBreakpoint
3409 * Debugger Breakpoint.
3410 * @remark In the gnu world we add a nop instruction after the int3 to
3411 * force gdb to remain at the int3 source line.
3412 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
3413 * @internal
3414 */
3415#if RT_INLINE_ASM_GNU_STYLE
3416# ifndef __L4ENV__
3417# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
3418# else
3419# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
3420# endif
3421#else
3422# define ASMBreakpoint() __debugbreak()
3423#endif
3424
3425
3426
3427/** @defgroup grp_inline_bits Bit Operations
3428 * @{
3429 */
3430
3431
3432/**
3433 * Sets a bit in a bitmap.
3434 *
3435 * @param pvBitmap Pointer to the bitmap.
3436 * @param iBit The bit to set.
3437 */
3438#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3439DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3440#else
3441DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3442{
3443# if RT_INLINE_ASM_USES_INTRIN
3444 _bittestandset((long *)pvBitmap, iBit);
3445
3446# elif RT_INLINE_ASM_GNU_STYLE
3447 __asm__ __volatile__ ("btsl %1, %0"
3448 : "=m" (*(volatile long *)pvBitmap)
3449 : "Ir" (iBit)
3450 : "memory");
3451# else
3452 __asm
3453 {
3454# ifdef RT_ARCH_AMD64
3455 mov rax, [pvBitmap]
3456 mov edx, [iBit]
3457 bts [rax], edx
3458# else
3459 mov eax, [pvBitmap]
3460 mov edx, [iBit]
3461 bts [eax], edx
3462# endif
3463 }
3464# endif
3465}
3466#endif
3467
3468
3469/**
3470 * Atomically sets a bit in a bitmap.
3471 *
3472 * @param pvBitmap Pointer to the bitmap.
3473 * @param iBit The bit to set.
3474 */
3475#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3476DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3477#else
3478DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3479{
3480# if RT_INLINE_ASM_USES_INTRIN
3481 _interlockedbittestandset((long *)pvBitmap, iBit);
3482# elif RT_INLINE_ASM_GNU_STYLE
3483 __asm__ __volatile__ ("lock; btsl %1, %0"
3484 : "=m" (*(volatile long *)pvBitmap)
3485 : "Ir" (iBit)
3486 : "memory");
3487# else
3488 __asm
3489 {
3490# ifdef RT_ARCH_AMD64
3491 mov rax, [pvBitmap]
3492 mov edx, [iBit]
3493 lock bts [rax], edx
3494# else
3495 mov eax, [pvBitmap]
3496 mov edx, [iBit]
3497 lock bts [eax], edx
3498# endif
3499 }
3500# endif
3501}
3502#endif
3503
3504
3505/**
3506 * Clears a bit in a bitmap.
3507 *
3508 * @param pvBitmap Pointer to the bitmap.
3509 * @param iBit The bit to clear.
3510 */
3511#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3512DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3513#else
3514DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3515{
3516# if RT_INLINE_ASM_USES_INTRIN
3517 _bittestandreset((long *)pvBitmap, iBit);
3518
3519# elif RT_INLINE_ASM_GNU_STYLE
3520 __asm__ __volatile__ ("btrl %1, %0"
3521 : "=m" (*(volatile long *)pvBitmap)
3522 : "Ir" (iBit)
3523 : "memory");
3524# else
3525 __asm
3526 {
3527# ifdef RT_ARCH_AMD64
3528 mov rax, [pvBitmap]
3529 mov edx, [iBit]
3530 btr [rax], edx
3531# else
3532 mov eax, [pvBitmap]
3533 mov edx, [iBit]
3534 btr [eax], edx
3535# endif
3536 }
3537# endif
3538}
3539#endif
3540
3541
3542/**
3543 * Atomically clears a bit in a bitmap.
3544 *
3545 * @param pvBitmap Pointer to the bitmap.
3546 * @param iBit The bit to toggle set.
3547 * @remark No memory barrier, take care on smp.
3548 */
3549#if RT_INLINE_ASM_EXTERNAL
3550DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3551#else
3552DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3553{
3554# if RT_INLINE_ASM_GNU_STYLE
3555 __asm__ __volatile__ ("lock; btrl %1, %0"
3556 : "=m" (*(volatile long *)pvBitmap)
3557 : "Ir" (iBit)
3558 : "memory");
3559# else
3560 __asm
3561 {
3562# ifdef RT_ARCH_AMD64
3563 mov rax, [pvBitmap]
3564 mov edx, [iBit]
3565 lock btr [rax], edx
3566# else
3567 mov eax, [pvBitmap]
3568 mov edx, [iBit]
3569 lock btr [eax], edx
3570# endif
3571 }
3572# endif
3573}
3574#endif
3575
3576
3577/**
3578 * Toggles a bit in a bitmap.
3579 *
3580 * @param pvBitmap Pointer to the bitmap.
3581 * @param iBit The bit to toggle.
3582 */
3583#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3584DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3585#else
3586DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3587{
3588# if RT_INLINE_ASM_USES_INTRIN
3589 _bittestandcomplement((long *)pvBitmap, iBit);
3590# elif RT_INLINE_ASM_GNU_STYLE
3591 __asm__ __volatile__ ("btcl %1, %0"
3592 : "=m" (*(volatile long *)pvBitmap)
3593 : "Ir" (iBit)
3594 : "memory");
3595# else
3596 __asm
3597 {
3598# ifdef RT_ARCH_AMD64
3599 mov rax, [pvBitmap]
3600 mov edx, [iBit]
3601 btc [rax], edx
3602# else
3603 mov eax, [pvBitmap]
3604 mov edx, [iBit]
3605 btc [eax], edx
3606# endif
3607 }
3608# endif
3609}
3610#endif
3611
3612
3613/**
3614 * Atomically toggles a bit in a bitmap.
3615 *
3616 * @param pvBitmap Pointer to the bitmap.
3617 * @param iBit The bit to test and set.
3618 */
3619#if RT_INLINE_ASM_EXTERNAL
3620DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3621#else
3622DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3623{
3624# if RT_INLINE_ASM_GNU_STYLE
3625 __asm__ __volatile__ ("lock; btcl %1, %0"
3626 : "=m" (*(volatile long *)pvBitmap)
3627 : "Ir" (iBit)
3628 : "memory");
3629# else
3630 __asm
3631 {
3632# ifdef RT_ARCH_AMD64
3633 mov rax, [pvBitmap]
3634 mov edx, [iBit]
3635 lock btc [rax], edx
3636# else
3637 mov eax, [pvBitmap]
3638 mov edx, [iBit]
3639 lock btc [eax], edx
3640# endif
3641 }
3642# endif
3643}
3644#endif
3645
3646
3647/**
3648 * Tests and sets a bit in a bitmap.
3649 *
3650 * @returns true if the bit was set.
3651 * @returns false if the bit was clear.
3652 * @param pvBitmap Pointer to the bitmap.
3653 * @param iBit The bit to test and set.
3654 */
3655#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3656DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3657#else
3658DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3659{
3660 union { bool f; uint32_t u32; uint8_t u8; } rc;
3661# if RT_INLINE_ASM_USES_INTRIN
3662 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3663
3664# elif RT_INLINE_ASM_GNU_STYLE
3665 __asm__ __volatile__ ("btsl %2, %1\n\t"
3666 "setc %b0\n\t"
3667 "andl $1, %0\n\t"
3668 : "=q" (rc.u32),
3669 "=m" (*(volatile long *)pvBitmap)
3670 : "Ir" (iBit)
3671 : "memory");
3672# else
3673 __asm
3674 {
3675 mov edx, [iBit]
3676# ifdef RT_ARCH_AMD64
3677 mov rax, [pvBitmap]
3678 bts [rax], edx
3679# else
3680 mov eax, [pvBitmap]
3681 bts [eax], edx
3682# endif
3683 setc al
3684 and eax, 1
3685 mov [rc.u32], eax
3686 }
3687# endif
3688 return rc.f;
3689}
3690#endif
3691
3692
3693/**
3694 * Atomically tests and sets a bit in a bitmap.
3695 *
3696 * @returns true if the bit was set.
3697 * @returns false if the bit was clear.
3698 * @param pvBitmap Pointer to the bitmap.
3699 * @param iBit The bit to set.
3700 */
3701#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3702DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3703#else
3704DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3705{
3706 union { bool f; uint32_t u32; uint8_t u8; } rc;
3707# if RT_INLINE_ASM_USES_INTRIN
3708 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3709# elif RT_INLINE_ASM_GNU_STYLE
3710 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
3711 "setc %b0\n\t"
3712 "andl $1, %0\n\t"
3713 : "=q" (rc.u32),
3714 "=m" (*(volatile long *)pvBitmap)
3715 : "Ir" (iBit)
3716 : "memory");
3717# else
3718 __asm
3719 {
3720 mov edx, [iBit]
3721# ifdef RT_ARCH_AMD64
3722 mov rax, [pvBitmap]
3723 lock bts [rax], edx
3724# else
3725 mov eax, [pvBitmap]
3726 lock bts [eax], edx
3727# endif
3728 setc al
3729 and eax, 1
3730 mov [rc.u32], eax
3731 }
3732# endif
3733 return rc.f;
3734}
3735#endif
3736
3737
3738/**
3739 * Tests and clears a bit in a bitmap.
3740 *
3741 * @returns true if the bit was set.
3742 * @returns false if the bit was clear.
3743 * @param pvBitmap Pointer to the bitmap.
3744 * @param iBit The bit to test and clear.
3745 */
3746#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3747DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3748#else
3749DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3750{
3751 union { bool f; uint32_t u32; uint8_t u8; } rc;
3752# if RT_INLINE_ASM_USES_INTRIN
3753 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3754
3755# elif RT_INLINE_ASM_GNU_STYLE
3756 __asm__ __volatile__ ("btrl %2, %1\n\t"
3757 "setc %b0\n\t"
3758 "andl $1, %0\n\t"
3759 : "=q" (rc.u32),
3760 "=m" (*(volatile long *)pvBitmap)
3761 : "Ir" (iBit)
3762 : "memory");
3763# else
3764 __asm
3765 {
3766 mov edx, [iBit]
3767# ifdef RT_ARCH_AMD64
3768 mov rax, [pvBitmap]
3769 btr [rax], edx
3770# else
3771 mov eax, [pvBitmap]
3772 btr [eax], edx
3773# endif
3774 setc al
3775 and eax, 1
3776 mov [rc.u32], eax
3777 }
3778# endif
3779 return rc.f;
3780}
3781#endif
3782
3783
3784/**
3785 * Atomically tests and clears a bit in a bitmap.
3786 *
3787 * @returns true if the bit was set.
3788 * @returns false if the bit was clear.
3789 * @param pvBitmap Pointer to the bitmap.
3790 * @param iBit The bit to test and clear.
3791 * @remark No memory barrier, take care on smp.
3792 */
3793#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3794DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3795#else
3796DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3797{
3798 union { bool f; uint32_t u32; uint8_t u8; } rc;
3799# if RT_INLINE_ASM_USES_INTRIN
3800 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3801
3802# elif RT_INLINE_ASM_GNU_STYLE
3803 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
3804 "setc %b0\n\t"
3805 "andl $1, %0\n\t"
3806 : "=q" (rc.u32),
3807 "=m" (*(volatile long *)pvBitmap)
3808 : "Ir" (iBit)
3809 : "memory");
3810# else
3811 __asm
3812 {
3813 mov edx, [iBit]
3814# ifdef RT_ARCH_AMD64
3815 mov rax, [pvBitmap]
3816 lock btr [rax], edx
3817# else
3818 mov eax, [pvBitmap]
3819 lock btr [eax], edx
3820# endif
3821 setc al
3822 and eax, 1
3823 mov [rc.u32], eax
3824 }
3825# endif
3826 return rc.f;
3827}
3828#endif
3829
3830
3831/**
3832 * Tests and toggles a bit in a bitmap.
3833 *
3834 * @returns true if the bit was set.
3835 * @returns false if the bit was clear.
3836 * @param pvBitmap Pointer to the bitmap.
3837 * @param iBit The bit to test and toggle.
3838 */
3839#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3840DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3841#else
3842DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3843{
3844 union { bool f; uint32_t u32; uint8_t u8; } rc;
3845# if RT_INLINE_ASM_USES_INTRIN
3846 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3847
3848# elif RT_INLINE_ASM_GNU_STYLE
3849 __asm__ __volatile__ ("btcl %2, %1\n\t"
3850 "setc %b0\n\t"
3851 "andl $1, %0\n\t"
3852 : "=q" (rc.u32),
3853 "=m" (*(volatile long *)pvBitmap)
3854 : "Ir" (iBit)
3855 : "memory");
3856# else
3857 __asm
3858 {
3859 mov edx, [iBit]
3860# ifdef RT_ARCH_AMD64
3861 mov rax, [pvBitmap]
3862 btc [rax], edx
3863# else
3864 mov eax, [pvBitmap]
3865 btc [eax], edx
3866# endif
3867 setc al
3868 and eax, 1
3869 mov [rc.u32], eax
3870 }
3871# endif
3872 return rc.f;
3873}
3874#endif
3875
3876
3877/**
3878 * Atomically tests and toggles a bit in a bitmap.
3879 *
3880 * @returns true if the bit was set.
3881 * @returns false if the bit was clear.
3882 * @param pvBitmap Pointer to the bitmap.
3883 * @param iBit The bit to test and toggle.
3884 */
3885#if RT_INLINE_ASM_EXTERNAL
3886DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3887#else
3888DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3889{
3890 union { bool f; uint32_t u32; uint8_t u8; } rc;
3891# if RT_INLINE_ASM_GNU_STYLE
3892 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
3893 "setc %b0\n\t"
3894 "andl $1, %0\n\t"
3895 : "=q" (rc.u32),
3896 "=m" (*(volatile long *)pvBitmap)
3897 : "Ir" (iBit)
3898 : "memory");
3899# else
3900 __asm
3901 {
3902 mov edx, [iBit]
3903# ifdef RT_ARCH_AMD64
3904 mov rax, [pvBitmap]
3905 lock btc [rax], edx
3906# else
3907 mov eax, [pvBitmap]
3908 lock btc [eax], edx
3909# endif
3910 setc al
3911 and eax, 1
3912 mov [rc.u32], eax
3913 }
3914# endif
3915 return rc.f;
3916}
3917#endif
3918
3919
3920/**
3921 * Tests if a bit in a bitmap is set.
3922 *
3923 * @returns true if the bit is set.
3924 * @returns false if the bit is clear.
3925 * @param pvBitmap Pointer to the bitmap.
3926 * @param iBit The bit to test.
3927 */
3928#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3929DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
3930#else
3931DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
3932{
3933 union { bool f; uint32_t u32; uint8_t u8; } rc;
3934# if RT_INLINE_ASM_USES_INTRIN
3935 rc.u32 = _bittest((long *)pvBitmap, iBit);
3936# elif RT_INLINE_ASM_GNU_STYLE
3937
3938 __asm__ __volatile__ ("btl %2, %1\n\t"
3939 "setc %b0\n\t"
3940 "andl $1, %0\n\t"
3941 : "=q" (rc.u32),
3942 "=m" (*(volatile long *)pvBitmap)
3943 : "Ir" (iBit)
3944 : "memory");
3945# else
3946 __asm
3947 {
3948 mov edx, [iBit]
3949# ifdef RT_ARCH_AMD64
3950 mov rax, [pvBitmap]
3951 bt [rax], edx
3952# else
3953 mov eax, [pvBitmap]
3954 bt [eax], edx
3955# endif
3956 setc al
3957 and eax, 1
3958 mov [rc.u32], eax
3959 }
3960# endif
3961 return rc.f;
3962}
3963#endif
3964
3965
3966/**
3967 * Clears a bit range within a bitmap.
3968 *
3969 * @param pvBitmap Pointer to the bitmap.
3970 * @param iBitStart The First bit to clear.
3971 * @param iBitEnd The first bit not to clear.
3972 */
3973DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3974{
3975 if (iBitStart < iBitEnd)
3976 {
3977 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3978 int iStart = iBitStart & ~31;
3979 int iEnd = iBitEnd & ~31;
3980 if (iStart == iEnd)
3981 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
3982 else
3983 {
3984 /* bits in first dword. */
3985 if (iBitStart & 31)
3986 {
3987 *pu32 &= (1 << (iBitStart & 31)) - 1;
3988 pu32++;
3989 iBitStart = iStart + 32;
3990 }
3991
3992 /* whole dword. */
3993 if (iBitStart != iEnd)
3994 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
3995
3996 /* bits in last dword. */
3997 if (iBitEnd & 31)
3998 {
3999 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4000 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
4001 }
4002 }
4003 }
4004}
4005
4006
4007/**
4008 * Finds the first clear bit in a bitmap.
4009 *
4010 * @returns Index of the first zero bit.
4011 * @returns -1 if no clear bit was found.
4012 * @param pvBitmap Pointer to the bitmap.
4013 * @param cBits The number of bits in the bitmap. Multiple of 32.
4014 */
4015#if RT_INLINE_ASM_EXTERNAL
4016DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
4017#else
4018DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
4019{
4020 if (cBits)
4021 {
4022 int32_t iBit;
4023# if RT_INLINE_ASM_GNU_STYLE
4024 RTCCUINTREG uEAX, uECX, uEDI;
4025 cBits = RT_ALIGN_32(cBits, 32);
4026 __asm__ __volatile__("repe; scasl\n\t"
4027 "je 1f\n\t"
4028# ifdef RT_ARCH_AMD64
4029 "lea -4(%%rdi), %%rdi\n\t"
4030 "xorl (%%rdi), %%eax\n\t"
4031 "subq %5, %%rdi\n\t"
4032# else
4033 "lea -4(%%edi), %%edi\n\t"
4034 "xorl (%%edi), %%eax\n\t"
4035 "subl %5, %%edi\n\t"
4036# endif
4037 "shll $3, %%edi\n\t"
4038 "bsfl %%eax, %%edx\n\t"
4039 "addl %%edi, %%edx\n\t"
4040 "1:\t\n"
4041 : "=d" (iBit),
4042 "=&c" (uECX),
4043 "=&D" (uEDI),
4044 "=&a" (uEAX)
4045 : "0" (0xffffffff),
4046 "mr" (pvBitmap),
4047 "1" (cBits >> 5),
4048 "2" (pvBitmap),
4049 "3" (0xffffffff));
4050# else
4051 cBits = RT_ALIGN_32(cBits, 32);
4052 __asm
4053 {
4054# ifdef RT_ARCH_AMD64
4055 mov rdi, [pvBitmap]
4056 mov rbx, rdi
4057# else
4058 mov edi, [pvBitmap]
4059 mov ebx, edi
4060# endif
4061 mov edx, 0ffffffffh
4062 mov eax, edx
4063 mov ecx, [cBits]
4064 shr ecx, 5
4065 repe scasd
4066 je done
4067
4068# ifdef RT_ARCH_AMD64
4069 lea rdi, [rdi - 4]
4070 xor eax, [rdi]
4071 sub rdi, rbx
4072# else
4073 lea edi, [edi - 4]
4074 xor eax, [edi]
4075 sub edi, ebx
4076# endif
4077 shl edi, 3
4078 bsf edx, eax
4079 add edx, edi
4080 done:
4081 mov [iBit], edx
4082 }
4083# endif
4084 return iBit;
4085 }
4086 return -1;
4087}
4088#endif
4089
4090
4091/**
4092 * Finds the next clear bit in a bitmap.
4093 *
4094 * @returns Index of the first zero bit.
4095 * @returns -1 if no clear bit was found.
4096 * @param pvBitmap Pointer to the bitmap.
4097 * @param cBits The number of bits in the bitmap. Multiple of 32.
4098 * @param iBitPrev The bit returned from the last search.
4099 * The search will start at iBitPrev + 1.
4100 */
4101#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4102DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4103#else
4104DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4105{
4106 int iBit = ++iBitPrev & 31;
4107 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
4108 cBits -= iBitPrev & ~31;
4109 if (iBit)
4110 {
4111 /* inspect the first dword. */
4112 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
4113# if RT_INLINE_ASM_USES_INTRIN
4114 unsigned long ulBit = 0;
4115 if (_BitScanForward(&ulBit, u32))
4116 return ulBit + iBitPrev;
4117 iBit = -1;
4118# else
4119# if RT_INLINE_ASM_GNU_STYLE
4120 __asm__ __volatile__("bsf %1, %0\n\t"
4121 "jnz 1f\n\t"
4122 "movl $-1, %0\n\t"
4123 "1:\n\t"
4124 : "=r" (iBit)
4125 : "r" (u32));
4126# else
4127 __asm
4128 {
4129 mov edx, [u32]
4130 bsf eax, edx
4131 jnz done
4132 mov eax, 0ffffffffh
4133 done:
4134 mov [iBit], eax
4135 }
4136# endif
4137 if (iBit >= 0)
4138 return iBit + iBitPrev;
4139# endif
4140 /* Search the rest of the bitmap, if there is anything. */
4141 if (cBits > 32)
4142 {
4143 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
4144 if (iBit >= 0)
4145 return iBit + (iBitPrev & ~31) + 32;
4146 }
4147 }
4148 else
4149 {
4150 /* Search the rest of the bitmap. */
4151 iBit = ASMBitFirstClear(pvBitmap, cBits);
4152 if (iBit >= 0)
4153 return iBit + (iBitPrev & ~31);
4154 }
4155 return iBit;
4156}
4157#endif
4158
4159
4160/**
4161 * Finds the first set bit in a bitmap.
4162 *
4163 * @returns Index of the first set bit.
4164 * @returns -1 if no clear bit was found.
4165 * @param pvBitmap Pointer to the bitmap.
4166 * @param cBits The number of bits in the bitmap. Multiple of 32.
4167 */
4168#if RT_INLINE_ASM_EXTERNAL
4169DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
4170#else
4171DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
4172{
4173 if (cBits)
4174 {
4175 int32_t iBit;
4176# if RT_INLINE_ASM_GNU_STYLE
4177 RTCCUINTREG uEAX, uECX, uEDI;
4178 cBits = RT_ALIGN_32(cBits, 32);
4179 __asm__ __volatile__("repe; scasl\n\t"
4180 "je 1f\n\t"
4181# ifdef RT_ARCH_AMD64
4182 "lea -4(%%rdi), %%rdi\n\t"
4183 "movl (%%rdi), %%eax\n\t"
4184 "subq %5, %%rdi\n\t"
4185# else
4186 "lea -4(%%edi), %%edi\n\t"
4187 "movl (%%edi), %%eax\n\t"
4188 "subl %5, %%edi\n\t"
4189# endif
4190 "shll $3, %%edi\n\t"
4191 "bsfl %%eax, %%edx\n\t"
4192 "addl %%edi, %%edx\n\t"
4193 "1:\t\n"
4194 : "=d" (iBit),
4195 "=&c" (uECX),
4196 "=&D" (uEDI),
4197 "=&a" (uEAX)
4198 : "0" (0xffffffff),
4199 "mr" (pvBitmap),
4200 "1" (cBits >> 5),
4201 "2" (pvBitmap),
4202 "3" (0));
4203# else
4204 cBits = RT_ALIGN_32(cBits, 32);
4205 __asm
4206 {
4207# ifdef RT_ARCH_AMD64
4208 mov rdi, [pvBitmap]
4209 mov rbx, rdi
4210# else
4211 mov edi, [pvBitmap]
4212 mov ebx, edi
4213# endif
4214 mov edx, 0ffffffffh
4215 xor eax, eax
4216 mov ecx, [cBits]
4217 shr ecx, 5
4218 repe scasd
4219 je done
4220# ifdef RT_ARCH_AMD64
4221 lea rdi, [rdi - 4]
4222 mov eax, [rdi]
4223 sub rdi, rbx
4224# else
4225 lea edi, [edi - 4]
4226 mov eax, [edi]
4227 sub edi, ebx
4228# endif
4229 shl edi, 3
4230 bsf edx, eax
4231 add edx, edi
4232 done:
4233 mov [iBit], edx
4234 }
4235# endif
4236 return iBit;
4237 }
4238 return -1;
4239}
4240#endif
4241
4242
4243/**
4244 * Finds the next set bit in a bitmap.
4245 *
4246 * @returns Index of the next set bit.
4247 * @returns -1 if no set bit was found.
4248 * @param pvBitmap Pointer to the bitmap.
4249 * @param cBits The number of bits in the bitmap. Multiple of 32.
4250 * @param iBitPrev The bit returned from the last search.
4251 * The search will start at iBitPrev + 1.
4252 */
4253#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4254DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4255#else
4256DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4257{
4258 int iBit = ++iBitPrev & 31;
4259 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
4260 cBits -= iBitPrev & ~31;
4261 if (iBit)
4262 {
4263 /* inspect the first dword. */
4264 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
4265# if RT_INLINE_ASM_USES_INTRIN
4266 unsigned long ulBit = 0;
4267 if (_BitScanForward(&ulBit, u32))
4268 return ulBit + iBitPrev;
4269 iBit = -1;
4270# else
4271# if RT_INLINE_ASM_GNU_STYLE
4272 __asm__ __volatile__("bsf %1, %0\n\t"
4273 "jnz 1f\n\t"
4274 "movl $-1, %0\n\t"
4275 "1:\n\t"
4276 : "=r" (iBit)
4277 : "r" (u32));
4278# else
4279 __asm
4280 {
4281 mov edx, u32
4282 bsf eax, edx
4283 jnz done
4284 mov eax, 0ffffffffh
4285 done:
4286 mov [iBit], eax
4287 }
4288# endif
4289 if (iBit >= 0)
4290 return iBit + iBitPrev;
4291# endif
4292 /* Search the rest of the bitmap, if there is anything. */
4293 if (cBits > 32)
4294 {
4295 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
4296 if (iBit >= 0)
4297 return iBit + (iBitPrev & ~31) + 32;
4298 }
4299
4300 }
4301 else
4302 {
4303 /* Search the rest of the bitmap. */
4304 iBit = ASMBitFirstSet(pvBitmap, cBits);
4305 if (iBit >= 0)
4306 return iBit + (iBitPrev & ~31);
4307 }
4308 return iBit;
4309}
4310#endif
4311
4312
4313/**
4314 * Finds the first bit which is set in the given 32-bit integer.
4315 * Bits are numbered from 1 (least significant) to 32.
4316 *
4317 * @returns index [1..32] of the first set bit.
4318 * @returns 0 if all bits are cleared.
4319 * @param u32 Integer to search for set bits.
4320 * @remark Similar to ffs() in BSD.
4321 */
4322DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
4323{
4324# if RT_INLINE_ASM_USES_INTRIN
4325 unsigned long iBit;
4326 if (_BitScanForward(&iBit, u32))
4327 iBit++;
4328 else
4329 iBit = 0;
4330# elif RT_INLINE_ASM_GNU_STYLE
4331 uint32_t iBit;
4332 __asm__ __volatile__("bsf %1, %0\n\t"
4333 "jnz 1f\n\t"
4334 "xorl %0, %0\n\t"
4335 "jmp 2f\n"
4336 "1:\n\t"
4337 "incl %0\n"
4338 "2:\n\t"
4339 : "=r" (iBit)
4340 : "rm" (u32));
4341# else
4342 uint32_t iBit;
4343 _asm
4344 {
4345 bsf eax, [u32]
4346 jnz found
4347 xor eax, eax
4348 jmp done
4349 found:
4350 inc eax
4351 done:
4352 mov [iBit], eax
4353 }
4354# endif
4355 return iBit;
4356}
4357
4358
4359/**
4360 * Finds the first bit which is set in the given 32-bit integer.
4361 * Bits are numbered from 1 (least significant) to 32.
4362 *
4363 * @returns index [1..32] of the first set bit.
4364 * @returns 0 if all bits are cleared.
4365 * @param i32 Integer to search for set bits.
4366 * @remark Similar to ffs() in BSD.
4367 */
4368DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
4369{
4370 return ASMBitFirstSetU32((uint32_t)i32);
4371}
4372
4373
4374/**
4375 * Finds the last bit which is set in the given 32-bit integer.
4376 * Bits are numbered from 1 (least significant) to 32.
4377 *
4378 * @returns index [1..32] of the last set bit.
4379 * @returns 0 if all bits are cleared.
4380 * @param u32 Integer to search for set bits.
4381 * @remark Similar to fls() in BSD.
4382 */
4383DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4384{
4385# if RT_INLINE_ASM_USES_INTRIN
4386 unsigned long iBit;
4387 if (_BitScanReverse(&iBit, u32))
4388 iBit++;
4389 else
4390 iBit = 0;
4391# elif RT_INLINE_ASM_GNU_STYLE
4392 uint32_t iBit;
4393 __asm__ __volatile__("bsrl %1, %0\n\t"
4394 "jnz 1f\n\t"
4395 "xorl %0, %0\n\t"
4396 "jmp 2f\n"
4397 "1:\n\t"
4398 "incl %0\n"
4399 "2:\n\t"
4400 : "=r" (iBit)
4401 : "rm" (u32));
4402# else
4403 uint32_t iBit;
4404 _asm
4405 {
4406 bsr eax, [u32]
4407 jnz found
4408 xor eax, eax
4409 jmp done
4410 found:
4411 inc eax
4412 done:
4413 mov [iBit], eax
4414 }
4415# endif
4416 return iBit;
4417}
4418
4419
4420/**
4421 * Finds the last bit which is set in the given 32-bit integer.
4422 * Bits are numbered from 1 (least significant) to 32.
4423 *
4424 * @returns index [1..32] of the last set bit.
4425 * @returns 0 if all bits are cleared.
4426 * @param i32 Integer to search for set bits.
4427 * @remark Similar to fls() in BSD.
4428 */
4429DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4430{
4431 return ASMBitLastSetS32((uint32_t)i32);
4432}
4433
4434
4435/**
4436 * Reverse the byte order of the given 32-bit integer.
4437 * @param u32 Integer
4438 */
4439DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4440{
4441#if RT_INLINE_ASM_USES_INTRIN
4442 u32 = _byteswap_ulong(u32);
4443#elif RT_INLINE_ASM_GNU_STYLE
4444 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4445#else
4446 _asm
4447 {
4448 mov eax, [u32]
4449 bswap eax
4450 mov [u32], eax
4451 }
4452#endif
4453 return u32;
4454}
4455
4456/** @} */
4457
4458
4459/** @} */
4460#endif
4461
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette