VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 6646

Last change on this file since 6646 was 6646, checked in by vboxsync, 17 years ago

Attempt to fix ASMAtomicCmpXchgExU64 with 32bit gcc. It compiles, but
produces testcase errors.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 111.4 KB
Line 
1/** @file
2 * innotek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31/** @todo #include <iprt/param.h> for PAGE_SIZE. */
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37#ifdef _MSC_VER
38# if _MSC_VER >= 1400
39# define RT_INLINE_ASM_USES_INTRIN 1
40# include <intrin.h>
41 /* Emit the intrinsics at all optimization levels. */
42# pragma intrinsic(_ReadWriteBarrier)
43# pragma intrinsic(__cpuid)
44# pragma intrinsic(_enable)
45# pragma intrinsic(_disable)
46# pragma intrinsic(__rdtsc)
47# pragma intrinsic(__readmsr)
48# pragma intrinsic(__writemsr)
49# pragma intrinsic(__outbyte)
50# pragma intrinsic(__outword)
51# pragma intrinsic(__outdword)
52# pragma intrinsic(__inbyte)
53# pragma intrinsic(__inword)
54# pragma intrinsic(__indword)
55# pragma intrinsic(__invlpg)
56# pragma intrinsic(__stosd)
57# pragma intrinsic(__stosw)
58# pragma intrinsic(__stosb)
59# pragma intrinsic(__readcr0)
60# pragma intrinsic(__readcr2)
61# pragma intrinsic(__readcr3)
62# pragma intrinsic(__readcr4)
63# pragma intrinsic(__writecr0)
64# pragma intrinsic(__writecr3)
65# pragma intrinsic(__writecr4)
66# pragma intrinsic(_BitScanForward)
67# pragma intrinsic(_BitScanReverse)
68# pragma intrinsic(_bittest)
69# pragma intrinsic(_bittestandset)
70# pragma intrinsic(_bittestandreset)
71# pragma intrinsic(_bittestandcomplement)
72# pragma intrinsic(_byteswap_ushort)
73# pragma intrinsic(_byteswap_ulong)
74# pragma intrinsic(_interlockedbittestandset)
75# pragma intrinsic(_interlockedbittestandreset)
76# pragma intrinsic(_InterlockedAnd)
77# pragma intrinsic(_InterlockedOr)
78# pragma intrinsic(_InterlockedIncrement)
79# pragma intrinsic(_InterlockedDecrement)
80# pragma intrinsic(_InterlockedExchange)
81# pragma intrinsic(_InterlockedCompareExchange)
82# pragma intrinsic(_InterlockedCompareExchange64)
83# ifdef RT_ARCH_AMD64
84# pragma intrinsic(__stosq)
85# pragma intrinsic(__readcr8)
86# pragma intrinsic(__writecr8)
87# pragma intrinsic(_byteswap_uint64)
88# pragma intrinsic(_InterlockedExchange64)
89# endif
90# endif
91#endif
92#ifndef RT_INLINE_ASM_USES_INTRIN
93# define RT_INLINE_ASM_USES_INTRIN 0
94#endif
95
96
97
98/** @defgroup grp_asm ASM - Assembly Routines
99 * @ingroup grp_rt
100 * @{
101 */
102
103/** @def RT_INLINE_ASM_EXTERNAL
104 * Defined as 1 if the compiler does not support inline assembly.
105 * The ASM* functions will then be implemented in an external .asm file.
106 *
107 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
108 * inline assmebly in their AMD64 compiler.
109 */
110#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
111# define RT_INLINE_ASM_EXTERNAL 1
112#else
113# define RT_INLINE_ASM_EXTERNAL 0
114#endif
115
116/** @def RT_INLINE_ASM_GNU_STYLE
117 * Defined as 1 if the compiler understand GNU style inline assembly.
118 */
119#if defined(_MSC_VER)
120# define RT_INLINE_ASM_GNU_STYLE 0
121#else
122# define RT_INLINE_ASM_GNU_STYLE 1
123#endif
124
125
126/** @todo find a more proper place for this structure? */
127#pragma pack(1)
128/** IDTR */
129typedef struct RTIDTR
130{
131 /** Size of the IDT. */
132 uint16_t cbIdt;
133 /** Address of the IDT. */
134 uintptr_t pIdt;
135} RTIDTR, *PRTIDTR;
136#pragma pack()
137
138#pragma pack(1)
139/** GDTR */
140typedef struct RTGDTR
141{
142 /** Size of the GDT. */
143 uint16_t cbGdt;
144 /** Address of the GDT. */
145 uintptr_t pGdt;
146} RTGDTR, *PRTGDTR;
147#pragma pack()
148
149
150/** @def ASMReturnAddress
151 * Gets the return address of the current (or calling if you like) function or method.
152 */
153#ifdef _MSC_VER
154# ifdef __cplusplus
155extern "C"
156# endif
157void * _ReturnAddress(void);
158# pragma intrinsic(_ReturnAddress)
159# define ASMReturnAddress() _ReturnAddress()
160#elif defined(__GNUC__) || defined(__DOXYGEN__)
161# define ASMReturnAddress() __builtin_return_address(0)
162#else
163# error "Unsupported compiler."
164#endif
165
166
167/**
168 * Gets the content of the IDTR CPU register.
169 * @param pIdtr Where to store the IDTR contents.
170 */
171#if RT_INLINE_ASM_EXTERNAL
172DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
173#else
174DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
175{
176# if RT_INLINE_ASM_GNU_STYLE
177 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
178# else
179 __asm
180 {
181# ifdef RT_ARCH_AMD64
182 mov rax, [pIdtr]
183 sidt [rax]
184# else
185 mov eax, [pIdtr]
186 sidt [eax]
187# endif
188 }
189# endif
190}
191#endif
192
193
194/**
195 * Sets the content of the IDTR CPU register.
196 * @param pIdtr Where to load the IDTR contents from
197 */
198#if RT_INLINE_ASM_EXTERNAL
199DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
200#else
201DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
202{
203# if RT_INLINE_ASM_GNU_STYLE
204 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
205# else
206 __asm
207 {
208# ifdef RT_ARCH_AMD64
209 mov rax, [pIdtr]
210 lidt [rax]
211# else
212 mov eax, [pIdtr]
213 lidt [eax]
214# endif
215 }
216# endif
217}
218#endif
219
220
221/**
222 * Gets the content of the GDTR CPU register.
223 * @param pGdtr Where to store the GDTR contents.
224 */
225#if RT_INLINE_ASM_EXTERNAL
226DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
227#else
228DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
229{
230# if RT_INLINE_ASM_GNU_STYLE
231 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
232# else
233 __asm
234 {
235# ifdef RT_ARCH_AMD64
236 mov rax, [pGdtr]
237 sgdt [rax]
238# else
239 mov eax, [pGdtr]
240 sgdt [eax]
241# endif
242 }
243# endif
244}
245#endif
246
247/**
248 * Get the cs register.
249 * @returns cs.
250 */
251#if RT_INLINE_ASM_EXTERNAL
252DECLASM(RTSEL) ASMGetCS(void);
253#else
254DECLINLINE(RTSEL) ASMGetCS(void)
255{
256 RTSEL SelCS;
257# if RT_INLINE_ASM_GNU_STYLE
258 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
259# else
260 __asm
261 {
262 mov ax, cs
263 mov [SelCS], ax
264 }
265# endif
266 return SelCS;
267}
268#endif
269
270
271/**
272 * Get the DS register.
273 * @returns DS.
274 */
275#if RT_INLINE_ASM_EXTERNAL
276DECLASM(RTSEL) ASMGetDS(void);
277#else
278DECLINLINE(RTSEL) ASMGetDS(void)
279{
280 RTSEL SelDS;
281# if RT_INLINE_ASM_GNU_STYLE
282 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
283# else
284 __asm
285 {
286 mov ax, ds
287 mov [SelDS], ax
288 }
289# endif
290 return SelDS;
291}
292#endif
293
294
295/**
296 * Get the ES register.
297 * @returns ES.
298 */
299#if RT_INLINE_ASM_EXTERNAL
300DECLASM(RTSEL) ASMGetES(void);
301#else
302DECLINLINE(RTSEL) ASMGetES(void)
303{
304 RTSEL SelES;
305# if RT_INLINE_ASM_GNU_STYLE
306 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
307# else
308 __asm
309 {
310 mov ax, es
311 mov [SelES], ax
312 }
313# endif
314 return SelES;
315}
316#endif
317
318
319/**
320 * Get the FS register.
321 * @returns FS.
322 */
323#if RT_INLINE_ASM_EXTERNAL
324DECLASM(RTSEL) ASMGetFS(void);
325#else
326DECLINLINE(RTSEL) ASMGetFS(void)
327{
328 RTSEL SelFS;
329# if RT_INLINE_ASM_GNU_STYLE
330 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
331# else
332 __asm
333 {
334 mov ax, fs
335 mov [SelFS], ax
336 }
337# endif
338 return SelFS;
339}
340# endif
341
342
343/**
344 * Get the GS register.
345 * @returns GS.
346 */
347#if RT_INLINE_ASM_EXTERNAL
348DECLASM(RTSEL) ASMGetGS(void);
349#else
350DECLINLINE(RTSEL) ASMGetGS(void)
351{
352 RTSEL SelGS;
353# if RT_INLINE_ASM_GNU_STYLE
354 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
355# else
356 __asm
357 {
358 mov ax, gs
359 mov [SelGS], ax
360 }
361# endif
362 return SelGS;
363}
364#endif
365
366
367/**
368 * Get the SS register.
369 * @returns SS.
370 */
371#if RT_INLINE_ASM_EXTERNAL
372DECLASM(RTSEL) ASMGetSS(void);
373#else
374DECLINLINE(RTSEL) ASMGetSS(void)
375{
376 RTSEL SelSS;
377# if RT_INLINE_ASM_GNU_STYLE
378 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
379# else
380 __asm
381 {
382 mov ax, ss
383 mov [SelSS], ax
384 }
385# endif
386 return SelSS;
387}
388#endif
389
390
391/**
392 * Get the TR register.
393 * @returns TR.
394 */
395#if RT_INLINE_ASM_EXTERNAL
396DECLASM(RTSEL) ASMGetTR(void);
397#else
398DECLINLINE(RTSEL) ASMGetTR(void)
399{
400 RTSEL SelTR;
401# if RT_INLINE_ASM_GNU_STYLE
402 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
403# else
404 __asm
405 {
406 str ax
407 mov [SelTR], ax
408 }
409# endif
410 return SelTR;
411}
412#endif
413
414
415/**
416 * Get the [RE]FLAGS register.
417 * @returns [RE]FLAGS.
418 */
419#if RT_INLINE_ASM_EXTERNAL
420DECLASM(RTCCUINTREG) ASMGetFlags(void);
421#else
422DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
423{
424 RTCCUINTREG uFlags;
425# if RT_INLINE_ASM_GNU_STYLE
426# ifdef RT_ARCH_AMD64
427 __asm__ __volatile__("pushfq\n\t"
428 "popq %0\n\t"
429 : "=g" (uFlags));
430# else
431 __asm__ __volatile__("pushfl\n\t"
432 "popl %0\n\t"
433 : "=g" (uFlags));
434# endif
435# else
436 __asm
437 {
438# ifdef RT_ARCH_AMD64
439 pushfq
440 pop [uFlags]
441# else
442 pushfd
443 pop [uFlags]
444# endif
445 }
446# endif
447 return uFlags;
448}
449#endif
450
451
452/**
453 * Set the [RE]FLAGS register.
454 * @param uFlags The new [RE]FLAGS value.
455 */
456#if RT_INLINE_ASM_EXTERNAL
457DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
458#else
459DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
460{
461# if RT_INLINE_ASM_GNU_STYLE
462# ifdef RT_ARCH_AMD64
463 __asm__ __volatile__("pushq %0\n\t"
464 "popfq\n\t"
465 : : "g" (uFlags));
466# else
467 __asm__ __volatile__("pushl %0\n\t"
468 "popfl\n\t"
469 : : "g" (uFlags));
470# endif
471# else
472 __asm
473 {
474# ifdef RT_ARCH_AMD64
475 push [uFlags]
476 popfq
477# else
478 push [uFlags]
479 popfd
480# endif
481 }
482# endif
483}
484#endif
485
486
487/**
488 * Gets the content of the CPU timestamp counter register.
489 *
490 * @returns TSC.
491 */
492#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
493DECLASM(uint64_t) ASMReadTSC(void);
494#else
495DECLINLINE(uint64_t) ASMReadTSC(void)
496{
497 RTUINT64U u;
498# if RT_INLINE_ASM_GNU_STYLE
499 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
500# else
501# if RT_INLINE_ASM_USES_INTRIN
502 u.u = __rdtsc();
503# else
504 __asm
505 {
506 rdtsc
507 mov [u.s.Lo], eax
508 mov [u.s.Hi], edx
509 }
510# endif
511# endif
512 return u.u;
513}
514#endif
515
516
517/**
518 * Performs the cpuid instruction returning all registers.
519 *
520 * @param uOperator CPUID operation (eax).
521 * @param pvEAX Where to store eax.
522 * @param pvEBX Where to store ebx.
523 * @param pvECX Where to store ecx.
524 * @param pvEDX Where to store edx.
525 * @remark We're using void pointers to ease the use of special bitfield structures and such.
526 */
527#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
528DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
529#else
530DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
531{
532# if RT_INLINE_ASM_GNU_STYLE
533# ifdef RT_ARCH_AMD64
534 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
535 __asm__ ("cpuid\n\t"
536 : "=a" (uRAX),
537 "=b" (uRBX),
538 "=c" (uRCX),
539 "=d" (uRDX)
540 : "0" (uOperator));
541 *(uint32_t *)pvEAX = (uint32_t)uRAX;
542 *(uint32_t *)pvEBX = (uint32_t)uRBX;
543 *(uint32_t *)pvECX = (uint32_t)uRCX;
544 *(uint32_t *)pvEDX = (uint32_t)uRDX;
545# else
546 __asm__ ("xchgl %%ebx, %1\n\t"
547 "cpuid\n\t"
548 "xchgl %%ebx, %1\n\t"
549 : "=a" (*(uint32_t *)pvEAX),
550 "=r" (*(uint32_t *)pvEBX),
551 "=c" (*(uint32_t *)pvECX),
552 "=d" (*(uint32_t *)pvEDX)
553 : "0" (uOperator));
554# endif
555
556# elif RT_INLINE_ASM_USES_INTRIN
557 int aInfo[4];
558 __cpuid(aInfo, uOperator);
559 *(uint32_t *)pvEAX = aInfo[0];
560 *(uint32_t *)pvEBX = aInfo[1];
561 *(uint32_t *)pvECX = aInfo[2];
562 *(uint32_t *)pvEDX = aInfo[3];
563
564# else
565 uint32_t uEAX;
566 uint32_t uEBX;
567 uint32_t uECX;
568 uint32_t uEDX;
569 __asm
570 {
571 push ebx
572 mov eax, [uOperator]
573 cpuid
574 mov [uEAX], eax
575 mov [uEBX], ebx
576 mov [uECX], ecx
577 mov [uEDX], edx
578 pop ebx
579 }
580 *(uint32_t *)pvEAX = uEAX;
581 *(uint32_t *)pvEBX = uEBX;
582 *(uint32_t *)pvECX = uECX;
583 *(uint32_t *)pvEDX = uEDX;
584# endif
585}
586#endif
587
588
589/**
590 * Performs the cpuid instruction returning all registers.
591 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
592 *
593 * @param uOperator CPUID operation (eax).
594 * @param uIdxECX ecx index
595 * @param pvEAX Where to store eax.
596 * @param pvEBX Where to store ebx.
597 * @param pvECX Where to store ecx.
598 * @param pvEDX Where to store edx.
599 * @remark We're using void pointers to ease the use of special bitfield structures and such.
600 */
601#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
602DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
603#else
604DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
605{
606# if RT_INLINE_ASM_GNU_STYLE
607# ifdef RT_ARCH_AMD64
608 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
609 __asm__ ("cpuid\n\t"
610 : "=a" (uRAX),
611 "=b" (uRBX),
612 "=c" (uRCX),
613 "=d" (uRDX)
614 : "0" (uOperator),
615 "2" (uIdxECX));
616 *(uint32_t *)pvEAX = (uint32_t)uRAX;
617 *(uint32_t *)pvEBX = (uint32_t)uRBX;
618 *(uint32_t *)pvECX = (uint32_t)uRCX;
619 *(uint32_t *)pvEDX = (uint32_t)uRDX;
620# else
621 __asm__ ("xchgl %%ebx, %1\n\t"
622 "cpuid\n\t"
623 "xchgl %%ebx, %1\n\t"
624 : "=a" (*(uint32_t *)pvEAX),
625 "=r" (*(uint32_t *)pvEBX),
626 "=c" (*(uint32_t *)pvECX),
627 "=d" (*(uint32_t *)pvEDX)
628 : "0" (uOperator),
629 "2" (uIdxECX));
630# endif
631
632# elif RT_INLINE_ASM_USES_INTRIN
633 int aInfo[4];
634 /* ??? another intrinsic ??? */
635 __cpuid(aInfo, uOperator);
636 *(uint32_t *)pvEAX = aInfo[0];
637 *(uint32_t *)pvEBX = aInfo[1];
638 *(uint32_t *)pvECX = aInfo[2];
639 *(uint32_t *)pvEDX = aInfo[3];
640
641# else
642 uint32_t uEAX;
643 uint32_t uEBX;
644 uint32_t uECX;
645 uint32_t uEDX;
646 __asm
647 {
648 push ebx
649 mov eax, [uOperator]
650 mov ecx, [uIdxECX]
651 cpuid
652 mov [uEAX], eax
653 mov [uEBX], ebx
654 mov [uECX], ecx
655 mov [uEDX], edx
656 pop ebx
657 }
658 *(uint32_t *)pvEAX = uEAX;
659 *(uint32_t *)pvEBX = uEBX;
660 *(uint32_t *)pvECX = uECX;
661 *(uint32_t *)pvEDX = uEDX;
662# endif
663}
664#endif
665
666
667/**
668 * Performs the cpuid instruction returning ecx and edx.
669 *
670 * @param uOperator CPUID operation (eax).
671 * @param pvECX Where to store ecx.
672 * @param pvEDX Where to store edx.
673 * @remark We're using void pointers to ease the use of special bitfield structures and such.
674 */
675#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
676DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
677#else
678DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
679{
680 uint32_t uEBX;
681 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
682}
683#endif
684
685
686/**
687 * Performs the cpuid instruction returning edx.
688 *
689 * @param uOperator CPUID operation (eax).
690 * @returns EDX after cpuid operation.
691 */
692#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
693DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
694#else
695DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
696{
697 RTCCUINTREG xDX;
698# if RT_INLINE_ASM_GNU_STYLE
699# ifdef RT_ARCH_AMD64
700 RTCCUINTREG uSpill;
701 __asm__ ("cpuid"
702 : "=a" (uSpill),
703 "=d" (xDX)
704 : "0" (uOperator)
705 : "rbx", "rcx");
706# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
707 __asm__ ("push %%ebx\n\t"
708 "cpuid\n\t"
709 "pop %%ebx\n\t"
710 : "=a" (uOperator),
711 "=d" (xDX)
712 : "0" (uOperator)
713 : "ecx");
714# else
715 __asm__ ("cpuid"
716 : "=a" (uOperator),
717 "=d" (xDX)
718 : "0" (uOperator)
719 : "ebx", "ecx");
720# endif
721
722# elif RT_INLINE_ASM_USES_INTRIN
723 int aInfo[4];
724 __cpuid(aInfo, uOperator);
725 xDX = aInfo[3];
726
727# else
728 __asm
729 {
730 push ebx
731 mov eax, [uOperator]
732 cpuid
733 mov [xDX], edx
734 pop ebx
735 }
736# endif
737 return (uint32_t)xDX;
738}
739#endif
740
741
742/**
743 * Performs the cpuid instruction returning ecx.
744 *
745 * @param uOperator CPUID operation (eax).
746 * @returns ECX after cpuid operation.
747 */
748#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
749DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
750#else
751DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
752{
753 RTCCUINTREG xCX;
754# if RT_INLINE_ASM_GNU_STYLE
755# ifdef RT_ARCH_AMD64
756 RTCCUINTREG uSpill;
757 __asm__ ("cpuid"
758 : "=a" (uSpill),
759 "=c" (xCX)
760 : "0" (uOperator)
761 : "rbx", "rdx");
762# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
763 __asm__ ("push %%ebx\n\t"
764 "cpuid\n\t"
765 "pop %%ebx\n\t"
766 : "=a" (uOperator),
767 "=c" (xCX)
768 : "0" (uOperator)
769 : "edx");
770# else
771 __asm__ ("cpuid"
772 : "=a" (uOperator),
773 "=c" (xCX)
774 : "0" (uOperator)
775 : "ebx", "edx");
776
777# endif
778
779# elif RT_INLINE_ASM_USES_INTRIN
780 int aInfo[4];
781 __cpuid(aInfo, uOperator);
782 xCX = aInfo[2];
783
784# else
785 __asm
786 {
787 push ebx
788 mov eax, [uOperator]
789 cpuid
790 mov [xCX], ecx
791 pop ebx
792 }
793# endif
794 return (uint32_t)xCX;
795}
796#endif
797
798
799/**
800 * Checks if the current CPU supports CPUID.
801 *
802 * @returns true if CPUID is supported.
803 */
804DECLINLINE(bool) ASMHasCpuId(void)
805{
806#ifdef RT_ARCH_AMD64
807 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
808#else /* !RT_ARCH_AMD64 */
809 bool fRet = false;
810# if RT_INLINE_ASM_GNU_STYLE
811 uint32_t u1;
812 uint32_t u2;
813 __asm__ ("pushf\n\t"
814 "pop %1\n\t"
815 "mov %1, %2\n\t"
816 "xorl $0x200000, %1\n\t"
817 "push %1\n\t"
818 "popf\n\t"
819 "pushf\n\t"
820 "pop %1\n\t"
821 "cmpl %1, %2\n\t"
822 "setne %0\n\t"
823 "push %2\n\t"
824 "popf\n\t"
825 : "=m" (fRet), "=r" (u1), "=r" (u2));
826# else
827 __asm
828 {
829 pushfd
830 pop eax
831 mov ebx, eax
832 xor eax, 0200000h
833 push eax
834 popfd
835 pushfd
836 pop eax
837 cmp eax, ebx
838 setne fRet
839 push ebx
840 popfd
841 }
842# endif
843 return fRet;
844#endif /* !RT_ARCH_AMD64 */
845}
846
847
848/**
849 * Gets the APIC ID of the current CPU.
850 *
851 * @returns the APIC ID.
852 */
853#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
854DECLASM(uint8_t) ASMGetApicId(void);
855#else
856DECLINLINE(uint8_t) ASMGetApicId(void)
857{
858 RTCCUINTREG xBX;
859# if RT_INLINE_ASM_GNU_STYLE
860# ifdef RT_ARCH_AMD64
861 RTCCUINTREG uSpill;
862 __asm__ ("cpuid"
863 : "=a" (uSpill),
864 "=b" (xBX)
865 : "0" (1)
866 : "rcx", "rdx");
867# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
868 RTCCUINTREG uSpill;
869 __asm__ ("mov %%ebx,%1\n\t"
870 "cpuid\n\t"
871 "xchgl %%ebx,%1\n\t"
872 : "=a" (uSpill),
873 "=r" (xBX)
874 : "0" (1)
875 : "ecx", "edx");
876# else
877 RTCCUINTREG uSpill;
878 __asm__ ("cpuid"
879 : "=a" (uSpill),
880 "=b" (xBX)
881 : "0" (1)
882 : "ecx", "edx");
883# endif
884
885# elif RT_INLINE_ASM_USES_INTRIN
886 int aInfo[4];
887 __cpuid(aInfo, 1);
888 xBX = aInfo[1];
889
890# else
891 __asm
892 {
893 push ebx
894 mov eax, 1
895 cpuid
896 mov [xBX], ebx
897 pop ebx
898 }
899# endif
900 return (uint8_t)(xBX >> 24);
901}
902#endif
903
904/**
905 * Get cr0.
906 * @returns cr0.
907 */
908#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
909DECLASM(RTCCUINTREG) ASMGetCR0(void);
910#else
911DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
912{
913 RTCCUINTREG uCR0;
914# if RT_INLINE_ASM_USES_INTRIN
915 uCR0 = __readcr0();
916
917# elif RT_INLINE_ASM_GNU_STYLE
918# ifdef RT_ARCH_AMD64
919 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
920# else
921 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
922# endif
923# else
924 __asm
925 {
926# ifdef RT_ARCH_AMD64
927 mov rax, cr0
928 mov [uCR0], rax
929# else
930 mov eax, cr0
931 mov [uCR0], eax
932# endif
933 }
934# endif
935 return uCR0;
936}
937#endif
938
939
940/**
941 * Sets the CR0 register.
942 * @param uCR0 The new CR0 value.
943 */
944#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
945DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
946#else
947DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
948{
949# if RT_INLINE_ASM_USES_INTRIN
950 __writecr0(uCR0);
951
952# elif RT_INLINE_ASM_GNU_STYLE
953# ifdef RT_ARCH_AMD64
954 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
955# else
956 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
957# endif
958# else
959 __asm
960 {
961# ifdef RT_ARCH_AMD64
962 mov rax, [uCR0]
963 mov cr0, rax
964# else
965 mov eax, [uCR0]
966 mov cr0, eax
967# endif
968 }
969# endif
970}
971#endif
972
973
974/**
975 * Get cr2.
976 * @returns cr2.
977 */
978#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
979DECLASM(RTCCUINTREG) ASMGetCR2(void);
980#else
981DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
982{
983 RTCCUINTREG uCR2;
984# if RT_INLINE_ASM_USES_INTRIN
985 uCR2 = __readcr2();
986
987# elif RT_INLINE_ASM_GNU_STYLE
988# ifdef RT_ARCH_AMD64
989 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
990# else
991 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
992# endif
993# else
994 __asm
995 {
996# ifdef RT_ARCH_AMD64
997 mov rax, cr2
998 mov [uCR2], rax
999# else
1000 mov eax, cr2
1001 mov [uCR2], eax
1002# endif
1003 }
1004# endif
1005 return uCR2;
1006}
1007#endif
1008
1009
1010/**
1011 * Sets the CR2 register.
1012 * @param uCR2 The new CR0 value.
1013 */
1014#if RT_INLINE_ASM_EXTERNAL
1015DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1016#else
1017DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1018{
1019# if RT_INLINE_ASM_GNU_STYLE
1020# ifdef RT_ARCH_AMD64
1021 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1022# else
1023 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1024# endif
1025# else
1026 __asm
1027 {
1028# ifdef RT_ARCH_AMD64
1029 mov rax, [uCR2]
1030 mov cr2, rax
1031# else
1032 mov eax, [uCR2]
1033 mov cr2, eax
1034# endif
1035 }
1036# endif
1037}
1038#endif
1039
1040
1041/**
1042 * Get cr3.
1043 * @returns cr3.
1044 */
1045#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1046DECLASM(RTCCUINTREG) ASMGetCR3(void);
1047#else
1048DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1049{
1050 RTCCUINTREG uCR3;
1051# if RT_INLINE_ASM_USES_INTRIN
1052 uCR3 = __readcr3();
1053
1054# elif RT_INLINE_ASM_GNU_STYLE
1055# ifdef RT_ARCH_AMD64
1056 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
1057# else
1058 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
1059# endif
1060# else
1061 __asm
1062 {
1063# ifdef RT_ARCH_AMD64
1064 mov rax, cr3
1065 mov [uCR3], rax
1066# else
1067 mov eax, cr3
1068 mov [uCR3], eax
1069# endif
1070 }
1071# endif
1072 return uCR3;
1073}
1074#endif
1075
1076
1077/**
1078 * Sets the CR3 register.
1079 *
1080 * @param uCR3 New CR3 value.
1081 */
1082#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1083DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1084#else
1085DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1086{
1087# if RT_INLINE_ASM_USES_INTRIN
1088 __writecr3(uCR3);
1089
1090# elif RT_INLINE_ASM_GNU_STYLE
1091# ifdef RT_ARCH_AMD64
1092 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1093# else
1094 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1095# endif
1096# else
1097 __asm
1098 {
1099# ifdef RT_ARCH_AMD64
1100 mov rax, [uCR3]
1101 mov cr3, rax
1102# else
1103 mov eax, [uCR3]
1104 mov cr3, eax
1105# endif
1106 }
1107# endif
1108}
1109#endif
1110
1111
1112/**
1113 * Reloads the CR3 register.
1114 */
1115#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1116DECLASM(void) ASMReloadCR3(void);
1117#else
1118DECLINLINE(void) ASMReloadCR3(void)
1119{
1120# if RT_INLINE_ASM_USES_INTRIN
1121 __writecr3(__readcr3());
1122
1123# elif RT_INLINE_ASM_GNU_STYLE
1124 RTCCUINTREG u;
1125# ifdef RT_ARCH_AMD64
1126 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1127 "movq %0, %%cr3\n\t"
1128 : "=r" (u));
1129# else
1130 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1131 "movl %0, %%cr3\n\t"
1132 : "=r" (u));
1133# endif
1134# else
1135 __asm
1136 {
1137# ifdef RT_ARCH_AMD64
1138 mov rax, cr3
1139 mov cr3, rax
1140# else
1141 mov eax, cr3
1142 mov cr3, eax
1143# endif
1144 }
1145# endif
1146}
1147#endif
1148
1149
1150/**
1151 * Get cr4.
1152 * @returns cr4.
1153 */
1154#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1155DECLASM(RTCCUINTREG) ASMGetCR4(void);
1156#else
1157DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1158{
1159 RTCCUINTREG uCR4;
1160# if RT_INLINE_ASM_USES_INTRIN
1161 uCR4 = __readcr4();
1162
1163# elif RT_INLINE_ASM_GNU_STYLE
1164# ifdef RT_ARCH_AMD64
1165 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1166# else
1167 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1168# endif
1169# else
1170 __asm
1171 {
1172# ifdef RT_ARCH_AMD64
1173 mov rax, cr4
1174 mov [uCR4], rax
1175# else
1176 push eax /* just in case */
1177 /*mov eax, cr4*/
1178 _emit 0x0f
1179 _emit 0x20
1180 _emit 0xe0
1181 mov [uCR4], eax
1182 pop eax
1183# endif
1184 }
1185# endif
1186 return uCR4;
1187}
1188#endif
1189
1190
1191/**
1192 * Sets the CR4 register.
1193 *
1194 * @param uCR4 New CR4 value.
1195 */
1196#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1197DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1198#else
1199DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1200{
1201# if RT_INLINE_ASM_USES_INTRIN
1202 __writecr4(uCR4);
1203
1204# elif RT_INLINE_ASM_GNU_STYLE
1205# ifdef RT_ARCH_AMD64
1206 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1207# else
1208 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1209# endif
1210# else
1211 __asm
1212 {
1213# ifdef RT_ARCH_AMD64
1214 mov rax, [uCR4]
1215 mov cr4, rax
1216# else
1217 mov eax, [uCR4]
1218 _emit 0x0F
1219 _emit 0x22
1220 _emit 0xE0 /* mov cr4, eax */
1221# endif
1222 }
1223# endif
1224}
1225#endif
1226
1227
1228/**
1229 * Get cr8.
1230 * @returns cr8.
1231 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1232 */
1233#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1234DECLASM(RTCCUINTREG) ASMGetCR8(void);
1235#else
1236DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1237{
1238# ifdef RT_ARCH_AMD64
1239 RTCCUINTREG uCR8;
1240# if RT_INLINE_ASM_USES_INTRIN
1241 uCR8 = __readcr8();
1242
1243# elif RT_INLINE_ASM_GNU_STYLE
1244 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1245# else
1246 __asm
1247 {
1248 mov rax, cr8
1249 mov [uCR8], rax
1250 }
1251# endif
1252 return uCR8;
1253# else /* !RT_ARCH_AMD64 */
1254 return 0;
1255# endif /* !RT_ARCH_AMD64 */
1256}
1257#endif
1258
1259
1260/**
1261 * Enables interrupts (EFLAGS.IF).
1262 */
1263#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1264DECLASM(void) ASMIntEnable(void);
1265#else
1266DECLINLINE(void) ASMIntEnable(void)
1267{
1268# if RT_INLINE_ASM_GNU_STYLE
1269 __asm("sti\n");
1270# elif RT_INLINE_ASM_USES_INTRIN
1271 _enable();
1272# else
1273 __asm sti
1274# endif
1275}
1276#endif
1277
1278
1279/**
1280 * Disables interrupts (!EFLAGS.IF).
1281 */
1282#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1283DECLASM(void) ASMIntDisable(void);
1284#else
1285DECLINLINE(void) ASMIntDisable(void)
1286{
1287# if RT_INLINE_ASM_GNU_STYLE
1288 __asm("cli\n");
1289# elif RT_INLINE_ASM_USES_INTRIN
1290 _disable();
1291# else
1292 __asm cli
1293# endif
1294}
1295#endif
1296
1297
1298/**
1299 * Disables interrupts and returns previous xFLAGS.
1300 */
1301#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1302DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1303#else
1304DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1305{
1306 RTCCUINTREG xFlags;
1307# if RT_INLINE_ASM_GNU_STYLE
1308# ifdef RT_ARCH_AMD64
1309 __asm__ __volatile__("pushfq\n\t"
1310 "cli\n\t"
1311 "popq %0\n\t"
1312 : "=rm" (xFlags));
1313# else
1314 __asm__ __volatile__("pushfl\n\t"
1315 "cli\n\t"
1316 "popl %0\n\t"
1317 : "=rm" (xFlags));
1318# endif
1319# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1320 xFlags = ASMGetFlags();
1321 _disable();
1322# else
1323 __asm {
1324 pushfd
1325 cli
1326 pop [xFlags]
1327 }
1328# endif
1329 return xFlags;
1330}
1331#endif
1332
1333
1334/**
1335 * Reads a machine specific register.
1336 *
1337 * @returns Register content.
1338 * @param uRegister Register to read.
1339 */
1340#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1341DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1342#else
1343DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1344{
1345 RTUINT64U u;
1346# if RT_INLINE_ASM_GNU_STYLE
1347 __asm__ ("rdmsr\n\t"
1348 : "=a" (u.s.Lo),
1349 "=d" (u.s.Hi)
1350 : "c" (uRegister));
1351
1352# elif RT_INLINE_ASM_USES_INTRIN
1353 u.u = __readmsr(uRegister);
1354
1355# else
1356 __asm
1357 {
1358 mov ecx, [uRegister]
1359 rdmsr
1360 mov [u.s.Lo], eax
1361 mov [u.s.Hi], edx
1362 }
1363# endif
1364
1365 return u.u;
1366}
1367#endif
1368
1369
1370/**
1371 * Writes a machine specific register.
1372 *
1373 * @returns Register content.
1374 * @param uRegister Register to write to.
1375 * @param u64Val Value to write.
1376 */
1377#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1378DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1379#else
1380DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1381{
1382 RTUINT64U u;
1383
1384 u.u = u64Val;
1385# if RT_INLINE_ASM_GNU_STYLE
1386 __asm__ __volatile__("wrmsr\n\t"
1387 ::"a" (u.s.Lo),
1388 "d" (u.s.Hi),
1389 "c" (uRegister));
1390
1391# elif RT_INLINE_ASM_USES_INTRIN
1392 __writemsr(uRegister, u.u);
1393
1394# else
1395 __asm
1396 {
1397 mov ecx, [uRegister]
1398 mov edx, [u.s.Hi]
1399 mov eax, [u.s.Lo]
1400 wrmsr
1401 }
1402# endif
1403}
1404#endif
1405
1406
1407/**
1408 * Reads low part of a machine specific register.
1409 *
1410 * @returns Register content.
1411 * @param uRegister Register to read.
1412 */
1413#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1414DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1415#else
1416DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1417{
1418 uint32_t u32;
1419# if RT_INLINE_ASM_GNU_STYLE
1420 __asm__ ("rdmsr\n\t"
1421 : "=a" (u32)
1422 : "c" (uRegister)
1423 : "edx");
1424
1425# elif RT_INLINE_ASM_USES_INTRIN
1426 u32 = (uint32_t)__readmsr(uRegister);
1427
1428#else
1429 __asm
1430 {
1431 mov ecx, [uRegister]
1432 rdmsr
1433 mov [u32], eax
1434 }
1435# endif
1436
1437 return u32;
1438}
1439#endif
1440
1441
1442/**
1443 * Reads high part of a machine specific register.
1444 *
1445 * @returns Register content.
1446 * @param uRegister Register to read.
1447 */
1448#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1449DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1450#else
1451DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1452{
1453 uint32_t u32;
1454# if RT_INLINE_ASM_GNU_STYLE
1455 __asm__ ("rdmsr\n\t"
1456 : "=d" (u32)
1457 : "c" (uRegister)
1458 : "eax");
1459
1460# elif RT_INLINE_ASM_USES_INTRIN
1461 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1462
1463# else
1464 __asm
1465 {
1466 mov ecx, [uRegister]
1467 rdmsr
1468 mov [u32], edx
1469 }
1470# endif
1471
1472 return u32;
1473}
1474#endif
1475
1476
1477/**
1478 * Gets dr7.
1479 *
1480 * @returns dr7.
1481 */
1482#if RT_INLINE_ASM_EXTERNAL
1483DECLASM(RTCCUINTREG) ASMGetDR7(void);
1484#else
1485DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1486{
1487 RTCCUINTREG uDR7;
1488# if RT_INLINE_ASM_GNU_STYLE
1489# ifdef RT_ARCH_AMD64
1490 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1491# else
1492 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1493# endif
1494# else
1495 __asm
1496 {
1497# ifdef RT_ARCH_AMD64
1498 mov rax, dr7
1499 mov [uDR7], rax
1500# else
1501 mov eax, dr7
1502 mov [uDR7], eax
1503# endif
1504 }
1505# endif
1506 return uDR7;
1507}
1508#endif
1509
1510
1511/**
1512 * Gets dr6.
1513 *
1514 * @returns dr6.
1515 */
1516#if RT_INLINE_ASM_EXTERNAL
1517DECLASM(RTCCUINTREG) ASMGetDR6(void);
1518#else
1519DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1520{
1521 RTCCUINTREG uDR6;
1522# if RT_INLINE_ASM_GNU_STYLE
1523# ifdef RT_ARCH_AMD64
1524 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1525# else
1526 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1527# endif
1528# else
1529 __asm
1530 {
1531# ifdef RT_ARCH_AMD64
1532 mov rax, dr6
1533 mov [uDR6], rax
1534# else
1535 mov eax, dr6
1536 mov [uDR6], eax
1537# endif
1538 }
1539# endif
1540 return uDR6;
1541}
1542#endif
1543
1544
1545/**
1546 * Reads and clears DR6.
1547 *
1548 * @returns DR6.
1549 */
1550#if RT_INLINE_ASM_EXTERNAL
1551DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1552#else
1553DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1554{
1555 RTCCUINTREG uDR6;
1556# if RT_INLINE_ASM_GNU_STYLE
1557 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1558# ifdef RT_ARCH_AMD64
1559 __asm__ ("movq %%dr6, %0\n\t"
1560 "movq %1, %%dr6\n\t"
1561 : "=r" (uDR6)
1562 : "r" (uNewValue));
1563# else
1564 __asm__ ("movl %%dr6, %0\n\t"
1565 "movl %1, %%dr6\n\t"
1566 : "=r" (uDR6)
1567 : "r" (uNewValue));
1568# endif
1569# else
1570 __asm
1571 {
1572# ifdef RT_ARCH_AMD64
1573 mov rax, dr6
1574 mov [uDR6], rax
1575 mov rcx, rax
1576 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1577 mov dr6, rcx
1578# else
1579 mov eax, dr6
1580 mov [uDR6], eax
1581 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1582 mov dr6, ecx
1583# endif
1584 }
1585# endif
1586 return uDR6;
1587}
1588#endif
1589
1590
1591/**
1592 * Compiler memory barrier.
1593 *
1594 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1595 * values or any outstanding writes when returning from this function.
1596 *
1597 * This function must be used if non-volatile data is modified by a
1598 * device or the VMM. Typical cases are port access, MMIO access,
1599 * trapping instruction, etc.
1600 */
1601#if RT_INLINE_ASM_GNU_STYLE
1602# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1603#elif RT_INLINE_ASM_USES_INTRIN
1604# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1605#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1606DECLINLINE(void) ASMCompilerBarrier(void)
1607{
1608 __asm
1609 {
1610 }
1611}
1612#endif
1613
1614
1615/**
1616 * Writes a 8-bit unsigned integer to an I/O port.
1617 *
1618 * @param Port I/O port to read from.
1619 * @param u8 8-bit integer to write.
1620 */
1621#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1622DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1623#else
1624DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1625{
1626# if RT_INLINE_ASM_GNU_STYLE
1627 __asm__ __volatile__("outb %b1, %w0\n\t"
1628 :: "Nd" (Port),
1629 "a" (u8));
1630
1631# elif RT_INLINE_ASM_USES_INTRIN
1632 __outbyte(Port, u8);
1633
1634# else
1635 __asm
1636 {
1637 mov dx, [Port]
1638 mov al, [u8]
1639 out dx, al
1640 }
1641# endif
1642}
1643#endif
1644
1645
1646/**
1647 * Gets a 8-bit unsigned integer from an I/O port.
1648 *
1649 * @returns 8-bit integer.
1650 * @param Port I/O port to read from.
1651 */
1652#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1653DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1654#else
1655DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1656{
1657 uint8_t u8;
1658# if RT_INLINE_ASM_GNU_STYLE
1659 __asm__ __volatile__("inb %w1, %b0\n\t"
1660 : "=a" (u8)
1661 : "Nd" (Port));
1662
1663# elif RT_INLINE_ASM_USES_INTRIN
1664 u8 = __inbyte(Port);
1665
1666# else
1667 __asm
1668 {
1669 mov dx, [Port]
1670 in al, dx
1671 mov [u8], al
1672 }
1673# endif
1674 return u8;
1675}
1676#endif
1677
1678
1679/**
1680 * Writes a 16-bit unsigned integer to an I/O port.
1681 *
1682 * @param Port I/O port to read from.
1683 * @param u16 16-bit integer to write.
1684 */
1685#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1686DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1687#else
1688DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1689{
1690# if RT_INLINE_ASM_GNU_STYLE
1691 __asm__ __volatile__("outw %w1, %w0\n\t"
1692 :: "Nd" (Port),
1693 "a" (u16));
1694
1695# elif RT_INLINE_ASM_USES_INTRIN
1696 __outword(Port, u16);
1697
1698# else
1699 __asm
1700 {
1701 mov dx, [Port]
1702 mov ax, [u16]
1703 out dx, ax
1704 }
1705# endif
1706}
1707#endif
1708
1709
1710/**
1711 * Gets a 16-bit unsigned integer from an I/O port.
1712 *
1713 * @returns 16-bit integer.
1714 * @param Port I/O port to read from.
1715 */
1716#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1717DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1718#else
1719DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1720{
1721 uint16_t u16;
1722# if RT_INLINE_ASM_GNU_STYLE
1723 __asm__ __volatile__("inw %w1, %w0\n\t"
1724 : "=a" (u16)
1725 : "Nd" (Port));
1726
1727# elif RT_INLINE_ASM_USES_INTRIN
1728 u16 = __inword(Port);
1729
1730# else
1731 __asm
1732 {
1733 mov dx, [Port]
1734 in ax, dx
1735 mov [u16], ax
1736 }
1737# endif
1738 return u16;
1739}
1740#endif
1741
1742
1743/**
1744 * Writes a 32-bit unsigned integer to an I/O port.
1745 *
1746 * @param Port I/O port to read from.
1747 * @param u32 32-bit integer to write.
1748 */
1749#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1750DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1751#else
1752DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1753{
1754# if RT_INLINE_ASM_GNU_STYLE
1755 __asm__ __volatile__("outl %1, %w0\n\t"
1756 :: "Nd" (Port),
1757 "a" (u32));
1758
1759# elif RT_INLINE_ASM_USES_INTRIN
1760 __outdword(Port, u32);
1761
1762# else
1763 __asm
1764 {
1765 mov dx, [Port]
1766 mov eax, [u32]
1767 out dx, eax
1768 }
1769# endif
1770}
1771#endif
1772
1773
1774/**
1775 * Gets a 32-bit unsigned integer from an I/O port.
1776 *
1777 * @returns 32-bit integer.
1778 * @param Port I/O port to read from.
1779 */
1780#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1781DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1782#else
1783DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1784{
1785 uint32_t u32;
1786# if RT_INLINE_ASM_GNU_STYLE
1787 __asm__ __volatile__("inl %w1, %0\n\t"
1788 : "=a" (u32)
1789 : "Nd" (Port));
1790
1791# elif RT_INLINE_ASM_USES_INTRIN
1792 u32 = __indword(Port);
1793
1794# else
1795 __asm
1796 {
1797 mov dx, [Port]
1798 in eax, dx
1799 mov [u32], eax
1800 }
1801# endif
1802 return u32;
1803}
1804#endif
1805
1806
1807/**
1808 * Atomically Exchange an unsigned 8-bit value.
1809 *
1810 * @returns Current *pu8 value
1811 * @param pu8 Pointer to the 8-bit variable to update.
1812 * @param u8 The 8-bit value to assign to *pu8.
1813 */
1814#if RT_INLINE_ASM_EXTERNAL
1815DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1816#else
1817DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1818{
1819# if RT_INLINE_ASM_GNU_STYLE
1820 __asm__ __volatile__("xchgb %0, %1\n\t"
1821 : "=m" (*pu8),
1822 "=r" (u8)
1823 : "1" (u8));
1824# else
1825 __asm
1826 {
1827# ifdef RT_ARCH_AMD64
1828 mov rdx, [pu8]
1829 mov al, [u8]
1830 xchg [rdx], al
1831 mov [u8], al
1832# else
1833 mov edx, [pu8]
1834 mov al, [u8]
1835 xchg [edx], al
1836 mov [u8], al
1837# endif
1838 }
1839# endif
1840 return u8;
1841}
1842#endif
1843
1844
1845/**
1846 * Atomically Exchange a signed 8-bit value.
1847 *
1848 * @returns Current *pu8 value
1849 * @param pi8 Pointer to the 8-bit variable to update.
1850 * @param i8 The 8-bit value to assign to *pi8.
1851 */
1852DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1853{
1854 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1855}
1856
1857
1858/**
1859 * Atomically Exchange a bool value.
1860 *
1861 * @returns Current *pf value
1862 * @param pf Pointer to the 8-bit variable to update.
1863 * @param f The 8-bit value to assign to *pi8.
1864 */
1865DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
1866{
1867#ifdef _MSC_VER
1868 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1869#else
1870 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1871#endif
1872}
1873
1874
1875/**
1876 * Atomically Exchange an unsigned 16-bit value.
1877 *
1878 * @returns Current *pu16 value
1879 * @param pu16 Pointer to the 16-bit variable to update.
1880 * @param u16 The 16-bit value to assign to *pu16.
1881 */
1882#if RT_INLINE_ASM_EXTERNAL
1883DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1884#else
1885DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1886{
1887# if RT_INLINE_ASM_GNU_STYLE
1888 __asm__ __volatile__("xchgw %0, %1\n\t"
1889 : "=m" (*pu16),
1890 "=r" (u16)
1891 : "1" (u16));
1892# else
1893 __asm
1894 {
1895# ifdef RT_ARCH_AMD64
1896 mov rdx, [pu16]
1897 mov ax, [u16]
1898 xchg [rdx], ax
1899 mov [u16], ax
1900# else
1901 mov edx, [pu16]
1902 mov ax, [u16]
1903 xchg [edx], ax
1904 mov [u16], ax
1905# endif
1906 }
1907# endif
1908 return u16;
1909}
1910#endif
1911
1912
1913/**
1914 * Atomically Exchange a signed 16-bit value.
1915 *
1916 * @returns Current *pu16 value
1917 * @param pi16 Pointer to the 16-bit variable to update.
1918 * @param i16 The 16-bit value to assign to *pi16.
1919 */
1920DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1921{
1922 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1923}
1924
1925
1926/**
1927 * Atomically Exchange an unsigned 32-bit value.
1928 *
1929 * @returns Current *pu32 value
1930 * @param pu32 Pointer to the 32-bit variable to update.
1931 * @param u32 The 32-bit value to assign to *pu32.
1932 */
1933#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1934DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1935#else
1936DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1937{
1938# if RT_INLINE_ASM_GNU_STYLE
1939 __asm__ __volatile__("xchgl %0, %1\n\t"
1940 : "=m" (*pu32),
1941 "=r" (u32)
1942 : "1" (u32));
1943
1944# elif RT_INLINE_ASM_USES_INTRIN
1945 u32 = _InterlockedExchange((long *)pu32, u32);
1946
1947# else
1948 __asm
1949 {
1950# ifdef RT_ARCH_AMD64
1951 mov rdx, [pu32]
1952 mov eax, u32
1953 xchg [rdx], eax
1954 mov [u32], eax
1955# else
1956 mov edx, [pu32]
1957 mov eax, u32
1958 xchg [edx], eax
1959 mov [u32], eax
1960# endif
1961 }
1962# endif
1963 return u32;
1964}
1965#endif
1966
1967
1968/**
1969 * Atomically Exchange a signed 32-bit value.
1970 *
1971 * @returns Current *pu32 value
1972 * @param pi32 Pointer to the 32-bit variable to update.
1973 * @param i32 The 32-bit value to assign to *pi32.
1974 */
1975DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1976{
1977 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1978}
1979
1980
1981/**
1982 * Atomically Exchange an unsigned 64-bit value.
1983 *
1984 * @returns Current *pu64 value
1985 * @param pu64 Pointer to the 64-bit variable to update.
1986 * @param u64 The 64-bit value to assign to *pu64.
1987 */
1988#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1989DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
1990#else
1991DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
1992{
1993# if defined(RT_ARCH_AMD64)
1994# if RT_INLINE_ASM_USES_INTRIN
1995 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
1996
1997# elif RT_INLINE_ASM_GNU_STYLE
1998 __asm__ __volatile__("xchgq %0, %1\n\t"
1999 : "=m" (*pu64),
2000 "=r" (u64)
2001 : "1" (u64));
2002# else
2003 __asm
2004 {
2005 mov rdx, [pu64]
2006 mov rax, [u64]
2007 xchg [rdx], rax
2008 mov [u64], rax
2009 }
2010# endif
2011# else /* !RT_ARCH_AMD64 */
2012# if RT_INLINE_ASM_GNU_STYLE
2013# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2014 uint32_t u32 = (uint32_t)u64;
2015 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2016 "xchgl %%ebx, %3\n\t"
2017 "1:\n\t"
2018 "lock; cmpxchg8b (%5)\n\t"
2019 "jnz 1b\n\t"
2020 "xchgl %%ebx, %3\n\t"
2021 /*"xchgl %%esi, %5\n\t"*/
2022 : "=A" (u64),
2023 "=m" (*pu64)
2024 : "0" (*pu64),
2025 "m" ( u32 ),
2026 "c" ( (uint32_t)(u64 >> 32) ),
2027 "S" (pu64) );
2028# else /* !PIC */
2029 __asm__ __volatile__("1:\n\t"
2030 "lock; cmpxchg8b %1\n\t"
2031 "jnz 1b\n\t"
2032 : "=A" (u64),
2033 "=m" (*pu64)
2034 : "0" (*pu64),
2035 "b" ( (uint32_t)u64 ),
2036 "c" ( (uint32_t)(u64 >> 32) ));
2037# endif
2038# else
2039 __asm
2040 {
2041 mov ebx, dword ptr [u64]
2042 mov ecx, dword ptr [u64 + 4]
2043 mov edi, pu64
2044 mov eax, dword ptr [edi]
2045 mov edx, dword ptr [edi + 4]
2046 retry:
2047 lock cmpxchg8b [edi]
2048 jnz retry
2049 mov dword ptr [u64], eax
2050 mov dword ptr [u64 + 4], edx
2051 }
2052# endif
2053# endif /* !RT_ARCH_AMD64 */
2054 return u64;
2055}
2056#endif
2057
2058
2059/**
2060 * Atomically Exchange an signed 64-bit value.
2061 *
2062 * @returns Current *pi64 value
2063 * @param pi64 Pointer to the 64-bit variable to update.
2064 * @param i64 The 64-bit value to assign to *pi64.
2065 */
2066DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2067{
2068 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2069}
2070
2071
2072#ifdef RT_ARCH_AMD64
2073/**
2074 * Atomically Exchange an unsigned 128-bit value.
2075 *
2076 * @returns Current *pu128.
2077 * @param pu128 Pointer to the 128-bit variable to update.
2078 * @param u128 The 128-bit value to assign to *pu128.
2079 *
2080 * @remark We cannot really assume that any hardware supports this. Nor do I have
2081 * GAS support for it. So, for the time being we'll BREAK the atomic
2082 * bit of this function and use two 64-bit exchanges instead.
2083 */
2084# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2085DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2086# else
2087DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2088{
2089 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2090 {
2091 /** @todo this is clumsy code */
2092 RTUINT128U u128Ret;
2093 u128Ret.u = u128;
2094 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2095 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2096 return u128Ret.u;
2097 }
2098#if 0 /* later? */
2099 else
2100 {
2101# if RT_INLINE_ASM_GNU_STYLE
2102 __asm__ __volatile__("1:\n\t"
2103 "lock; cmpxchg8b %1\n\t"
2104 "jnz 1b\n\t"
2105 : "=A" (u128),
2106 "=m" (*pu128)
2107 : "0" (*pu128),
2108 "b" ( (uint64_t)u128 ),
2109 "c" ( (uint64_t)(u128 >> 64) ));
2110# else
2111 __asm
2112 {
2113 mov rbx, dword ptr [u128]
2114 mov rcx, dword ptr [u128 + 4]
2115 mov rdi, pu128
2116 mov rax, dword ptr [rdi]
2117 mov rdx, dword ptr [rdi + 4]
2118 retry:
2119 lock cmpxchg16b [rdi]
2120 jnz retry
2121 mov dword ptr [u128], rax
2122 mov dword ptr [u128 + 4], rdx
2123 }
2124# endif
2125 }
2126 return u128;
2127#endif
2128}
2129# endif
2130#endif /* RT_ARCH_AMD64 */
2131
2132
2133/**
2134 * Atomically Reads a unsigned 64-bit value.
2135 *
2136 * @returns Current *pu64 value
2137 * @param pu64 Pointer to the 64-bit variable to read.
2138 * The memory pointed to must be writable.
2139 * @remark This will fault if the memory is read-only!
2140 */
2141#if RT_INLINE_ASM_EXTERNAL
2142DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
2143#else
2144DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
2145{
2146 uint64_t u64;
2147# ifdef RT_ARCH_AMD64
2148# if RT_INLINE_ASM_GNU_STYLE
2149 __asm__ __volatile__("movq %1, %0\n\t"
2150 : "=r" (u64)
2151 : "m" (*pu64));
2152# else
2153 __asm
2154 {
2155 mov rdx, [pu64]
2156 mov rax, [rdx]
2157 mov [u64], rax
2158 }
2159# endif
2160# else /* !RT_ARCH_AMD64 */
2161# if RT_INLINE_ASM_GNU_STYLE
2162# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2163 uint32_t u32EBX = 0;
2164 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2165 "lock; cmpxchg8b (%5)\n\t"
2166 "xchgl %%ebx, %3\n\t"
2167 : "=A" (u64),
2168 "=m" (*pu64)
2169 : "0" (0),
2170 "m" (u32EBX),
2171 "c" (0),
2172 "S" (pu64));
2173# else /* !PIC */
2174 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2175 : "=A" (u64),
2176 "=m" (*pu64)
2177 : "0" (0),
2178 "b" (0),
2179 "c" (0));
2180# endif
2181# else
2182 __asm
2183 {
2184 xor eax, eax
2185 xor edx, edx
2186 mov edi, pu64
2187 xor ecx, ecx
2188 xor ebx, ebx
2189 lock cmpxchg8b [edi]
2190 mov dword ptr [u64], eax
2191 mov dword ptr [u64 + 4], edx
2192 }
2193# endif
2194# endif /* !RT_ARCH_AMD64 */
2195 return u64;
2196}
2197#endif
2198
2199
2200/**
2201 * Atomically Reads a signed 64-bit value.
2202 *
2203 * @returns Current *pi64 value
2204 * @param pi64 Pointer to the 64-bit variable to read.
2205 * The memory pointed to must be writable.
2206 * @remark This will fault if the memory is read-only!
2207 */
2208DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
2209{
2210 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
2211}
2212
2213
2214/**
2215 * Atomically Exchange a value which size might differ
2216 * between platforms or compilers.
2217 *
2218 * @param pu Pointer to the variable to update.
2219 * @param uNew The value to assign to *pu.
2220 */
2221#define ASMAtomicXchgSize(pu, uNew) \
2222 do { \
2223 switch (sizeof(*(pu))) { \
2224 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2225 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2226 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2227 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2228 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2229 } \
2230 } while (0)
2231
2232
2233/**
2234 * Atomically Exchange a pointer value.
2235 *
2236 * @returns Current *ppv value
2237 * @param ppv Pointer to the pointer variable to update.
2238 * @param pv The pointer value to assign to *ppv.
2239 */
2240DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2241{
2242#if ARCH_BITS == 32
2243 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2244#elif ARCH_BITS == 64
2245 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2246#else
2247# error "ARCH_BITS is bogus"
2248#endif
2249}
2250
2251
2252/**
2253 * Atomically Compare and Exchange an unsigned 32-bit value.
2254 *
2255 * @returns true if xchg was done.
2256 * @returns false if xchg wasn't done.
2257 *
2258 * @param pu32 Pointer to the value to update.
2259 * @param u32New The new value to assigned to *pu32.
2260 * @param u32Old The old value to *pu32 compare with.
2261 */
2262#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2263DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2264#else
2265DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old)
2266{
2267# if RT_INLINE_ASM_GNU_STYLE
2268 uint32_t u32Ret;
2269 __asm__ __volatile__("lock; cmpxchgl %2, %0\n\t"
2270 "setz %%al\n\t"
2271 "movzbl %%al, %%eax\n\t"
2272 : "=m" (*pu32),
2273 "=a" (u32Ret)
2274 : "r" (u32New),
2275 "1" (u32Old));
2276 return (bool)u32Ret;
2277
2278# elif RT_INLINE_ASM_USES_INTRIN
2279 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2280
2281# else
2282 uint32_t u32Ret;
2283 __asm
2284 {
2285# ifdef RT_ARCH_AMD64
2286 mov rdx, [pu32]
2287# else
2288 mov edx, [pu32]
2289# endif
2290 mov eax, [u32Old]
2291 mov ecx, [u32New]
2292# ifdef RT_ARCH_AMD64
2293 lock cmpxchg [rdx], ecx
2294# else
2295 lock cmpxchg [edx], ecx
2296# endif
2297 setz al
2298 movzx eax, al
2299 mov [u32Ret], eax
2300 }
2301 return !!u32Ret;
2302# endif
2303}
2304#endif
2305
2306
2307/**
2308 * Atomically Compare and Exchange a signed 32-bit value.
2309 *
2310 * @returns true if xchg was done.
2311 * @returns false if xchg wasn't done.
2312 *
2313 * @param pi32 Pointer to the value to update.
2314 * @param i32New The new value to assigned to *pi32.
2315 * @param i32Old The old value to *pi32 compare with.
2316 */
2317DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2318{
2319 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2320}
2321
2322
2323/**
2324 * Atomically Compare and exchange an unsigned 64-bit value.
2325 *
2326 * @returns true if xchg was done.
2327 * @returns false if xchg wasn't done.
2328 *
2329 * @param pu64 Pointer to the 64-bit variable to update.
2330 * @param u64New The 64-bit value to assign to *pu64.
2331 * @param u64Old The value to compare with.
2332 */
2333#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2334DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2335#else
2336DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2337{
2338# if RT_INLINE_ASM_USES_INTRIN
2339 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2340
2341# elif defined(RT_ARCH_AMD64)
2342# if RT_INLINE_ASM_GNU_STYLE
2343 uint64_t u64Ret;
2344 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2345 "setz %%al\n\t"
2346 "movzbl %%al, %%eax\n\t"
2347 : "=m" (*pu64),
2348 "=a" (u64Ret)
2349 : "r" (u64New),
2350 "1" (u64Old));
2351 return (bool)u64Ret;
2352# else
2353 bool fRet;
2354 __asm
2355 {
2356 mov rdx, [pu32]
2357 mov rax, [u64Old]
2358 mov rcx, [u64New]
2359 lock cmpxchg [rdx], rcx
2360 setz al
2361 mov [fRet], al
2362 }
2363 return fRet;
2364# endif
2365# else /* !RT_ARCH_AMD64 */
2366 uint32_t u32Ret;
2367# if RT_INLINE_ASM_GNU_STYLE
2368# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2369 uint32_t u32 = (uint32_t)u64New;
2370 uint32_t u32Spill;
2371 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2372 "lock; cmpxchg8b (%6)\n\t"
2373 "setz %%al\n\t"
2374 "xchgl %%ebx, %4\n\t"
2375 "movzbl %%al, %%eax\n\t"
2376 : "=a" (u32Ret),
2377 "=d" (u32Spill),
2378 "=m" (*pu64)
2379 : "A" (u64Old),
2380 "m" ( u32 ),
2381 "c" ( (uint32_t)(u64New >> 32) ),
2382 "S" (pu64) );
2383# else /* !PIC */
2384 uint32_t u32Spill;
2385 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2386 "setz %%al\n\t"
2387 "movzbl %%al, %%eax\n\t"
2388 : "=a" (u32Ret),
2389 "=d" (u32Spill),
2390 "=m" (*pu64)
2391 : "A" (u64Old),
2392 "b" ( (uint32_t)u64New ),
2393 "c" ( (uint32_t)(u64New >> 32) ));
2394# endif
2395 return (bool)u32Ret;
2396# else
2397 __asm
2398 {
2399 mov ebx, dword ptr [u64New]
2400 mov ecx, dword ptr [u64New + 4]
2401 mov edi, [pu64]
2402 mov eax, dword ptr [u64Old]
2403 mov edx, dword ptr [u64Old + 4]
2404 lock cmpxchg8b [edi]
2405 setz al
2406 movzx eax, al
2407 mov dword ptr [u32Ret], eax
2408 }
2409 return !!u32Ret;
2410# endif
2411# endif /* !RT_ARCH_AMD64 */
2412}
2413#endif
2414
2415
2416/**
2417 * Atomically Compare and exchange a signed 64-bit value.
2418 *
2419 * @returns true if xchg was done.
2420 * @returns false if xchg wasn't done.
2421 *
2422 * @param pi64 Pointer to the 64-bit variable to update.
2423 * @param i64 The 64-bit value to assign to *pu64.
2424 * @param i64Old The value to compare with.
2425 */
2426DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2427{
2428 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2429}
2430
2431
2432/** @def ASMAtomicCmpXchgSize
2433 * Atomically Compare and Exchange a value which size might differ
2434 * between platforms or compilers.
2435 *
2436 * @param pu Pointer to the value to update.
2437 * @param uNew The new value to assigned to *pu.
2438 * @param uOld The old value to *pu compare with.
2439 * @param fRc Where to store the result.
2440 */
2441#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2442 do { \
2443 switch (sizeof(*(pu))) { \
2444 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2445 break; \
2446 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2447 break; \
2448 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2449 (fRc) = false; \
2450 break; \
2451 } \
2452 } while (0)
2453
2454
2455/**
2456 * Atomically Compare and Exchange a pointer value.
2457 *
2458 * @returns true if xchg was done.
2459 * @returns false if xchg wasn't done.
2460 *
2461 * @param ppv Pointer to the value to update.
2462 * @param pvNew The new value to assigned to *ppv.
2463 * @param pvOld The old value to *ppv compare with.
2464 */
2465DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2466{
2467#if ARCH_BITS == 32
2468 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2469#elif ARCH_BITS == 64
2470 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2471#else
2472# error "ARCH_BITS is bogus"
2473#endif
2474}
2475
2476
2477/**
2478 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
2479 * passes back old value.
2480 *
2481 * @returns true if xchg was done.
2482 * @returns false if xchg wasn't done.
2483 *
2484 * @param pu32 Pointer to the value to update.
2485 * @param u32New The new value to assigned to *pu32.
2486 * @param u32Old The old value to *pu32 compare with.
2487 * @param pu32Old Pointer store the old value at.
2488 */
2489#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2490DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
2491#else
2492DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
2493{
2494# if RT_INLINE_ASM_GNU_STYLE
2495 uint32_t u32Ret;
2496 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2497 "movl %%eax, %2\n\t"
2498 "setz %%al\n\t"
2499 "movzbl %%al, %%eax\n\t"
2500 : "=m" (*pu32),
2501 "=a" (u32Ret),
2502 "=m" (*pu32Old)
2503 : "r" (u32New),
2504 "1" (u32Old));
2505 return (bool)u32Ret;
2506
2507# elif RT_INLINE_ASM_USES_INTRIN
2508 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
2509
2510# else
2511 uint32_t u32Ret;
2512 __asm
2513 {
2514# ifdef RT_ARCH_AMD64
2515 mov rdx, [pu32]
2516# else
2517 mov edx, [pu32]
2518# endif
2519 mov eax, [u32Old]
2520 mov ecx, [u32New]
2521# ifdef RT_ARCH_AMD64
2522 lock cmpxchg [rdx], ecx
2523 mov rdx, [pu32Old]
2524 mov [rdx], eax
2525# else
2526 lock cmpxchg [edx], ecx
2527 mov edx, [pu32Old]
2528 mov [edx], eax
2529# endif
2530 setz al
2531 movzx eax, al
2532 mov [u32Ret], eax
2533 }
2534 return !!u32Ret;
2535# endif
2536}
2537#endif
2538
2539
2540/**
2541 * Atomically Compare and Exchange a signed 32-bit value, additionally
2542 * passes back old value.
2543 *
2544 * @returns true if xchg was done.
2545 * @returns false if xchg wasn't done.
2546 *
2547 * @param pi32 Pointer to the value to update.
2548 * @param i32New The new value to assigned to *pi32.
2549 * @param i32Old The old value to *pi32 compare with.
2550 * @param pi32Old Pointer store the old value at.
2551 */
2552DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
2553{
2554 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
2555}
2556
2557
2558/**
2559 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2560 * passing back old value.
2561 *
2562 * @returns true if xchg was done.
2563 * @returns false if xchg wasn't done.
2564 *
2565 * @param pu64 Pointer to the 64-bit variable to update.
2566 * @param u64New The 64-bit value to assign to *pu64.
2567 * @param u64Old The value to compare with.
2568 * @param pu32Old Pointer store the old value at.
2569 */
2570#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2571DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
2572#else
2573DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
2574{
2575# if RT_INLINE_ASM_USES_INTRIN
2576 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
2577
2578# elif defined(RT_ARCH_AMD64)
2579# if RT_INLINE_ASM_GNU_STYLE
2580 uint64_t u64Ret;
2581 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2582 "movq %%rax, %2\n\t"
2583 "setz %%al\n\t"
2584 "movzbl %%al, %%eax\n\t"
2585 : "=m" (*pu64),
2586 "=a" (u64Ret),
2587 "=m" (*pu64Old)
2588 : "r" (u64New),
2589 "1" (u64Old));
2590 return (bool)u64Ret;
2591# else
2592 bool fRet;
2593 __asm
2594 {
2595 mov rdx, [pu32]
2596 mov rax, [u64Old]
2597 mov rcx, [u64New]
2598 lock cmpxchg [rdx], rcx
2599 mov rdx, [pu64Old]
2600 mov [rdx], rax
2601 setz al
2602 mov [fRet], al
2603 }
2604 return fRet;
2605# endif
2606# else /* !RT_ARCH_AMD64 */
2607# if RT_INLINE_ASM_GNU_STYLE
2608 uint64_t u64Ret;
2609# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2610 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2611 "lock; cmpxchg8b %3\n\t"
2612 "xchgl %%ebx, %1\n\t"
2613 : "=A" (u64Ret)
2614 : "DS" ((uint32_t)u64New),
2615 "c" ((uint32_t)(u64New >> 32)),
2616 "m" (*pu64),
2617 "0" (u64Old)
2618 : "memory" );
2619# else /* !PIC */
2620 __asm__ __volatile__("lock; cmpxchg8b %3\n\t"
2621 : "=A" (u64Ret)
2622 : "b" ((uint32_t)u64New),
2623 "c" ((uint32_t)(u64New >> 32)),
2624 "m" (*pu64),
2625 "0" (u64Old)
2626 : "memory" );
2627# endif
2628 *pu64Old = u64Ret;
2629 return u64Ret != u64Old;
2630# else
2631 uint32_t u32Ret;
2632 __asm
2633 {
2634 mov ebx, dword ptr [u64New]
2635 mov ecx, dword ptr [u64New + 4]
2636 mov edi, [pu64]
2637 mov eax, dword ptr [u64Old]
2638 mov edx, dword ptr [u64Old + 4]
2639 lock cmpxchg8b [edi]
2640 mov ebx, [pu64Old]
2641 mov [ebx], eax
2642 add ebx, 8
2643 mov [ebx], edx
2644 setz al
2645 movzx eax, al
2646 mov dword ptr [u32Ret], eax
2647 }
2648 return !!u32Ret;
2649# endif
2650# endif /* !RT_ARCH_AMD64 */
2651}
2652#endif
2653
2654
2655/**
2656 * Atomically Compare and exchange a signed 64-bit value, additionally
2657 * passing back old value.
2658 *
2659 * @returns true if xchg was done.
2660 * @returns false if xchg wasn't done.
2661 *
2662 * @param pi64 Pointer to the 64-bit variable to update.
2663 * @param i64 The 64-bit value to assign to *pu64.
2664 * @param i64Old The value to compare with.
2665 * @param pi64Old Pointer store the old value at.
2666 */
2667DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
2668{
2669 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
2670}
2671
2672
2673/** @def ASMAtomicCmpXchgExSize
2674 * Atomically Compare and Exchange a value which size might differ
2675 * between platforms or compilers. Additionally passes back old value.
2676 *
2677 * @param pu Pointer to the value to update.
2678 * @param uNew The new value to assigned to *pu.
2679 * @param uOld The old value to *pu compare with.
2680 * @param fRc Where to store the result.
2681 * @param uOldVal Where to store the old value.
2682 */
2683#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, uOldVal) \
2684 do { \
2685 switch (sizeof(*(pu))) { \
2686 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)&(uOldVal)); \
2687 break; \
2688 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)&(uOldVal)); \
2689 break; \
2690 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2691 (fRc) = false; \
2692 (uOldVal) = 0; \
2693 break; \
2694 } \
2695 } while (0)
2696
2697
2698/**
2699 * Atomically Compare and Exchange a pointer value, additionally
2700 * passing back old value.
2701 *
2702 * @returns true if xchg was done.
2703 * @returns false if xchg wasn't done.
2704 *
2705 * @param ppv Pointer to the value to update.
2706 * @param pvNew The new value to assigned to *ppv.
2707 * @param pvOld The old value to *ppv compare with.
2708 * @param ppvOld Pointer store the old value at.
2709 */
2710DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, void *pvNew, void *pvOld, void **ppvOld)
2711{
2712#if ARCH_BITS == 32
2713 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
2714#elif ARCH_BITS == 64
2715 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
2716#else
2717# error "ARCH_BITS is bogus"
2718#endif
2719}
2720
2721
2722/**
2723 * Atomically increment a 32-bit value.
2724 *
2725 * @returns The new value.
2726 * @param pu32 Pointer to the value to increment.
2727 */
2728#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2729DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2730#else
2731DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2732{
2733 uint32_t u32;
2734# if RT_INLINE_ASM_USES_INTRIN
2735 u32 = _InterlockedIncrement((long *)pu32);
2736 return u32;
2737
2738# elif RT_INLINE_ASM_GNU_STYLE
2739 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2740 : "=r" (u32),
2741 "=m" (*pu32)
2742 : "0" (1)
2743 : "memory");
2744 return u32+1;
2745# else
2746 __asm
2747 {
2748 mov eax, 1
2749# ifdef RT_ARCH_AMD64
2750 mov rdx, [pu32]
2751 lock xadd [rdx], eax
2752# else
2753 mov edx, [pu32]
2754 lock xadd [edx], eax
2755# endif
2756 mov u32, eax
2757 }
2758 return u32+1;
2759# endif
2760}
2761#endif
2762
2763
2764/**
2765 * Atomically increment a signed 32-bit value.
2766 *
2767 * @returns The new value.
2768 * @param pi32 Pointer to the value to increment.
2769 */
2770DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2771{
2772 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2773}
2774
2775
2776/**
2777 * Atomically decrement an unsigned 32-bit value.
2778 *
2779 * @returns The new value.
2780 * @param pu32 Pointer to the value to decrement.
2781 */
2782#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2783DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2784#else
2785DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2786{
2787 uint32_t u32;
2788# if RT_INLINE_ASM_USES_INTRIN
2789 u32 = _InterlockedDecrement((long *)pu32);
2790 return u32;
2791
2792# elif RT_INLINE_ASM_GNU_STYLE
2793 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2794 : "=r" (u32),
2795 "=m" (*pu32)
2796 : "0" (-1)
2797 : "memory");
2798 return u32-1;
2799# else
2800 __asm
2801 {
2802 mov eax, -1
2803# ifdef RT_ARCH_AMD64
2804 mov rdx, [pu32]
2805 lock xadd [rdx], eax
2806# else
2807 mov edx, [pu32]
2808 lock xadd [edx], eax
2809# endif
2810 mov u32, eax
2811 }
2812 return u32-1;
2813# endif
2814}
2815#endif
2816
2817
2818/**
2819 * Atomically decrement a signed 32-bit value.
2820 *
2821 * @returns The new value.
2822 * @param pi32 Pointer to the value to decrement.
2823 */
2824DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2825{
2826 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2827}
2828
2829
2830/**
2831 * Atomically Or an unsigned 32-bit value.
2832 *
2833 * @param pu32 Pointer to the pointer variable to OR u32 with.
2834 * @param u32 The value to OR *pu32 with.
2835 */
2836#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2837DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2838#else
2839DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2840{
2841# if RT_INLINE_ASM_USES_INTRIN
2842 _InterlockedOr((long volatile *)pu32, (long)u32);
2843
2844# elif RT_INLINE_ASM_GNU_STYLE
2845 __asm__ __volatile__("lock; orl %1, %0\n\t"
2846 : "=m" (*pu32)
2847 : "ir" (u32));
2848# else
2849 __asm
2850 {
2851 mov eax, [u32]
2852# ifdef RT_ARCH_AMD64
2853 mov rdx, [pu32]
2854 lock or [rdx], eax
2855# else
2856 mov edx, [pu32]
2857 lock or [edx], eax
2858# endif
2859 }
2860# endif
2861}
2862#endif
2863
2864
2865/**
2866 * Atomically Or a signed 32-bit value.
2867 *
2868 * @param pi32 Pointer to the pointer variable to OR u32 with.
2869 * @param i32 The value to OR *pu32 with.
2870 */
2871DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2872{
2873 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2874}
2875
2876
2877/**
2878 * Atomically And an unsigned 32-bit value.
2879 *
2880 * @param pu32 Pointer to the pointer variable to AND u32 with.
2881 * @param u32 The value to AND *pu32 with.
2882 */
2883#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2884DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2885#else
2886DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2887{
2888# if RT_INLINE_ASM_USES_INTRIN
2889 _InterlockedAnd((long volatile *)pu32, u32);
2890
2891# elif RT_INLINE_ASM_GNU_STYLE
2892 __asm__ __volatile__("lock; andl %1, %0\n\t"
2893 : "=m" (*pu32)
2894 : "ir" (u32));
2895# else
2896 __asm
2897 {
2898 mov eax, [u32]
2899# ifdef RT_ARCH_AMD64
2900 mov rdx, [pu32]
2901 lock and [rdx], eax
2902# else
2903 mov edx, [pu32]
2904 lock and [edx], eax
2905# endif
2906 }
2907# endif
2908}
2909#endif
2910
2911
2912/**
2913 * Atomically And a signed 32-bit value.
2914 *
2915 * @param pi32 Pointer to the pointer variable to AND i32 with.
2916 * @param i32 The value to AND *pi32 with.
2917 */
2918DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2919{
2920 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2921}
2922
2923
2924/**
2925 * Invalidate page.
2926 *
2927 * @param pv Address of the page to invalidate.
2928 */
2929#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2930DECLASM(void) ASMInvalidatePage(void *pv);
2931#else
2932DECLINLINE(void) ASMInvalidatePage(void *pv)
2933{
2934# if RT_INLINE_ASM_USES_INTRIN
2935 __invlpg(pv);
2936
2937# elif RT_INLINE_ASM_GNU_STYLE
2938 __asm__ __volatile__("invlpg %0\n\t"
2939 : : "m" (*(uint8_t *)pv));
2940# else
2941 __asm
2942 {
2943# ifdef RT_ARCH_AMD64
2944 mov rax, [pv]
2945 invlpg [rax]
2946# else
2947 mov eax, [pv]
2948 invlpg [eax]
2949# endif
2950 }
2951# endif
2952}
2953#endif
2954
2955
2956#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
2957# if PAGE_SIZE != 0x1000
2958# error "PAGE_SIZE is not 0x1000!"
2959# endif
2960#endif
2961
2962/**
2963 * Zeros a 4K memory page.
2964 *
2965 * @param pv Pointer to the memory block. This must be page aligned.
2966 */
2967#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2968DECLASM(void) ASMMemZeroPage(volatile void *pv);
2969# else
2970DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
2971{
2972# if RT_INLINE_ASM_USES_INTRIN
2973# ifdef RT_ARCH_AMD64
2974 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
2975# else
2976 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
2977# endif
2978
2979# elif RT_INLINE_ASM_GNU_STYLE
2980 RTUINTREG uDummy;
2981# ifdef RT_ARCH_AMD64
2982 __asm__ __volatile__ ("rep stosq"
2983 : "=D" (pv),
2984 "=c" (uDummy)
2985 : "0" (pv),
2986 "c" (0x1000 >> 3),
2987 "a" (0)
2988 : "memory");
2989# else
2990 __asm__ __volatile__ ("rep stosl"
2991 : "=D" (pv),
2992 "=c" (uDummy)
2993 : "0" (pv),
2994 "c" (0x1000 >> 2),
2995 "a" (0)
2996 : "memory");
2997# endif
2998# else
2999 __asm
3000 {
3001# ifdef RT_ARCH_AMD64
3002 xor rax, rax
3003 mov ecx, 0200h
3004 mov rdi, [pv]
3005 rep stosq
3006# else
3007 xor eax, eax
3008 mov ecx, 0400h
3009 mov edi, [pv]
3010 rep stosd
3011# endif
3012 }
3013# endif
3014}
3015# endif
3016
3017
3018/**
3019 * Zeros a memory block with a 32-bit aligned size.
3020 *
3021 * @param pv Pointer to the memory block.
3022 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3023 */
3024#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3025DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3026#else
3027DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3028{
3029# if RT_INLINE_ASM_USES_INTRIN
3030 __stosd((unsigned long *)pv, 0, cb >> 2);
3031
3032# elif RT_INLINE_ASM_GNU_STYLE
3033 __asm__ __volatile__ ("rep stosl"
3034 : "=D" (pv),
3035 "=c" (cb)
3036 : "0" (pv),
3037 "1" (cb >> 2),
3038 "a" (0)
3039 : "memory");
3040# else
3041 __asm
3042 {
3043 xor eax, eax
3044# ifdef RT_ARCH_AMD64
3045 mov rcx, [cb]
3046 shr rcx, 2
3047 mov rdi, [pv]
3048# else
3049 mov ecx, [cb]
3050 shr ecx, 2
3051 mov edi, [pv]
3052# endif
3053 rep stosd
3054 }
3055# endif
3056}
3057#endif
3058
3059
3060/**
3061 * Fills a memory block with a 32-bit aligned size.
3062 *
3063 * @param pv Pointer to the memory block.
3064 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3065 * @param u32 The value to fill with.
3066 */
3067#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3068DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3069#else
3070DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3071{
3072# if RT_INLINE_ASM_USES_INTRIN
3073 __stosd((unsigned long *)pv, 0, cb >> 2);
3074
3075# elif RT_INLINE_ASM_GNU_STYLE
3076 __asm__ __volatile__ ("rep stosl"
3077 : "=D" (pv),
3078 "=c" (cb)
3079 : "0" (pv),
3080 "1" (cb >> 2),
3081 "a" (u32)
3082 : "memory");
3083# else
3084 __asm
3085 {
3086# ifdef RT_ARCH_AMD64
3087 mov rcx, [cb]
3088 shr rcx, 2
3089 mov rdi, [pv]
3090# else
3091 mov ecx, [cb]
3092 shr ecx, 2
3093 mov edi, [pv]
3094# endif
3095 mov eax, [u32]
3096 rep stosd
3097 }
3098# endif
3099}
3100#endif
3101
3102
3103/**
3104 * Checks if a memory block is filled with the specified byte.
3105 *
3106 * This is a sort of inverted memchr.
3107 *
3108 * @returns Pointer to the byte which doesn't equal u8.
3109 * @returns NULL if all equal to u8.
3110 *
3111 * @param pv Pointer to the memory block.
3112 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3113 * @param u8 The value it's supposed to be filled with.
3114 */
3115#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3116DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
3117#else
3118DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3119{
3120/** @todo rewrite this in inline assembly. */
3121 uint8_t const *pb = (uint8_t const *)pv;
3122 for (; cb; cb--, pb++)
3123 if (RT_UNLIKELY(*pb != u8))
3124 return (void *)pb;
3125 return NULL;
3126}
3127#endif
3128
3129
3130
3131/**
3132 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
3133 *
3134 * @returns u32F1 * u32F2.
3135 */
3136#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3137DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
3138#else
3139DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
3140{
3141# ifdef RT_ARCH_AMD64
3142 return (uint64_t)u32F1 * u32F2;
3143# else /* !RT_ARCH_AMD64 */
3144 uint64_t u64;
3145# if RT_INLINE_ASM_GNU_STYLE
3146 __asm__ __volatile__("mull %%edx"
3147 : "=A" (u64)
3148 : "a" (u32F2), "d" (u32F1));
3149# else
3150 __asm
3151 {
3152 mov edx, [u32F1]
3153 mov eax, [u32F2]
3154 mul edx
3155 mov dword ptr [u64], eax
3156 mov dword ptr [u64 + 4], edx
3157 }
3158# endif
3159 return u64;
3160# endif /* !RT_ARCH_AMD64 */
3161}
3162#endif
3163
3164
3165/**
3166 * Multiplies two signed 32-bit values returning a signed 64-bit result.
3167 *
3168 * @returns u32F1 * u32F2.
3169 */
3170#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3171DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
3172#else
3173DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
3174{
3175# ifdef RT_ARCH_AMD64
3176 return (int64_t)i32F1 * i32F2;
3177# else /* !RT_ARCH_AMD64 */
3178 int64_t i64;
3179# if RT_INLINE_ASM_GNU_STYLE
3180 __asm__ __volatile__("imull %%edx"
3181 : "=A" (i64)
3182 : "a" (i32F2), "d" (i32F1));
3183# else
3184 __asm
3185 {
3186 mov edx, [i32F1]
3187 mov eax, [i32F2]
3188 imul edx
3189 mov dword ptr [i64], eax
3190 mov dword ptr [i64 + 4], edx
3191 }
3192# endif
3193 return i64;
3194# endif /* !RT_ARCH_AMD64 */
3195}
3196#endif
3197
3198
3199/**
3200 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
3201 *
3202 * @returns u64 / u32.
3203 */
3204#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3205DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
3206#else
3207DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
3208{
3209# ifdef RT_ARCH_AMD64
3210 return (uint32_t)(u64 / u32);
3211# else /* !RT_ARCH_AMD64 */
3212# if RT_INLINE_ASM_GNU_STYLE
3213 RTUINTREG uDummy;
3214 __asm__ __volatile__("divl %3"
3215 : "=a" (u32), "=d"(uDummy)
3216 : "A" (u64), "r" (u32));
3217# else
3218 __asm
3219 {
3220 mov eax, dword ptr [u64]
3221 mov edx, dword ptr [u64 + 4]
3222 mov ecx, [u32]
3223 div ecx
3224 mov [u32], eax
3225 }
3226# endif
3227 return u32;
3228# endif /* !RT_ARCH_AMD64 */
3229}
3230#endif
3231
3232
3233/**
3234 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
3235 *
3236 * @returns u64 / u32.
3237 */
3238#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3239DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
3240#else
3241DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
3242{
3243# ifdef RT_ARCH_AMD64
3244 return (int32_t)(i64 / i32);
3245# else /* !RT_ARCH_AMD64 */
3246# if RT_INLINE_ASM_GNU_STYLE
3247 RTUINTREG iDummy;
3248 __asm__ __volatile__("idivl %3"
3249 : "=a" (i32), "=d"(iDummy)
3250 : "A" (i64), "r" (i32));
3251# else
3252 __asm
3253 {
3254 mov eax, dword ptr [i64]
3255 mov edx, dword ptr [i64 + 4]
3256 mov ecx, [i32]
3257 idiv ecx
3258 mov [i32], eax
3259 }
3260# endif
3261 return i32;
3262# endif /* !RT_ARCH_AMD64 */
3263}
3264#endif
3265
3266
3267/**
3268 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
3269 * using a 96 bit intermediate result.
3270 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
3271 * __udivdi3 and __umoddi3 even if this inline function is not used.
3272 *
3273 * @returns (u64A * u32B) / u32C.
3274 * @param u64A The 64-bit value.
3275 * @param u32B The 32-bit value to multiple by A.
3276 * @param u32C The 32-bit value to divide A*B by.
3277 */
3278#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
3279DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
3280#else
3281DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
3282{
3283# if RT_INLINE_ASM_GNU_STYLE
3284# ifdef RT_ARCH_AMD64
3285 uint64_t u64Result, u64Spill;
3286 __asm__ __volatile__("mulq %2\n\t"
3287 "divq %3\n\t"
3288 : "=a" (u64Result),
3289 "=d" (u64Spill)
3290 : "r" ((uint64_t)u32B),
3291 "r" ((uint64_t)u32C),
3292 "0" (u64A),
3293 "1" (0));
3294 return u64Result;
3295# else
3296 uint32_t u32Dummy;
3297 uint64_t u64Result;
3298 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
3299 edx = u64Lo.hi = (u64A.lo * u32B).hi */
3300 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
3301 eax = u64A.hi */
3302 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
3303 edx = u32C */
3304 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
3305 edx = u32B */
3306 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
3307 edx = u64Hi.hi = (u64A.hi * u32B).hi */
3308 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
3309 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
3310 "divl %%ecx \n\t" /* eax = u64Hi / u32C
3311 edx = u64Hi % u32C */
3312 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
3313 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
3314 "divl %%ecx \n\t" /* u64Result.lo */
3315 "movl %%edi,%%edx \n\t" /* u64Result.hi */
3316 : "=A"(u64Result), "=c"(u32Dummy),
3317 "=S"(u32Dummy), "=D"(u32Dummy)
3318 : "a"((uint32_t)u64A),
3319 "S"((uint32_t)(u64A >> 32)),
3320 "c"(u32B),
3321 "D"(u32C));
3322 return u64Result;
3323# endif
3324# else
3325 RTUINT64U u;
3326 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
3327 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
3328 u64Hi += (u64Lo >> 32);
3329 u.s.Hi = (uint32_t)(u64Hi / u32C);
3330 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
3331 return u.u;
3332# endif
3333}
3334#endif
3335
3336
3337/**
3338 * Probes a byte pointer for read access.
3339 *
3340 * While the function will not fault if the byte is not read accessible,
3341 * the idea is to do this in a safe place like before acquiring locks
3342 * and such like.
3343 *
3344 * Also, this functions guarantees that an eager compiler is not going
3345 * to optimize the probing away.
3346 *
3347 * @param pvByte Pointer to the byte.
3348 */
3349#if RT_INLINE_ASM_EXTERNAL
3350DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
3351#else
3352DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
3353{
3354 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3355 uint8_t u8;
3356# if RT_INLINE_ASM_GNU_STYLE
3357 __asm__ __volatile__("movb (%1), %0\n\t"
3358 : "=r" (u8)
3359 : "r" (pvByte));
3360# else
3361 __asm
3362 {
3363# ifdef RT_ARCH_AMD64
3364 mov rax, [pvByte]
3365 mov al, [rax]
3366# else
3367 mov eax, [pvByte]
3368 mov al, [eax]
3369# endif
3370 mov [u8], al
3371 }
3372# endif
3373 return u8;
3374}
3375#endif
3376
3377/**
3378 * Probes a buffer for read access page by page.
3379 *
3380 * While the function will fault if the buffer is not fully read
3381 * accessible, the idea is to do this in a safe place like before
3382 * acquiring locks and such like.
3383 *
3384 * Also, this functions guarantees that an eager compiler is not going
3385 * to optimize the probing away.
3386 *
3387 * @param pvBuf Pointer to the buffer.
3388 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3389 */
3390DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3391{
3392 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3393 /* the first byte */
3394 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3395 ASMProbeReadByte(pu8);
3396
3397 /* the pages in between pages. */
3398 while (cbBuf > /*PAGE_SIZE*/0x1000)
3399 {
3400 ASMProbeReadByte(pu8);
3401 cbBuf -= /*PAGE_SIZE*/0x1000;
3402 pu8 += /*PAGE_SIZE*/0x1000;
3403 }
3404
3405 /* the last byte */
3406 ASMProbeReadByte(pu8 + cbBuf - 1);
3407}
3408
3409
3410/** @def ASMBreakpoint
3411 * Debugger Breakpoint.
3412 * @remark In the gnu world we add a nop instruction after the int3 to
3413 * force gdb to remain at the int3 source line.
3414 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
3415 * @internal
3416 */
3417#if RT_INLINE_ASM_GNU_STYLE
3418# ifndef __L4ENV__
3419# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
3420# else
3421# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
3422# endif
3423#else
3424# define ASMBreakpoint() __debugbreak()
3425#endif
3426
3427
3428
3429/** @defgroup grp_inline_bits Bit Operations
3430 * @{
3431 */
3432
3433
3434/**
3435 * Sets a bit in a bitmap.
3436 *
3437 * @param pvBitmap Pointer to the bitmap.
3438 * @param iBit The bit to set.
3439 */
3440#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3441DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3442#else
3443DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3444{
3445# if RT_INLINE_ASM_USES_INTRIN
3446 _bittestandset((long *)pvBitmap, iBit);
3447
3448# elif RT_INLINE_ASM_GNU_STYLE
3449 __asm__ __volatile__ ("btsl %1, %0"
3450 : "=m" (*(volatile long *)pvBitmap)
3451 : "Ir" (iBit)
3452 : "memory");
3453# else
3454 __asm
3455 {
3456# ifdef RT_ARCH_AMD64
3457 mov rax, [pvBitmap]
3458 mov edx, [iBit]
3459 bts [rax], edx
3460# else
3461 mov eax, [pvBitmap]
3462 mov edx, [iBit]
3463 bts [eax], edx
3464# endif
3465 }
3466# endif
3467}
3468#endif
3469
3470
3471/**
3472 * Atomically sets a bit in a bitmap.
3473 *
3474 * @param pvBitmap Pointer to the bitmap.
3475 * @param iBit The bit to set.
3476 */
3477#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3478DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3479#else
3480DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3481{
3482# if RT_INLINE_ASM_USES_INTRIN
3483 _interlockedbittestandset((long *)pvBitmap, iBit);
3484# elif RT_INLINE_ASM_GNU_STYLE
3485 __asm__ __volatile__ ("lock; btsl %1, %0"
3486 : "=m" (*(volatile long *)pvBitmap)
3487 : "Ir" (iBit)
3488 : "memory");
3489# else
3490 __asm
3491 {
3492# ifdef RT_ARCH_AMD64
3493 mov rax, [pvBitmap]
3494 mov edx, [iBit]
3495 lock bts [rax], edx
3496# else
3497 mov eax, [pvBitmap]
3498 mov edx, [iBit]
3499 lock bts [eax], edx
3500# endif
3501 }
3502# endif
3503}
3504#endif
3505
3506
3507/**
3508 * Clears a bit in a bitmap.
3509 *
3510 * @param pvBitmap Pointer to the bitmap.
3511 * @param iBit The bit to clear.
3512 */
3513#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3514DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3515#else
3516DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3517{
3518# if RT_INLINE_ASM_USES_INTRIN
3519 _bittestandreset((long *)pvBitmap, iBit);
3520
3521# elif RT_INLINE_ASM_GNU_STYLE
3522 __asm__ __volatile__ ("btrl %1, %0"
3523 : "=m" (*(volatile long *)pvBitmap)
3524 : "Ir" (iBit)
3525 : "memory");
3526# else
3527 __asm
3528 {
3529# ifdef RT_ARCH_AMD64
3530 mov rax, [pvBitmap]
3531 mov edx, [iBit]
3532 btr [rax], edx
3533# else
3534 mov eax, [pvBitmap]
3535 mov edx, [iBit]
3536 btr [eax], edx
3537# endif
3538 }
3539# endif
3540}
3541#endif
3542
3543
3544/**
3545 * Atomically clears a bit in a bitmap.
3546 *
3547 * @param pvBitmap Pointer to the bitmap.
3548 * @param iBit The bit to toggle set.
3549 * @remark No memory barrier, take care on smp.
3550 */
3551#if RT_INLINE_ASM_EXTERNAL
3552DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3553#else
3554DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3555{
3556# if RT_INLINE_ASM_GNU_STYLE
3557 __asm__ __volatile__ ("lock; btrl %1, %0"
3558 : "=m" (*(volatile long *)pvBitmap)
3559 : "Ir" (iBit)
3560 : "memory");
3561# else
3562 __asm
3563 {
3564# ifdef RT_ARCH_AMD64
3565 mov rax, [pvBitmap]
3566 mov edx, [iBit]
3567 lock btr [rax], edx
3568# else
3569 mov eax, [pvBitmap]
3570 mov edx, [iBit]
3571 lock btr [eax], edx
3572# endif
3573 }
3574# endif
3575}
3576#endif
3577
3578
3579/**
3580 * Toggles a bit in a bitmap.
3581 *
3582 * @param pvBitmap Pointer to the bitmap.
3583 * @param iBit The bit to toggle.
3584 */
3585#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3586DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3587#else
3588DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3589{
3590# if RT_INLINE_ASM_USES_INTRIN
3591 _bittestandcomplement((long *)pvBitmap, iBit);
3592# elif RT_INLINE_ASM_GNU_STYLE
3593 __asm__ __volatile__ ("btcl %1, %0"
3594 : "=m" (*(volatile long *)pvBitmap)
3595 : "Ir" (iBit)
3596 : "memory");
3597# else
3598 __asm
3599 {
3600# ifdef RT_ARCH_AMD64
3601 mov rax, [pvBitmap]
3602 mov edx, [iBit]
3603 btc [rax], edx
3604# else
3605 mov eax, [pvBitmap]
3606 mov edx, [iBit]
3607 btc [eax], edx
3608# endif
3609 }
3610# endif
3611}
3612#endif
3613
3614
3615/**
3616 * Atomically toggles a bit in a bitmap.
3617 *
3618 * @param pvBitmap Pointer to the bitmap.
3619 * @param iBit The bit to test and set.
3620 */
3621#if RT_INLINE_ASM_EXTERNAL
3622DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3623#else
3624DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3625{
3626# if RT_INLINE_ASM_GNU_STYLE
3627 __asm__ __volatile__ ("lock; btcl %1, %0"
3628 : "=m" (*(volatile long *)pvBitmap)
3629 : "Ir" (iBit)
3630 : "memory");
3631# else
3632 __asm
3633 {
3634# ifdef RT_ARCH_AMD64
3635 mov rax, [pvBitmap]
3636 mov edx, [iBit]
3637 lock btc [rax], edx
3638# else
3639 mov eax, [pvBitmap]
3640 mov edx, [iBit]
3641 lock btc [eax], edx
3642# endif
3643 }
3644# endif
3645}
3646#endif
3647
3648
3649/**
3650 * Tests and sets a bit in a bitmap.
3651 *
3652 * @returns true if the bit was set.
3653 * @returns false if the bit was clear.
3654 * @param pvBitmap Pointer to the bitmap.
3655 * @param iBit The bit to test and set.
3656 */
3657#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3658DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3659#else
3660DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3661{
3662 union { bool f; uint32_t u32; uint8_t u8; } rc;
3663# if RT_INLINE_ASM_USES_INTRIN
3664 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3665
3666# elif RT_INLINE_ASM_GNU_STYLE
3667 __asm__ __volatile__ ("btsl %2, %1\n\t"
3668 "setc %b0\n\t"
3669 "andl $1, %0\n\t"
3670 : "=q" (rc.u32),
3671 "=m" (*(volatile long *)pvBitmap)
3672 : "Ir" (iBit)
3673 : "memory");
3674# else
3675 __asm
3676 {
3677 mov edx, [iBit]
3678# ifdef RT_ARCH_AMD64
3679 mov rax, [pvBitmap]
3680 bts [rax], edx
3681# else
3682 mov eax, [pvBitmap]
3683 bts [eax], edx
3684# endif
3685 setc al
3686 and eax, 1
3687 mov [rc.u32], eax
3688 }
3689# endif
3690 return rc.f;
3691}
3692#endif
3693
3694
3695/**
3696 * Atomically tests and sets a bit in a bitmap.
3697 *
3698 * @returns true if the bit was set.
3699 * @returns false if the bit was clear.
3700 * @param pvBitmap Pointer to the bitmap.
3701 * @param iBit The bit to set.
3702 */
3703#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3704DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3705#else
3706DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3707{
3708 union { bool f; uint32_t u32; uint8_t u8; } rc;
3709# if RT_INLINE_ASM_USES_INTRIN
3710 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3711# elif RT_INLINE_ASM_GNU_STYLE
3712 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
3713 "setc %b0\n\t"
3714 "andl $1, %0\n\t"
3715 : "=q" (rc.u32),
3716 "=m" (*(volatile long *)pvBitmap)
3717 : "Ir" (iBit)
3718 : "memory");
3719# else
3720 __asm
3721 {
3722 mov edx, [iBit]
3723# ifdef RT_ARCH_AMD64
3724 mov rax, [pvBitmap]
3725 lock bts [rax], edx
3726# else
3727 mov eax, [pvBitmap]
3728 lock bts [eax], edx
3729# endif
3730 setc al
3731 and eax, 1
3732 mov [rc.u32], eax
3733 }
3734# endif
3735 return rc.f;
3736}
3737#endif
3738
3739
3740/**
3741 * Tests and clears a bit in a bitmap.
3742 *
3743 * @returns true if the bit was set.
3744 * @returns false if the bit was clear.
3745 * @param pvBitmap Pointer to the bitmap.
3746 * @param iBit The bit to test and clear.
3747 */
3748#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3749DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3750#else
3751DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3752{
3753 union { bool f; uint32_t u32; uint8_t u8; } rc;
3754# if RT_INLINE_ASM_USES_INTRIN
3755 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3756
3757# elif RT_INLINE_ASM_GNU_STYLE
3758 __asm__ __volatile__ ("btrl %2, %1\n\t"
3759 "setc %b0\n\t"
3760 "andl $1, %0\n\t"
3761 : "=q" (rc.u32),
3762 "=m" (*(volatile long *)pvBitmap)
3763 : "Ir" (iBit)
3764 : "memory");
3765# else
3766 __asm
3767 {
3768 mov edx, [iBit]
3769# ifdef RT_ARCH_AMD64
3770 mov rax, [pvBitmap]
3771 btr [rax], edx
3772# else
3773 mov eax, [pvBitmap]
3774 btr [eax], edx
3775# endif
3776 setc al
3777 and eax, 1
3778 mov [rc.u32], eax
3779 }
3780# endif
3781 return rc.f;
3782}
3783#endif
3784
3785
3786/**
3787 * Atomically tests and clears a bit in a bitmap.
3788 *
3789 * @returns true if the bit was set.
3790 * @returns false if the bit was clear.
3791 * @param pvBitmap Pointer to the bitmap.
3792 * @param iBit The bit to test and clear.
3793 * @remark No memory barrier, take care on smp.
3794 */
3795#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3796DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3797#else
3798DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3799{
3800 union { bool f; uint32_t u32; uint8_t u8; } rc;
3801# if RT_INLINE_ASM_USES_INTRIN
3802 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3803
3804# elif RT_INLINE_ASM_GNU_STYLE
3805 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
3806 "setc %b0\n\t"
3807 "andl $1, %0\n\t"
3808 : "=q" (rc.u32),
3809 "=m" (*(volatile long *)pvBitmap)
3810 : "Ir" (iBit)
3811 : "memory");
3812# else
3813 __asm
3814 {
3815 mov edx, [iBit]
3816# ifdef RT_ARCH_AMD64
3817 mov rax, [pvBitmap]
3818 lock btr [rax], edx
3819# else
3820 mov eax, [pvBitmap]
3821 lock btr [eax], edx
3822# endif
3823 setc al
3824 and eax, 1
3825 mov [rc.u32], eax
3826 }
3827# endif
3828 return rc.f;
3829}
3830#endif
3831
3832
3833/**
3834 * Tests and toggles a bit in a bitmap.
3835 *
3836 * @returns true if the bit was set.
3837 * @returns false if the bit was clear.
3838 * @param pvBitmap Pointer to the bitmap.
3839 * @param iBit The bit to test and toggle.
3840 */
3841#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3842DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3843#else
3844DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3845{
3846 union { bool f; uint32_t u32; uint8_t u8; } rc;
3847# if RT_INLINE_ASM_USES_INTRIN
3848 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3849
3850# elif RT_INLINE_ASM_GNU_STYLE
3851 __asm__ __volatile__ ("btcl %2, %1\n\t"
3852 "setc %b0\n\t"
3853 "andl $1, %0\n\t"
3854 : "=q" (rc.u32),
3855 "=m" (*(volatile long *)pvBitmap)
3856 : "Ir" (iBit)
3857 : "memory");
3858# else
3859 __asm
3860 {
3861 mov edx, [iBit]
3862# ifdef RT_ARCH_AMD64
3863 mov rax, [pvBitmap]
3864 btc [rax], edx
3865# else
3866 mov eax, [pvBitmap]
3867 btc [eax], edx
3868# endif
3869 setc al
3870 and eax, 1
3871 mov [rc.u32], eax
3872 }
3873# endif
3874 return rc.f;
3875}
3876#endif
3877
3878
3879/**
3880 * Atomically tests and toggles a bit in a bitmap.
3881 *
3882 * @returns true if the bit was set.
3883 * @returns false if the bit was clear.
3884 * @param pvBitmap Pointer to the bitmap.
3885 * @param iBit The bit to test and toggle.
3886 */
3887#if RT_INLINE_ASM_EXTERNAL
3888DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3889#else
3890DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3891{
3892 union { bool f; uint32_t u32; uint8_t u8; } rc;
3893# if RT_INLINE_ASM_GNU_STYLE
3894 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
3895 "setc %b0\n\t"
3896 "andl $1, %0\n\t"
3897 : "=q" (rc.u32),
3898 "=m" (*(volatile long *)pvBitmap)
3899 : "Ir" (iBit)
3900 : "memory");
3901# else
3902 __asm
3903 {
3904 mov edx, [iBit]
3905# ifdef RT_ARCH_AMD64
3906 mov rax, [pvBitmap]
3907 lock btc [rax], edx
3908# else
3909 mov eax, [pvBitmap]
3910 lock btc [eax], edx
3911# endif
3912 setc al
3913 and eax, 1
3914 mov [rc.u32], eax
3915 }
3916# endif
3917 return rc.f;
3918}
3919#endif
3920
3921
3922/**
3923 * Tests if a bit in a bitmap is set.
3924 *
3925 * @returns true if the bit is set.
3926 * @returns false if the bit is clear.
3927 * @param pvBitmap Pointer to the bitmap.
3928 * @param iBit The bit to test.
3929 */
3930#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3931DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
3932#else
3933DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
3934{
3935 union { bool f; uint32_t u32; uint8_t u8; } rc;
3936# if RT_INLINE_ASM_USES_INTRIN
3937 rc.u32 = _bittest((long *)pvBitmap, iBit);
3938# elif RT_INLINE_ASM_GNU_STYLE
3939
3940 __asm__ __volatile__ ("btl %2, %1\n\t"
3941 "setc %b0\n\t"
3942 "andl $1, %0\n\t"
3943 : "=q" (rc.u32),
3944 "=m" (*(volatile long *)pvBitmap)
3945 : "Ir" (iBit)
3946 : "memory");
3947# else
3948 __asm
3949 {
3950 mov edx, [iBit]
3951# ifdef RT_ARCH_AMD64
3952 mov rax, [pvBitmap]
3953 bt [rax], edx
3954# else
3955 mov eax, [pvBitmap]
3956 bt [eax], edx
3957# endif
3958 setc al
3959 and eax, 1
3960 mov [rc.u32], eax
3961 }
3962# endif
3963 return rc.f;
3964}
3965#endif
3966
3967
3968/**
3969 * Clears a bit range within a bitmap.
3970 *
3971 * @param pvBitmap Pointer to the bitmap.
3972 * @param iBitStart The First bit to clear.
3973 * @param iBitEnd The first bit not to clear.
3974 */
3975DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3976{
3977 if (iBitStart < iBitEnd)
3978 {
3979 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3980 int iStart = iBitStart & ~31;
3981 int iEnd = iBitEnd & ~31;
3982 if (iStart == iEnd)
3983 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
3984 else
3985 {
3986 /* bits in first dword. */
3987 if (iBitStart & 31)
3988 {
3989 *pu32 &= (1 << (iBitStart & 31)) - 1;
3990 pu32++;
3991 iBitStart = iStart + 32;
3992 }
3993
3994 /* whole dword. */
3995 if (iBitStart != iEnd)
3996 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
3997
3998 /* bits in last dword. */
3999 if (iBitEnd & 31)
4000 {
4001 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4002 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
4003 }
4004 }
4005 }
4006}
4007
4008
4009/**
4010 * Finds the first clear bit in a bitmap.
4011 *
4012 * @returns Index of the first zero bit.
4013 * @returns -1 if no clear bit was found.
4014 * @param pvBitmap Pointer to the bitmap.
4015 * @param cBits The number of bits in the bitmap. Multiple of 32.
4016 */
4017#if RT_INLINE_ASM_EXTERNAL
4018DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
4019#else
4020DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
4021{
4022 if (cBits)
4023 {
4024 int32_t iBit;
4025# if RT_INLINE_ASM_GNU_STYLE
4026 RTCCUINTREG uEAX, uECX, uEDI;
4027 cBits = RT_ALIGN_32(cBits, 32);
4028 __asm__ __volatile__("repe; scasl\n\t"
4029 "je 1f\n\t"
4030# ifdef RT_ARCH_AMD64
4031 "lea -4(%%rdi), %%rdi\n\t"
4032 "xorl (%%rdi), %%eax\n\t"
4033 "subq %5, %%rdi\n\t"
4034# else
4035 "lea -4(%%edi), %%edi\n\t"
4036 "xorl (%%edi), %%eax\n\t"
4037 "subl %5, %%edi\n\t"
4038# endif
4039 "shll $3, %%edi\n\t"
4040 "bsfl %%eax, %%edx\n\t"
4041 "addl %%edi, %%edx\n\t"
4042 "1:\t\n"
4043 : "=d" (iBit),
4044 "=&c" (uECX),
4045 "=&D" (uEDI),
4046 "=&a" (uEAX)
4047 : "0" (0xffffffff),
4048 "mr" (pvBitmap),
4049 "1" (cBits >> 5),
4050 "2" (pvBitmap),
4051 "3" (0xffffffff));
4052# else
4053 cBits = RT_ALIGN_32(cBits, 32);
4054 __asm
4055 {
4056# ifdef RT_ARCH_AMD64
4057 mov rdi, [pvBitmap]
4058 mov rbx, rdi
4059# else
4060 mov edi, [pvBitmap]
4061 mov ebx, edi
4062# endif
4063 mov edx, 0ffffffffh
4064 mov eax, edx
4065 mov ecx, [cBits]
4066 shr ecx, 5
4067 repe scasd
4068 je done
4069
4070# ifdef RT_ARCH_AMD64
4071 lea rdi, [rdi - 4]
4072 xor eax, [rdi]
4073 sub rdi, rbx
4074# else
4075 lea edi, [edi - 4]
4076 xor eax, [edi]
4077 sub edi, ebx
4078# endif
4079 shl edi, 3
4080 bsf edx, eax
4081 add edx, edi
4082 done:
4083 mov [iBit], edx
4084 }
4085# endif
4086 return iBit;
4087 }
4088 return -1;
4089}
4090#endif
4091
4092
4093/**
4094 * Finds the next clear bit in a bitmap.
4095 *
4096 * @returns Index of the first zero bit.
4097 * @returns -1 if no clear bit was found.
4098 * @param pvBitmap Pointer to the bitmap.
4099 * @param cBits The number of bits in the bitmap. Multiple of 32.
4100 * @param iBitPrev The bit returned from the last search.
4101 * The search will start at iBitPrev + 1.
4102 */
4103#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4104DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4105#else
4106DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4107{
4108 int iBit = ++iBitPrev & 31;
4109 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
4110 cBits -= iBitPrev & ~31;
4111 if (iBit)
4112 {
4113 /* inspect the first dword. */
4114 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
4115# if RT_INLINE_ASM_USES_INTRIN
4116 unsigned long ulBit = 0;
4117 if (_BitScanForward(&ulBit, u32))
4118 return ulBit + iBitPrev;
4119 iBit = -1;
4120# else
4121# if RT_INLINE_ASM_GNU_STYLE
4122 __asm__ __volatile__("bsf %1, %0\n\t"
4123 "jnz 1f\n\t"
4124 "movl $-1, %0\n\t"
4125 "1:\n\t"
4126 : "=r" (iBit)
4127 : "r" (u32));
4128# else
4129 __asm
4130 {
4131 mov edx, [u32]
4132 bsf eax, edx
4133 jnz done
4134 mov eax, 0ffffffffh
4135 done:
4136 mov [iBit], eax
4137 }
4138# endif
4139 if (iBit >= 0)
4140 return iBit + iBitPrev;
4141# endif
4142 /* Search the rest of the bitmap, if there is anything. */
4143 if (cBits > 32)
4144 {
4145 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
4146 if (iBit >= 0)
4147 return iBit + (iBitPrev & ~31) + 32;
4148 }
4149 }
4150 else
4151 {
4152 /* Search the rest of the bitmap. */
4153 iBit = ASMBitFirstClear(pvBitmap, cBits);
4154 if (iBit >= 0)
4155 return iBit + (iBitPrev & ~31);
4156 }
4157 return iBit;
4158}
4159#endif
4160
4161
4162/**
4163 * Finds the first set bit in a bitmap.
4164 *
4165 * @returns Index of the first set bit.
4166 * @returns -1 if no clear bit was found.
4167 * @param pvBitmap Pointer to the bitmap.
4168 * @param cBits The number of bits in the bitmap. Multiple of 32.
4169 */
4170#if RT_INLINE_ASM_EXTERNAL
4171DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
4172#else
4173DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
4174{
4175 if (cBits)
4176 {
4177 int32_t iBit;
4178# if RT_INLINE_ASM_GNU_STYLE
4179 RTCCUINTREG uEAX, uECX, uEDI;
4180 cBits = RT_ALIGN_32(cBits, 32);
4181 __asm__ __volatile__("repe; scasl\n\t"
4182 "je 1f\n\t"
4183# ifdef RT_ARCH_AMD64
4184 "lea -4(%%rdi), %%rdi\n\t"
4185 "movl (%%rdi), %%eax\n\t"
4186 "subq %5, %%rdi\n\t"
4187# else
4188 "lea -4(%%edi), %%edi\n\t"
4189 "movl (%%edi), %%eax\n\t"
4190 "subl %5, %%edi\n\t"
4191# endif
4192 "shll $3, %%edi\n\t"
4193 "bsfl %%eax, %%edx\n\t"
4194 "addl %%edi, %%edx\n\t"
4195 "1:\t\n"
4196 : "=d" (iBit),
4197 "=&c" (uECX),
4198 "=&D" (uEDI),
4199 "=&a" (uEAX)
4200 : "0" (0xffffffff),
4201 "mr" (pvBitmap),
4202 "1" (cBits >> 5),
4203 "2" (pvBitmap),
4204 "3" (0));
4205# else
4206 cBits = RT_ALIGN_32(cBits, 32);
4207 __asm
4208 {
4209# ifdef RT_ARCH_AMD64
4210 mov rdi, [pvBitmap]
4211 mov rbx, rdi
4212# else
4213 mov edi, [pvBitmap]
4214 mov ebx, edi
4215# endif
4216 mov edx, 0ffffffffh
4217 xor eax, eax
4218 mov ecx, [cBits]
4219 shr ecx, 5
4220 repe scasd
4221 je done
4222# ifdef RT_ARCH_AMD64
4223 lea rdi, [rdi - 4]
4224 mov eax, [rdi]
4225 sub rdi, rbx
4226# else
4227 lea edi, [edi - 4]
4228 mov eax, [edi]
4229 sub edi, ebx
4230# endif
4231 shl edi, 3
4232 bsf edx, eax
4233 add edx, edi
4234 done:
4235 mov [iBit], edx
4236 }
4237# endif
4238 return iBit;
4239 }
4240 return -1;
4241}
4242#endif
4243
4244
4245/**
4246 * Finds the next set bit in a bitmap.
4247 *
4248 * @returns Index of the next set bit.
4249 * @returns -1 if no set bit was found.
4250 * @param pvBitmap Pointer to the bitmap.
4251 * @param cBits The number of bits in the bitmap. Multiple of 32.
4252 * @param iBitPrev The bit returned from the last search.
4253 * The search will start at iBitPrev + 1.
4254 */
4255#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4256DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4257#else
4258DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4259{
4260 int iBit = ++iBitPrev & 31;
4261 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
4262 cBits -= iBitPrev & ~31;
4263 if (iBit)
4264 {
4265 /* inspect the first dword. */
4266 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
4267# if RT_INLINE_ASM_USES_INTRIN
4268 unsigned long ulBit = 0;
4269 if (_BitScanForward(&ulBit, u32))
4270 return ulBit + iBitPrev;
4271 iBit = -1;
4272# else
4273# if RT_INLINE_ASM_GNU_STYLE
4274 __asm__ __volatile__("bsf %1, %0\n\t"
4275 "jnz 1f\n\t"
4276 "movl $-1, %0\n\t"
4277 "1:\n\t"
4278 : "=r" (iBit)
4279 : "r" (u32));
4280# else
4281 __asm
4282 {
4283 mov edx, u32
4284 bsf eax, edx
4285 jnz done
4286 mov eax, 0ffffffffh
4287 done:
4288 mov [iBit], eax
4289 }
4290# endif
4291 if (iBit >= 0)
4292 return iBit + iBitPrev;
4293# endif
4294 /* Search the rest of the bitmap, if there is anything. */
4295 if (cBits > 32)
4296 {
4297 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
4298 if (iBit >= 0)
4299 return iBit + (iBitPrev & ~31) + 32;
4300 }
4301
4302 }
4303 else
4304 {
4305 /* Search the rest of the bitmap. */
4306 iBit = ASMBitFirstSet(pvBitmap, cBits);
4307 if (iBit >= 0)
4308 return iBit + (iBitPrev & ~31);
4309 }
4310 return iBit;
4311}
4312#endif
4313
4314
4315/**
4316 * Finds the first bit which is set in the given 32-bit integer.
4317 * Bits are numbered from 1 (least significant) to 32.
4318 *
4319 * @returns index [1..32] of the first set bit.
4320 * @returns 0 if all bits are cleared.
4321 * @param u32 Integer to search for set bits.
4322 * @remark Similar to ffs() in BSD.
4323 */
4324DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
4325{
4326# if RT_INLINE_ASM_USES_INTRIN
4327 unsigned long iBit;
4328 if (_BitScanForward(&iBit, u32))
4329 iBit++;
4330 else
4331 iBit = 0;
4332# elif RT_INLINE_ASM_GNU_STYLE
4333 uint32_t iBit;
4334 __asm__ __volatile__("bsf %1, %0\n\t"
4335 "jnz 1f\n\t"
4336 "xorl %0, %0\n\t"
4337 "jmp 2f\n"
4338 "1:\n\t"
4339 "incl %0\n"
4340 "2:\n\t"
4341 : "=r" (iBit)
4342 : "rm" (u32));
4343# else
4344 uint32_t iBit;
4345 _asm
4346 {
4347 bsf eax, [u32]
4348 jnz found
4349 xor eax, eax
4350 jmp done
4351 found:
4352 inc eax
4353 done:
4354 mov [iBit], eax
4355 }
4356# endif
4357 return iBit;
4358}
4359
4360
4361/**
4362 * Finds the first bit which is set in the given 32-bit integer.
4363 * Bits are numbered from 1 (least significant) to 32.
4364 *
4365 * @returns index [1..32] of the first set bit.
4366 * @returns 0 if all bits are cleared.
4367 * @param i32 Integer to search for set bits.
4368 * @remark Similar to ffs() in BSD.
4369 */
4370DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
4371{
4372 return ASMBitFirstSetU32((uint32_t)i32);
4373}
4374
4375
4376/**
4377 * Finds the last bit which is set in the given 32-bit integer.
4378 * Bits are numbered from 1 (least significant) to 32.
4379 *
4380 * @returns index [1..32] of the last set bit.
4381 * @returns 0 if all bits are cleared.
4382 * @param u32 Integer to search for set bits.
4383 * @remark Similar to fls() in BSD.
4384 */
4385DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4386{
4387# if RT_INLINE_ASM_USES_INTRIN
4388 unsigned long iBit;
4389 if (_BitScanReverse(&iBit, u32))
4390 iBit++;
4391 else
4392 iBit = 0;
4393# elif RT_INLINE_ASM_GNU_STYLE
4394 uint32_t iBit;
4395 __asm__ __volatile__("bsrl %1, %0\n\t"
4396 "jnz 1f\n\t"
4397 "xorl %0, %0\n\t"
4398 "jmp 2f\n"
4399 "1:\n\t"
4400 "incl %0\n"
4401 "2:\n\t"
4402 : "=r" (iBit)
4403 : "rm" (u32));
4404# else
4405 uint32_t iBit;
4406 _asm
4407 {
4408 bsr eax, [u32]
4409 jnz found
4410 xor eax, eax
4411 jmp done
4412 found:
4413 inc eax
4414 done:
4415 mov [iBit], eax
4416 }
4417# endif
4418 return iBit;
4419}
4420
4421
4422/**
4423 * Finds the last bit which is set in the given 32-bit integer.
4424 * Bits are numbered from 1 (least significant) to 32.
4425 *
4426 * @returns index [1..32] of the last set bit.
4427 * @returns 0 if all bits are cleared.
4428 * @param i32 Integer to search for set bits.
4429 * @remark Similar to fls() in BSD.
4430 */
4431DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4432{
4433 return ASMBitLastSetS32((uint32_t)i32);
4434}
4435
4436
4437/**
4438 * Reverse the byte order of the given 32-bit integer.
4439 * @param u32 Integer
4440 */
4441DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4442{
4443#if RT_INLINE_ASM_USES_INTRIN
4444 u32 = _byteswap_ulong(u32);
4445#elif RT_INLINE_ASM_GNU_STYLE
4446 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4447#else
4448 _asm
4449 {
4450 mov eax, [u32]
4451 bswap eax
4452 mov [u32], eax
4453 }
4454#endif
4455 return u32;
4456}
4457
4458/** @} */
4459
4460
4461/** @} */
4462#endif
4463
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette