VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 6650

Last change on this file since 6650 was 6650, checked in by vboxsync, 17 years ago

cmpxchg optimization

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 111.1 KB
Line 
1/** @file
2 * innotek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31/** @todo #include <iprt/param.h> for PAGE_SIZE. */
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37#ifdef _MSC_VER
38# if _MSC_VER >= 1400
39# define RT_INLINE_ASM_USES_INTRIN 1
40# include <intrin.h>
41 /* Emit the intrinsics at all optimization levels. */
42# pragma intrinsic(_ReadWriteBarrier)
43# pragma intrinsic(__cpuid)
44# pragma intrinsic(_enable)
45# pragma intrinsic(_disable)
46# pragma intrinsic(__rdtsc)
47# pragma intrinsic(__readmsr)
48# pragma intrinsic(__writemsr)
49# pragma intrinsic(__outbyte)
50# pragma intrinsic(__outword)
51# pragma intrinsic(__outdword)
52# pragma intrinsic(__inbyte)
53# pragma intrinsic(__inword)
54# pragma intrinsic(__indword)
55# pragma intrinsic(__invlpg)
56# pragma intrinsic(__stosd)
57# pragma intrinsic(__stosw)
58# pragma intrinsic(__stosb)
59# pragma intrinsic(__readcr0)
60# pragma intrinsic(__readcr2)
61# pragma intrinsic(__readcr3)
62# pragma intrinsic(__readcr4)
63# pragma intrinsic(__writecr0)
64# pragma intrinsic(__writecr3)
65# pragma intrinsic(__writecr4)
66# pragma intrinsic(_BitScanForward)
67# pragma intrinsic(_BitScanReverse)
68# pragma intrinsic(_bittest)
69# pragma intrinsic(_bittestandset)
70# pragma intrinsic(_bittestandreset)
71# pragma intrinsic(_bittestandcomplement)
72# pragma intrinsic(_byteswap_ushort)
73# pragma intrinsic(_byteswap_ulong)
74# pragma intrinsic(_interlockedbittestandset)
75# pragma intrinsic(_interlockedbittestandreset)
76# pragma intrinsic(_InterlockedAnd)
77# pragma intrinsic(_InterlockedOr)
78# pragma intrinsic(_InterlockedIncrement)
79# pragma intrinsic(_InterlockedDecrement)
80# pragma intrinsic(_InterlockedExchange)
81# pragma intrinsic(_InterlockedCompareExchange)
82# pragma intrinsic(_InterlockedCompareExchange64)
83# ifdef RT_ARCH_AMD64
84# pragma intrinsic(__stosq)
85# pragma intrinsic(__readcr8)
86# pragma intrinsic(__writecr8)
87# pragma intrinsic(_byteswap_uint64)
88# pragma intrinsic(_InterlockedExchange64)
89# endif
90# endif
91#endif
92#ifndef RT_INLINE_ASM_USES_INTRIN
93# define RT_INLINE_ASM_USES_INTRIN 0
94#endif
95
96
97
98/** @defgroup grp_asm ASM - Assembly Routines
99 * @ingroup grp_rt
100 * @{
101 */
102
103/** @def RT_INLINE_ASM_EXTERNAL
104 * Defined as 1 if the compiler does not support inline assembly.
105 * The ASM* functions will then be implemented in an external .asm file.
106 *
107 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
108 * inline assmebly in their AMD64 compiler.
109 */
110#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
111# define RT_INLINE_ASM_EXTERNAL 1
112#else
113# define RT_INLINE_ASM_EXTERNAL 0
114#endif
115
116/** @def RT_INLINE_ASM_GNU_STYLE
117 * Defined as 1 if the compiler understand GNU style inline assembly.
118 */
119#if defined(_MSC_VER)
120# define RT_INLINE_ASM_GNU_STYLE 0
121#else
122# define RT_INLINE_ASM_GNU_STYLE 1
123#endif
124
125
126/** @todo find a more proper place for this structure? */
127#pragma pack(1)
128/** IDTR */
129typedef struct RTIDTR
130{
131 /** Size of the IDT. */
132 uint16_t cbIdt;
133 /** Address of the IDT. */
134 uintptr_t pIdt;
135} RTIDTR, *PRTIDTR;
136#pragma pack()
137
138#pragma pack(1)
139/** GDTR */
140typedef struct RTGDTR
141{
142 /** Size of the GDT. */
143 uint16_t cbGdt;
144 /** Address of the GDT. */
145 uintptr_t pGdt;
146} RTGDTR, *PRTGDTR;
147#pragma pack()
148
149
150/** @def ASMReturnAddress
151 * Gets the return address of the current (or calling if you like) function or method.
152 */
153#ifdef _MSC_VER
154# ifdef __cplusplus
155extern "C"
156# endif
157void * _ReturnAddress(void);
158# pragma intrinsic(_ReturnAddress)
159# define ASMReturnAddress() _ReturnAddress()
160#elif defined(__GNUC__) || defined(__DOXYGEN__)
161# define ASMReturnAddress() __builtin_return_address(0)
162#else
163# error "Unsupported compiler."
164#endif
165
166
167/**
168 * Gets the content of the IDTR CPU register.
169 * @param pIdtr Where to store the IDTR contents.
170 */
171#if RT_INLINE_ASM_EXTERNAL
172DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
173#else
174DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
175{
176# if RT_INLINE_ASM_GNU_STYLE
177 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
178# else
179 __asm
180 {
181# ifdef RT_ARCH_AMD64
182 mov rax, [pIdtr]
183 sidt [rax]
184# else
185 mov eax, [pIdtr]
186 sidt [eax]
187# endif
188 }
189# endif
190}
191#endif
192
193
194/**
195 * Sets the content of the IDTR CPU register.
196 * @param pIdtr Where to load the IDTR contents from
197 */
198#if RT_INLINE_ASM_EXTERNAL
199DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
200#else
201DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
202{
203# if RT_INLINE_ASM_GNU_STYLE
204 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
205# else
206 __asm
207 {
208# ifdef RT_ARCH_AMD64
209 mov rax, [pIdtr]
210 lidt [rax]
211# else
212 mov eax, [pIdtr]
213 lidt [eax]
214# endif
215 }
216# endif
217}
218#endif
219
220
221/**
222 * Gets the content of the GDTR CPU register.
223 * @param pGdtr Where to store the GDTR contents.
224 */
225#if RT_INLINE_ASM_EXTERNAL
226DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
227#else
228DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
229{
230# if RT_INLINE_ASM_GNU_STYLE
231 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
232# else
233 __asm
234 {
235# ifdef RT_ARCH_AMD64
236 mov rax, [pGdtr]
237 sgdt [rax]
238# else
239 mov eax, [pGdtr]
240 sgdt [eax]
241# endif
242 }
243# endif
244}
245#endif
246
247/**
248 * Get the cs register.
249 * @returns cs.
250 */
251#if RT_INLINE_ASM_EXTERNAL
252DECLASM(RTSEL) ASMGetCS(void);
253#else
254DECLINLINE(RTSEL) ASMGetCS(void)
255{
256 RTSEL SelCS;
257# if RT_INLINE_ASM_GNU_STYLE
258 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
259# else
260 __asm
261 {
262 mov ax, cs
263 mov [SelCS], ax
264 }
265# endif
266 return SelCS;
267}
268#endif
269
270
271/**
272 * Get the DS register.
273 * @returns DS.
274 */
275#if RT_INLINE_ASM_EXTERNAL
276DECLASM(RTSEL) ASMGetDS(void);
277#else
278DECLINLINE(RTSEL) ASMGetDS(void)
279{
280 RTSEL SelDS;
281# if RT_INLINE_ASM_GNU_STYLE
282 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
283# else
284 __asm
285 {
286 mov ax, ds
287 mov [SelDS], ax
288 }
289# endif
290 return SelDS;
291}
292#endif
293
294
295/**
296 * Get the ES register.
297 * @returns ES.
298 */
299#if RT_INLINE_ASM_EXTERNAL
300DECLASM(RTSEL) ASMGetES(void);
301#else
302DECLINLINE(RTSEL) ASMGetES(void)
303{
304 RTSEL SelES;
305# if RT_INLINE_ASM_GNU_STYLE
306 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
307# else
308 __asm
309 {
310 mov ax, es
311 mov [SelES], ax
312 }
313# endif
314 return SelES;
315}
316#endif
317
318
319/**
320 * Get the FS register.
321 * @returns FS.
322 */
323#if RT_INLINE_ASM_EXTERNAL
324DECLASM(RTSEL) ASMGetFS(void);
325#else
326DECLINLINE(RTSEL) ASMGetFS(void)
327{
328 RTSEL SelFS;
329# if RT_INLINE_ASM_GNU_STYLE
330 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
331# else
332 __asm
333 {
334 mov ax, fs
335 mov [SelFS], ax
336 }
337# endif
338 return SelFS;
339}
340# endif
341
342
343/**
344 * Get the GS register.
345 * @returns GS.
346 */
347#if RT_INLINE_ASM_EXTERNAL
348DECLASM(RTSEL) ASMGetGS(void);
349#else
350DECLINLINE(RTSEL) ASMGetGS(void)
351{
352 RTSEL SelGS;
353# if RT_INLINE_ASM_GNU_STYLE
354 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
355# else
356 __asm
357 {
358 mov ax, gs
359 mov [SelGS], ax
360 }
361# endif
362 return SelGS;
363}
364#endif
365
366
367/**
368 * Get the SS register.
369 * @returns SS.
370 */
371#if RT_INLINE_ASM_EXTERNAL
372DECLASM(RTSEL) ASMGetSS(void);
373#else
374DECLINLINE(RTSEL) ASMGetSS(void)
375{
376 RTSEL SelSS;
377# if RT_INLINE_ASM_GNU_STYLE
378 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
379# else
380 __asm
381 {
382 mov ax, ss
383 mov [SelSS], ax
384 }
385# endif
386 return SelSS;
387}
388#endif
389
390
391/**
392 * Get the TR register.
393 * @returns TR.
394 */
395#if RT_INLINE_ASM_EXTERNAL
396DECLASM(RTSEL) ASMGetTR(void);
397#else
398DECLINLINE(RTSEL) ASMGetTR(void)
399{
400 RTSEL SelTR;
401# if RT_INLINE_ASM_GNU_STYLE
402 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
403# else
404 __asm
405 {
406 str ax
407 mov [SelTR], ax
408 }
409# endif
410 return SelTR;
411}
412#endif
413
414
415/**
416 * Get the [RE]FLAGS register.
417 * @returns [RE]FLAGS.
418 */
419#if RT_INLINE_ASM_EXTERNAL
420DECLASM(RTCCUINTREG) ASMGetFlags(void);
421#else
422DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
423{
424 RTCCUINTREG uFlags;
425# if RT_INLINE_ASM_GNU_STYLE
426# ifdef RT_ARCH_AMD64
427 __asm__ __volatile__("pushfq\n\t"
428 "popq %0\n\t"
429 : "=g" (uFlags));
430# else
431 __asm__ __volatile__("pushfl\n\t"
432 "popl %0\n\t"
433 : "=g" (uFlags));
434# endif
435# else
436 __asm
437 {
438# ifdef RT_ARCH_AMD64
439 pushfq
440 pop [uFlags]
441# else
442 pushfd
443 pop [uFlags]
444# endif
445 }
446# endif
447 return uFlags;
448}
449#endif
450
451
452/**
453 * Set the [RE]FLAGS register.
454 * @param uFlags The new [RE]FLAGS value.
455 */
456#if RT_INLINE_ASM_EXTERNAL
457DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
458#else
459DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
460{
461# if RT_INLINE_ASM_GNU_STYLE
462# ifdef RT_ARCH_AMD64
463 __asm__ __volatile__("pushq %0\n\t"
464 "popfq\n\t"
465 : : "g" (uFlags));
466# else
467 __asm__ __volatile__("pushl %0\n\t"
468 "popfl\n\t"
469 : : "g" (uFlags));
470# endif
471# else
472 __asm
473 {
474# ifdef RT_ARCH_AMD64
475 push [uFlags]
476 popfq
477# else
478 push [uFlags]
479 popfd
480# endif
481 }
482# endif
483}
484#endif
485
486
487/**
488 * Gets the content of the CPU timestamp counter register.
489 *
490 * @returns TSC.
491 */
492#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
493DECLASM(uint64_t) ASMReadTSC(void);
494#else
495DECLINLINE(uint64_t) ASMReadTSC(void)
496{
497 RTUINT64U u;
498# if RT_INLINE_ASM_GNU_STYLE
499 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
500# else
501# if RT_INLINE_ASM_USES_INTRIN
502 u.u = __rdtsc();
503# else
504 __asm
505 {
506 rdtsc
507 mov [u.s.Lo], eax
508 mov [u.s.Hi], edx
509 }
510# endif
511# endif
512 return u.u;
513}
514#endif
515
516
517/**
518 * Performs the cpuid instruction returning all registers.
519 *
520 * @param uOperator CPUID operation (eax).
521 * @param pvEAX Where to store eax.
522 * @param pvEBX Where to store ebx.
523 * @param pvECX Where to store ecx.
524 * @param pvEDX Where to store edx.
525 * @remark We're using void pointers to ease the use of special bitfield structures and such.
526 */
527#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
528DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
529#else
530DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
531{
532# if RT_INLINE_ASM_GNU_STYLE
533# ifdef RT_ARCH_AMD64
534 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
535 __asm__ ("cpuid\n\t"
536 : "=a" (uRAX),
537 "=b" (uRBX),
538 "=c" (uRCX),
539 "=d" (uRDX)
540 : "0" (uOperator));
541 *(uint32_t *)pvEAX = (uint32_t)uRAX;
542 *(uint32_t *)pvEBX = (uint32_t)uRBX;
543 *(uint32_t *)pvECX = (uint32_t)uRCX;
544 *(uint32_t *)pvEDX = (uint32_t)uRDX;
545# else
546 __asm__ ("xchgl %%ebx, %1\n\t"
547 "cpuid\n\t"
548 "xchgl %%ebx, %1\n\t"
549 : "=a" (*(uint32_t *)pvEAX),
550 "=r" (*(uint32_t *)pvEBX),
551 "=c" (*(uint32_t *)pvECX),
552 "=d" (*(uint32_t *)pvEDX)
553 : "0" (uOperator));
554# endif
555
556# elif RT_INLINE_ASM_USES_INTRIN
557 int aInfo[4];
558 __cpuid(aInfo, uOperator);
559 *(uint32_t *)pvEAX = aInfo[0];
560 *(uint32_t *)pvEBX = aInfo[1];
561 *(uint32_t *)pvECX = aInfo[2];
562 *(uint32_t *)pvEDX = aInfo[3];
563
564# else
565 uint32_t uEAX;
566 uint32_t uEBX;
567 uint32_t uECX;
568 uint32_t uEDX;
569 __asm
570 {
571 push ebx
572 mov eax, [uOperator]
573 cpuid
574 mov [uEAX], eax
575 mov [uEBX], ebx
576 mov [uECX], ecx
577 mov [uEDX], edx
578 pop ebx
579 }
580 *(uint32_t *)pvEAX = uEAX;
581 *(uint32_t *)pvEBX = uEBX;
582 *(uint32_t *)pvECX = uECX;
583 *(uint32_t *)pvEDX = uEDX;
584# endif
585}
586#endif
587
588
589/**
590 * Performs the cpuid instruction returning all registers.
591 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
592 *
593 * @param uOperator CPUID operation (eax).
594 * @param uIdxECX ecx index
595 * @param pvEAX Where to store eax.
596 * @param pvEBX Where to store ebx.
597 * @param pvECX Where to store ecx.
598 * @param pvEDX Where to store edx.
599 * @remark We're using void pointers to ease the use of special bitfield structures and such.
600 */
601#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
602DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
603#else
604DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
605{
606# if RT_INLINE_ASM_GNU_STYLE
607# ifdef RT_ARCH_AMD64
608 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
609 __asm__ ("cpuid\n\t"
610 : "=a" (uRAX),
611 "=b" (uRBX),
612 "=c" (uRCX),
613 "=d" (uRDX)
614 : "0" (uOperator),
615 "2" (uIdxECX));
616 *(uint32_t *)pvEAX = (uint32_t)uRAX;
617 *(uint32_t *)pvEBX = (uint32_t)uRBX;
618 *(uint32_t *)pvECX = (uint32_t)uRCX;
619 *(uint32_t *)pvEDX = (uint32_t)uRDX;
620# else
621 __asm__ ("xchgl %%ebx, %1\n\t"
622 "cpuid\n\t"
623 "xchgl %%ebx, %1\n\t"
624 : "=a" (*(uint32_t *)pvEAX),
625 "=r" (*(uint32_t *)pvEBX),
626 "=c" (*(uint32_t *)pvECX),
627 "=d" (*(uint32_t *)pvEDX)
628 : "0" (uOperator),
629 "2" (uIdxECX));
630# endif
631
632# elif RT_INLINE_ASM_USES_INTRIN
633 int aInfo[4];
634 /* ??? another intrinsic ??? */
635 __cpuid(aInfo, uOperator);
636 *(uint32_t *)pvEAX = aInfo[0];
637 *(uint32_t *)pvEBX = aInfo[1];
638 *(uint32_t *)pvECX = aInfo[2];
639 *(uint32_t *)pvEDX = aInfo[3];
640
641# else
642 uint32_t uEAX;
643 uint32_t uEBX;
644 uint32_t uECX;
645 uint32_t uEDX;
646 __asm
647 {
648 push ebx
649 mov eax, [uOperator]
650 mov ecx, [uIdxECX]
651 cpuid
652 mov [uEAX], eax
653 mov [uEBX], ebx
654 mov [uECX], ecx
655 mov [uEDX], edx
656 pop ebx
657 }
658 *(uint32_t *)pvEAX = uEAX;
659 *(uint32_t *)pvEBX = uEBX;
660 *(uint32_t *)pvECX = uECX;
661 *(uint32_t *)pvEDX = uEDX;
662# endif
663}
664#endif
665
666
667/**
668 * Performs the cpuid instruction returning ecx and edx.
669 *
670 * @param uOperator CPUID operation (eax).
671 * @param pvECX Where to store ecx.
672 * @param pvEDX Where to store edx.
673 * @remark We're using void pointers to ease the use of special bitfield structures and such.
674 */
675#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
676DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
677#else
678DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
679{
680 uint32_t uEBX;
681 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
682}
683#endif
684
685
686/**
687 * Performs the cpuid instruction returning edx.
688 *
689 * @param uOperator CPUID operation (eax).
690 * @returns EDX after cpuid operation.
691 */
692#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
693DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
694#else
695DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
696{
697 RTCCUINTREG xDX;
698# if RT_INLINE_ASM_GNU_STYLE
699# ifdef RT_ARCH_AMD64
700 RTCCUINTREG uSpill;
701 __asm__ ("cpuid"
702 : "=a" (uSpill),
703 "=d" (xDX)
704 : "0" (uOperator)
705 : "rbx", "rcx");
706# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
707 __asm__ ("push %%ebx\n\t"
708 "cpuid\n\t"
709 "pop %%ebx\n\t"
710 : "=a" (uOperator),
711 "=d" (xDX)
712 : "0" (uOperator)
713 : "ecx");
714# else
715 __asm__ ("cpuid"
716 : "=a" (uOperator),
717 "=d" (xDX)
718 : "0" (uOperator)
719 : "ebx", "ecx");
720# endif
721
722# elif RT_INLINE_ASM_USES_INTRIN
723 int aInfo[4];
724 __cpuid(aInfo, uOperator);
725 xDX = aInfo[3];
726
727# else
728 __asm
729 {
730 push ebx
731 mov eax, [uOperator]
732 cpuid
733 mov [xDX], edx
734 pop ebx
735 }
736# endif
737 return (uint32_t)xDX;
738}
739#endif
740
741
742/**
743 * Performs the cpuid instruction returning ecx.
744 *
745 * @param uOperator CPUID operation (eax).
746 * @returns ECX after cpuid operation.
747 */
748#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
749DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
750#else
751DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
752{
753 RTCCUINTREG xCX;
754# if RT_INLINE_ASM_GNU_STYLE
755# ifdef RT_ARCH_AMD64
756 RTCCUINTREG uSpill;
757 __asm__ ("cpuid"
758 : "=a" (uSpill),
759 "=c" (xCX)
760 : "0" (uOperator)
761 : "rbx", "rdx");
762# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
763 __asm__ ("push %%ebx\n\t"
764 "cpuid\n\t"
765 "pop %%ebx\n\t"
766 : "=a" (uOperator),
767 "=c" (xCX)
768 : "0" (uOperator)
769 : "edx");
770# else
771 __asm__ ("cpuid"
772 : "=a" (uOperator),
773 "=c" (xCX)
774 : "0" (uOperator)
775 : "ebx", "edx");
776
777# endif
778
779# elif RT_INLINE_ASM_USES_INTRIN
780 int aInfo[4];
781 __cpuid(aInfo, uOperator);
782 xCX = aInfo[2];
783
784# else
785 __asm
786 {
787 push ebx
788 mov eax, [uOperator]
789 cpuid
790 mov [xCX], ecx
791 pop ebx
792 }
793# endif
794 return (uint32_t)xCX;
795}
796#endif
797
798
799/**
800 * Checks if the current CPU supports CPUID.
801 *
802 * @returns true if CPUID is supported.
803 */
804DECLINLINE(bool) ASMHasCpuId(void)
805{
806#ifdef RT_ARCH_AMD64
807 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
808#else /* !RT_ARCH_AMD64 */
809 bool fRet = false;
810# if RT_INLINE_ASM_GNU_STYLE
811 uint32_t u1;
812 uint32_t u2;
813 __asm__ ("pushf\n\t"
814 "pop %1\n\t"
815 "mov %1, %2\n\t"
816 "xorl $0x200000, %1\n\t"
817 "push %1\n\t"
818 "popf\n\t"
819 "pushf\n\t"
820 "pop %1\n\t"
821 "cmpl %1, %2\n\t"
822 "setne %0\n\t"
823 "push %2\n\t"
824 "popf\n\t"
825 : "=m" (fRet), "=r" (u1), "=r" (u2));
826# else
827 __asm
828 {
829 pushfd
830 pop eax
831 mov ebx, eax
832 xor eax, 0200000h
833 push eax
834 popfd
835 pushfd
836 pop eax
837 cmp eax, ebx
838 setne fRet
839 push ebx
840 popfd
841 }
842# endif
843 return fRet;
844#endif /* !RT_ARCH_AMD64 */
845}
846
847
848/**
849 * Gets the APIC ID of the current CPU.
850 *
851 * @returns the APIC ID.
852 */
853#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
854DECLASM(uint8_t) ASMGetApicId(void);
855#else
856DECLINLINE(uint8_t) ASMGetApicId(void)
857{
858 RTCCUINTREG xBX;
859# if RT_INLINE_ASM_GNU_STYLE
860# ifdef RT_ARCH_AMD64
861 RTCCUINTREG uSpill;
862 __asm__ ("cpuid"
863 : "=a" (uSpill),
864 "=b" (xBX)
865 : "0" (1)
866 : "rcx", "rdx");
867# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
868 RTCCUINTREG uSpill;
869 __asm__ ("mov %%ebx,%1\n\t"
870 "cpuid\n\t"
871 "xchgl %%ebx,%1\n\t"
872 : "=a" (uSpill),
873 "=r" (xBX)
874 : "0" (1)
875 : "ecx", "edx");
876# else
877 RTCCUINTREG uSpill;
878 __asm__ ("cpuid"
879 : "=a" (uSpill),
880 "=b" (xBX)
881 : "0" (1)
882 : "ecx", "edx");
883# endif
884
885# elif RT_INLINE_ASM_USES_INTRIN
886 int aInfo[4];
887 __cpuid(aInfo, 1);
888 xBX = aInfo[1];
889
890# else
891 __asm
892 {
893 push ebx
894 mov eax, 1
895 cpuid
896 mov [xBX], ebx
897 pop ebx
898 }
899# endif
900 return (uint8_t)(xBX >> 24);
901}
902#endif
903
904/**
905 * Get cr0.
906 * @returns cr0.
907 */
908#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
909DECLASM(RTCCUINTREG) ASMGetCR0(void);
910#else
911DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
912{
913 RTCCUINTREG uCR0;
914# if RT_INLINE_ASM_USES_INTRIN
915 uCR0 = __readcr0();
916
917# elif RT_INLINE_ASM_GNU_STYLE
918# ifdef RT_ARCH_AMD64
919 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
920# else
921 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
922# endif
923# else
924 __asm
925 {
926# ifdef RT_ARCH_AMD64
927 mov rax, cr0
928 mov [uCR0], rax
929# else
930 mov eax, cr0
931 mov [uCR0], eax
932# endif
933 }
934# endif
935 return uCR0;
936}
937#endif
938
939
940/**
941 * Sets the CR0 register.
942 * @param uCR0 The new CR0 value.
943 */
944#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
945DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
946#else
947DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
948{
949# if RT_INLINE_ASM_USES_INTRIN
950 __writecr0(uCR0);
951
952# elif RT_INLINE_ASM_GNU_STYLE
953# ifdef RT_ARCH_AMD64
954 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
955# else
956 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
957# endif
958# else
959 __asm
960 {
961# ifdef RT_ARCH_AMD64
962 mov rax, [uCR0]
963 mov cr0, rax
964# else
965 mov eax, [uCR0]
966 mov cr0, eax
967# endif
968 }
969# endif
970}
971#endif
972
973
974/**
975 * Get cr2.
976 * @returns cr2.
977 */
978#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
979DECLASM(RTCCUINTREG) ASMGetCR2(void);
980#else
981DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
982{
983 RTCCUINTREG uCR2;
984# if RT_INLINE_ASM_USES_INTRIN
985 uCR2 = __readcr2();
986
987# elif RT_INLINE_ASM_GNU_STYLE
988# ifdef RT_ARCH_AMD64
989 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
990# else
991 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
992# endif
993# else
994 __asm
995 {
996# ifdef RT_ARCH_AMD64
997 mov rax, cr2
998 mov [uCR2], rax
999# else
1000 mov eax, cr2
1001 mov [uCR2], eax
1002# endif
1003 }
1004# endif
1005 return uCR2;
1006}
1007#endif
1008
1009
1010/**
1011 * Sets the CR2 register.
1012 * @param uCR2 The new CR0 value.
1013 */
1014#if RT_INLINE_ASM_EXTERNAL
1015DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1016#else
1017DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1018{
1019# if RT_INLINE_ASM_GNU_STYLE
1020# ifdef RT_ARCH_AMD64
1021 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1022# else
1023 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1024# endif
1025# else
1026 __asm
1027 {
1028# ifdef RT_ARCH_AMD64
1029 mov rax, [uCR2]
1030 mov cr2, rax
1031# else
1032 mov eax, [uCR2]
1033 mov cr2, eax
1034# endif
1035 }
1036# endif
1037}
1038#endif
1039
1040
1041/**
1042 * Get cr3.
1043 * @returns cr3.
1044 */
1045#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1046DECLASM(RTCCUINTREG) ASMGetCR3(void);
1047#else
1048DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1049{
1050 RTCCUINTREG uCR3;
1051# if RT_INLINE_ASM_USES_INTRIN
1052 uCR3 = __readcr3();
1053
1054# elif RT_INLINE_ASM_GNU_STYLE
1055# ifdef RT_ARCH_AMD64
1056 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
1057# else
1058 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
1059# endif
1060# else
1061 __asm
1062 {
1063# ifdef RT_ARCH_AMD64
1064 mov rax, cr3
1065 mov [uCR3], rax
1066# else
1067 mov eax, cr3
1068 mov [uCR3], eax
1069# endif
1070 }
1071# endif
1072 return uCR3;
1073}
1074#endif
1075
1076
1077/**
1078 * Sets the CR3 register.
1079 *
1080 * @param uCR3 New CR3 value.
1081 */
1082#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1083DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1084#else
1085DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1086{
1087# if RT_INLINE_ASM_USES_INTRIN
1088 __writecr3(uCR3);
1089
1090# elif RT_INLINE_ASM_GNU_STYLE
1091# ifdef RT_ARCH_AMD64
1092 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1093# else
1094 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1095# endif
1096# else
1097 __asm
1098 {
1099# ifdef RT_ARCH_AMD64
1100 mov rax, [uCR3]
1101 mov cr3, rax
1102# else
1103 mov eax, [uCR3]
1104 mov cr3, eax
1105# endif
1106 }
1107# endif
1108}
1109#endif
1110
1111
1112/**
1113 * Reloads the CR3 register.
1114 */
1115#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1116DECLASM(void) ASMReloadCR3(void);
1117#else
1118DECLINLINE(void) ASMReloadCR3(void)
1119{
1120# if RT_INLINE_ASM_USES_INTRIN
1121 __writecr3(__readcr3());
1122
1123# elif RT_INLINE_ASM_GNU_STYLE
1124 RTCCUINTREG u;
1125# ifdef RT_ARCH_AMD64
1126 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1127 "movq %0, %%cr3\n\t"
1128 : "=r" (u));
1129# else
1130 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1131 "movl %0, %%cr3\n\t"
1132 : "=r" (u));
1133# endif
1134# else
1135 __asm
1136 {
1137# ifdef RT_ARCH_AMD64
1138 mov rax, cr3
1139 mov cr3, rax
1140# else
1141 mov eax, cr3
1142 mov cr3, eax
1143# endif
1144 }
1145# endif
1146}
1147#endif
1148
1149
1150/**
1151 * Get cr4.
1152 * @returns cr4.
1153 */
1154#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1155DECLASM(RTCCUINTREG) ASMGetCR4(void);
1156#else
1157DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1158{
1159 RTCCUINTREG uCR4;
1160# if RT_INLINE_ASM_USES_INTRIN
1161 uCR4 = __readcr4();
1162
1163# elif RT_INLINE_ASM_GNU_STYLE
1164# ifdef RT_ARCH_AMD64
1165 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1166# else
1167 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1168# endif
1169# else
1170 __asm
1171 {
1172# ifdef RT_ARCH_AMD64
1173 mov rax, cr4
1174 mov [uCR4], rax
1175# else
1176 push eax /* just in case */
1177 /*mov eax, cr4*/
1178 _emit 0x0f
1179 _emit 0x20
1180 _emit 0xe0
1181 mov [uCR4], eax
1182 pop eax
1183# endif
1184 }
1185# endif
1186 return uCR4;
1187}
1188#endif
1189
1190
1191/**
1192 * Sets the CR4 register.
1193 *
1194 * @param uCR4 New CR4 value.
1195 */
1196#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1197DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1198#else
1199DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1200{
1201# if RT_INLINE_ASM_USES_INTRIN
1202 __writecr4(uCR4);
1203
1204# elif RT_INLINE_ASM_GNU_STYLE
1205# ifdef RT_ARCH_AMD64
1206 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1207# else
1208 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1209# endif
1210# else
1211 __asm
1212 {
1213# ifdef RT_ARCH_AMD64
1214 mov rax, [uCR4]
1215 mov cr4, rax
1216# else
1217 mov eax, [uCR4]
1218 _emit 0x0F
1219 _emit 0x22
1220 _emit 0xE0 /* mov cr4, eax */
1221# endif
1222 }
1223# endif
1224}
1225#endif
1226
1227
1228/**
1229 * Get cr8.
1230 * @returns cr8.
1231 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1232 */
1233#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1234DECLASM(RTCCUINTREG) ASMGetCR8(void);
1235#else
1236DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1237{
1238# ifdef RT_ARCH_AMD64
1239 RTCCUINTREG uCR8;
1240# if RT_INLINE_ASM_USES_INTRIN
1241 uCR8 = __readcr8();
1242
1243# elif RT_INLINE_ASM_GNU_STYLE
1244 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1245# else
1246 __asm
1247 {
1248 mov rax, cr8
1249 mov [uCR8], rax
1250 }
1251# endif
1252 return uCR8;
1253# else /* !RT_ARCH_AMD64 */
1254 return 0;
1255# endif /* !RT_ARCH_AMD64 */
1256}
1257#endif
1258
1259
1260/**
1261 * Enables interrupts (EFLAGS.IF).
1262 */
1263#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1264DECLASM(void) ASMIntEnable(void);
1265#else
1266DECLINLINE(void) ASMIntEnable(void)
1267{
1268# if RT_INLINE_ASM_GNU_STYLE
1269 __asm("sti\n");
1270# elif RT_INLINE_ASM_USES_INTRIN
1271 _enable();
1272# else
1273 __asm sti
1274# endif
1275}
1276#endif
1277
1278
1279/**
1280 * Disables interrupts (!EFLAGS.IF).
1281 */
1282#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1283DECLASM(void) ASMIntDisable(void);
1284#else
1285DECLINLINE(void) ASMIntDisable(void)
1286{
1287# if RT_INLINE_ASM_GNU_STYLE
1288 __asm("cli\n");
1289# elif RT_INLINE_ASM_USES_INTRIN
1290 _disable();
1291# else
1292 __asm cli
1293# endif
1294}
1295#endif
1296
1297
1298/**
1299 * Disables interrupts and returns previous xFLAGS.
1300 */
1301#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1302DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1303#else
1304DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1305{
1306 RTCCUINTREG xFlags;
1307# if RT_INLINE_ASM_GNU_STYLE
1308# ifdef RT_ARCH_AMD64
1309 __asm__ __volatile__("pushfq\n\t"
1310 "cli\n\t"
1311 "popq %0\n\t"
1312 : "=rm" (xFlags));
1313# else
1314 __asm__ __volatile__("pushfl\n\t"
1315 "cli\n\t"
1316 "popl %0\n\t"
1317 : "=rm" (xFlags));
1318# endif
1319# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1320 xFlags = ASMGetFlags();
1321 _disable();
1322# else
1323 __asm {
1324 pushfd
1325 cli
1326 pop [xFlags]
1327 }
1328# endif
1329 return xFlags;
1330}
1331#endif
1332
1333
1334/**
1335 * Reads a machine specific register.
1336 *
1337 * @returns Register content.
1338 * @param uRegister Register to read.
1339 */
1340#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1341DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1342#else
1343DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1344{
1345 RTUINT64U u;
1346# if RT_INLINE_ASM_GNU_STYLE
1347 __asm__ ("rdmsr\n\t"
1348 : "=a" (u.s.Lo),
1349 "=d" (u.s.Hi)
1350 : "c" (uRegister));
1351
1352# elif RT_INLINE_ASM_USES_INTRIN
1353 u.u = __readmsr(uRegister);
1354
1355# else
1356 __asm
1357 {
1358 mov ecx, [uRegister]
1359 rdmsr
1360 mov [u.s.Lo], eax
1361 mov [u.s.Hi], edx
1362 }
1363# endif
1364
1365 return u.u;
1366}
1367#endif
1368
1369
1370/**
1371 * Writes a machine specific register.
1372 *
1373 * @returns Register content.
1374 * @param uRegister Register to write to.
1375 * @param u64Val Value to write.
1376 */
1377#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1378DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1379#else
1380DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1381{
1382 RTUINT64U u;
1383
1384 u.u = u64Val;
1385# if RT_INLINE_ASM_GNU_STYLE
1386 __asm__ __volatile__("wrmsr\n\t"
1387 ::"a" (u.s.Lo),
1388 "d" (u.s.Hi),
1389 "c" (uRegister));
1390
1391# elif RT_INLINE_ASM_USES_INTRIN
1392 __writemsr(uRegister, u.u);
1393
1394# else
1395 __asm
1396 {
1397 mov ecx, [uRegister]
1398 mov edx, [u.s.Hi]
1399 mov eax, [u.s.Lo]
1400 wrmsr
1401 }
1402# endif
1403}
1404#endif
1405
1406
1407/**
1408 * Reads low part of a machine specific register.
1409 *
1410 * @returns Register content.
1411 * @param uRegister Register to read.
1412 */
1413#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1414DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1415#else
1416DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1417{
1418 uint32_t u32;
1419# if RT_INLINE_ASM_GNU_STYLE
1420 __asm__ ("rdmsr\n\t"
1421 : "=a" (u32)
1422 : "c" (uRegister)
1423 : "edx");
1424
1425# elif RT_INLINE_ASM_USES_INTRIN
1426 u32 = (uint32_t)__readmsr(uRegister);
1427
1428#else
1429 __asm
1430 {
1431 mov ecx, [uRegister]
1432 rdmsr
1433 mov [u32], eax
1434 }
1435# endif
1436
1437 return u32;
1438}
1439#endif
1440
1441
1442/**
1443 * Reads high part of a machine specific register.
1444 *
1445 * @returns Register content.
1446 * @param uRegister Register to read.
1447 */
1448#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1449DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1450#else
1451DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1452{
1453 uint32_t u32;
1454# if RT_INLINE_ASM_GNU_STYLE
1455 __asm__ ("rdmsr\n\t"
1456 : "=d" (u32)
1457 : "c" (uRegister)
1458 : "eax");
1459
1460# elif RT_INLINE_ASM_USES_INTRIN
1461 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1462
1463# else
1464 __asm
1465 {
1466 mov ecx, [uRegister]
1467 rdmsr
1468 mov [u32], edx
1469 }
1470# endif
1471
1472 return u32;
1473}
1474#endif
1475
1476
1477/**
1478 * Gets dr7.
1479 *
1480 * @returns dr7.
1481 */
1482#if RT_INLINE_ASM_EXTERNAL
1483DECLASM(RTCCUINTREG) ASMGetDR7(void);
1484#else
1485DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1486{
1487 RTCCUINTREG uDR7;
1488# if RT_INLINE_ASM_GNU_STYLE
1489# ifdef RT_ARCH_AMD64
1490 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1491# else
1492 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1493# endif
1494# else
1495 __asm
1496 {
1497# ifdef RT_ARCH_AMD64
1498 mov rax, dr7
1499 mov [uDR7], rax
1500# else
1501 mov eax, dr7
1502 mov [uDR7], eax
1503# endif
1504 }
1505# endif
1506 return uDR7;
1507}
1508#endif
1509
1510
1511/**
1512 * Gets dr6.
1513 *
1514 * @returns dr6.
1515 */
1516#if RT_INLINE_ASM_EXTERNAL
1517DECLASM(RTCCUINTREG) ASMGetDR6(void);
1518#else
1519DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1520{
1521 RTCCUINTREG uDR6;
1522# if RT_INLINE_ASM_GNU_STYLE
1523# ifdef RT_ARCH_AMD64
1524 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1525# else
1526 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1527# endif
1528# else
1529 __asm
1530 {
1531# ifdef RT_ARCH_AMD64
1532 mov rax, dr6
1533 mov [uDR6], rax
1534# else
1535 mov eax, dr6
1536 mov [uDR6], eax
1537# endif
1538 }
1539# endif
1540 return uDR6;
1541}
1542#endif
1543
1544
1545/**
1546 * Reads and clears DR6.
1547 *
1548 * @returns DR6.
1549 */
1550#if RT_INLINE_ASM_EXTERNAL
1551DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1552#else
1553DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1554{
1555 RTCCUINTREG uDR6;
1556# if RT_INLINE_ASM_GNU_STYLE
1557 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1558# ifdef RT_ARCH_AMD64
1559 __asm__ ("movq %%dr6, %0\n\t"
1560 "movq %1, %%dr6\n\t"
1561 : "=r" (uDR6)
1562 : "r" (uNewValue));
1563# else
1564 __asm__ ("movl %%dr6, %0\n\t"
1565 "movl %1, %%dr6\n\t"
1566 : "=r" (uDR6)
1567 : "r" (uNewValue));
1568# endif
1569# else
1570 __asm
1571 {
1572# ifdef RT_ARCH_AMD64
1573 mov rax, dr6
1574 mov [uDR6], rax
1575 mov rcx, rax
1576 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1577 mov dr6, rcx
1578# else
1579 mov eax, dr6
1580 mov [uDR6], eax
1581 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1582 mov dr6, ecx
1583# endif
1584 }
1585# endif
1586 return uDR6;
1587}
1588#endif
1589
1590
1591/**
1592 * Compiler memory barrier.
1593 *
1594 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1595 * values or any outstanding writes when returning from this function.
1596 *
1597 * This function must be used if non-volatile data is modified by a
1598 * device or the VMM. Typical cases are port access, MMIO access,
1599 * trapping instruction, etc.
1600 */
1601#if RT_INLINE_ASM_GNU_STYLE
1602# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1603#elif RT_INLINE_ASM_USES_INTRIN
1604# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1605#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1606DECLINLINE(void) ASMCompilerBarrier(void)
1607{
1608 __asm
1609 {
1610 }
1611}
1612#endif
1613
1614
1615/**
1616 * Writes a 8-bit unsigned integer to an I/O port.
1617 *
1618 * @param Port I/O port to read from.
1619 * @param u8 8-bit integer to write.
1620 */
1621#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1622DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1623#else
1624DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1625{
1626# if RT_INLINE_ASM_GNU_STYLE
1627 __asm__ __volatile__("outb %b1, %w0\n\t"
1628 :: "Nd" (Port),
1629 "a" (u8));
1630
1631# elif RT_INLINE_ASM_USES_INTRIN
1632 __outbyte(Port, u8);
1633
1634# else
1635 __asm
1636 {
1637 mov dx, [Port]
1638 mov al, [u8]
1639 out dx, al
1640 }
1641# endif
1642}
1643#endif
1644
1645
1646/**
1647 * Gets a 8-bit unsigned integer from an I/O port.
1648 *
1649 * @returns 8-bit integer.
1650 * @param Port I/O port to read from.
1651 */
1652#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1653DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1654#else
1655DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1656{
1657 uint8_t u8;
1658# if RT_INLINE_ASM_GNU_STYLE
1659 __asm__ __volatile__("inb %w1, %b0\n\t"
1660 : "=a" (u8)
1661 : "Nd" (Port));
1662
1663# elif RT_INLINE_ASM_USES_INTRIN
1664 u8 = __inbyte(Port);
1665
1666# else
1667 __asm
1668 {
1669 mov dx, [Port]
1670 in al, dx
1671 mov [u8], al
1672 }
1673# endif
1674 return u8;
1675}
1676#endif
1677
1678
1679/**
1680 * Writes a 16-bit unsigned integer to an I/O port.
1681 *
1682 * @param Port I/O port to read from.
1683 * @param u16 16-bit integer to write.
1684 */
1685#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1686DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1687#else
1688DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1689{
1690# if RT_INLINE_ASM_GNU_STYLE
1691 __asm__ __volatile__("outw %w1, %w0\n\t"
1692 :: "Nd" (Port),
1693 "a" (u16));
1694
1695# elif RT_INLINE_ASM_USES_INTRIN
1696 __outword(Port, u16);
1697
1698# else
1699 __asm
1700 {
1701 mov dx, [Port]
1702 mov ax, [u16]
1703 out dx, ax
1704 }
1705# endif
1706}
1707#endif
1708
1709
1710/**
1711 * Gets a 16-bit unsigned integer from an I/O port.
1712 *
1713 * @returns 16-bit integer.
1714 * @param Port I/O port to read from.
1715 */
1716#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1717DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1718#else
1719DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1720{
1721 uint16_t u16;
1722# if RT_INLINE_ASM_GNU_STYLE
1723 __asm__ __volatile__("inw %w1, %w0\n\t"
1724 : "=a" (u16)
1725 : "Nd" (Port));
1726
1727# elif RT_INLINE_ASM_USES_INTRIN
1728 u16 = __inword(Port);
1729
1730# else
1731 __asm
1732 {
1733 mov dx, [Port]
1734 in ax, dx
1735 mov [u16], ax
1736 }
1737# endif
1738 return u16;
1739}
1740#endif
1741
1742
1743/**
1744 * Writes a 32-bit unsigned integer to an I/O port.
1745 *
1746 * @param Port I/O port to read from.
1747 * @param u32 32-bit integer to write.
1748 */
1749#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1750DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1751#else
1752DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1753{
1754# if RT_INLINE_ASM_GNU_STYLE
1755 __asm__ __volatile__("outl %1, %w0\n\t"
1756 :: "Nd" (Port),
1757 "a" (u32));
1758
1759# elif RT_INLINE_ASM_USES_INTRIN
1760 __outdword(Port, u32);
1761
1762# else
1763 __asm
1764 {
1765 mov dx, [Port]
1766 mov eax, [u32]
1767 out dx, eax
1768 }
1769# endif
1770}
1771#endif
1772
1773
1774/**
1775 * Gets a 32-bit unsigned integer from an I/O port.
1776 *
1777 * @returns 32-bit integer.
1778 * @param Port I/O port to read from.
1779 */
1780#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1781DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1782#else
1783DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1784{
1785 uint32_t u32;
1786# if RT_INLINE_ASM_GNU_STYLE
1787 __asm__ __volatile__("inl %w1, %0\n\t"
1788 : "=a" (u32)
1789 : "Nd" (Port));
1790
1791# elif RT_INLINE_ASM_USES_INTRIN
1792 u32 = __indword(Port);
1793
1794# else
1795 __asm
1796 {
1797 mov dx, [Port]
1798 in eax, dx
1799 mov [u32], eax
1800 }
1801# endif
1802 return u32;
1803}
1804#endif
1805
1806
1807/**
1808 * Atomically Exchange an unsigned 8-bit value.
1809 *
1810 * @returns Current *pu8 value
1811 * @param pu8 Pointer to the 8-bit variable to update.
1812 * @param u8 The 8-bit value to assign to *pu8.
1813 */
1814#if RT_INLINE_ASM_EXTERNAL
1815DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1816#else
1817DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1818{
1819# if RT_INLINE_ASM_GNU_STYLE
1820 __asm__ __volatile__("xchgb %0, %1\n\t"
1821 : "=m" (*pu8),
1822 "=r" (u8)
1823 : "1" (u8));
1824# else
1825 __asm
1826 {
1827# ifdef RT_ARCH_AMD64
1828 mov rdx, [pu8]
1829 mov al, [u8]
1830 xchg [rdx], al
1831 mov [u8], al
1832# else
1833 mov edx, [pu8]
1834 mov al, [u8]
1835 xchg [edx], al
1836 mov [u8], al
1837# endif
1838 }
1839# endif
1840 return u8;
1841}
1842#endif
1843
1844
1845/**
1846 * Atomically Exchange a signed 8-bit value.
1847 *
1848 * @returns Current *pu8 value
1849 * @param pi8 Pointer to the 8-bit variable to update.
1850 * @param i8 The 8-bit value to assign to *pi8.
1851 */
1852DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1853{
1854 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1855}
1856
1857
1858/**
1859 * Atomically Exchange a bool value.
1860 *
1861 * @returns Current *pf value
1862 * @param pf Pointer to the 8-bit variable to update.
1863 * @param f The 8-bit value to assign to *pi8.
1864 */
1865DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
1866{
1867#ifdef _MSC_VER
1868 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1869#else
1870 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1871#endif
1872}
1873
1874
1875/**
1876 * Atomically Exchange an unsigned 16-bit value.
1877 *
1878 * @returns Current *pu16 value
1879 * @param pu16 Pointer to the 16-bit variable to update.
1880 * @param u16 The 16-bit value to assign to *pu16.
1881 */
1882#if RT_INLINE_ASM_EXTERNAL
1883DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1884#else
1885DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1886{
1887# if RT_INLINE_ASM_GNU_STYLE
1888 __asm__ __volatile__("xchgw %0, %1\n\t"
1889 : "=m" (*pu16),
1890 "=r" (u16)
1891 : "1" (u16));
1892# else
1893 __asm
1894 {
1895# ifdef RT_ARCH_AMD64
1896 mov rdx, [pu16]
1897 mov ax, [u16]
1898 xchg [rdx], ax
1899 mov [u16], ax
1900# else
1901 mov edx, [pu16]
1902 mov ax, [u16]
1903 xchg [edx], ax
1904 mov [u16], ax
1905# endif
1906 }
1907# endif
1908 return u16;
1909}
1910#endif
1911
1912
1913/**
1914 * Atomically Exchange a signed 16-bit value.
1915 *
1916 * @returns Current *pu16 value
1917 * @param pi16 Pointer to the 16-bit variable to update.
1918 * @param i16 The 16-bit value to assign to *pi16.
1919 */
1920DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1921{
1922 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1923}
1924
1925
1926/**
1927 * Atomically Exchange an unsigned 32-bit value.
1928 *
1929 * @returns Current *pu32 value
1930 * @param pu32 Pointer to the 32-bit variable to update.
1931 * @param u32 The 32-bit value to assign to *pu32.
1932 */
1933#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1934DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1935#else
1936DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1937{
1938# if RT_INLINE_ASM_GNU_STYLE
1939 __asm__ __volatile__("xchgl %0, %1\n\t"
1940 : "=m" (*pu32),
1941 "=r" (u32)
1942 : "1" (u32));
1943
1944# elif RT_INLINE_ASM_USES_INTRIN
1945 u32 = _InterlockedExchange((long *)pu32, u32);
1946
1947# else
1948 __asm
1949 {
1950# ifdef RT_ARCH_AMD64
1951 mov rdx, [pu32]
1952 mov eax, u32
1953 xchg [rdx], eax
1954 mov [u32], eax
1955# else
1956 mov edx, [pu32]
1957 mov eax, u32
1958 xchg [edx], eax
1959 mov [u32], eax
1960# endif
1961 }
1962# endif
1963 return u32;
1964}
1965#endif
1966
1967
1968/**
1969 * Atomically Exchange a signed 32-bit value.
1970 *
1971 * @returns Current *pu32 value
1972 * @param pi32 Pointer to the 32-bit variable to update.
1973 * @param i32 The 32-bit value to assign to *pi32.
1974 */
1975DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1976{
1977 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1978}
1979
1980
1981/**
1982 * Atomically Exchange an unsigned 64-bit value.
1983 *
1984 * @returns Current *pu64 value
1985 * @param pu64 Pointer to the 64-bit variable to update.
1986 * @param u64 The 64-bit value to assign to *pu64.
1987 */
1988#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1989DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
1990#else
1991DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
1992{
1993# if defined(RT_ARCH_AMD64)
1994# if RT_INLINE_ASM_USES_INTRIN
1995 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
1996
1997# elif RT_INLINE_ASM_GNU_STYLE
1998 __asm__ __volatile__("xchgq %0, %1\n\t"
1999 : "=m" (*pu64),
2000 "=r" (u64)
2001 : "1" (u64));
2002# else
2003 __asm
2004 {
2005 mov rdx, [pu64]
2006 mov rax, [u64]
2007 xchg [rdx], rax
2008 mov [u64], rax
2009 }
2010# endif
2011# else /* !RT_ARCH_AMD64 */
2012# if RT_INLINE_ASM_GNU_STYLE
2013# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2014 uint32_t u32 = (uint32_t)u64;
2015 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2016 "xchgl %%ebx, %3\n\t"
2017 "1:\n\t"
2018 "lock; cmpxchg8b (%5)\n\t"
2019 "jnz 1b\n\t"
2020 "xchgl %%ebx, %3\n\t"
2021 /*"xchgl %%esi, %5\n\t"*/
2022 : "=A" (u64),
2023 "=m" (*pu64)
2024 : "0" (*pu64),
2025 "m" ( u32 ),
2026 "c" ( (uint32_t)(u64 >> 32) ),
2027 "S" (pu64) );
2028# else /* !PIC */
2029 __asm__ __volatile__("1:\n\t"
2030 "lock; cmpxchg8b %1\n\t"
2031 "jnz 1b\n\t"
2032 : "=A" (u64),
2033 "=m" (*pu64)
2034 : "0" (*pu64),
2035 "b" ( (uint32_t)u64 ),
2036 "c" ( (uint32_t)(u64 >> 32) ));
2037# endif
2038# else
2039 __asm
2040 {
2041 mov ebx, dword ptr [u64]
2042 mov ecx, dword ptr [u64 + 4]
2043 mov edi, pu64
2044 mov eax, dword ptr [edi]
2045 mov edx, dword ptr [edi + 4]
2046 retry:
2047 lock cmpxchg8b [edi]
2048 jnz retry
2049 mov dword ptr [u64], eax
2050 mov dword ptr [u64 + 4], edx
2051 }
2052# endif
2053# endif /* !RT_ARCH_AMD64 */
2054 return u64;
2055}
2056#endif
2057
2058
2059/**
2060 * Atomically Exchange an signed 64-bit value.
2061 *
2062 * @returns Current *pi64 value
2063 * @param pi64 Pointer to the 64-bit variable to update.
2064 * @param i64 The 64-bit value to assign to *pi64.
2065 */
2066DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2067{
2068 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2069}
2070
2071
2072#ifdef RT_ARCH_AMD64
2073/**
2074 * Atomically Exchange an unsigned 128-bit value.
2075 *
2076 * @returns Current *pu128.
2077 * @param pu128 Pointer to the 128-bit variable to update.
2078 * @param u128 The 128-bit value to assign to *pu128.
2079 *
2080 * @remark We cannot really assume that any hardware supports this. Nor do I have
2081 * GAS support for it. So, for the time being we'll BREAK the atomic
2082 * bit of this function and use two 64-bit exchanges instead.
2083 */
2084# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2085DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2086# else
2087DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2088{
2089 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2090 {
2091 /** @todo this is clumsy code */
2092 RTUINT128U u128Ret;
2093 u128Ret.u = u128;
2094 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2095 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2096 return u128Ret.u;
2097 }
2098#if 0 /* later? */
2099 else
2100 {
2101# if RT_INLINE_ASM_GNU_STYLE
2102 __asm__ __volatile__("1:\n\t"
2103 "lock; cmpxchg8b %1\n\t"
2104 "jnz 1b\n\t"
2105 : "=A" (u128),
2106 "=m" (*pu128)
2107 : "0" (*pu128),
2108 "b" ( (uint64_t)u128 ),
2109 "c" ( (uint64_t)(u128 >> 64) ));
2110# else
2111 __asm
2112 {
2113 mov rbx, dword ptr [u128]
2114 mov rcx, dword ptr [u128 + 4]
2115 mov rdi, pu128
2116 mov rax, dword ptr [rdi]
2117 mov rdx, dword ptr [rdi + 4]
2118 retry:
2119 lock cmpxchg16b [rdi]
2120 jnz retry
2121 mov dword ptr [u128], rax
2122 mov dword ptr [u128 + 4], rdx
2123 }
2124# endif
2125 }
2126 return u128;
2127#endif
2128}
2129# endif
2130#endif /* RT_ARCH_AMD64 */
2131
2132
2133/**
2134 * Atomically Reads a unsigned 64-bit value.
2135 *
2136 * @returns Current *pu64 value
2137 * @param pu64 Pointer to the 64-bit variable to read.
2138 * The memory pointed to must be writable.
2139 * @remark This will fault if the memory is read-only!
2140 */
2141#if RT_INLINE_ASM_EXTERNAL
2142DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
2143#else
2144DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
2145{
2146 uint64_t u64;
2147# ifdef RT_ARCH_AMD64
2148# if RT_INLINE_ASM_GNU_STYLE
2149 __asm__ __volatile__("movq %1, %0\n\t"
2150 : "=r" (u64)
2151 : "m" (*pu64));
2152# else
2153 __asm
2154 {
2155 mov rdx, [pu64]
2156 mov rax, [rdx]
2157 mov [u64], rax
2158 }
2159# endif
2160# else /* !RT_ARCH_AMD64 */
2161# if RT_INLINE_ASM_GNU_STYLE
2162# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2163 uint32_t u32EBX = 0;
2164 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2165 "lock; cmpxchg8b (%5)\n\t"
2166 "xchgl %%ebx, %3\n\t"
2167 : "=A" (u64),
2168 "=m" (*pu64)
2169 : "0" (0),
2170 "m" (u32EBX),
2171 "c" (0),
2172 "S" (pu64));
2173# else /* !PIC */
2174 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2175 : "=A" (u64),
2176 "=m" (*pu64)
2177 : "0" (0),
2178 "b" (0),
2179 "c" (0));
2180# endif
2181# else
2182 __asm
2183 {
2184 xor eax, eax
2185 xor edx, edx
2186 mov edi, pu64
2187 xor ecx, ecx
2188 xor ebx, ebx
2189 lock cmpxchg8b [edi]
2190 mov dword ptr [u64], eax
2191 mov dword ptr [u64 + 4], edx
2192 }
2193# endif
2194# endif /* !RT_ARCH_AMD64 */
2195 return u64;
2196}
2197#endif
2198
2199
2200/**
2201 * Atomically Reads a signed 64-bit value.
2202 *
2203 * @returns Current *pi64 value
2204 * @param pi64 Pointer to the 64-bit variable to read.
2205 * The memory pointed to must be writable.
2206 * @remark This will fault if the memory is read-only!
2207 */
2208DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
2209{
2210 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
2211}
2212
2213
2214/**
2215 * Atomically Exchange a value which size might differ
2216 * between platforms or compilers.
2217 *
2218 * @param pu Pointer to the variable to update.
2219 * @param uNew The value to assign to *pu.
2220 */
2221#define ASMAtomicXchgSize(pu, uNew) \
2222 do { \
2223 switch (sizeof(*(pu))) { \
2224 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2225 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2226 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2227 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2228 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2229 } \
2230 } while (0)
2231
2232
2233/**
2234 * Atomically Exchange a pointer value.
2235 *
2236 * @returns Current *ppv value
2237 * @param ppv Pointer to the pointer variable to update.
2238 * @param pv The pointer value to assign to *ppv.
2239 */
2240DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2241{
2242#if ARCH_BITS == 32
2243 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2244#elif ARCH_BITS == 64
2245 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2246#else
2247# error "ARCH_BITS is bogus"
2248#endif
2249}
2250
2251
2252/**
2253 * Atomically Compare and Exchange an unsigned 32-bit value.
2254 *
2255 * @returns true if xchg was done.
2256 * @returns false if xchg wasn't done.
2257 *
2258 * @param pu32 Pointer to the value to update.
2259 * @param u32New The new value to assigned to *pu32.
2260 * @param u32Old The old value to *pu32 compare with.
2261 */
2262#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2263DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2264#else
2265DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old)
2266{
2267# if RT_INLINE_ASM_GNU_STYLE
2268 uint8_t u8Ret;
2269 __asm__ __volatile__("lock; cmpxchgl %2, %0\n\t"
2270 "setz %1\n\t"
2271 : "=m" (*pu32),
2272 "=qm" (u8Ret)
2273 : "r" (u32New),
2274 "a" (u32Old));
2275 return (bool)u8Ret;
2276
2277# elif RT_INLINE_ASM_USES_INTRIN
2278 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2279
2280# else
2281 uint32_t u32Ret;
2282 __asm
2283 {
2284# ifdef RT_ARCH_AMD64
2285 mov rdx, [pu32]
2286# else
2287 mov edx, [pu32]
2288# endif
2289 mov eax, [u32Old]
2290 mov ecx, [u32New]
2291# ifdef RT_ARCH_AMD64
2292 lock cmpxchg [rdx], ecx
2293# else
2294 lock cmpxchg [edx], ecx
2295# endif
2296 setz al
2297 movzx eax, al
2298 mov [u32Ret], eax
2299 }
2300 return !!u32Ret;
2301# endif
2302}
2303#endif
2304
2305
2306/**
2307 * Atomically Compare and Exchange a signed 32-bit value.
2308 *
2309 * @returns true if xchg was done.
2310 * @returns false if xchg wasn't done.
2311 *
2312 * @param pi32 Pointer to the value to update.
2313 * @param i32New The new value to assigned to *pi32.
2314 * @param i32Old The old value to *pi32 compare with.
2315 */
2316DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2317{
2318 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2319}
2320
2321
2322/**
2323 * Atomically Compare and exchange an unsigned 64-bit value.
2324 *
2325 * @returns true if xchg was done.
2326 * @returns false if xchg wasn't done.
2327 *
2328 * @param pu64 Pointer to the 64-bit variable to update.
2329 * @param u64New The 64-bit value to assign to *pu64.
2330 * @param u64Old The value to compare with.
2331 */
2332#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2333DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2334#else
2335DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2336{
2337# if RT_INLINE_ASM_USES_INTRIN
2338 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2339
2340# elif defined(RT_ARCH_AMD64)
2341# if RT_INLINE_ASM_GNU_STYLE
2342 uint8_t u8Ret;
2343 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2344 "setz %1\n\t"
2345 : "=m" (*pu64),
2346 "=qm" (u8Ret)
2347 : "r" (u64New),
2348 "a" (u64Old));
2349 return (bool)u8Ret;
2350# else
2351 bool fRet;
2352 __asm
2353 {
2354 mov rdx, [pu32]
2355 mov rax, [u64Old]
2356 mov rcx, [u64New]
2357 lock cmpxchg [rdx], rcx
2358 setz al
2359 mov [fRet], al
2360 }
2361 return fRet;
2362# endif
2363# else /* !RT_ARCH_AMD64 */
2364 uint32_t u32Ret;
2365# if RT_INLINE_ASM_GNU_STYLE
2366# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2367 uint32_t u32 = (uint32_t)u64New;
2368 uint32_t u32Spill;
2369 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2370 "lock; cmpxchg8b (%6)\n\t"
2371 "setz %%al\n\t"
2372 "xchgl %%ebx, %4\n\t"
2373 "movzbl %%al, %%eax\n\t"
2374 : "=a" (u32Ret),
2375 "=d" (u32Spill),
2376 "=m" (*pu64)
2377 : "A" (u64Old),
2378 "m" ( u32 ),
2379 "c" ( (uint32_t)(u64New >> 32) ),
2380 "S" (pu64) );
2381# else /* !PIC */
2382 uint32_t u32Spill;
2383 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2384 "setz %%al\n\t"
2385 "movzbl %%al, %%eax\n\t"
2386 : "=a" (u32Ret),
2387 "=d" (u32Spill),
2388 "=m" (*pu64)
2389 : "A" (u64Old),
2390 "b" ( (uint32_t)u64New ),
2391 "c" ( (uint32_t)(u64New >> 32) ));
2392# endif
2393 return (bool)u32Ret;
2394# else
2395 __asm
2396 {
2397 mov ebx, dword ptr [u64New]
2398 mov ecx, dword ptr [u64New + 4]
2399 mov edi, [pu64]
2400 mov eax, dword ptr [u64Old]
2401 mov edx, dword ptr [u64Old + 4]
2402 lock cmpxchg8b [edi]
2403 setz al
2404 movzx eax, al
2405 mov dword ptr [u32Ret], eax
2406 }
2407 return !!u32Ret;
2408# endif
2409# endif /* !RT_ARCH_AMD64 */
2410}
2411#endif
2412
2413
2414/**
2415 * Atomically Compare and exchange a signed 64-bit value.
2416 *
2417 * @returns true if xchg was done.
2418 * @returns false if xchg wasn't done.
2419 *
2420 * @param pi64 Pointer to the 64-bit variable to update.
2421 * @param i64 The 64-bit value to assign to *pu64.
2422 * @param i64Old The value to compare with.
2423 */
2424DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2425{
2426 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2427}
2428
2429
2430/** @def ASMAtomicCmpXchgSize
2431 * Atomically Compare and Exchange a value which size might differ
2432 * between platforms or compilers.
2433 *
2434 * @param pu Pointer to the value to update.
2435 * @param uNew The new value to assigned to *pu.
2436 * @param uOld The old value to *pu compare with.
2437 * @param fRc Where to store the result.
2438 */
2439#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2440 do { \
2441 switch (sizeof(*(pu))) { \
2442 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2443 break; \
2444 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2445 break; \
2446 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2447 (fRc) = false; \
2448 break; \
2449 } \
2450 } while (0)
2451
2452
2453/**
2454 * Atomically Compare and Exchange a pointer value.
2455 *
2456 * @returns true if xchg was done.
2457 * @returns false if xchg wasn't done.
2458 *
2459 * @param ppv Pointer to the value to update.
2460 * @param pvNew The new value to assigned to *ppv.
2461 * @param pvOld The old value to *ppv compare with.
2462 */
2463DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2464{
2465#if ARCH_BITS == 32
2466 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2467#elif ARCH_BITS == 64
2468 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2469#else
2470# error "ARCH_BITS is bogus"
2471#endif
2472}
2473
2474
2475/**
2476 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
2477 * passes back old value.
2478 *
2479 * @returns true if xchg was done.
2480 * @returns false if xchg wasn't done.
2481 *
2482 * @param pu32 Pointer to the value to update.
2483 * @param u32New The new value to assigned to *pu32.
2484 * @param u32Old The old value to *pu32 compare with.
2485 * @param pu32Old Pointer store the old value at.
2486 */
2487#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2488DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
2489#else
2490DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
2491{
2492# if RT_INLINE_ASM_GNU_STYLE
2493 uint8_t u8Ret;
2494 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2495 "setz %1\n\t"
2496 : "=m" (*pu32),
2497 "=qm" (u8Ret),
2498 "=a" (*pu32Old)
2499 : "r" (u32New),
2500 "a" (u32Old));
2501 return (bool)u8Ret;
2502
2503# elif RT_INLINE_ASM_USES_INTRIN
2504 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
2505
2506# else
2507 uint32_t u32Ret;
2508 __asm
2509 {
2510# ifdef RT_ARCH_AMD64
2511 mov rdx, [pu32]
2512# else
2513 mov edx, [pu32]
2514# endif
2515 mov eax, [u32Old]
2516 mov ecx, [u32New]
2517# ifdef RT_ARCH_AMD64
2518 lock cmpxchg [rdx], ecx
2519 mov rdx, [pu32Old]
2520 mov [rdx], eax
2521# else
2522 lock cmpxchg [edx], ecx
2523 mov edx, [pu32Old]
2524 mov [edx], eax
2525# endif
2526 setz al
2527 movzx eax, al
2528 mov [u32Ret], eax
2529 }
2530 return !!u32Ret;
2531# endif
2532}
2533#endif
2534
2535
2536/**
2537 * Atomically Compare and Exchange a signed 32-bit value, additionally
2538 * passes back old value.
2539 *
2540 * @returns true if xchg was done.
2541 * @returns false if xchg wasn't done.
2542 *
2543 * @param pi32 Pointer to the value to update.
2544 * @param i32New The new value to assigned to *pi32.
2545 * @param i32Old The old value to *pi32 compare with.
2546 * @param pi32Old Pointer store the old value at.
2547 */
2548DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
2549{
2550 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
2551}
2552
2553
2554/**
2555 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2556 * passing back old value.
2557 *
2558 * @returns true if xchg was done.
2559 * @returns false if xchg wasn't done.
2560 *
2561 * @param pu64 Pointer to the 64-bit variable to update.
2562 * @param u64New The 64-bit value to assign to *pu64.
2563 * @param u64Old The value to compare with.
2564 * @param pu32Old Pointer store the old value at.
2565 */
2566#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2567DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
2568#else
2569DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
2570{
2571# if RT_INLINE_ASM_USES_INTRIN
2572 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
2573
2574# elif defined(RT_ARCH_AMD64)
2575# if RT_INLINE_ASM_GNU_STYLE
2576 uint8_t u8Ret;
2577 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2578 "setz %1\n\t"
2579 : "=m" (*pu64),
2580 "=qm" (u8Ret),
2581 "=a" (*pu64Old)
2582 : "r" (u64New),
2583 "a" (u64Old));
2584 return (bool)u64Ret;
2585# else
2586 bool fRet;
2587 __asm
2588 {
2589 mov rdx, [pu32]
2590 mov rax, [u64Old]
2591 mov rcx, [u64New]
2592 lock cmpxchg [rdx], rcx
2593 mov rdx, [pu64Old]
2594 mov [rdx], rax
2595 setz al
2596 mov [fRet], al
2597 }
2598 return fRet;
2599# endif
2600# else /* !RT_ARCH_AMD64 */
2601# if RT_INLINE_ASM_GNU_STYLE
2602 uint64_t u64Ret;
2603# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2604 __asm__ __volatile__("xchgl %%ebx, %2\n\t"
2605 "lock; cmpxchg8b %4\n\t"
2606 "xchgl %%ebx, %2\n\t"
2607 : "=A" (u64Ret),
2608 "=m" (*pu64)
2609 : "DS" ((uint32_t)u64New),
2610 "c" ((uint32_t)(u64New >> 32)),
2611 "m" (*pu64),
2612 "0" (u64Old));
2613# else /* !PIC */
2614 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2615 : "=A" (u64Ret),
2616 "=m" (*pu64)
2617 : "b" ((uint32_t)u64New),
2618 "c" ((uint32_t)(u64New >> 32)),
2619 "m" (*pu64),
2620 "0" (u64Old));
2621# endif
2622 *pu64Old = u64Ret;
2623 return u64Ret == u64Old;
2624# else
2625 uint32_t u32Ret;
2626 __asm
2627 {
2628 mov ebx, dword ptr [u64New]
2629 mov ecx, dword ptr [u64New + 4]
2630 mov edi, [pu64]
2631 mov eax, dword ptr [u64Old]
2632 mov edx, dword ptr [u64Old + 4]
2633 lock cmpxchg8b [edi]
2634 mov ebx, [pu64Old]
2635 mov [ebx], eax
2636 setz al
2637 movzx eax, al
2638 add ebx, 4
2639 mov [ebx], edx
2640 mov dword ptr [u32Ret], eax
2641 }
2642 return !!u32Ret;
2643# endif
2644# endif /* !RT_ARCH_AMD64 */
2645}
2646#endif
2647
2648
2649/**
2650 * Atomically Compare and exchange a signed 64-bit value, additionally
2651 * passing back old value.
2652 *
2653 * @returns true if xchg was done.
2654 * @returns false if xchg wasn't done.
2655 *
2656 * @param pi64 Pointer to the 64-bit variable to update.
2657 * @param i64 The 64-bit value to assign to *pu64.
2658 * @param i64Old The value to compare with.
2659 * @param pi64Old Pointer store the old value at.
2660 */
2661DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
2662{
2663 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
2664}
2665
2666
2667/** @def ASMAtomicCmpXchgExSize
2668 * Atomically Compare and Exchange a value which size might differ
2669 * between platforms or compilers. Additionally passes back old value.
2670 *
2671 * @param pu Pointer to the value to update.
2672 * @param uNew The new value to assigned to *pu.
2673 * @param uOld The old value to *pu compare with.
2674 * @param fRc Where to store the result.
2675 * @param uOldVal Where to store the old value.
2676 */
2677#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, uOldVal) \
2678 do { \
2679 switch (sizeof(*(pu))) { \
2680 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)&(uOldVal)); \
2681 break; \
2682 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)&(uOldVal)); \
2683 break; \
2684 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2685 (fRc) = false; \
2686 (uOldVal) = 0; \
2687 break; \
2688 } \
2689 } while (0)
2690
2691
2692/**
2693 * Atomically Compare and Exchange a pointer value, additionally
2694 * passing back old value.
2695 *
2696 * @returns true if xchg was done.
2697 * @returns false if xchg wasn't done.
2698 *
2699 * @param ppv Pointer to the value to update.
2700 * @param pvNew The new value to assigned to *ppv.
2701 * @param pvOld The old value to *ppv compare with.
2702 * @param ppvOld Pointer store the old value at.
2703 */
2704DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, void *pvNew, void *pvOld, void **ppvOld)
2705{
2706#if ARCH_BITS == 32
2707 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
2708#elif ARCH_BITS == 64
2709 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
2710#else
2711# error "ARCH_BITS is bogus"
2712#endif
2713}
2714
2715
2716/**
2717 * Atomically increment a 32-bit value.
2718 *
2719 * @returns The new value.
2720 * @param pu32 Pointer to the value to increment.
2721 */
2722#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2723DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2724#else
2725DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2726{
2727 uint32_t u32;
2728# if RT_INLINE_ASM_USES_INTRIN
2729 u32 = _InterlockedIncrement((long *)pu32);
2730 return u32;
2731
2732# elif RT_INLINE_ASM_GNU_STYLE
2733 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2734 : "=r" (u32),
2735 "=m" (*pu32)
2736 : "0" (1)
2737 : "memory");
2738 return u32+1;
2739# else
2740 __asm
2741 {
2742 mov eax, 1
2743# ifdef RT_ARCH_AMD64
2744 mov rdx, [pu32]
2745 lock xadd [rdx], eax
2746# else
2747 mov edx, [pu32]
2748 lock xadd [edx], eax
2749# endif
2750 mov u32, eax
2751 }
2752 return u32+1;
2753# endif
2754}
2755#endif
2756
2757
2758/**
2759 * Atomically increment a signed 32-bit value.
2760 *
2761 * @returns The new value.
2762 * @param pi32 Pointer to the value to increment.
2763 */
2764DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2765{
2766 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2767}
2768
2769
2770/**
2771 * Atomically decrement an unsigned 32-bit value.
2772 *
2773 * @returns The new value.
2774 * @param pu32 Pointer to the value to decrement.
2775 */
2776#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2777DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2778#else
2779DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2780{
2781 uint32_t u32;
2782# if RT_INLINE_ASM_USES_INTRIN
2783 u32 = _InterlockedDecrement((long *)pu32);
2784 return u32;
2785
2786# elif RT_INLINE_ASM_GNU_STYLE
2787 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2788 : "=r" (u32),
2789 "=m" (*pu32)
2790 : "0" (-1)
2791 : "memory");
2792 return u32-1;
2793# else
2794 __asm
2795 {
2796 mov eax, -1
2797# ifdef RT_ARCH_AMD64
2798 mov rdx, [pu32]
2799 lock xadd [rdx], eax
2800# else
2801 mov edx, [pu32]
2802 lock xadd [edx], eax
2803# endif
2804 mov u32, eax
2805 }
2806 return u32-1;
2807# endif
2808}
2809#endif
2810
2811
2812/**
2813 * Atomically decrement a signed 32-bit value.
2814 *
2815 * @returns The new value.
2816 * @param pi32 Pointer to the value to decrement.
2817 */
2818DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2819{
2820 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2821}
2822
2823
2824/**
2825 * Atomically Or an unsigned 32-bit value.
2826 *
2827 * @param pu32 Pointer to the pointer variable to OR u32 with.
2828 * @param u32 The value to OR *pu32 with.
2829 */
2830#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2831DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2832#else
2833DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2834{
2835# if RT_INLINE_ASM_USES_INTRIN
2836 _InterlockedOr((long volatile *)pu32, (long)u32);
2837
2838# elif RT_INLINE_ASM_GNU_STYLE
2839 __asm__ __volatile__("lock; orl %1, %0\n\t"
2840 : "=m" (*pu32)
2841 : "ir" (u32));
2842# else
2843 __asm
2844 {
2845 mov eax, [u32]
2846# ifdef RT_ARCH_AMD64
2847 mov rdx, [pu32]
2848 lock or [rdx], eax
2849# else
2850 mov edx, [pu32]
2851 lock or [edx], eax
2852# endif
2853 }
2854# endif
2855}
2856#endif
2857
2858
2859/**
2860 * Atomically Or a signed 32-bit value.
2861 *
2862 * @param pi32 Pointer to the pointer variable to OR u32 with.
2863 * @param i32 The value to OR *pu32 with.
2864 */
2865DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2866{
2867 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2868}
2869
2870
2871/**
2872 * Atomically And an unsigned 32-bit value.
2873 *
2874 * @param pu32 Pointer to the pointer variable to AND u32 with.
2875 * @param u32 The value to AND *pu32 with.
2876 */
2877#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2878DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2879#else
2880DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2881{
2882# if RT_INLINE_ASM_USES_INTRIN
2883 _InterlockedAnd((long volatile *)pu32, u32);
2884
2885# elif RT_INLINE_ASM_GNU_STYLE
2886 __asm__ __volatile__("lock; andl %1, %0\n\t"
2887 : "=m" (*pu32)
2888 : "ir" (u32));
2889# else
2890 __asm
2891 {
2892 mov eax, [u32]
2893# ifdef RT_ARCH_AMD64
2894 mov rdx, [pu32]
2895 lock and [rdx], eax
2896# else
2897 mov edx, [pu32]
2898 lock and [edx], eax
2899# endif
2900 }
2901# endif
2902}
2903#endif
2904
2905
2906/**
2907 * Atomically And a signed 32-bit value.
2908 *
2909 * @param pi32 Pointer to the pointer variable to AND i32 with.
2910 * @param i32 The value to AND *pi32 with.
2911 */
2912DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2913{
2914 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2915}
2916
2917
2918/**
2919 * Invalidate page.
2920 *
2921 * @param pv Address of the page to invalidate.
2922 */
2923#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2924DECLASM(void) ASMInvalidatePage(void *pv);
2925#else
2926DECLINLINE(void) ASMInvalidatePage(void *pv)
2927{
2928# if RT_INLINE_ASM_USES_INTRIN
2929 __invlpg(pv);
2930
2931# elif RT_INLINE_ASM_GNU_STYLE
2932 __asm__ __volatile__("invlpg %0\n\t"
2933 : : "m" (*(uint8_t *)pv));
2934# else
2935 __asm
2936 {
2937# ifdef RT_ARCH_AMD64
2938 mov rax, [pv]
2939 invlpg [rax]
2940# else
2941 mov eax, [pv]
2942 invlpg [eax]
2943# endif
2944 }
2945# endif
2946}
2947#endif
2948
2949
2950#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
2951# if PAGE_SIZE != 0x1000
2952# error "PAGE_SIZE is not 0x1000!"
2953# endif
2954#endif
2955
2956/**
2957 * Zeros a 4K memory page.
2958 *
2959 * @param pv Pointer to the memory block. This must be page aligned.
2960 */
2961#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2962DECLASM(void) ASMMemZeroPage(volatile void *pv);
2963# else
2964DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
2965{
2966# if RT_INLINE_ASM_USES_INTRIN
2967# ifdef RT_ARCH_AMD64
2968 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
2969# else
2970 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
2971# endif
2972
2973# elif RT_INLINE_ASM_GNU_STYLE
2974 RTUINTREG uDummy;
2975# ifdef RT_ARCH_AMD64
2976 __asm__ __volatile__ ("rep stosq"
2977 : "=D" (pv),
2978 "=c" (uDummy)
2979 : "0" (pv),
2980 "c" (0x1000 >> 3),
2981 "a" (0)
2982 : "memory");
2983# else
2984 __asm__ __volatile__ ("rep stosl"
2985 : "=D" (pv),
2986 "=c" (uDummy)
2987 : "0" (pv),
2988 "c" (0x1000 >> 2),
2989 "a" (0)
2990 : "memory");
2991# endif
2992# else
2993 __asm
2994 {
2995# ifdef RT_ARCH_AMD64
2996 xor rax, rax
2997 mov ecx, 0200h
2998 mov rdi, [pv]
2999 rep stosq
3000# else
3001 xor eax, eax
3002 mov ecx, 0400h
3003 mov edi, [pv]
3004 rep stosd
3005# endif
3006 }
3007# endif
3008}
3009# endif
3010
3011
3012/**
3013 * Zeros a memory block with a 32-bit aligned size.
3014 *
3015 * @param pv Pointer to the memory block.
3016 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3017 */
3018#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3019DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3020#else
3021DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3022{
3023# if RT_INLINE_ASM_USES_INTRIN
3024 __stosd((unsigned long *)pv, 0, cb >> 2);
3025
3026# elif RT_INLINE_ASM_GNU_STYLE
3027 __asm__ __volatile__ ("rep stosl"
3028 : "=D" (pv),
3029 "=c" (cb)
3030 : "0" (pv),
3031 "1" (cb >> 2),
3032 "a" (0)
3033 : "memory");
3034# else
3035 __asm
3036 {
3037 xor eax, eax
3038# ifdef RT_ARCH_AMD64
3039 mov rcx, [cb]
3040 shr rcx, 2
3041 mov rdi, [pv]
3042# else
3043 mov ecx, [cb]
3044 shr ecx, 2
3045 mov edi, [pv]
3046# endif
3047 rep stosd
3048 }
3049# endif
3050}
3051#endif
3052
3053
3054/**
3055 * Fills a memory block with a 32-bit aligned size.
3056 *
3057 * @param pv Pointer to the memory block.
3058 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3059 * @param u32 The value to fill with.
3060 */
3061#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3062DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3063#else
3064DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3065{
3066# if RT_INLINE_ASM_USES_INTRIN
3067 __stosd((unsigned long *)pv, 0, cb >> 2);
3068
3069# elif RT_INLINE_ASM_GNU_STYLE
3070 __asm__ __volatile__ ("rep stosl"
3071 : "=D" (pv),
3072 "=c" (cb)
3073 : "0" (pv),
3074 "1" (cb >> 2),
3075 "a" (u32)
3076 : "memory");
3077# else
3078 __asm
3079 {
3080# ifdef RT_ARCH_AMD64
3081 mov rcx, [cb]
3082 shr rcx, 2
3083 mov rdi, [pv]
3084# else
3085 mov ecx, [cb]
3086 shr ecx, 2
3087 mov edi, [pv]
3088# endif
3089 mov eax, [u32]
3090 rep stosd
3091 }
3092# endif
3093}
3094#endif
3095
3096
3097/**
3098 * Checks if a memory block is filled with the specified byte.
3099 *
3100 * This is a sort of inverted memchr.
3101 *
3102 * @returns Pointer to the byte which doesn't equal u8.
3103 * @returns NULL if all equal to u8.
3104 *
3105 * @param pv Pointer to the memory block.
3106 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3107 * @param u8 The value it's supposed to be filled with.
3108 */
3109#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3110DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
3111#else
3112DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3113{
3114/** @todo rewrite this in inline assembly. */
3115 uint8_t const *pb = (uint8_t const *)pv;
3116 for (; cb; cb--, pb++)
3117 if (RT_UNLIKELY(*pb != u8))
3118 return (void *)pb;
3119 return NULL;
3120}
3121#endif
3122
3123
3124
3125/**
3126 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
3127 *
3128 * @returns u32F1 * u32F2.
3129 */
3130#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3131DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
3132#else
3133DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
3134{
3135# ifdef RT_ARCH_AMD64
3136 return (uint64_t)u32F1 * u32F2;
3137# else /* !RT_ARCH_AMD64 */
3138 uint64_t u64;
3139# if RT_INLINE_ASM_GNU_STYLE
3140 __asm__ __volatile__("mull %%edx"
3141 : "=A" (u64)
3142 : "a" (u32F2), "d" (u32F1));
3143# else
3144 __asm
3145 {
3146 mov edx, [u32F1]
3147 mov eax, [u32F2]
3148 mul edx
3149 mov dword ptr [u64], eax
3150 mov dword ptr [u64 + 4], edx
3151 }
3152# endif
3153 return u64;
3154# endif /* !RT_ARCH_AMD64 */
3155}
3156#endif
3157
3158
3159/**
3160 * Multiplies two signed 32-bit values returning a signed 64-bit result.
3161 *
3162 * @returns u32F1 * u32F2.
3163 */
3164#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3165DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
3166#else
3167DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
3168{
3169# ifdef RT_ARCH_AMD64
3170 return (int64_t)i32F1 * i32F2;
3171# else /* !RT_ARCH_AMD64 */
3172 int64_t i64;
3173# if RT_INLINE_ASM_GNU_STYLE
3174 __asm__ __volatile__("imull %%edx"
3175 : "=A" (i64)
3176 : "a" (i32F2), "d" (i32F1));
3177# else
3178 __asm
3179 {
3180 mov edx, [i32F1]
3181 mov eax, [i32F2]
3182 imul edx
3183 mov dword ptr [i64], eax
3184 mov dword ptr [i64 + 4], edx
3185 }
3186# endif
3187 return i64;
3188# endif /* !RT_ARCH_AMD64 */
3189}
3190#endif
3191
3192
3193/**
3194 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
3195 *
3196 * @returns u64 / u32.
3197 */
3198#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3199DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
3200#else
3201DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
3202{
3203# ifdef RT_ARCH_AMD64
3204 return (uint32_t)(u64 / u32);
3205# else /* !RT_ARCH_AMD64 */
3206# if RT_INLINE_ASM_GNU_STYLE
3207 RTUINTREG uDummy;
3208 __asm__ __volatile__("divl %3"
3209 : "=a" (u32), "=d"(uDummy)
3210 : "A" (u64), "r" (u32));
3211# else
3212 __asm
3213 {
3214 mov eax, dword ptr [u64]
3215 mov edx, dword ptr [u64 + 4]
3216 mov ecx, [u32]
3217 div ecx
3218 mov [u32], eax
3219 }
3220# endif
3221 return u32;
3222# endif /* !RT_ARCH_AMD64 */
3223}
3224#endif
3225
3226
3227/**
3228 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
3229 *
3230 * @returns u64 / u32.
3231 */
3232#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3233DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
3234#else
3235DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
3236{
3237# ifdef RT_ARCH_AMD64
3238 return (int32_t)(i64 / i32);
3239# else /* !RT_ARCH_AMD64 */
3240# if RT_INLINE_ASM_GNU_STYLE
3241 RTUINTREG iDummy;
3242 __asm__ __volatile__("idivl %3"
3243 : "=a" (i32), "=d"(iDummy)
3244 : "A" (i64), "r" (i32));
3245# else
3246 __asm
3247 {
3248 mov eax, dword ptr [i64]
3249 mov edx, dword ptr [i64 + 4]
3250 mov ecx, [i32]
3251 idiv ecx
3252 mov [i32], eax
3253 }
3254# endif
3255 return i32;
3256# endif /* !RT_ARCH_AMD64 */
3257}
3258#endif
3259
3260
3261/**
3262 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
3263 * using a 96 bit intermediate result.
3264 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
3265 * __udivdi3 and __umoddi3 even if this inline function is not used.
3266 *
3267 * @returns (u64A * u32B) / u32C.
3268 * @param u64A The 64-bit value.
3269 * @param u32B The 32-bit value to multiple by A.
3270 * @param u32C The 32-bit value to divide A*B by.
3271 */
3272#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
3273DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
3274#else
3275DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
3276{
3277# if RT_INLINE_ASM_GNU_STYLE
3278# ifdef RT_ARCH_AMD64
3279 uint64_t u64Result, u64Spill;
3280 __asm__ __volatile__("mulq %2\n\t"
3281 "divq %3\n\t"
3282 : "=a" (u64Result),
3283 "=d" (u64Spill)
3284 : "r" ((uint64_t)u32B),
3285 "r" ((uint64_t)u32C),
3286 "0" (u64A),
3287 "1" (0));
3288 return u64Result;
3289# else
3290 uint32_t u32Dummy;
3291 uint64_t u64Result;
3292 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
3293 edx = u64Lo.hi = (u64A.lo * u32B).hi */
3294 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
3295 eax = u64A.hi */
3296 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
3297 edx = u32C */
3298 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
3299 edx = u32B */
3300 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
3301 edx = u64Hi.hi = (u64A.hi * u32B).hi */
3302 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
3303 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
3304 "divl %%ecx \n\t" /* eax = u64Hi / u32C
3305 edx = u64Hi % u32C */
3306 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
3307 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
3308 "divl %%ecx \n\t" /* u64Result.lo */
3309 "movl %%edi,%%edx \n\t" /* u64Result.hi */
3310 : "=A"(u64Result), "=c"(u32Dummy),
3311 "=S"(u32Dummy), "=D"(u32Dummy)
3312 : "a"((uint32_t)u64A),
3313 "S"((uint32_t)(u64A >> 32)),
3314 "c"(u32B),
3315 "D"(u32C));
3316 return u64Result;
3317# endif
3318# else
3319 RTUINT64U u;
3320 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
3321 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
3322 u64Hi += (u64Lo >> 32);
3323 u.s.Hi = (uint32_t)(u64Hi / u32C);
3324 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
3325 return u.u;
3326# endif
3327}
3328#endif
3329
3330
3331/**
3332 * Probes a byte pointer for read access.
3333 *
3334 * While the function will not fault if the byte is not read accessible,
3335 * the idea is to do this in a safe place like before acquiring locks
3336 * and such like.
3337 *
3338 * Also, this functions guarantees that an eager compiler is not going
3339 * to optimize the probing away.
3340 *
3341 * @param pvByte Pointer to the byte.
3342 */
3343#if RT_INLINE_ASM_EXTERNAL
3344DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
3345#else
3346DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
3347{
3348 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3349 uint8_t u8;
3350# if RT_INLINE_ASM_GNU_STYLE
3351 __asm__ __volatile__("movb (%1), %0\n\t"
3352 : "=r" (u8)
3353 : "r" (pvByte));
3354# else
3355 __asm
3356 {
3357# ifdef RT_ARCH_AMD64
3358 mov rax, [pvByte]
3359 mov al, [rax]
3360# else
3361 mov eax, [pvByte]
3362 mov al, [eax]
3363# endif
3364 mov [u8], al
3365 }
3366# endif
3367 return u8;
3368}
3369#endif
3370
3371/**
3372 * Probes a buffer for read access page by page.
3373 *
3374 * While the function will fault if the buffer is not fully read
3375 * accessible, the idea is to do this in a safe place like before
3376 * acquiring locks and such like.
3377 *
3378 * Also, this functions guarantees that an eager compiler is not going
3379 * to optimize the probing away.
3380 *
3381 * @param pvBuf Pointer to the buffer.
3382 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3383 */
3384DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3385{
3386 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3387 /* the first byte */
3388 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3389 ASMProbeReadByte(pu8);
3390
3391 /* the pages in between pages. */
3392 while (cbBuf > /*PAGE_SIZE*/0x1000)
3393 {
3394 ASMProbeReadByte(pu8);
3395 cbBuf -= /*PAGE_SIZE*/0x1000;
3396 pu8 += /*PAGE_SIZE*/0x1000;
3397 }
3398
3399 /* the last byte */
3400 ASMProbeReadByte(pu8 + cbBuf - 1);
3401}
3402
3403
3404/** @def ASMBreakpoint
3405 * Debugger Breakpoint.
3406 * @remark In the gnu world we add a nop instruction after the int3 to
3407 * force gdb to remain at the int3 source line.
3408 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
3409 * @internal
3410 */
3411#if RT_INLINE_ASM_GNU_STYLE
3412# ifndef __L4ENV__
3413# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
3414# else
3415# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
3416# endif
3417#else
3418# define ASMBreakpoint() __debugbreak()
3419#endif
3420
3421
3422
3423/** @defgroup grp_inline_bits Bit Operations
3424 * @{
3425 */
3426
3427
3428/**
3429 * Sets a bit in a bitmap.
3430 *
3431 * @param pvBitmap Pointer to the bitmap.
3432 * @param iBit The bit to set.
3433 */
3434#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3435DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3436#else
3437DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3438{
3439# if RT_INLINE_ASM_USES_INTRIN
3440 _bittestandset((long *)pvBitmap, iBit);
3441
3442# elif RT_INLINE_ASM_GNU_STYLE
3443 __asm__ __volatile__ ("btsl %1, %0"
3444 : "=m" (*(volatile long *)pvBitmap)
3445 : "Ir" (iBit)
3446 : "memory");
3447# else
3448 __asm
3449 {
3450# ifdef RT_ARCH_AMD64
3451 mov rax, [pvBitmap]
3452 mov edx, [iBit]
3453 bts [rax], edx
3454# else
3455 mov eax, [pvBitmap]
3456 mov edx, [iBit]
3457 bts [eax], edx
3458# endif
3459 }
3460# endif
3461}
3462#endif
3463
3464
3465/**
3466 * Atomically sets a bit in a bitmap.
3467 *
3468 * @param pvBitmap Pointer to the bitmap.
3469 * @param iBit The bit to set.
3470 */
3471#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3472DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3473#else
3474DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3475{
3476# if RT_INLINE_ASM_USES_INTRIN
3477 _interlockedbittestandset((long *)pvBitmap, iBit);
3478# elif RT_INLINE_ASM_GNU_STYLE
3479 __asm__ __volatile__ ("lock; btsl %1, %0"
3480 : "=m" (*(volatile long *)pvBitmap)
3481 : "Ir" (iBit)
3482 : "memory");
3483# else
3484 __asm
3485 {
3486# ifdef RT_ARCH_AMD64
3487 mov rax, [pvBitmap]
3488 mov edx, [iBit]
3489 lock bts [rax], edx
3490# else
3491 mov eax, [pvBitmap]
3492 mov edx, [iBit]
3493 lock bts [eax], edx
3494# endif
3495 }
3496# endif
3497}
3498#endif
3499
3500
3501/**
3502 * Clears a bit in a bitmap.
3503 *
3504 * @param pvBitmap Pointer to the bitmap.
3505 * @param iBit The bit to clear.
3506 */
3507#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3508DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3509#else
3510DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3511{
3512# if RT_INLINE_ASM_USES_INTRIN
3513 _bittestandreset((long *)pvBitmap, iBit);
3514
3515# elif RT_INLINE_ASM_GNU_STYLE
3516 __asm__ __volatile__ ("btrl %1, %0"
3517 : "=m" (*(volatile long *)pvBitmap)
3518 : "Ir" (iBit)
3519 : "memory");
3520# else
3521 __asm
3522 {
3523# ifdef RT_ARCH_AMD64
3524 mov rax, [pvBitmap]
3525 mov edx, [iBit]
3526 btr [rax], edx
3527# else
3528 mov eax, [pvBitmap]
3529 mov edx, [iBit]
3530 btr [eax], edx
3531# endif
3532 }
3533# endif
3534}
3535#endif
3536
3537
3538/**
3539 * Atomically clears a bit in a bitmap.
3540 *
3541 * @param pvBitmap Pointer to the bitmap.
3542 * @param iBit The bit to toggle set.
3543 * @remark No memory barrier, take care on smp.
3544 */
3545#if RT_INLINE_ASM_EXTERNAL
3546DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3547#else
3548DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3549{
3550# if RT_INLINE_ASM_GNU_STYLE
3551 __asm__ __volatile__ ("lock; btrl %1, %0"
3552 : "=m" (*(volatile long *)pvBitmap)
3553 : "Ir" (iBit)
3554 : "memory");
3555# else
3556 __asm
3557 {
3558# ifdef RT_ARCH_AMD64
3559 mov rax, [pvBitmap]
3560 mov edx, [iBit]
3561 lock btr [rax], edx
3562# else
3563 mov eax, [pvBitmap]
3564 mov edx, [iBit]
3565 lock btr [eax], edx
3566# endif
3567 }
3568# endif
3569}
3570#endif
3571
3572
3573/**
3574 * Toggles a bit in a bitmap.
3575 *
3576 * @param pvBitmap Pointer to the bitmap.
3577 * @param iBit The bit to toggle.
3578 */
3579#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3580DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3581#else
3582DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3583{
3584# if RT_INLINE_ASM_USES_INTRIN
3585 _bittestandcomplement((long *)pvBitmap, iBit);
3586# elif RT_INLINE_ASM_GNU_STYLE
3587 __asm__ __volatile__ ("btcl %1, %0"
3588 : "=m" (*(volatile long *)pvBitmap)
3589 : "Ir" (iBit)
3590 : "memory");
3591# else
3592 __asm
3593 {
3594# ifdef RT_ARCH_AMD64
3595 mov rax, [pvBitmap]
3596 mov edx, [iBit]
3597 btc [rax], edx
3598# else
3599 mov eax, [pvBitmap]
3600 mov edx, [iBit]
3601 btc [eax], edx
3602# endif
3603 }
3604# endif
3605}
3606#endif
3607
3608
3609/**
3610 * Atomically toggles a bit in a bitmap.
3611 *
3612 * @param pvBitmap Pointer to the bitmap.
3613 * @param iBit The bit to test and set.
3614 */
3615#if RT_INLINE_ASM_EXTERNAL
3616DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3617#else
3618DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3619{
3620# if RT_INLINE_ASM_GNU_STYLE
3621 __asm__ __volatile__ ("lock; btcl %1, %0"
3622 : "=m" (*(volatile long *)pvBitmap)
3623 : "Ir" (iBit)
3624 : "memory");
3625# else
3626 __asm
3627 {
3628# ifdef RT_ARCH_AMD64
3629 mov rax, [pvBitmap]
3630 mov edx, [iBit]
3631 lock btc [rax], edx
3632# else
3633 mov eax, [pvBitmap]
3634 mov edx, [iBit]
3635 lock btc [eax], edx
3636# endif
3637 }
3638# endif
3639}
3640#endif
3641
3642
3643/**
3644 * Tests and sets a bit in a bitmap.
3645 *
3646 * @returns true if the bit was set.
3647 * @returns false if the bit was clear.
3648 * @param pvBitmap Pointer to the bitmap.
3649 * @param iBit The bit to test and set.
3650 */
3651#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3652DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3653#else
3654DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3655{
3656 union { bool f; uint32_t u32; uint8_t u8; } rc;
3657# if RT_INLINE_ASM_USES_INTRIN
3658 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3659
3660# elif RT_INLINE_ASM_GNU_STYLE
3661 __asm__ __volatile__ ("btsl %2, %1\n\t"
3662 "setc %b0\n\t"
3663 "andl $1, %0\n\t"
3664 : "=q" (rc.u32),
3665 "=m" (*(volatile long *)pvBitmap)
3666 : "Ir" (iBit)
3667 : "memory");
3668# else
3669 __asm
3670 {
3671 mov edx, [iBit]
3672# ifdef RT_ARCH_AMD64
3673 mov rax, [pvBitmap]
3674 bts [rax], edx
3675# else
3676 mov eax, [pvBitmap]
3677 bts [eax], edx
3678# endif
3679 setc al
3680 and eax, 1
3681 mov [rc.u32], eax
3682 }
3683# endif
3684 return rc.f;
3685}
3686#endif
3687
3688
3689/**
3690 * Atomically tests and sets a bit in a bitmap.
3691 *
3692 * @returns true if the bit was set.
3693 * @returns false if the bit was clear.
3694 * @param pvBitmap Pointer to the bitmap.
3695 * @param iBit The bit to set.
3696 */
3697#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3698DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3699#else
3700DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3701{
3702 union { bool f; uint32_t u32; uint8_t u8; } rc;
3703# if RT_INLINE_ASM_USES_INTRIN
3704 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3705# elif RT_INLINE_ASM_GNU_STYLE
3706 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
3707 "setc %b0\n\t"
3708 "andl $1, %0\n\t"
3709 : "=q" (rc.u32),
3710 "=m" (*(volatile long *)pvBitmap)
3711 : "Ir" (iBit)
3712 : "memory");
3713# else
3714 __asm
3715 {
3716 mov edx, [iBit]
3717# ifdef RT_ARCH_AMD64
3718 mov rax, [pvBitmap]
3719 lock bts [rax], edx
3720# else
3721 mov eax, [pvBitmap]
3722 lock bts [eax], edx
3723# endif
3724 setc al
3725 and eax, 1
3726 mov [rc.u32], eax
3727 }
3728# endif
3729 return rc.f;
3730}
3731#endif
3732
3733
3734/**
3735 * Tests and clears a bit in a bitmap.
3736 *
3737 * @returns true if the bit was set.
3738 * @returns false if the bit was clear.
3739 * @param pvBitmap Pointer to the bitmap.
3740 * @param iBit The bit to test and clear.
3741 */
3742#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3743DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3744#else
3745DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3746{
3747 union { bool f; uint32_t u32; uint8_t u8; } rc;
3748# if RT_INLINE_ASM_USES_INTRIN
3749 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3750
3751# elif RT_INLINE_ASM_GNU_STYLE
3752 __asm__ __volatile__ ("btrl %2, %1\n\t"
3753 "setc %b0\n\t"
3754 "andl $1, %0\n\t"
3755 : "=q" (rc.u32),
3756 "=m" (*(volatile long *)pvBitmap)
3757 : "Ir" (iBit)
3758 : "memory");
3759# else
3760 __asm
3761 {
3762 mov edx, [iBit]
3763# ifdef RT_ARCH_AMD64
3764 mov rax, [pvBitmap]
3765 btr [rax], edx
3766# else
3767 mov eax, [pvBitmap]
3768 btr [eax], edx
3769# endif
3770 setc al
3771 and eax, 1
3772 mov [rc.u32], eax
3773 }
3774# endif
3775 return rc.f;
3776}
3777#endif
3778
3779
3780/**
3781 * Atomically tests and clears a bit in a bitmap.
3782 *
3783 * @returns true if the bit was set.
3784 * @returns false if the bit was clear.
3785 * @param pvBitmap Pointer to the bitmap.
3786 * @param iBit The bit to test and clear.
3787 * @remark No memory barrier, take care on smp.
3788 */
3789#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3790DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3791#else
3792DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3793{
3794 union { bool f; uint32_t u32; uint8_t u8; } rc;
3795# if RT_INLINE_ASM_USES_INTRIN
3796 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3797
3798# elif RT_INLINE_ASM_GNU_STYLE
3799 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
3800 "setc %b0\n\t"
3801 "andl $1, %0\n\t"
3802 : "=q" (rc.u32),
3803 "=m" (*(volatile long *)pvBitmap)
3804 : "Ir" (iBit)
3805 : "memory");
3806# else
3807 __asm
3808 {
3809 mov edx, [iBit]
3810# ifdef RT_ARCH_AMD64
3811 mov rax, [pvBitmap]
3812 lock btr [rax], edx
3813# else
3814 mov eax, [pvBitmap]
3815 lock btr [eax], edx
3816# endif
3817 setc al
3818 and eax, 1
3819 mov [rc.u32], eax
3820 }
3821# endif
3822 return rc.f;
3823}
3824#endif
3825
3826
3827/**
3828 * Tests and toggles a bit in a bitmap.
3829 *
3830 * @returns true if the bit was set.
3831 * @returns false if the bit was clear.
3832 * @param pvBitmap Pointer to the bitmap.
3833 * @param iBit The bit to test and toggle.
3834 */
3835#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3836DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3837#else
3838DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3839{
3840 union { bool f; uint32_t u32; uint8_t u8; } rc;
3841# if RT_INLINE_ASM_USES_INTRIN
3842 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3843
3844# elif RT_INLINE_ASM_GNU_STYLE
3845 __asm__ __volatile__ ("btcl %2, %1\n\t"
3846 "setc %b0\n\t"
3847 "andl $1, %0\n\t"
3848 : "=q" (rc.u32),
3849 "=m" (*(volatile long *)pvBitmap)
3850 : "Ir" (iBit)
3851 : "memory");
3852# else
3853 __asm
3854 {
3855 mov edx, [iBit]
3856# ifdef RT_ARCH_AMD64
3857 mov rax, [pvBitmap]
3858 btc [rax], edx
3859# else
3860 mov eax, [pvBitmap]
3861 btc [eax], edx
3862# endif
3863 setc al
3864 and eax, 1
3865 mov [rc.u32], eax
3866 }
3867# endif
3868 return rc.f;
3869}
3870#endif
3871
3872
3873/**
3874 * Atomically tests and toggles a bit in a bitmap.
3875 *
3876 * @returns true if the bit was set.
3877 * @returns false if the bit was clear.
3878 * @param pvBitmap Pointer to the bitmap.
3879 * @param iBit The bit to test and toggle.
3880 */
3881#if RT_INLINE_ASM_EXTERNAL
3882DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3883#else
3884DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3885{
3886 union { bool f; uint32_t u32; uint8_t u8; } rc;
3887# if RT_INLINE_ASM_GNU_STYLE
3888 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
3889 "setc %b0\n\t"
3890 "andl $1, %0\n\t"
3891 : "=q" (rc.u32),
3892 "=m" (*(volatile long *)pvBitmap)
3893 : "Ir" (iBit)
3894 : "memory");
3895# else
3896 __asm
3897 {
3898 mov edx, [iBit]
3899# ifdef RT_ARCH_AMD64
3900 mov rax, [pvBitmap]
3901 lock btc [rax], edx
3902# else
3903 mov eax, [pvBitmap]
3904 lock btc [eax], edx
3905# endif
3906 setc al
3907 and eax, 1
3908 mov [rc.u32], eax
3909 }
3910# endif
3911 return rc.f;
3912}
3913#endif
3914
3915
3916/**
3917 * Tests if a bit in a bitmap is set.
3918 *
3919 * @returns true if the bit is set.
3920 * @returns false if the bit is clear.
3921 * @param pvBitmap Pointer to the bitmap.
3922 * @param iBit The bit to test.
3923 */
3924#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3925DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
3926#else
3927DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
3928{
3929 union { bool f; uint32_t u32; uint8_t u8; } rc;
3930# if RT_INLINE_ASM_USES_INTRIN
3931 rc.u32 = _bittest((long *)pvBitmap, iBit);
3932# elif RT_INLINE_ASM_GNU_STYLE
3933
3934 __asm__ __volatile__ ("btl %2, %1\n\t"
3935 "setc %b0\n\t"
3936 "andl $1, %0\n\t"
3937 : "=q" (rc.u32),
3938 "=m" (*(volatile long *)pvBitmap)
3939 : "Ir" (iBit)
3940 : "memory");
3941# else
3942 __asm
3943 {
3944 mov edx, [iBit]
3945# ifdef RT_ARCH_AMD64
3946 mov rax, [pvBitmap]
3947 bt [rax], edx
3948# else
3949 mov eax, [pvBitmap]
3950 bt [eax], edx
3951# endif
3952 setc al
3953 and eax, 1
3954 mov [rc.u32], eax
3955 }
3956# endif
3957 return rc.f;
3958}
3959#endif
3960
3961
3962/**
3963 * Clears a bit range within a bitmap.
3964 *
3965 * @param pvBitmap Pointer to the bitmap.
3966 * @param iBitStart The First bit to clear.
3967 * @param iBitEnd The first bit not to clear.
3968 */
3969DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3970{
3971 if (iBitStart < iBitEnd)
3972 {
3973 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3974 int iStart = iBitStart & ~31;
3975 int iEnd = iBitEnd & ~31;
3976 if (iStart == iEnd)
3977 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
3978 else
3979 {
3980 /* bits in first dword. */
3981 if (iBitStart & 31)
3982 {
3983 *pu32 &= (1 << (iBitStart & 31)) - 1;
3984 pu32++;
3985 iBitStart = iStart + 32;
3986 }
3987
3988 /* whole dword. */
3989 if (iBitStart != iEnd)
3990 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
3991
3992 /* bits in last dword. */
3993 if (iBitEnd & 31)
3994 {
3995 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
3996 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
3997 }
3998 }
3999 }
4000}
4001
4002
4003/**
4004 * Finds the first clear bit in a bitmap.
4005 *
4006 * @returns Index of the first zero bit.
4007 * @returns -1 if no clear bit was found.
4008 * @param pvBitmap Pointer to the bitmap.
4009 * @param cBits The number of bits in the bitmap. Multiple of 32.
4010 */
4011#if RT_INLINE_ASM_EXTERNAL
4012DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
4013#else
4014DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
4015{
4016 if (cBits)
4017 {
4018 int32_t iBit;
4019# if RT_INLINE_ASM_GNU_STYLE
4020 RTCCUINTREG uEAX, uECX, uEDI;
4021 cBits = RT_ALIGN_32(cBits, 32);
4022 __asm__ __volatile__("repe; scasl\n\t"
4023 "je 1f\n\t"
4024# ifdef RT_ARCH_AMD64
4025 "lea -4(%%rdi), %%rdi\n\t"
4026 "xorl (%%rdi), %%eax\n\t"
4027 "subq %5, %%rdi\n\t"
4028# else
4029 "lea -4(%%edi), %%edi\n\t"
4030 "xorl (%%edi), %%eax\n\t"
4031 "subl %5, %%edi\n\t"
4032# endif
4033 "shll $3, %%edi\n\t"
4034 "bsfl %%eax, %%edx\n\t"
4035 "addl %%edi, %%edx\n\t"
4036 "1:\t\n"
4037 : "=d" (iBit),
4038 "=&c" (uECX),
4039 "=&D" (uEDI),
4040 "=&a" (uEAX)
4041 : "0" (0xffffffff),
4042 "mr" (pvBitmap),
4043 "1" (cBits >> 5),
4044 "2" (pvBitmap),
4045 "3" (0xffffffff));
4046# else
4047 cBits = RT_ALIGN_32(cBits, 32);
4048 __asm
4049 {
4050# ifdef RT_ARCH_AMD64
4051 mov rdi, [pvBitmap]
4052 mov rbx, rdi
4053# else
4054 mov edi, [pvBitmap]
4055 mov ebx, edi
4056# endif
4057 mov edx, 0ffffffffh
4058 mov eax, edx
4059 mov ecx, [cBits]
4060 shr ecx, 5
4061 repe scasd
4062 je done
4063
4064# ifdef RT_ARCH_AMD64
4065 lea rdi, [rdi - 4]
4066 xor eax, [rdi]
4067 sub rdi, rbx
4068# else
4069 lea edi, [edi - 4]
4070 xor eax, [edi]
4071 sub edi, ebx
4072# endif
4073 shl edi, 3
4074 bsf edx, eax
4075 add edx, edi
4076 done:
4077 mov [iBit], edx
4078 }
4079# endif
4080 return iBit;
4081 }
4082 return -1;
4083}
4084#endif
4085
4086
4087/**
4088 * Finds the next clear bit in a bitmap.
4089 *
4090 * @returns Index of the first zero bit.
4091 * @returns -1 if no clear bit was found.
4092 * @param pvBitmap Pointer to the bitmap.
4093 * @param cBits The number of bits in the bitmap. Multiple of 32.
4094 * @param iBitPrev The bit returned from the last search.
4095 * The search will start at iBitPrev + 1.
4096 */
4097#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4098DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4099#else
4100DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4101{
4102 int iBit = ++iBitPrev & 31;
4103 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
4104 cBits -= iBitPrev & ~31;
4105 if (iBit)
4106 {
4107 /* inspect the first dword. */
4108 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
4109# if RT_INLINE_ASM_USES_INTRIN
4110 unsigned long ulBit = 0;
4111 if (_BitScanForward(&ulBit, u32))
4112 return ulBit + iBitPrev;
4113 iBit = -1;
4114# else
4115# if RT_INLINE_ASM_GNU_STYLE
4116 __asm__ __volatile__("bsf %1, %0\n\t"
4117 "jnz 1f\n\t"
4118 "movl $-1, %0\n\t"
4119 "1:\n\t"
4120 : "=r" (iBit)
4121 : "r" (u32));
4122# else
4123 __asm
4124 {
4125 mov edx, [u32]
4126 bsf eax, edx
4127 jnz done
4128 mov eax, 0ffffffffh
4129 done:
4130 mov [iBit], eax
4131 }
4132# endif
4133 if (iBit >= 0)
4134 return iBit + iBitPrev;
4135# endif
4136 /* Search the rest of the bitmap, if there is anything. */
4137 if (cBits > 32)
4138 {
4139 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
4140 if (iBit >= 0)
4141 return iBit + (iBitPrev & ~31) + 32;
4142 }
4143 }
4144 else
4145 {
4146 /* Search the rest of the bitmap. */
4147 iBit = ASMBitFirstClear(pvBitmap, cBits);
4148 if (iBit >= 0)
4149 return iBit + (iBitPrev & ~31);
4150 }
4151 return iBit;
4152}
4153#endif
4154
4155
4156/**
4157 * Finds the first set bit in a bitmap.
4158 *
4159 * @returns Index of the first set bit.
4160 * @returns -1 if no clear bit was found.
4161 * @param pvBitmap Pointer to the bitmap.
4162 * @param cBits The number of bits in the bitmap. Multiple of 32.
4163 */
4164#if RT_INLINE_ASM_EXTERNAL
4165DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
4166#else
4167DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
4168{
4169 if (cBits)
4170 {
4171 int32_t iBit;
4172# if RT_INLINE_ASM_GNU_STYLE
4173 RTCCUINTREG uEAX, uECX, uEDI;
4174 cBits = RT_ALIGN_32(cBits, 32);
4175 __asm__ __volatile__("repe; scasl\n\t"
4176 "je 1f\n\t"
4177# ifdef RT_ARCH_AMD64
4178 "lea -4(%%rdi), %%rdi\n\t"
4179 "movl (%%rdi), %%eax\n\t"
4180 "subq %5, %%rdi\n\t"
4181# else
4182 "lea -4(%%edi), %%edi\n\t"
4183 "movl (%%edi), %%eax\n\t"
4184 "subl %5, %%edi\n\t"
4185# endif
4186 "shll $3, %%edi\n\t"
4187 "bsfl %%eax, %%edx\n\t"
4188 "addl %%edi, %%edx\n\t"
4189 "1:\t\n"
4190 : "=d" (iBit),
4191 "=&c" (uECX),
4192 "=&D" (uEDI),
4193 "=&a" (uEAX)
4194 : "0" (0xffffffff),
4195 "mr" (pvBitmap),
4196 "1" (cBits >> 5),
4197 "2" (pvBitmap),
4198 "3" (0));
4199# else
4200 cBits = RT_ALIGN_32(cBits, 32);
4201 __asm
4202 {
4203# ifdef RT_ARCH_AMD64
4204 mov rdi, [pvBitmap]
4205 mov rbx, rdi
4206# else
4207 mov edi, [pvBitmap]
4208 mov ebx, edi
4209# endif
4210 mov edx, 0ffffffffh
4211 xor eax, eax
4212 mov ecx, [cBits]
4213 shr ecx, 5
4214 repe scasd
4215 je done
4216# ifdef RT_ARCH_AMD64
4217 lea rdi, [rdi - 4]
4218 mov eax, [rdi]
4219 sub rdi, rbx
4220# else
4221 lea edi, [edi - 4]
4222 mov eax, [edi]
4223 sub edi, ebx
4224# endif
4225 shl edi, 3
4226 bsf edx, eax
4227 add edx, edi
4228 done:
4229 mov [iBit], edx
4230 }
4231# endif
4232 return iBit;
4233 }
4234 return -1;
4235}
4236#endif
4237
4238
4239/**
4240 * Finds the next set bit in a bitmap.
4241 *
4242 * @returns Index of the next set bit.
4243 * @returns -1 if no set bit was found.
4244 * @param pvBitmap Pointer to the bitmap.
4245 * @param cBits The number of bits in the bitmap. Multiple of 32.
4246 * @param iBitPrev The bit returned from the last search.
4247 * The search will start at iBitPrev + 1.
4248 */
4249#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4250DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4251#else
4252DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4253{
4254 int iBit = ++iBitPrev & 31;
4255 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
4256 cBits -= iBitPrev & ~31;
4257 if (iBit)
4258 {
4259 /* inspect the first dword. */
4260 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
4261# if RT_INLINE_ASM_USES_INTRIN
4262 unsigned long ulBit = 0;
4263 if (_BitScanForward(&ulBit, u32))
4264 return ulBit + iBitPrev;
4265 iBit = -1;
4266# else
4267# if RT_INLINE_ASM_GNU_STYLE
4268 __asm__ __volatile__("bsf %1, %0\n\t"
4269 "jnz 1f\n\t"
4270 "movl $-1, %0\n\t"
4271 "1:\n\t"
4272 : "=r" (iBit)
4273 : "r" (u32));
4274# else
4275 __asm
4276 {
4277 mov edx, u32
4278 bsf eax, edx
4279 jnz done
4280 mov eax, 0ffffffffh
4281 done:
4282 mov [iBit], eax
4283 }
4284# endif
4285 if (iBit >= 0)
4286 return iBit + iBitPrev;
4287# endif
4288 /* Search the rest of the bitmap, if there is anything. */
4289 if (cBits > 32)
4290 {
4291 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
4292 if (iBit >= 0)
4293 return iBit + (iBitPrev & ~31) + 32;
4294 }
4295
4296 }
4297 else
4298 {
4299 /* Search the rest of the bitmap. */
4300 iBit = ASMBitFirstSet(pvBitmap, cBits);
4301 if (iBit >= 0)
4302 return iBit + (iBitPrev & ~31);
4303 }
4304 return iBit;
4305}
4306#endif
4307
4308
4309/**
4310 * Finds the first bit which is set in the given 32-bit integer.
4311 * Bits are numbered from 1 (least significant) to 32.
4312 *
4313 * @returns index [1..32] of the first set bit.
4314 * @returns 0 if all bits are cleared.
4315 * @param u32 Integer to search for set bits.
4316 * @remark Similar to ffs() in BSD.
4317 */
4318DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
4319{
4320# if RT_INLINE_ASM_USES_INTRIN
4321 unsigned long iBit;
4322 if (_BitScanForward(&iBit, u32))
4323 iBit++;
4324 else
4325 iBit = 0;
4326# elif RT_INLINE_ASM_GNU_STYLE
4327 uint32_t iBit;
4328 __asm__ __volatile__("bsf %1, %0\n\t"
4329 "jnz 1f\n\t"
4330 "xorl %0, %0\n\t"
4331 "jmp 2f\n"
4332 "1:\n\t"
4333 "incl %0\n"
4334 "2:\n\t"
4335 : "=r" (iBit)
4336 : "rm" (u32));
4337# else
4338 uint32_t iBit;
4339 _asm
4340 {
4341 bsf eax, [u32]
4342 jnz found
4343 xor eax, eax
4344 jmp done
4345 found:
4346 inc eax
4347 done:
4348 mov [iBit], eax
4349 }
4350# endif
4351 return iBit;
4352}
4353
4354
4355/**
4356 * Finds the first bit which is set in the given 32-bit integer.
4357 * Bits are numbered from 1 (least significant) to 32.
4358 *
4359 * @returns index [1..32] of the first set bit.
4360 * @returns 0 if all bits are cleared.
4361 * @param i32 Integer to search for set bits.
4362 * @remark Similar to ffs() in BSD.
4363 */
4364DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
4365{
4366 return ASMBitFirstSetU32((uint32_t)i32);
4367}
4368
4369
4370/**
4371 * Finds the last bit which is set in the given 32-bit integer.
4372 * Bits are numbered from 1 (least significant) to 32.
4373 *
4374 * @returns index [1..32] of the last set bit.
4375 * @returns 0 if all bits are cleared.
4376 * @param u32 Integer to search for set bits.
4377 * @remark Similar to fls() in BSD.
4378 */
4379DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4380{
4381# if RT_INLINE_ASM_USES_INTRIN
4382 unsigned long iBit;
4383 if (_BitScanReverse(&iBit, u32))
4384 iBit++;
4385 else
4386 iBit = 0;
4387# elif RT_INLINE_ASM_GNU_STYLE
4388 uint32_t iBit;
4389 __asm__ __volatile__("bsrl %1, %0\n\t"
4390 "jnz 1f\n\t"
4391 "xorl %0, %0\n\t"
4392 "jmp 2f\n"
4393 "1:\n\t"
4394 "incl %0\n"
4395 "2:\n\t"
4396 : "=r" (iBit)
4397 : "rm" (u32));
4398# else
4399 uint32_t iBit;
4400 _asm
4401 {
4402 bsr eax, [u32]
4403 jnz found
4404 xor eax, eax
4405 jmp done
4406 found:
4407 inc eax
4408 done:
4409 mov [iBit], eax
4410 }
4411# endif
4412 return iBit;
4413}
4414
4415
4416/**
4417 * Finds the last bit which is set in the given 32-bit integer.
4418 * Bits are numbered from 1 (least significant) to 32.
4419 *
4420 * @returns index [1..32] of the last set bit.
4421 * @returns 0 if all bits are cleared.
4422 * @param i32 Integer to search for set bits.
4423 * @remark Similar to fls() in BSD.
4424 */
4425DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4426{
4427 return ASMBitLastSetS32((uint32_t)i32);
4428}
4429
4430
4431/**
4432 * Reverse the byte order of the given 32-bit integer.
4433 * @param u32 Integer
4434 */
4435DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4436{
4437#if RT_INLINE_ASM_USES_INTRIN
4438 u32 = _byteswap_ulong(u32);
4439#elif RT_INLINE_ASM_GNU_STYLE
4440 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4441#else
4442 _asm
4443 {
4444 mov eax, [u32]
4445 bswap eax
4446 mov [u32], eax
4447 }
4448#endif
4449 return u32;
4450}
4451
4452/** @} */
4453
4454
4455/** @} */
4456#endif
4457
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette