VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 7606

Last change on this file since 7606 was 7182, checked in by vboxsync, 17 years ago

Worked around ASMAtomicXchgU8 breakage on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 131.5 KB
Line 
1/** @file
2 * innotek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @todo #include <iprt/param.h> for PAGE_SIZE. */
33/** @def RT_INLINE_ASM_USES_INTRIN
34 * Defined as 1 if we're using a _MSC_VER 1400.
35 * Otherwise defined as 0.
36 */
37
38#ifdef _MSC_VER
39# if _MSC_VER >= 1400
40# define RT_INLINE_ASM_USES_INTRIN 1
41# include <intrin.h>
42 /* Emit the intrinsics at all optimization levels. */
43# pragma intrinsic(_ReadWriteBarrier)
44# pragma intrinsic(__cpuid)
45# pragma intrinsic(_enable)
46# pragma intrinsic(_disable)
47# pragma intrinsic(__rdtsc)
48# pragma intrinsic(__readmsr)
49# pragma intrinsic(__writemsr)
50# pragma intrinsic(__outbyte)
51# pragma intrinsic(__outword)
52# pragma intrinsic(__outdword)
53# pragma intrinsic(__inbyte)
54# pragma intrinsic(__inword)
55# pragma intrinsic(__indword)
56# pragma intrinsic(__invlpg)
57# pragma intrinsic(__stosd)
58# pragma intrinsic(__stosw)
59# pragma intrinsic(__stosb)
60# pragma intrinsic(__readcr0)
61# pragma intrinsic(__readcr2)
62# pragma intrinsic(__readcr3)
63# pragma intrinsic(__readcr4)
64# pragma intrinsic(__writecr0)
65# pragma intrinsic(__writecr3)
66# pragma intrinsic(__writecr4)
67# pragma intrinsic(_BitScanForward)
68# pragma intrinsic(_BitScanReverse)
69# pragma intrinsic(_bittest)
70# pragma intrinsic(_bittestandset)
71# pragma intrinsic(_bittestandreset)
72# pragma intrinsic(_bittestandcomplement)
73# pragma intrinsic(_byteswap_ushort)
74# pragma intrinsic(_byteswap_ulong)
75# pragma intrinsic(_interlockedbittestandset)
76# pragma intrinsic(_interlockedbittestandreset)
77# pragma intrinsic(_InterlockedAnd)
78# pragma intrinsic(_InterlockedOr)
79# pragma intrinsic(_InterlockedIncrement)
80# pragma intrinsic(_InterlockedDecrement)
81# pragma intrinsic(_InterlockedExchange)
82# pragma intrinsic(_InterlockedExchangeAdd)
83# pragma intrinsic(_InterlockedCompareExchange)
84# pragma intrinsic(_InterlockedCompareExchange64)
85# ifdef RT_ARCH_AMD64
86# pragma intrinsic(__stosq)
87# pragma intrinsic(__readcr8)
88# pragma intrinsic(__writecr8)
89# pragma intrinsic(_byteswap_uint64)
90# pragma intrinsic(_InterlockedExchange64)
91# endif
92# endif
93#endif
94#ifndef RT_INLINE_ASM_USES_INTRIN
95# define RT_INLINE_ASM_USES_INTRIN 0
96#endif
97
98
99
100/** @defgroup grp_asm ASM - Assembly Routines
101 * @ingroup grp_rt
102 *
103 * @remarks The difference between ordered and unordered atomic operations are that
104 * the former will complete outstanding reads and writes before continuing
105 * while the latter doesn't make any promisses about the order. Ordered
106 * operations doesn't, it seems, make any 100% promise wrt to whether
107 * the operation will complete before any subsequent memory access.
108 * (please, correct if wrong.)
109 *
110 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
111 * are unordered (note the Uo).
112 *
113 * @{
114 */
115
116/** @def RT_INLINE_ASM_EXTERNAL
117 * Defined as 1 if the compiler does not support inline assembly.
118 * The ASM* functions will then be implemented in an external .asm file.
119 *
120 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
121 * inline assmebly in their AMD64 compiler.
122 */
123#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
124# define RT_INLINE_ASM_EXTERNAL 1
125#else
126# define RT_INLINE_ASM_EXTERNAL 0
127#endif
128
129/** @def RT_INLINE_ASM_GNU_STYLE
130 * Defined as 1 if the compiler understand GNU style inline assembly.
131 */
132#if defined(_MSC_VER)
133# define RT_INLINE_ASM_GNU_STYLE 0
134#else
135# define RT_INLINE_ASM_GNU_STYLE 1
136#endif
137
138
139/** @todo find a more proper place for this structure? */
140#pragma pack(1)
141/** IDTR */
142typedef struct RTIDTR
143{
144 /** Size of the IDT. */
145 uint16_t cbIdt;
146 /** Address of the IDT. */
147 uintptr_t pIdt;
148} RTIDTR, *PRTIDTR;
149#pragma pack()
150
151#pragma pack(1)
152/** GDTR */
153typedef struct RTGDTR
154{
155 /** Size of the GDT. */
156 uint16_t cbGdt;
157 /** Address of the GDT. */
158 uintptr_t pGdt;
159} RTGDTR, *PRTGDTR;
160#pragma pack()
161
162
163/** @def ASMReturnAddress
164 * Gets the return address of the current (or calling if you like) function or method.
165 */
166#ifdef _MSC_VER
167# ifdef __cplusplus
168extern "C"
169# endif
170void * _ReturnAddress(void);
171# pragma intrinsic(_ReturnAddress)
172# define ASMReturnAddress() _ReturnAddress()
173#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
174# define ASMReturnAddress() __builtin_return_address(0)
175#else
176# error "Unsupported compiler."
177#endif
178
179
180/**
181 * Gets the content of the IDTR CPU register.
182 * @param pIdtr Where to store the IDTR contents.
183 */
184#if RT_INLINE_ASM_EXTERNAL
185DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
186#else
187DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
188{
189# if RT_INLINE_ASM_GNU_STYLE
190 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
191# else
192 __asm
193 {
194# ifdef RT_ARCH_AMD64
195 mov rax, [pIdtr]
196 sidt [rax]
197# else
198 mov eax, [pIdtr]
199 sidt [eax]
200# endif
201 }
202# endif
203}
204#endif
205
206
207/**
208 * Sets the content of the IDTR CPU register.
209 * @param pIdtr Where to load the IDTR contents from
210 */
211#if RT_INLINE_ASM_EXTERNAL
212DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
213#else
214DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
215{
216# if RT_INLINE_ASM_GNU_STYLE
217 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
218# else
219 __asm
220 {
221# ifdef RT_ARCH_AMD64
222 mov rax, [pIdtr]
223 lidt [rax]
224# else
225 mov eax, [pIdtr]
226 lidt [eax]
227# endif
228 }
229# endif
230}
231#endif
232
233
234/**
235 * Gets the content of the GDTR CPU register.
236 * @param pGdtr Where to store the GDTR contents.
237 */
238#if RT_INLINE_ASM_EXTERNAL
239DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
240#else
241DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
242{
243# if RT_INLINE_ASM_GNU_STYLE
244 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
245# else
246 __asm
247 {
248# ifdef RT_ARCH_AMD64
249 mov rax, [pGdtr]
250 sgdt [rax]
251# else
252 mov eax, [pGdtr]
253 sgdt [eax]
254# endif
255 }
256# endif
257}
258#endif
259
260/**
261 * Get the cs register.
262 * @returns cs.
263 */
264#if RT_INLINE_ASM_EXTERNAL
265DECLASM(RTSEL) ASMGetCS(void);
266#else
267DECLINLINE(RTSEL) ASMGetCS(void)
268{
269 RTSEL SelCS;
270# if RT_INLINE_ASM_GNU_STYLE
271 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
272# else
273 __asm
274 {
275 mov ax, cs
276 mov [SelCS], ax
277 }
278# endif
279 return SelCS;
280}
281#endif
282
283
284/**
285 * Get the DS register.
286 * @returns DS.
287 */
288#if RT_INLINE_ASM_EXTERNAL
289DECLASM(RTSEL) ASMGetDS(void);
290#else
291DECLINLINE(RTSEL) ASMGetDS(void)
292{
293 RTSEL SelDS;
294# if RT_INLINE_ASM_GNU_STYLE
295 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
296# else
297 __asm
298 {
299 mov ax, ds
300 mov [SelDS], ax
301 }
302# endif
303 return SelDS;
304}
305#endif
306
307
308/**
309 * Get the ES register.
310 * @returns ES.
311 */
312#if RT_INLINE_ASM_EXTERNAL
313DECLASM(RTSEL) ASMGetES(void);
314#else
315DECLINLINE(RTSEL) ASMGetES(void)
316{
317 RTSEL SelES;
318# if RT_INLINE_ASM_GNU_STYLE
319 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
320# else
321 __asm
322 {
323 mov ax, es
324 mov [SelES], ax
325 }
326# endif
327 return SelES;
328}
329#endif
330
331
332/**
333 * Get the FS register.
334 * @returns FS.
335 */
336#if RT_INLINE_ASM_EXTERNAL
337DECLASM(RTSEL) ASMGetFS(void);
338#else
339DECLINLINE(RTSEL) ASMGetFS(void)
340{
341 RTSEL SelFS;
342# if RT_INLINE_ASM_GNU_STYLE
343 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
344# else
345 __asm
346 {
347 mov ax, fs
348 mov [SelFS], ax
349 }
350# endif
351 return SelFS;
352}
353# endif
354
355
356/**
357 * Get the GS register.
358 * @returns GS.
359 */
360#if RT_INLINE_ASM_EXTERNAL
361DECLASM(RTSEL) ASMGetGS(void);
362#else
363DECLINLINE(RTSEL) ASMGetGS(void)
364{
365 RTSEL SelGS;
366# if RT_INLINE_ASM_GNU_STYLE
367 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
368# else
369 __asm
370 {
371 mov ax, gs
372 mov [SelGS], ax
373 }
374# endif
375 return SelGS;
376}
377#endif
378
379
380/**
381 * Get the SS register.
382 * @returns SS.
383 */
384#if RT_INLINE_ASM_EXTERNAL
385DECLASM(RTSEL) ASMGetSS(void);
386#else
387DECLINLINE(RTSEL) ASMGetSS(void)
388{
389 RTSEL SelSS;
390# if RT_INLINE_ASM_GNU_STYLE
391 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
392# else
393 __asm
394 {
395 mov ax, ss
396 mov [SelSS], ax
397 }
398# endif
399 return SelSS;
400}
401#endif
402
403
404/**
405 * Get the TR register.
406 * @returns TR.
407 */
408#if RT_INLINE_ASM_EXTERNAL
409DECLASM(RTSEL) ASMGetTR(void);
410#else
411DECLINLINE(RTSEL) ASMGetTR(void)
412{
413 RTSEL SelTR;
414# if RT_INLINE_ASM_GNU_STYLE
415 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
416# else
417 __asm
418 {
419 str ax
420 mov [SelTR], ax
421 }
422# endif
423 return SelTR;
424}
425#endif
426
427
428/**
429 * Get the [RE]FLAGS register.
430 * @returns [RE]FLAGS.
431 */
432#if RT_INLINE_ASM_EXTERNAL
433DECLASM(RTCCUINTREG) ASMGetFlags(void);
434#else
435DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
436{
437 RTCCUINTREG uFlags;
438# if RT_INLINE_ASM_GNU_STYLE
439# ifdef RT_ARCH_AMD64
440 __asm__ __volatile__("pushfq\n\t"
441 "popq %0\n\t"
442 : "=g" (uFlags));
443# else
444 __asm__ __volatile__("pushfl\n\t"
445 "popl %0\n\t"
446 : "=g" (uFlags));
447# endif
448# else
449 __asm
450 {
451# ifdef RT_ARCH_AMD64
452 pushfq
453 pop [uFlags]
454# else
455 pushfd
456 pop [uFlags]
457# endif
458 }
459# endif
460 return uFlags;
461}
462#endif
463
464
465/**
466 * Set the [RE]FLAGS register.
467 * @param uFlags The new [RE]FLAGS value.
468 */
469#if RT_INLINE_ASM_EXTERNAL
470DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
471#else
472DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
473{
474# if RT_INLINE_ASM_GNU_STYLE
475# ifdef RT_ARCH_AMD64
476 __asm__ __volatile__("pushq %0\n\t"
477 "popfq\n\t"
478 : : "g" (uFlags));
479# else
480 __asm__ __volatile__("pushl %0\n\t"
481 "popfl\n\t"
482 : : "g" (uFlags));
483# endif
484# else
485 __asm
486 {
487# ifdef RT_ARCH_AMD64
488 push [uFlags]
489 popfq
490# else
491 push [uFlags]
492 popfd
493# endif
494 }
495# endif
496}
497#endif
498
499
500/**
501 * Gets the content of the CPU timestamp counter register.
502 *
503 * @returns TSC.
504 */
505#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
506DECLASM(uint64_t) ASMReadTSC(void);
507#else
508DECLINLINE(uint64_t) ASMReadTSC(void)
509{
510 RTUINT64U u;
511# if RT_INLINE_ASM_GNU_STYLE
512 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
513# else
514# if RT_INLINE_ASM_USES_INTRIN
515 u.u = __rdtsc();
516# else
517 __asm
518 {
519 rdtsc
520 mov [u.s.Lo], eax
521 mov [u.s.Hi], edx
522 }
523# endif
524# endif
525 return u.u;
526}
527#endif
528
529
530/**
531 * Performs the cpuid instruction returning all registers.
532 *
533 * @param uOperator CPUID operation (eax).
534 * @param pvEAX Where to store eax.
535 * @param pvEBX Where to store ebx.
536 * @param pvECX Where to store ecx.
537 * @param pvEDX Where to store edx.
538 * @remark We're using void pointers to ease the use of special bitfield structures and such.
539 */
540#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
541DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
542#else
543DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
544{
545# if RT_INLINE_ASM_GNU_STYLE
546# ifdef RT_ARCH_AMD64
547 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
548 __asm__ ("cpuid\n\t"
549 : "=a" (uRAX),
550 "=b" (uRBX),
551 "=c" (uRCX),
552 "=d" (uRDX)
553 : "0" (uOperator));
554 *(uint32_t *)pvEAX = (uint32_t)uRAX;
555 *(uint32_t *)pvEBX = (uint32_t)uRBX;
556 *(uint32_t *)pvECX = (uint32_t)uRCX;
557 *(uint32_t *)pvEDX = (uint32_t)uRDX;
558# else
559 __asm__ ("xchgl %%ebx, %1\n\t"
560 "cpuid\n\t"
561 "xchgl %%ebx, %1\n\t"
562 : "=a" (*(uint32_t *)pvEAX),
563 "=r" (*(uint32_t *)pvEBX),
564 "=c" (*(uint32_t *)pvECX),
565 "=d" (*(uint32_t *)pvEDX)
566 : "0" (uOperator));
567# endif
568
569# elif RT_INLINE_ASM_USES_INTRIN
570 int aInfo[4];
571 __cpuid(aInfo, uOperator);
572 *(uint32_t *)pvEAX = aInfo[0];
573 *(uint32_t *)pvEBX = aInfo[1];
574 *(uint32_t *)pvECX = aInfo[2];
575 *(uint32_t *)pvEDX = aInfo[3];
576
577# else
578 uint32_t uEAX;
579 uint32_t uEBX;
580 uint32_t uECX;
581 uint32_t uEDX;
582 __asm
583 {
584 push ebx
585 mov eax, [uOperator]
586 cpuid
587 mov [uEAX], eax
588 mov [uEBX], ebx
589 mov [uECX], ecx
590 mov [uEDX], edx
591 pop ebx
592 }
593 *(uint32_t *)pvEAX = uEAX;
594 *(uint32_t *)pvEBX = uEBX;
595 *(uint32_t *)pvECX = uECX;
596 *(uint32_t *)pvEDX = uEDX;
597# endif
598}
599#endif
600
601
602/**
603 * Performs the cpuid instruction returning all registers.
604 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
605 *
606 * @param uOperator CPUID operation (eax).
607 * @param uIdxECX ecx index
608 * @param pvEAX Where to store eax.
609 * @param pvEBX Where to store ebx.
610 * @param pvECX Where to store ecx.
611 * @param pvEDX Where to store edx.
612 * @remark We're using void pointers to ease the use of special bitfield structures and such.
613 */
614#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
615DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
616#else
617DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
618{
619# if RT_INLINE_ASM_GNU_STYLE
620# ifdef RT_ARCH_AMD64
621 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
622 __asm__ ("cpuid\n\t"
623 : "=a" (uRAX),
624 "=b" (uRBX),
625 "=c" (uRCX),
626 "=d" (uRDX)
627 : "0" (uOperator),
628 "2" (uIdxECX));
629 *(uint32_t *)pvEAX = (uint32_t)uRAX;
630 *(uint32_t *)pvEBX = (uint32_t)uRBX;
631 *(uint32_t *)pvECX = (uint32_t)uRCX;
632 *(uint32_t *)pvEDX = (uint32_t)uRDX;
633# else
634 __asm__ ("xchgl %%ebx, %1\n\t"
635 "cpuid\n\t"
636 "xchgl %%ebx, %1\n\t"
637 : "=a" (*(uint32_t *)pvEAX),
638 "=r" (*(uint32_t *)pvEBX),
639 "=c" (*(uint32_t *)pvECX),
640 "=d" (*(uint32_t *)pvEDX)
641 : "0" (uOperator),
642 "2" (uIdxECX));
643# endif
644
645# elif RT_INLINE_ASM_USES_INTRIN
646 int aInfo[4];
647 /* ??? another intrinsic ??? */
648 __cpuid(aInfo, uOperator);
649 *(uint32_t *)pvEAX = aInfo[0];
650 *(uint32_t *)pvEBX = aInfo[1];
651 *(uint32_t *)pvECX = aInfo[2];
652 *(uint32_t *)pvEDX = aInfo[3];
653
654# else
655 uint32_t uEAX;
656 uint32_t uEBX;
657 uint32_t uECX;
658 uint32_t uEDX;
659 __asm
660 {
661 push ebx
662 mov eax, [uOperator]
663 mov ecx, [uIdxECX]
664 cpuid
665 mov [uEAX], eax
666 mov [uEBX], ebx
667 mov [uECX], ecx
668 mov [uEDX], edx
669 pop ebx
670 }
671 *(uint32_t *)pvEAX = uEAX;
672 *(uint32_t *)pvEBX = uEBX;
673 *(uint32_t *)pvECX = uECX;
674 *(uint32_t *)pvEDX = uEDX;
675# endif
676}
677#endif
678
679
680/**
681 * Performs the cpuid instruction returning ecx and edx.
682 *
683 * @param uOperator CPUID operation (eax).
684 * @param pvECX Where to store ecx.
685 * @param pvEDX Where to store edx.
686 * @remark We're using void pointers to ease the use of special bitfield structures and such.
687 */
688#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
689DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
690#else
691DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
692{
693 uint32_t uEBX;
694 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
695}
696#endif
697
698
699/**
700 * Performs the cpuid instruction returning edx.
701 *
702 * @param uOperator CPUID operation (eax).
703 * @returns EDX after cpuid operation.
704 */
705#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
706DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
707#else
708DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
709{
710 RTCCUINTREG xDX;
711# if RT_INLINE_ASM_GNU_STYLE
712# ifdef RT_ARCH_AMD64
713 RTCCUINTREG uSpill;
714 __asm__ ("cpuid"
715 : "=a" (uSpill),
716 "=d" (xDX)
717 : "0" (uOperator)
718 : "rbx", "rcx");
719# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
720 __asm__ ("push %%ebx\n\t"
721 "cpuid\n\t"
722 "pop %%ebx\n\t"
723 : "=a" (uOperator),
724 "=d" (xDX)
725 : "0" (uOperator)
726 : "ecx");
727# else
728 __asm__ ("cpuid"
729 : "=a" (uOperator),
730 "=d" (xDX)
731 : "0" (uOperator)
732 : "ebx", "ecx");
733# endif
734
735# elif RT_INLINE_ASM_USES_INTRIN
736 int aInfo[4];
737 __cpuid(aInfo, uOperator);
738 xDX = aInfo[3];
739
740# else
741 __asm
742 {
743 push ebx
744 mov eax, [uOperator]
745 cpuid
746 mov [xDX], edx
747 pop ebx
748 }
749# endif
750 return (uint32_t)xDX;
751}
752#endif
753
754
755/**
756 * Performs the cpuid instruction returning ecx.
757 *
758 * @param uOperator CPUID operation (eax).
759 * @returns ECX after cpuid operation.
760 */
761#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
762DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
763#else
764DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
765{
766 RTCCUINTREG xCX;
767# if RT_INLINE_ASM_GNU_STYLE
768# ifdef RT_ARCH_AMD64
769 RTCCUINTREG uSpill;
770 __asm__ ("cpuid"
771 : "=a" (uSpill),
772 "=c" (xCX)
773 : "0" (uOperator)
774 : "rbx", "rdx");
775# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
776 __asm__ ("push %%ebx\n\t"
777 "cpuid\n\t"
778 "pop %%ebx\n\t"
779 : "=a" (uOperator),
780 "=c" (xCX)
781 : "0" (uOperator)
782 : "edx");
783# else
784 __asm__ ("cpuid"
785 : "=a" (uOperator),
786 "=c" (xCX)
787 : "0" (uOperator)
788 : "ebx", "edx");
789
790# endif
791
792# elif RT_INLINE_ASM_USES_INTRIN
793 int aInfo[4];
794 __cpuid(aInfo, uOperator);
795 xCX = aInfo[2];
796
797# else
798 __asm
799 {
800 push ebx
801 mov eax, [uOperator]
802 cpuid
803 mov [xCX], ecx
804 pop ebx
805 }
806# endif
807 return (uint32_t)xCX;
808}
809#endif
810
811
812/**
813 * Checks if the current CPU supports CPUID.
814 *
815 * @returns true if CPUID is supported.
816 */
817DECLINLINE(bool) ASMHasCpuId(void)
818{
819#ifdef RT_ARCH_AMD64
820 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
821#else /* !RT_ARCH_AMD64 */
822 bool fRet = false;
823# if RT_INLINE_ASM_GNU_STYLE
824 uint32_t u1;
825 uint32_t u2;
826 __asm__ ("pushf\n\t"
827 "pop %1\n\t"
828 "mov %1, %2\n\t"
829 "xorl $0x200000, %1\n\t"
830 "push %1\n\t"
831 "popf\n\t"
832 "pushf\n\t"
833 "pop %1\n\t"
834 "cmpl %1, %2\n\t"
835 "setne %0\n\t"
836 "push %2\n\t"
837 "popf\n\t"
838 : "=m" (fRet), "=r" (u1), "=r" (u2));
839# else
840 __asm
841 {
842 pushfd
843 pop eax
844 mov ebx, eax
845 xor eax, 0200000h
846 push eax
847 popfd
848 pushfd
849 pop eax
850 cmp eax, ebx
851 setne fRet
852 push ebx
853 popfd
854 }
855# endif
856 return fRet;
857#endif /* !RT_ARCH_AMD64 */
858}
859
860
861/**
862 * Gets the APIC ID of the current CPU.
863 *
864 * @returns the APIC ID.
865 */
866#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
867DECLASM(uint8_t) ASMGetApicId(void);
868#else
869DECLINLINE(uint8_t) ASMGetApicId(void)
870{
871 RTCCUINTREG xBX;
872# if RT_INLINE_ASM_GNU_STYLE
873# ifdef RT_ARCH_AMD64
874 RTCCUINTREG uSpill;
875 __asm__ ("cpuid"
876 : "=a" (uSpill),
877 "=b" (xBX)
878 : "0" (1)
879 : "rcx", "rdx");
880# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
881 RTCCUINTREG uSpill;
882 __asm__ ("mov %%ebx,%1\n\t"
883 "cpuid\n\t"
884 "xchgl %%ebx,%1\n\t"
885 : "=a" (uSpill),
886 "=r" (xBX)
887 : "0" (1)
888 : "ecx", "edx");
889# else
890 RTCCUINTREG uSpill;
891 __asm__ ("cpuid"
892 : "=a" (uSpill),
893 "=b" (xBX)
894 : "0" (1)
895 : "ecx", "edx");
896# endif
897
898# elif RT_INLINE_ASM_USES_INTRIN
899 int aInfo[4];
900 __cpuid(aInfo, 1);
901 xBX = aInfo[1];
902
903# else
904 __asm
905 {
906 push ebx
907 mov eax, 1
908 cpuid
909 mov [xBX], ebx
910 pop ebx
911 }
912# endif
913 return (uint8_t)(xBX >> 24);
914}
915#endif
916
917/**
918 * Get cr0.
919 * @returns cr0.
920 */
921#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
922DECLASM(RTCCUINTREG) ASMGetCR0(void);
923#else
924DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
925{
926 RTCCUINTREG uCR0;
927# if RT_INLINE_ASM_USES_INTRIN
928 uCR0 = __readcr0();
929
930# elif RT_INLINE_ASM_GNU_STYLE
931# ifdef RT_ARCH_AMD64
932 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
933# else
934 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
935# endif
936# else
937 __asm
938 {
939# ifdef RT_ARCH_AMD64
940 mov rax, cr0
941 mov [uCR0], rax
942# else
943 mov eax, cr0
944 mov [uCR0], eax
945# endif
946 }
947# endif
948 return uCR0;
949}
950#endif
951
952
953/**
954 * Sets the CR0 register.
955 * @param uCR0 The new CR0 value.
956 */
957#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
958DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
959#else
960DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
961{
962# if RT_INLINE_ASM_USES_INTRIN
963 __writecr0(uCR0);
964
965# elif RT_INLINE_ASM_GNU_STYLE
966# ifdef RT_ARCH_AMD64
967 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
968# else
969 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
970# endif
971# else
972 __asm
973 {
974# ifdef RT_ARCH_AMD64
975 mov rax, [uCR0]
976 mov cr0, rax
977# else
978 mov eax, [uCR0]
979 mov cr0, eax
980# endif
981 }
982# endif
983}
984#endif
985
986
987/**
988 * Get cr2.
989 * @returns cr2.
990 */
991#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
992DECLASM(RTCCUINTREG) ASMGetCR2(void);
993#else
994DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
995{
996 RTCCUINTREG uCR2;
997# if RT_INLINE_ASM_USES_INTRIN
998 uCR2 = __readcr2();
999
1000# elif RT_INLINE_ASM_GNU_STYLE
1001# ifdef RT_ARCH_AMD64
1002 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
1003# else
1004 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
1005# endif
1006# else
1007 __asm
1008 {
1009# ifdef RT_ARCH_AMD64
1010 mov rax, cr2
1011 mov [uCR2], rax
1012# else
1013 mov eax, cr2
1014 mov [uCR2], eax
1015# endif
1016 }
1017# endif
1018 return uCR2;
1019}
1020#endif
1021
1022
1023/**
1024 * Sets the CR2 register.
1025 * @param uCR2 The new CR0 value.
1026 */
1027#if RT_INLINE_ASM_EXTERNAL
1028DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1029#else
1030DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1031{
1032# if RT_INLINE_ASM_GNU_STYLE
1033# ifdef RT_ARCH_AMD64
1034 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1035# else
1036 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1037# endif
1038# else
1039 __asm
1040 {
1041# ifdef RT_ARCH_AMD64
1042 mov rax, [uCR2]
1043 mov cr2, rax
1044# else
1045 mov eax, [uCR2]
1046 mov cr2, eax
1047# endif
1048 }
1049# endif
1050}
1051#endif
1052
1053
1054/**
1055 * Get cr3.
1056 * @returns cr3.
1057 */
1058#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1059DECLASM(RTCCUINTREG) ASMGetCR3(void);
1060#else
1061DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1062{
1063 RTCCUINTREG uCR3;
1064# if RT_INLINE_ASM_USES_INTRIN
1065 uCR3 = __readcr3();
1066
1067# elif RT_INLINE_ASM_GNU_STYLE
1068# ifdef RT_ARCH_AMD64
1069 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
1070# else
1071 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
1072# endif
1073# else
1074 __asm
1075 {
1076# ifdef RT_ARCH_AMD64
1077 mov rax, cr3
1078 mov [uCR3], rax
1079# else
1080 mov eax, cr3
1081 mov [uCR3], eax
1082# endif
1083 }
1084# endif
1085 return uCR3;
1086}
1087#endif
1088
1089
1090/**
1091 * Sets the CR3 register.
1092 *
1093 * @param uCR3 New CR3 value.
1094 */
1095#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1096DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1097#else
1098DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1099{
1100# if RT_INLINE_ASM_USES_INTRIN
1101 __writecr3(uCR3);
1102
1103# elif RT_INLINE_ASM_GNU_STYLE
1104# ifdef RT_ARCH_AMD64
1105 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1106# else
1107 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1108# endif
1109# else
1110 __asm
1111 {
1112# ifdef RT_ARCH_AMD64
1113 mov rax, [uCR3]
1114 mov cr3, rax
1115# else
1116 mov eax, [uCR3]
1117 mov cr3, eax
1118# endif
1119 }
1120# endif
1121}
1122#endif
1123
1124
1125/**
1126 * Reloads the CR3 register.
1127 */
1128#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1129DECLASM(void) ASMReloadCR3(void);
1130#else
1131DECLINLINE(void) ASMReloadCR3(void)
1132{
1133# if RT_INLINE_ASM_USES_INTRIN
1134 __writecr3(__readcr3());
1135
1136# elif RT_INLINE_ASM_GNU_STYLE
1137 RTCCUINTREG u;
1138# ifdef RT_ARCH_AMD64
1139 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1140 "movq %0, %%cr3\n\t"
1141 : "=r" (u));
1142# else
1143 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1144 "movl %0, %%cr3\n\t"
1145 : "=r" (u));
1146# endif
1147# else
1148 __asm
1149 {
1150# ifdef RT_ARCH_AMD64
1151 mov rax, cr3
1152 mov cr3, rax
1153# else
1154 mov eax, cr3
1155 mov cr3, eax
1156# endif
1157 }
1158# endif
1159}
1160#endif
1161
1162
1163/**
1164 * Get cr4.
1165 * @returns cr4.
1166 */
1167#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1168DECLASM(RTCCUINTREG) ASMGetCR4(void);
1169#else
1170DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1171{
1172 RTCCUINTREG uCR4;
1173# if RT_INLINE_ASM_USES_INTRIN
1174 uCR4 = __readcr4();
1175
1176# elif RT_INLINE_ASM_GNU_STYLE
1177# ifdef RT_ARCH_AMD64
1178 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1179# else
1180 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1181# endif
1182# else
1183 __asm
1184 {
1185# ifdef RT_ARCH_AMD64
1186 mov rax, cr4
1187 mov [uCR4], rax
1188# else
1189 push eax /* just in case */
1190 /*mov eax, cr4*/
1191 _emit 0x0f
1192 _emit 0x20
1193 _emit 0xe0
1194 mov [uCR4], eax
1195 pop eax
1196# endif
1197 }
1198# endif
1199 return uCR4;
1200}
1201#endif
1202
1203
1204/**
1205 * Sets the CR4 register.
1206 *
1207 * @param uCR4 New CR4 value.
1208 */
1209#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1210DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1211#else
1212DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1213{
1214# if RT_INLINE_ASM_USES_INTRIN
1215 __writecr4(uCR4);
1216
1217# elif RT_INLINE_ASM_GNU_STYLE
1218# ifdef RT_ARCH_AMD64
1219 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1220# else
1221 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1222# endif
1223# else
1224 __asm
1225 {
1226# ifdef RT_ARCH_AMD64
1227 mov rax, [uCR4]
1228 mov cr4, rax
1229# else
1230 mov eax, [uCR4]
1231 _emit 0x0F
1232 _emit 0x22
1233 _emit 0xE0 /* mov cr4, eax */
1234# endif
1235 }
1236# endif
1237}
1238#endif
1239
1240
1241/**
1242 * Get cr8.
1243 * @returns cr8.
1244 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1245 */
1246#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1247DECLASM(RTCCUINTREG) ASMGetCR8(void);
1248#else
1249DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1250{
1251# ifdef RT_ARCH_AMD64
1252 RTCCUINTREG uCR8;
1253# if RT_INLINE_ASM_USES_INTRIN
1254 uCR8 = __readcr8();
1255
1256# elif RT_INLINE_ASM_GNU_STYLE
1257 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1258# else
1259 __asm
1260 {
1261 mov rax, cr8
1262 mov [uCR8], rax
1263 }
1264# endif
1265 return uCR8;
1266# else /* !RT_ARCH_AMD64 */
1267 return 0;
1268# endif /* !RT_ARCH_AMD64 */
1269}
1270#endif
1271
1272
1273/**
1274 * Enables interrupts (EFLAGS.IF).
1275 */
1276#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1277DECLASM(void) ASMIntEnable(void);
1278#else
1279DECLINLINE(void) ASMIntEnable(void)
1280{
1281# if RT_INLINE_ASM_GNU_STYLE
1282 __asm("sti\n");
1283# elif RT_INLINE_ASM_USES_INTRIN
1284 _enable();
1285# else
1286 __asm sti
1287# endif
1288}
1289#endif
1290
1291
1292/**
1293 * Disables interrupts (!EFLAGS.IF).
1294 */
1295#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1296DECLASM(void) ASMIntDisable(void);
1297#else
1298DECLINLINE(void) ASMIntDisable(void)
1299{
1300# if RT_INLINE_ASM_GNU_STYLE
1301 __asm("cli\n");
1302# elif RT_INLINE_ASM_USES_INTRIN
1303 _disable();
1304# else
1305 __asm cli
1306# endif
1307}
1308#endif
1309
1310
1311/**
1312 * Disables interrupts and returns previous xFLAGS.
1313 */
1314#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1315DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1316#else
1317DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1318{
1319 RTCCUINTREG xFlags;
1320# if RT_INLINE_ASM_GNU_STYLE
1321# ifdef RT_ARCH_AMD64
1322 __asm__ __volatile__("pushfq\n\t"
1323 "cli\n\t"
1324 "popq %0\n\t"
1325 : "=rm" (xFlags));
1326# else
1327 __asm__ __volatile__("pushfl\n\t"
1328 "cli\n\t"
1329 "popl %0\n\t"
1330 : "=rm" (xFlags));
1331# endif
1332# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1333 xFlags = ASMGetFlags();
1334 _disable();
1335# else
1336 __asm {
1337 pushfd
1338 cli
1339 pop [xFlags]
1340 }
1341# endif
1342 return xFlags;
1343}
1344#endif
1345
1346
1347/**
1348 * Reads a machine specific register.
1349 *
1350 * @returns Register content.
1351 * @param uRegister Register to read.
1352 */
1353#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1354DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1355#else
1356DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1357{
1358 RTUINT64U u;
1359# if RT_INLINE_ASM_GNU_STYLE
1360 __asm__ ("rdmsr\n\t"
1361 : "=a" (u.s.Lo),
1362 "=d" (u.s.Hi)
1363 : "c" (uRegister));
1364
1365# elif RT_INLINE_ASM_USES_INTRIN
1366 u.u = __readmsr(uRegister);
1367
1368# else
1369 __asm
1370 {
1371 mov ecx, [uRegister]
1372 rdmsr
1373 mov [u.s.Lo], eax
1374 mov [u.s.Hi], edx
1375 }
1376# endif
1377
1378 return u.u;
1379}
1380#endif
1381
1382
1383/**
1384 * Writes a machine specific register.
1385 *
1386 * @returns Register content.
1387 * @param uRegister Register to write to.
1388 * @param u64Val Value to write.
1389 */
1390#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1391DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1392#else
1393DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1394{
1395 RTUINT64U u;
1396
1397 u.u = u64Val;
1398# if RT_INLINE_ASM_GNU_STYLE
1399 __asm__ __volatile__("wrmsr\n\t"
1400 ::"a" (u.s.Lo),
1401 "d" (u.s.Hi),
1402 "c" (uRegister));
1403
1404# elif RT_INLINE_ASM_USES_INTRIN
1405 __writemsr(uRegister, u.u);
1406
1407# else
1408 __asm
1409 {
1410 mov ecx, [uRegister]
1411 mov edx, [u.s.Hi]
1412 mov eax, [u.s.Lo]
1413 wrmsr
1414 }
1415# endif
1416}
1417#endif
1418
1419
1420/**
1421 * Reads low part of a machine specific register.
1422 *
1423 * @returns Register content.
1424 * @param uRegister Register to read.
1425 */
1426#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1427DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1428#else
1429DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1430{
1431 uint32_t u32;
1432# if RT_INLINE_ASM_GNU_STYLE
1433 __asm__ ("rdmsr\n\t"
1434 : "=a" (u32)
1435 : "c" (uRegister)
1436 : "edx");
1437
1438# elif RT_INLINE_ASM_USES_INTRIN
1439 u32 = (uint32_t)__readmsr(uRegister);
1440
1441#else
1442 __asm
1443 {
1444 mov ecx, [uRegister]
1445 rdmsr
1446 mov [u32], eax
1447 }
1448# endif
1449
1450 return u32;
1451}
1452#endif
1453
1454
1455/**
1456 * Reads high part of a machine specific register.
1457 *
1458 * @returns Register content.
1459 * @param uRegister Register to read.
1460 */
1461#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1462DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1463#else
1464DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1465{
1466 uint32_t u32;
1467# if RT_INLINE_ASM_GNU_STYLE
1468 __asm__ ("rdmsr\n\t"
1469 : "=d" (u32)
1470 : "c" (uRegister)
1471 : "eax");
1472
1473# elif RT_INLINE_ASM_USES_INTRIN
1474 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1475
1476# else
1477 __asm
1478 {
1479 mov ecx, [uRegister]
1480 rdmsr
1481 mov [u32], edx
1482 }
1483# endif
1484
1485 return u32;
1486}
1487#endif
1488
1489
1490/**
1491 * Gets dr7.
1492 *
1493 * @returns dr7.
1494 */
1495#if RT_INLINE_ASM_EXTERNAL
1496DECLASM(RTCCUINTREG) ASMGetDR7(void);
1497#else
1498DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1499{
1500 RTCCUINTREG uDR7;
1501# if RT_INLINE_ASM_GNU_STYLE
1502# ifdef RT_ARCH_AMD64
1503 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1504# else
1505 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1506# endif
1507# else
1508 __asm
1509 {
1510# ifdef RT_ARCH_AMD64
1511 mov rax, dr7
1512 mov [uDR7], rax
1513# else
1514 mov eax, dr7
1515 mov [uDR7], eax
1516# endif
1517 }
1518# endif
1519 return uDR7;
1520}
1521#endif
1522
1523
1524/**
1525 * Gets dr6.
1526 *
1527 * @returns dr6.
1528 */
1529#if RT_INLINE_ASM_EXTERNAL
1530DECLASM(RTCCUINTREG) ASMGetDR6(void);
1531#else
1532DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1533{
1534 RTCCUINTREG uDR6;
1535# if RT_INLINE_ASM_GNU_STYLE
1536# ifdef RT_ARCH_AMD64
1537 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1538# else
1539 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1540# endif
1541# else
1542 __asm
1543 {
1544# ifdef RT_ARCH_AMD64
1545 mov rax, dr6
1546 mov [uDR6], rax
1547# else
1548 mov eax, dr6
1549 mov [uDR6], eax
1550# endif
1551 }
1552# endif
1553 return uDR6;
1554}
1555#endif
1556
1557
1558/**
1559 * Reads and clears DR6.
1560 *
1561 * @returns DR6.
1562 */
1563#if RT_INLINE_ASM_EXTERNAL
1564DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1565#else
1566DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1567{
1568 RTCCUINTREG uDR6;
1569# if RT_INLINE_ASM_GNU_STYLE
1570 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1571# ifdef RT_ARCH_AMD64
1572 __asm__ ("movq %%dr6, %0\n\t"
1573 "movq %1, %%dr6\n\t"
1574 : "=r" (uDR6)
1575 : "r" (uNewValue));
1576# else
1577 __asm__ ("movl %%dr6, %0\n\t"
1578 "movl %1, %%dr6\n\t"
1579 : "=r" (uDR6)
1580 : "r" (uNewValue));
1581# endif
1582# else
1583 __asm
1584 {
1585# ifdef RT_ARCH_AMD64
1586 mov rax, dr6
1587 mov [uDR6], rax
1588 mov rcx, rax
1589 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1590 mov dr6, rcx
1591# else
1592 mov eax, dr6
1593 mov [uDR6], eax
1594 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1595 mov dr6, ecx
1596# endif
1597 }
1598# endif
1599 return uDR6;
1600}
1601#endif
1602
1603
1604/**
1605 * Compiler memory barrier.
1606 *
1607 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1608 * values or any outstanding writes when returning from this function.
1609 *
1610 * This function must be used if non-volatile data is modified by a
1611 * device or the VMM. Typical cases are port access, MMIO access,
1612 * trapping instruction, etc.
1613 */
1614#if RT_INLINE_ASM_GNU_STYLE
1615# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1616#elif RT_INLINE_ASM_USES_INTRIN
1617# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1618#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1619DECLINLINE(void) ASMCompilerBarrier(void)
1620{
1621 __asm
1622 {
1623 }
1624}
1625#endif
1626
1627
1628/**
1629 * Writes a 8-bit unsigned integer to an I/O port, ordered.
1630 *
1631 * @param Port I/O port to read from.
1632 * @param u8 8-bit integer to write.
1633 */
1634#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1635DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1636#else
1637DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1638{
1639# if RT_INLINE_ASM_GNU_STYLE
1640 __asm__ __volatile__("outb %b1, %w0\n\t"
1641 :: "Nd" (Port),
1642 "a" (u8));
1643
1644# elif RT_INLINE_ASM_USES_INTRIN
1645 __outbyte(Port, u8);
1646
1647# else
1648 __asm
1649 {
1650 mov dx, [Port]
1651 mov al, [u8]
1652 out dx, al
1653 }
1654# endif
1655}
1656#endif
1657
1658
1659/**
1660 * Gets a 8-bit unsigned integer from an I/O port, ordered.
1661 *
1662 * @returns 8-bit integer.
1663 * @param Port I/O port to read from.
1664 */
1665#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1666DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1667#else
1668DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1669{
1670 uint8_t u8;
1671# if RT_INLINE_ASM_GNU_STYLE
1672 __asm__ __volatile__("inb %w1, %b0\n\t"
1673 : "=a" (u8)
1674 : "Nd" (Port));
1675
1676# elif RT_INLINE_ASM_USES_INTRIN
1677 u8 = __inbyte(Port);
1678
1679# else
1680 __asm
1681 {
1682 mov dx, [Port]
1683 in al, dx
1684 mov [u8], al
1685 }
1686# endif
1687 return u8;
1688}
1689#endif
1690
1691
1692/**
1693 * Writes a 16-bit unsigned integer to an I/O port, ordered.
1694 *
1695 * @param Port I/O port to read from.
1696 * @param u16 16-bit integer to write.
1697 */
1698#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1699DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1700#else
1701DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1702{
1703# if RT_INLINE_ASM_GNU_STYLE
1704 __asm__ __volatile__("outw %w1, %w0\n\t"
1705 :: "Nd" (Port),
1706 "a" (u16));
1707
1708# elif RT_INLINE_ASM_USES_INTRIN
1709 __outword(Port, u16);
1710
1711# else
1712 __asm
1713 {
1714 mov dx, [Port]
1715 mov ax, [u16]
1716 out dx, ax
1717 }
1718# endif
1719}
1720#endif
1721
1722
1723/**
1724 * Gets a 16-bit unsigned integer from an I/O port, ordered.
1725 *
1726 * @returns 16-bit integer.
1727 * @param Port I/O port to read from.
1728 */
1729#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1730DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1731#else
1732DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1733{
1734 uint16_t u16;
1735# if RT_INLINE_ASM_GNU_STYLE
1736 __asm__ __volatile__("inw %w1, %w0\n\t"
1737 : "=a" (u16)
1738 : "Nd" (Port));
1739
1740# elif RT_INLINE_ASM_USES_INTRIN
1741 u16 = __inword(Port);
1742
1743# else
1744 __asm
1745 {
1746 mov dx, [Port]
1747 in ax, dx
1748 mov [u16], ax
1749 }
1750# endif
1751 return u16;
1752}
1753#endif
1754
1755
1756/**
1757 * Writes a 32-bit unsigned integer to an I/O port, ordered.
1758 *
1759 * @param Port I/O port to read from.
1760 * @param u32 32-bit integer to write.
1761 */
1762#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1763DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1764#else
1765DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1766{
1767# if RT_INLINE_ASM_GNU_STYLE
1768 __asm__ __volatile__("outl %1, %w0\n\t"
1769 :: "Nd" (Port),
1770 "a" (u32));
1771
1772# elif RT_INLINE_ASM_USES_INTRIN
1773 __outdword(Port, u32);
1774
1775# else
1776 __asm
1777 {
1778 mov dx, [Port]
1779 mov eax, [u32]
1780 out dx, eax
1781 }
1782# endif
1783}
1784#endif
1785
1786
1787/**
1788 * Gets a 32-bit unsigned integer from an I/O port, ordered.
1789 *
1790 * @returns 32-bit integer.
1791 * @param Port I/O port to read from.
1792 */
1793#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1794DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1795#else
1796DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1797{
1798 uint32_t u32;
1799# if RT_INLINE_ASM_GNU_STYLE
1800 __asm__ __volatile__("inl %w1, %0\n\t"
1801 : "=a" (u32)
1802 : "Nd" (Port));
1803
1804# elif RT_INLINE_ASM_USES_INTRIN
1805 u32 = __indword(Port);
1806
1807# else
1808 __asm
1809 {
1810 mov dx, [Port]
1811 in eax, dx
1812 mov [u32], eax
1813 }
1814# endif
1815 return u32;
1816}
1817#endif
1818
1819/** @todo string i/o */
1820
1821
1822/**
1823 * Atomically Exchange an unsigned 8-bit value, ordered.
1824 *
1825 * @returns Current *pu8 value
1826 * @param pu8 Pointer to the 8-bit variable to update.
1827 * @param u8 The 8-bit value to assign to *pu8.
1828 */
1829#if RT_INLINE_ASM_EXTERNAL
1830DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1831#else
1832DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1833{
1834# if RT_INLINE_ASM_GNU_STYLE
1835 __asm__ __volatile__("xchgb %0, %1\n\t"
1836 : "=m" (*pu8),
1837 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
1838 : "1" (u8));
1839# else
1840 __asm
1841 {
1842# ifdef RT_ARCH_AMD64
1843 mov rdx, [pu8]
1844 mov al, [u8]
1845 xchg [rdx], al
1846 mov [u8], al
1847# else
1848 mov edx, [pu8]
1849 mov al, [u8]
1850 xchg [edx], al
1851 mov [u8], al
1852# endif
1853 }
1854# endif
1855 return u8;
1856}
1857#endif
1858
1859
1860/**
1861 * Atomically Exchange a signed 8-bit value, ordered.
1862 *
1863 * @returns Current *pu8 value
1864 * @param pi8 Pointer to the 8-bit variable to update.
1865 * @param i8 The 8-bit value to assign to *pi8.
1866 */
1867DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1868{
1869 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1870}
1871
1872
1873/**
1874 * Atomically Exchange a bool value, ordered.
1875 *
1876 * @returns Current *pf value
1877 * @param pf Pointer to the 8-bit variable to update.
1878 * @param f The 8-bit value to assign to *pi8.
1879 */
1880DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
1881{
1882#ifdef _MSC_VER
1883 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1884#else
1885 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1886#endif
1887}
1888
1889
1890/**
1891 * Atomically Exchange an unsigned 16-bit value, ordered.
1892 *
1893 * @returns Current *pu16 value
1894 * @param pu16 Pointer to the 16-bit variable to update.
1895 * @param u16 The 16-bit value to assign to *pu16.
1896 */
1897#if RT_INLINE_ASM_EXTERNAL
1898DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1899#else
1900DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1901{
1902# if RT_INLINE_ASM_GNU_STYLE
1903 __asm__ __volatile__("xchgw %0, %1\n\t"
1904 : "=m" (*pu16),
1905 "=r" (u16)
1906 : "1" (u16));
1907# else
1908 __asm
1909 {
1910# ifdef RT_ARCH_AMD64
1911 mov rdx, [pu16]
1912 mov ax, [u16]
1913 xchg [rdx], ax
1914 mov [u16], ax
1915# else
1916 mov edx, [pu16]
1917 mov ax, [u16]
1918 xchg [edx], ax
1919 mov [u16], ax
1920# endif
1921 }
1922# endif
1923 return u16;
1924}
1925#endif
1926
1927
1928/**
1929 * Atomically Exchange a signed 16-bit value, ordered.
1930 *
1931 * @returns Current *pu16 value
1932 * @param pi16 Pointer to the 16-bit variable to update.
1933 * @param i16 The 16-bit value to assign to *pi16.
1934 */
1935DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1936{
1937 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1938}
1939
1940
1941/**
1942 * Atomically Exchange an unsigned 32-bit value, ordered.
1943 *
1944 * @returns Current *pu32 value
1945 * @param pu32 Pointer to the 32-bit variable to update.
1946 * @param u32 The 32-bit value to assign to *pu32.
1947 */
1948#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1949DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1950#else
1951DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1952{
1953# if RT_INLINE_ASM_GNU_STYLE
1954 __asm__ __volatile__("xchgl %0, %1\n\t"
1955 : "=m" (*pu32),
1956 "=r" (u32)
1957 : "1" (u32));
1958
1959# elif RT_INLINE_ASM_USES_INTRIN
1960 u32 = _InterlockedExchange((long *)pu32, u32);
1961
1962# else
1963 __asm
1964 {
1965# ifdef RT_ARCH_AMD64
1966 mov rdx, [pu32]
1967 mov eax, u32
1968 xchg [rdx], eax
1969 mov [u32], eax
1970# else
1971 mov edx, [pu32]
1972 mov eax, u32
1973 xchg [edx], eax
1974 mov [u32], eax
1975# endif
1976 }
1977# endif
1978 return u32;
1979}
1980#endif
1981
1982
1983/**
1984 * Atomically Exchange a signed 32-bit value, ordered.
1985 *
1986 * @returns Current *pu32 value
1987 * @param pi32 Pointer to the 32-bit variable to update.
1988 * @param i32 The 32-bit value to assign to *pi32.
1989 */
1990DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1991{
1992 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1993}
1994
1995
1996/**
1997 * Atomically Exchange an unsigned 64-bit value, ordered.
1998 *
1999 * @returns Current *pu64 value
2000 * @param pu64 Pointer to the 64-bit variable to update.
2001 * @param u64 The 64-bit value to assign to *pu64.
2002 */
2003#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2004DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2005#else
2006DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2007{
2008# if defined(RT_ARCH_AMD64)
2009# if RT_INLINE_ASM_USES_INTRIN
2010 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2011
2012# elif RT_INLINE_ASM_GNU_STYLE
2013 __asm__ __volatile__("xchgq %0, %1\n\t"
2014 : "=m" (*pu64),
2015 "=r" (u64)
2016 : "1" (u64));
2017# else
2018 __asm
2019 {
2020 mov rdx, [pu64]
2021 mov rax, [u64]
2022 xchg [rdx], rax
2023 mov [u64], rax
2024 }
2025# endif
2026# else /* !RT_ARCH_AMD64 */
2027# if RT_INLINE_ASM_GNU_STYLE
2028# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2029 uint32_t u32 = (uint32_t)u64;
2030 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2031 "xchgl %%ebx, %3\n\t"
2032 "1:\n\t"
2033 "lock; cmpxchg8b (%5)\n\t"
2034 "jnz 1b\n\t"
2035 "xchgl %%ebx, %3\n\t"
2036 /*"xchgl %%esi, %5\n\t"*/
2037 : "=A" (u64),
2038 "=m" (*pu64)
2039 : "0" (*pu64),
2040 "m" ( u32 ),
2041 "c" ( (uint32_t)(u64 >> 32) ),
2042 "S" (pu64) );
2043# else /* !PIC */
2044 __asm__ __volatile__("1:\n\t"
2045 "lock; cmpxchg8b %1\n\t"
2046 "jnz 1b\n\t"
2047 : "=A" (u64),
2048 "=m" (*pu64)
2049 : "0" (*pu64),
2050 "b" ( (uint32_t)u64 ),
2051 "c" ( (uint32_t)(u64 >> 32) ));
2052# endif
2053# else
2054 __asm
2055 {
2056 mov ebx, dword ptr [u64]
2057 mov ecx, dword ptr [u64 + 4]
2058 mov edi, pu64
2059 mov eax, dword ptr [edi]
2060 mov edx, dword ptr [edi + 4]
2061 retry:
2062 lock cmpxchg8b [edi]
2063 jnz retry
2064 mov dword ptr [u64], eax
2065 mov dword ptr [u64 + 4], edx
2066 }
2067# endif
2068# endif /* !RT_ARCH_AMD64 */
2069 return u64;
2070}
2071#endif
2072
2073
2074/**
2075 * Atomically Exchange an signed 64-bit value, ordered.
2076 *
2077 * @returns Current *pi64 value
2078 * @param pi64 Pointer to the 64-bit variable to update.
2079 * @param i64 The 64-bit value to assign to *pi64.
2080 */
2081DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2082{
2083 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2084}
2085
2086
2087#ifdef RT_ARCH_AMD64
2088/**
2089 * Atomically Exchange an unsigned 128-bit value, ordered.
2090 *
2091 * @returns Current *pu128.
2092 * @param pu128 Pointer to the 128-bit variable to update.
2093 * @param u128 The 128-bit value to assign to *pu128.
2094 *
2095 * @remark We cannot really assume that any hardware supports this. Nor do I have
2096 * GAS support for it. So, for the time being we'll BREAK the atomic
2097 * bit of this function and use two 64-bit exchanges instead.
2098 */
2099# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2100DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2101# else
2102DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2103{
2104 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2105 {
2106 /** @todo this is clumsy code */
2107 RTUINT128U u128Ret;
2108 u128Ret.u = u128;
2109 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2110 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2111 return u128Ret.u;
2112 }
2113#if 0 /* later? */
2114 else
2115 {
2116# if RT_INLINE_ASM_GNU_STYLE
2117 __asm__ __volatile__("1:\n\t"
2118 "lock; cmpxchg8b %1\n\t"
2119 "jnz 1b\n\t"
2120 : "=A" (u128),
2121 "=m" (*pu128)
2122 : "0" (*pu128),
2123 "b" ( (uint64_t)u128 ),
2124 "c" ( (uint64_t)(u128 >> 64) ));
2125# else
2126 __asm
2127 {
2128 mov rbx, dword ptr [u128]
2129 mov rcx, dword ptr [u128 + 8]
2130 mov rdi, pu128
2131 mov rax, dword ptr [rdi]
2132 mov rdx, dword ptr [rdi + 8]
2133 retry:
2134 lock cmpxchg16b [rdi]
2135 jnz retry
2136 mov dword ptr [u128], rax
2137 mov dword ptr [u128 + 8], rdx
2138 }
2139# endif
2140 }
2141 return u128;
2142#endif
2143}
2144# endif
2145#endif /* RT_ARCH_AMD64 */
2146
2147
2148/**
2149 * Atomically Exchange a value which size might differ
2150 * between platforms or compilers, ordered.
2151 *
2152 * @param pu Pointer to the variable to update.
2153 * @param uNew The value to assign to *pu.
2154 */
2155#define ASMAtomicXchgSize(pu, uNew) \
2156 do { \
2157 switch (sizeof(*(pu))) { \
2158 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2159 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2160 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2161 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2162 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2163 } \
2164 } while (0)
2165
2166
2167/**
2168 * Atomically Exchange a pointer value, ordered.
2169 *
2170 * @returns Current *ppv value
2171 * @param ppv Pointer to the pointer variable to update.
2172 * @param pv The pointer value to assign to *ppv.
2173 */
2174DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2175{
2176#if ARCH_BITS == 32
2177 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2178#elif ARCH_BITS == 64
2179 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2180#else
2181# error "ARCH_BITS is bogus"
2182#endif
2183}
2184
2185
2186/**
2187 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
2188 *
2189 * @returns true if xchg was done.
2190 * @returns false if xchg wasn't done.
2191 *
2192 * @param pu32 Pointer to the value to update.
2193 * @param u32New The new value to assigned to *pu32.
2194 * @param u32Old The old value to *pu32 compare with.
2195 */
2196#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2197DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2198#else
2199DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old)
2200{
2201# if RT_INLINE_ASM_GNU_STYLE
2202 uint8_t u8Ret;
2203 __asm__ __volatile__("lock; cmpxchgl %2, %0\n\t"
2204 "setz %1\n\t"
2205 : "=m" (*pu32),
2206 "=qm" (u8Ret)
2207 : "r" (u32New),
2208 "a" (u32Old));
2209 return (bool)u8Ret;
2210
2211# elif RT_INLINE_ASM_USES_INTRIN
2212 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2213
2214# else
2215 uint32_t u32Ret;
2216 __asm
2217 {
2218# ifdef RT_ARCH_AMD64
2219 mov rdx, [pu32]
2220# else
2221 mov edx, [pu32]
2222# endif
2223 mov eax, [u32Old]
2224 mov ecx, [u32New]
2225# ifdef RT_ARCH_AMD64
2226 lock cmpxchg [rdx], ecx
2227# else
2228 lock cmpxchg [edx], ecx
2229# endif
2230 setz al
2231 movzx eax, al
2232 mov [u32Ret], eax
2233 }
2234 return !!u32Ret;
2235# endif
2236}
2237#endif
2238
2239
2240/**
2241 * Atomically Compare and Exchange a signed 32-bit value, ordered.
2242 *
2243 * @returns true if xchg was done.
2244 * @returns false if xchg wasn't done.
2245 *
2246 * @param pi32 Pointer to the value to update.
2247 * @param i32New The new value to assigned to *pi32.
2248 * @param i32Old The old value to *pi32 compare with.
2249 */
2250DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2251{
2252 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2253}
2254
2255
2256/**
2257 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
2258 *
2259 * @returns true if xchg was done.
2260 * @returns false if xchg wasn't done.
2261 *
2262 * @param pu64 Pointer to the 64-bit variable to update.
2263 * @param u64New The 64-bit value to assign to *pu64.
2264 * @param u64Old The value to compare with.
2265 */
2266#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2267DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2268#else
2269DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2270{
2271# if RT_INLINE_ASM_USES_INTRIN
2272 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2273
2274# elif defined(RT_ARCH_AMD64)
2275# if RT_INLINE_ASM_GNU_STYLE
2276 uint8_t u8Ret;
2277 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2278 "setz %1\n\t"
2279 : "=m" (*pu64),
2280 "=qm" (u8Ret)
2281 : "r" (u64New),
2282 "a" (u64Old));
2283 return (bool)u8Ret;
2284# else
2285 bool fRet;
2286 __asm
2287 {
2288 mov rdx, [pu32]
2289 mov rax, [u64Old]
2290 mov rcx, [u64New]
2291 lock cmpxchg [rdx], rcx
2292 setz al
2293 mov [fRet], al
2294 }
2295 return fRet;
2296# endif
2297# else /* !RT_ARCH_AMD64 */
2298 uint32_t u32Ret;
2299# if RT_INLINE_ASM_GNU_STYLE
2300# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2301 uint32_t u32 = (uint32_t)u64New;
2302 uint32_t u32Spill;
2303 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2304 "lock; cmpxchg8b (%6)\n\t"
2305 "setz %%al\n\t"
2306 "xchgl %%ebx, %4\n\t"
2307 "movzbl %%al, %%eax\n\t"
2308 : "=a" (u32Ret),
2309 "=d" (u32Spill),
2310 "=m" (*pu64)
2311 : "A" (u64Old),
2312 "m" ( u32 ),
2313 "c" ( (uint32_t)(u64New >> 32) ),
2314 "S" (pu64) );
2315# else /* !PIC */
2316 uint32_t u32Spill;
2317 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2318 "setz %%al\n\t"
2319 "movzbl %%al, %%eax\n\t"
2320 : "=a" (u32Ret),
2321 "=d" (u32Spill),
2322 "=m" (*pu64)
2323 : "A" (u64Old),
2324 "b" ( (uint32_t)u64New ),
2325 "c" ( (uint32_t)(u64New >> 32) ));
2326# endif
2327 return (bool)u32Ret;
2328# else
2329 __asm
2330 {
2331 mov ebx, dword ptr [u64New]
2332 mov ecx, dword ptr [u64New + 4]
2333 mov edi, [pu64]
2334 mov eax, dword ptr [u64Old]
2335 mov edx, dword ptr [u64Old + 4]
2336 lock cmpxchg8b [edi]
2337 setz al
2338 movzx eax, al
2339 mov dword ptr [u32Ret], eax
2340 }
2341 return !!u32Ret;
2342# endif
2343# endif /* !RT_ARCH_AMD64 */
2344}
2345#endif
2346
2347
2348/**
2349 * Atomically Compare and exchange a signed 64-bit value, ordered.
2350 *
2351 * @returns true if xchg was done.
2352 * @returns false if xchg wasn't done.
2353 *
2354 * @param pi64 Pointer to the 64-bit variable to update.
2355 * @param i64 The 64-bit value to assign to *pu64.
2356 * @param i64Old The value to compare with.
2357 */
2358DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2359{
2360 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2361}
2362
2363
2364/** @def ASMAtomicCmpXchgSize
2365 * Atomically Compare and Exchange a value which size might differ
2366 * between platforms or compilers, ordered.
2367 *
2368 * @param pu Pointer to the value to update.
2369 * @param uNew The new value to assigned to *pu.
2370 * @param uOld The old value to *pu compare with.
2371 * @param fRc Where to store the result.
2372 */
2373#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2374 do { \
2375 switch (sizeof(*(pu))) { \
2376 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2377 break; \
2378 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2379 break; \
2380 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2381 (fRc) = false; \
2382 break; \
2383 } \
2384 } while (0)
2385
2386
2387/**
2388 * Atomically Compare and Exchange a pointer value, ordered.
2389 *
2390 * @returns true if xchg was done.
2391 * @returns false if xchg wasn't done.
2392 *
2393 * @param ppv Pointer to the value to update.
2394 * @param pvNew The new value to assigned to *ppv.
2395 * @param pvOld The old value to *ppv compare with.
2396 */
2397DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2398{
2399#if ARCH_BITS == 32
2400 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2401#elif ARCH_BITS == 64
2402 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2403#else
2404# error "ARCH_BITS is bogus"
2405#endif
2406}
2407
2408
2409/**
2410 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
2411 * passes back old value, ordered.
2412 *
2413 * @returns true if xchg was done.
2414 * @returns false if xchg wasn't done.
2415 *
2416 * @param pu32 Pointer to the value to update.
2417 * @param u32New The new value to assigned to *pu32.
2418 * @param u32Old The old value to *pu32 compare with.
2419 * @param pu32Old Pointer store the old value at.
2420 */
2421#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2422DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
2423#else
2424DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
2425{
2426# if RT_INLINE_ASM_GNU_STYLE
2427 uint8_t u8Ret;
2428 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2429 "setz %1\n\t"
2430 : "=m" (*pu32),
2431 "=qm" (u8Ret),
2432 "=a" (*pu32Old)
2433 : "r" (u32New),
2434 "a" (u32Old));
2435 return (bool)u8Ret;
2436
2437# elif RT_INLINE_ASM_USES_INTRIN
2438 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
2439
2440# else
2441 uint32_t u32Ret;
2442 __asm
2443 {
2444# ifdef RT_ARCH_AMD64
2445 mov rdx, [pu32]
2446# else
2447 mov edx, [pu32]
2448# endif
2449 mov eax, [u32Old]
2450 mov ecx, [u32New]
2451# ifdef RT_ARCH_AMD64
2452 lock cmpxchg [rdx], ecx
2453 mov rdx, [pu32Old]
2454 mov [rdx], eax
2455# else
2456 lock cmpxchg [edx], ecx
2457 mov edx, [pu32Old]
2458 mov [edx], eax
2459# endif
2460 setz al
2461 movzx eax, al
2462 mov [u32Ret], eax
2463 }
2464 return !!u32Ret;
2465# endif
2466}
2467#endif
2468
2469
2470/**
2471 * Atomically Compare and Exchange a signed 32-bit value, additionally
2472 * passes back old value, ordered.
2473 *
2474 * @returns true if xchg was done.
2475 * @returns false if xchg wasn't done.
2476 *
2477 * @param pi32 Pointer to the value to update.
2478 * @param i32New The new value to assigned to *pi32.
2479 * @param i32Old The old value to *pi32 compare with.
2480 * @param pi32Old Pointer store the old value at.
2481 */
2482DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
2483{
2484 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
2485}
2486
2487
2488/**
2489 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2490 * passing back old value, ordered.
2491 *
2492 * @returns true if xchg was done.
2493 * @returns false if xchg wasn't done.
2494 *
2495 * @param pu64 Pointer to the 64-bit variable to update.
2496 * @param u64New The 64-bit value to assign to *pu64.
2497 * @param u64Old The value to compare with.
2498 * @param pu64Old Pointer store the old value at.
2499 */
2500#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2501DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
2502#else
2503DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
2504{
2505# if RT_INLINE_ASM_USES_INTRIN
2506 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
2507
2508# elif defined(RT_ARCH_AMD64)
2509# if RT_INLINE_ASM_GNU_STYLE
2510 uint8_t u8Ret;
2511 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2512 "setz %1\n\t"
2513 : "=m" (*pu64),
2514 "=qm" (u8Ret),
2515 "=a" (*pu64Old)
2516 : "r" (u64New),
2517 "a" (u64Old));
2518 return (bool)u8Ret;
2519# else
2520 bool fRet;
2521 __asm
2522 {
2523 mov rdx, [pu32]
2524 mov rax, [u64Old]
2525 mov rcx, [u64New]
2526 lock cmpxchg [rdx], rcx
2527 mov rdx, [pu64Old]
2528 mov [rdx], rax
2529 setz al
2530 mov [fRet], al
2531 }
2532 return fRet;
2533# endif
2534# else /* !RT_ARCH_AMD64 */
2535# if RT_INLINE_ASM_GNU_STYLE
2536 uint64_t u64Ret;
2537# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2538 /* NB: this code uses a memory clobber description, because the clean
2539 * solution with an output value for *pu64 makes gcc run out of registers.
2540 * This will cause suboptimal code, and anyone with a better solution is
2541 * welcome to improve this. */
2542 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2543 "lock; cmpxchg8b %3\n\t"
2544 "xchgl %%ebx, %1\n\t"
2545 : "=A" (u64Ret)
2546 : "DS" ((uint32_t)u64New),
2547 "c" ((uint32_t)(u64New >> 32)),
2548 "m" (*pu64),
2549 "0" (u64Old)
2550 : "memory" );
2551# else /* !PIC */
2552 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2553 : "=A" (u64Ret),
2554 "=m" (*pu64)
2555 : "b" ((uint32_t)u64New),
2556 "c" ((uint32_t)(u64New >> 32)),
2557 "m" (*pu64),
2558 "0" (u64Old));
2559# endif
2560 *pu64Old = u64Ret;
2561 return u64Ret == u64Old;
2562# else
2563 uint32_t u32Ret;
2564 __asm
2565 {
2566 mov ebx, dword ptr [u64New]
2567 mov ecx, dword ptr [u64New + 4]
2568 mov edi, [pu64]
2569 mov eax, dword ptr [u64Old]
2570 mov edx, dword ptr [u64Old + 4]
2571 lock cmpxchg8b [edi]
2572 mov ebx, [pu64Old]
2573 mov [ebx], eax
2574 setz al
2575 movzx eax, al
2576 add ebx, 4
2577 mov [ebx], edx
2578 mov dword ptr [u32Ret], eax
2579 }
2580 return !!u32Ret;
2581# endif
2582# endif /* !RT_ARCH_AMD64 */
2583}
2584#endif
2585
2586
2587/**
2588 * Atomically Compare and exchange a signed 64-bit value, additionally
2589 * passing back old value, ordered.
2590 *
2591 * @returns true if xchg was done.
2592 * @returns false if xchg wasn't done.
2593 *
2594 * @param pi64 Pointer to the 64-bit variable to update.
2595 * @param i64 The 64-bit value to assign to *pu64.
2596 * @param i64Old The value to compare with.
2597 * @param pi64Old Pointer store the old value at.
2598 */
2599DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
2600{
2601 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
2602}
2603
2604
2605/** @def ASMAtomicCmpXchgExSize
2606 * Atomically Compare and Exchange a value which size might differ
2607 * between platforms or compilers. Additionally passes back old value.
2608 *
2609 * @param pu Pointer to the value to update.
2610 * @param uNew The new value to assigned to *pu.
2611 * @param uOld The old value to *pu compare with.
2612 * @param fRc Where to store the result.
2613 * @param uOldVal Where to store the old value.
2614 */
2615#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, uOldVal) \
2616 do { \
2617 switch (sizeof(*(pu))) { \
2618 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)&(uOldVal)); \
2619 break; \
2620 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)&(uOldVal)); \
2621 break; \
2622 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2623 (fRc) = false; \
2624 (uOldVal) = 0; \
2625 break; \
2626 } \
2627 } while (0)
2628
2629
2630/**
2631 * Atomically Compare and Exchange a pointer value, additionally
2632 * passing back old value, ordered.
2633 *
2634 * @returns true if xchg was done.
2635 * @returns false if xchg wasn't done.
2636 *
2637 * @param ppv Pointer to the value to update.
2638 * @param pvNew The new value to assigned to *ppv.
2639 * @param pvOld The old value to *ppv compare with.
2640 * @param ppvOld Pointer store the old value at.
2641 */
2642DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, void *pvNew, void *pvOld, void **ppvOld)
2643{
2644#if ARCH_BITS == 32
2645 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
2646#elif ARCH_BITS == 64
2647 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
2648#else
2649# error "ARCH_BITS is bogus"
2650#endif
2651}
2652
2653
2654/**
2655 * Atomically exchanges and adds to a 32-bit value, ordered.
2656 *
2657 * @returns The old value.
2658 * @param pu32 Pointer to the value.
2659 * @param u32 Number to add.
2660 */
2661#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2662DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2663#else
2664DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2665{
2666# if RT_INLINE_ASM_USES_INTRIN
2667 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2668 return u32;
2669
2670# elif RT_INLINE_ASM_GNU_STYLE
2671 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2672 : "=r" (u32),
2673 "=m" (*pu32)
2674 : "0" (u32)
2675 : "memory");
2676 return u32;
2677# else
2678 __asm
2679 {
2680 mov eax, [u32]
2681# ifdef RT_ARCH_AMD64
2682 mov rdx, [pu32]
2683 lock xadd [rdx], eax
2684# else
2685 mov edx, [pu32]
2686 lock xadd [edx], eax
2687# endif
2688 mov [u32], eax
2689 }
2690 return u32;
2691# endif
2692}
2693#endif
2694
2695
2696/**
2697 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2698 *
2699 * @returns The old value.
2700 * @param pi32 Pointer to the value.
2701 * @param i32 Number to add.
2702 */
2703DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2704{
2705 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2706}
2707
2708
2709/**
2710 * Atomically increment a 32-bit value, ordered.
2711 *
2712 * @returns The new value.
2713 * @param pu32 Pointer to the value to increment.
2714 */
2715#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2716DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2717#else
2718DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2719{
2720 uint32_t u32;
2721# if RT_INLINE_ASM_USES_INTRIN
2722 u32 = _InterlockedIncrement((long *)pu32);
2723 return u32;
2724
2725# elif RT_INLINE_ASM_GNU_STYLE
2726 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2727 : "=r" (u32),
2728 "=m" (*pu32)
2729 : "0" (1)
2730 : "memory");
2731 return u32+1;
2732# else
2733 __asm
2734 {
2735 mov eax, 1
2736# ifdef RT_ARCH_AMD64
2737 mov rdx, [pu32]
2738 lock xadd [rdx], eax
2739# else
2740 mov edx, [pu32]
2741 lock xadd [edx], eax
2742# endif
2743 mov u32, eax
2744 }
2745 return u32+1;
2746# endif
2747}
2748#endif
2749
2750
2751/**
2752 * Atomically increment a signed 32-bit value, ordered.
2753 *
2754 * @returns The new value.
2755 * @param pi32 Pointer to the value to increment.
2756 */
2757DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2758{
2759 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2760}
2761
2762
2763/**
2764 * Atomically decrement an unsigned 32-bit value, ordered.
2765 *
2766 * @returns The new value.
2767 * @param pu32 Pointer to the value to decrement.
2768 */
2769#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2770DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2771#else
2772DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2773{
2774 uint32_t u32;
2775# if RT_INLINE_ASM_USES_INTRIN
2776 u32 = _InterlockedDecrement((long *)pu32);
2777 return u32;
2778
2779# elif RT_INLINE_ASM_GNU_STYLE
2780 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2781 : "=r" (u32),
2782 "=m" (*pu32)
2783 : "0" (-1)
2784 : "memory");
2785 return u32-1;
2786# else
2787 __asm
2788 {
2789 mov eax, -1
2790# ifdef RT_ARCH_AMD64
2791 mov rdx, [pu32]
2792 lock xadd [rdx], eax
2793# else
2794 mov edx, [pu32]
2795 lock xadd [edx], eax
2796# endif
2797 mov u32, eax
2798 }
2799 return u32-1;
2800# endif
2801}
2802#endif
2803
2804
2805/**
2806 * Atomically decrement a signed 32-bit value, ordered.
2807 *
2808 * @returns The new value.
2809 * @param pi32 Pointer to the value to decrement.
2810 */
2811DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2812{
2813 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2814}
2815
2816
2817/**
2818 * Atomically Or an unsigned 32-bit value, ordered.
2819 *
2820 * @param pu32 Pointer to the pointer variable to OR u32 with.
2821 * @param u32 The value to OR *pu32 with.
2822 */
2823#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2824DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2825#else
2826DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2827{
2828# if RT_INLINE_ASM_USES_INTRIN
2829 _InterlockedOr((long volatile *)pu32, (long)u32);
2830
2831# elif RT_INLINE_ASM_GNU_STYLE
2832 __asm__ __volatile__("lock; orl %1, %0\n\t"
2833 : "=m" (*pu32)
2834 : "ir" (u32));
2835# else
2836 __asm
2837 {
2838 mov eax, [u32]
2839# ifdef RT_ARCH_AMD64
2840 mov rdx, [pu32]
2841 lock or [rdx], eax
2842# else
2843 mov edx, [pu32]
2844 lock or [edx], eax
2845# endif
2846 }
2847# endif
2848}
2849#endif
2850
2851
2852/**
2853 * Atomically Or a signed 32-bit value, ordered.
2854 *
2855 * @param pi32 Pointer to the pointer variable to OR u32 with.
2856 * @param i32 The value to OR *pu32 with.
2857 */
2858DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2859{
2860 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2861}
2862
2863
2864/**
2865 * Atomically And an unsigned 32-bit value, ordered.
2866 *
2867 * @param pu32 Pointer to the pointer variable to AND u32 with.
2868 * @param u32 The value to AND *pu32 with.
2869 */
2870#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2871DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2872#else
2873DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2874{
2875# if RT_INLINE_ASM_USES_INTRIN
2876 _InterlockedAnd((long volatile *)pu32, u32);
2877
2878# elif RT_INLINE_ASM_GNU_STYLE
2879 __asm__ __volatile__("lock; andl %1, %0\n\t"
2880 : "=m" (*pu32)
2881 : "ir" (u32));
2882# else
2883 __asm
2884 {
2885 mov eax, [u32]
2886# ifdef RT_ARCH_AMD64
2887 mov rdx, [pu32]
2888 lock and [rdx], eax
2889# else
2890 mov edx, [pu32]
2891 lock and [edx], eax
2892# endif
2893 }
2894# endif
2895}
2896#endif
2897
2898
2899/**
2900 * Atomically And a signed 32-bit value, ordered.
2901 *
2902 * @param pi32 Pointer to the pointer variable to AND i32 with.
2903 * @param i32 The value to AND *pi32 with.
2904 */
2905DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2906{
2907 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2908}
2909
2910
2911/**
2912 * Memory fence, waits for any pending writes and reads to complete.
2913 */
2914DECLINLINE(void) ASMMemoryFence(void)
2915{
2916 /** @todo use mfence? check if all cpus we care for support it. */
2917 uint32_t volatile u32;
2918 ASMAtomicXchgU32(&u32, 0);
2919}
2920
2921
2922/**
2923 * Write fence, waits for any pending writes to complete.
2924 */
2925DECLINLINE(void) ASMWriteFence(void)
2926{
2927 /** @todo use sfence? check if all cpus we care for support it. */
2928 ASMMemoryFence();
2929}
2930
2931
2932/**
2933 * Read fence, waits for any pending reads to complete.
2934 */
2935DECLINLINE(void) ASMReadFence(void)
2936{
2937 /** @todo use lfence? check if all cpus we care for support it. */
2938 ASMMemoryFence();
2939}
2940
2941
2942/**
2943 * Atomically reads an unsigned 8-bit value, ordered.
2944 *
2945 * @returns Current *pu8 value
2946 * @param pu8 Pointer to the 8-bit variable to read.
2947 */
2948DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
2949{
2950 ASMMemoryFence();
2951 return *pu8; /* byte reads are atomic on x86 */
2952}
2953
2954
2955/**
2956 * Atomically reads an unsigned 8-bit value, unordered.
2957 *
2958 * @returns Current *pu8 value
2959 * @param pu8 Pointer to the 8-bit variable to read.
2960 */
2961DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
2962{
2963 return *pu8; /* byte reads are atomic on x86 */
2964}
2965
2966
2967/**
2968 * Atomically reads a signed 8-bit value, ordered.
2969 *
2970 * @returns Current *pi8 value
2971 * @param pi8 Pointer to the 8-bit variable to read.
2972 */
2973DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
2974{
2975 ASMMemoryFence();
2976 return *pi8; /* byte reads are atomic on x86 */
2977}
2978
2979
2980/**
2981 * Atomically reads a signed 8-bit value, unordered.
2982 *
2983 * @returns Current *pi8 value
2984 * @param pi8 Pointer to the 8-bit variable to read.
2985 */
2986DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
2987{
2988 return *pi8; /* byte reads are atomic on x86 */
2989}
2990
2991
2992/**
2993 * Atomically reads an unsigned 16-bit value, ordered.
2994 *
2995 * @returns Current *pu16 value
2996 * @param pu16 Pointer to the 16-bit variable to read.
2997 */
2998DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
2999{
3000 ASMMemoryFence();
3001 Assert(!((uintptr_t)pu16 & 1));
3002 return *pu16;
3003}
3004
3005
3006/**
3007 * Atomically reads an unsigned 16-bit value, unordered.
3008 *
3009 * @returns Current *pu16 value
3010 * @param pu16 Pointer to the 16-bit variable to read.
3011 */
3012DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3013{
3014 Assert(!((uintptr_t)pu16 & 1));
3015 return *pu16;
3016}
3017
3018
3019/**
3020 * Atomically reads a signed 16-bit value, ordered.
3021 *
3022 * @returns Current *pi16 value
3023 * @param pi16 Pointer to the 16-bit variable to read.
3024 */
3025DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3026{
3027 ASMMemoryFence();
3028 Assert(!((uintptr_t)pi16 & 1));
3029 return *pi16;
3030}
3031
3032
3033/**
3034 * Atomically reads a signed 16-bit value, unordered.
3035 *
3036 * @returns Current *pi16 value
3037 * @param pi16 Pointer to the 16-bit variable to read.
3038 */
3039DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3040{
3041 Assert(!((uintptr_t)pi16 & 1));
3042 return *pi16;
3043}
3044
3045
3046/**
3047 * Atomically reads an unsigned 32-bit value, ordered.
3048 *
3049 * @returns Current *pu32 value
3050 * @param pu32 Pointer to the 32-bit variable to read.
3051 */
3052DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3053{
3054 ASMMemoryFence();
3055 Assert(!((uintptr_t)pu32 & 3));
3056 return *pu32;
3057}
3058
3059
3060/**
3061 * Atomically reads an unsigned 32-bit value, unordered.
3062 *
3063 * @returns Current *pu32 value
3064 * @param pu32 Pointer to the 32-bit variable to read.
3065 */
3066DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3067{
3068 Assert(!((uintptr_t)pu32 & 3));
3069 return *pu32;
3070}
3071
3072
3073/**
3074 * Atomically reads a signed 32-bit value, ordered.
3075 *
3076 * @returns Current *pi32 value
3077 * @param pi32 Pointer to the 32-bit variable to read.
3078 */
3079DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3080{
3081 ASMMemoryFence();
3082 Assert(!((uintptr_t)pi32 & 3));
3083 return *pi32;
3084}
3085
3086
3087/**
3088 * Atomically reads a signed 32-bit value, unordered.
3089 *
3090 * @returns Current *pi32 value
3091 * @param pi32 Pointer to the 32-bit variable to read.
3092 */
3093DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
3094{
3095 Assert(!((uintptr_t)pi32 & 3));
3096 return *pi32;
3097}
3098
3099
3100/**
3101 * Atomically reads an unsigned 64-bit value, ordered.
3102 *
3103 * @returns Current *pu64 value
3104 * @param pu64 Pointer to the 64-bit variable to read.
3105 * The memory pointed to must be writable.
3106 * @remark This will fault if the memory is read-only!
3107 */
3108#if RT_INLINE_ASM_EXTERNAL
3109DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
3110#else
3111DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
3112{
3113 uint64_t u64;
3114# ifdef RT_ARCH_AMD64
3115# if RT_INLINE_ASM_GNU_STYLE
3116 Assert(!((uintptr_t)pu64 & 7));
3117 __asm__ __volatile__( "mfence\n\t"
3118 "movq %1, %0\n\t"
3119 : "=r" (u64)
3120 : "m" (*pu64));
3121# else
3122 __asm
3123 {
3124 mfence
3125 mov rdx, [pu64]
3126 mov rax, [rdx]
3127 mov [u64], rax
3128 }
3129# endif
3130# else /* !RT_ARCH_AMD64 */
3131# if RT_INLINE_ASM_GNU_STYLE
3132# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3133 uint32_t u32EBX = 0;
3134 Assert(!((uintptr_t)pu64 & 7));
3135 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3136 "lock; cmpxchg8b (%5)\n\t"
3137 "xchgl %%ebx, %3\n\t"
3138 : "=A" (u64),
3139 "=m" (*pu64)
3140 : "0" (0),
3141 "m" (u32EBX),
3142 "c" (0),
3143 "S" (pu64));
3144# else /* !PIC */
3145 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3146 : "=A" (u64),
3147 "=m" (*pu64)
3148 : "0" (0),
3149 "b" (0),
3150 "c" (0));
3151# endif
3152# else
3153 Assert(!((uintptr_t)pu64 & 7));
3154 __asm
3155 {
3156 xor eax, eax
3157 xor edx, edx
3158 mov edi, pu64
3159 xor ecx, ecx
3160 xor ebx, ebx
3161 lock cmpxchg8b [edi]
3162 mov dword ptr [u64], eax
3163 mov dword ptr [u64 + 4], edx
3164 }
3165# endif
3166# endif /* !RT_ARCH_AMD64 */
3167 return u64;
3168}
3169#endif
3170
3171
3172/**
3173 * Atomically reads an unsigned 64-bit value, unordered.
3174 *
3175 * @returns Current *pu64 value
3176 * @param pu64 Pointer to the 64-bit variable to read.
3177 * The memory pointed to must be writable.
3178 * @remark This will fault if the memory is read-only!
3179 */
3180#if RT_INLINE_ASM_EXTERNAL
3181DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
3182#else
3183DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
3184{
3185 uint64_t u64;
3186# ifdef RT_ARCH_AMD64
3187# if RT_INLINE_ASM_GNU_STYLE
3188 Assert(!((uintptr_t)pu64 & 7));
3189 __asm__ __volatile__("movq %1, %0\n\t"
3190 : "=r" (u64)
3191 : "m" (*pu64));
3192# else
3193 __asm
3194 {
3195 mov rdx, [pu64]
3196 mov rax, [rdx]
3197 mov [u64], rax
3198 }
3199# endif
3200# else /* !RT_ARCH_AMD64 */
3201# if RT_INLINE_ASM_GNU_STYLE
3202# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3203 uint32_t u32EBX = 0;
3204 Assert(!((uintptr_t)pu64 & 7));
3205 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3206 "lock; cmpxchg8b (%5)\n\t"
3207 "xchgl %%ebx, %3\n\t"
3208 : "=A" (u64),
3209 "=m" (*pu64)
3210 : "0" (0),
3211 "m" (u32EBX),
3212 "c" (0),
3213 "S" (pu64));
3214# else /* !PIC */
3215 __asm__ __volatile__("cmpxchg8b %1\n\t"
3216 : "=A" (u64),
3217 "=m" (*pu64)
3218 : "0" (0),
3219 "b" (0),
3220 "c" (0));
3221# endif
3222# else
3223 Assert(!((uintptr_t)pu64 & 7));
3224 __asm
3225 {
3226 xor eax, eax
3227 xor edx, edx
3228 mov edi, pu64
3229 xor ecx, ecx
3230 xor ebx, ebx
3231 lock cmpxchg8b [edi]
3232 mov dword ptr [u64], eax
3233 mov dword ptr [u64 + 4], edx
3234 }
3235# endif
3236# endif /* !RT_ARCH_AMD64 */
3237 return u64;
3238}
3239#endif
3240
3241
3242/**
3243 * Atomically reads a signed 64-bit value, ordered.
3244 *
3245 * @returns Current *pi64 value
3246 * @param pi64 Pointer to the 64-bit variable to read.
3247 * The memory pointed to must be writable.
3248 * @remark This will fault if the memory is read-only!
3249 */
3250DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
3251{
3252 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
3253}
3254
3255
3256/**
3257 * Atomically reads a signed 64-bit value, unordered.
3258 *
3259 * @returns Current *pi64 value
3260 * @param pi64 Pointer to the 64-bit variable to read.
3261 * The memory pointed to must be writable.
3262 * @remark This will fault if the memory is read-only!
3263 */
3264DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
3265{
3266 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
3267}
3268
3269
3270/**
3271 * Atomically reads a pointer value, ordered.
3272 *
3273 * @returns Current *pv value
3274 * @param ppv Pointer to the pointer variable to read.
3275 */
3276DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
3277{
3278#if ARCH_BITS == 32
3279 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
3280#elif ARCH_BITS == 64
3281 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
3282#else
3283# error "ARCH_BITS is bogus"
3284#endif
3285}
3286
3287
3288/**
3289 * Atomically reads a pointer value, unordered.
3290 *
3291 * @returns Current *pv value
3292 * @param ppv Pointer to the pointer variable to read.
3293 */
3294DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
3295{
3296#if ARCH_BITS == 32
3297 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
3298#elif ARCH_BITS == 64
3299 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
3300#else
3301# error "ARCH_BITS is bogus"
3302#endif
3303}
3304
3305
3306/**
3307 * Atomically reads a boolean value, ordered.
3308 *
3309 * @returns Current *pf value
3310 * @param pf Pointer to the boolean variable to read.
3311 */
3312DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
3313{
3314 ASMMemoryFence();
3315 return *pf; /* byte reads are atomic on x86 */
3316}
3317
3318
3319/**
3320 * Atomically reads a boolean value, unordered.
3321 *
3322 * @returns Current *pf value
3323 * @param pf Pointer to the boolean variable to read.
3324 */
3325DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
3326{
3327 return *pf; /* byte reads are atomic on x86 */
3328}
3329
3330
3331/**
3332 * Atomically read a value which size might differ
3333 * between platforms or compilers, ordered.
3334 *
3335 * @param pu Pointer to the variable to update.
3336 * @param puRes Where to store the result.
3337 */
3338#define ASMAtomicReadSize(pu, puRes) \
3339 do { \
3340 switch (sizeof(*(pu))) { \
3341 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3342 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
3343 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
3344 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
3345 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3346 } \
3347 } while (0)
3348
3349
3350/**
3351 * Atomically read a value which size might differ
3352 * between platforms or compilers, unordered.
3353 *
3354 * @param pu Pointer to the variable to update.
3355 * @param puRes Where to store the result.
3356 */
3357#define ASMAtomicUoReadSize(pu, puRes) \
3358 do { \
3359 switch (sizeof(*(pu))) { \
3360 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3361 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
3362 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
3363 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
3364 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3365 } \
3366 } while (0)
3367
3368
3369/**
3370 * Atomically writes an unsigned 8-bit value, ordered.
3371 *
3372 * @param pu8 Pointer to the 8-bit variable.
3373 * @param u8 The 8-bit value to assign to *pu8.
3374 */
3375DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
3376{
3377 ASMAtomicXchgU8(pu8, u8);
3378}
3379
3380
3381/**
3382 * Atomically writes an unsigned 8-bit value, unordered.
3383 *
3384 * @param pu8 Pointer to the 8-bit variable.
3385 * @param u8 The 8-bit value to assign to *pu8.
3386 */
3387DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
3388{
3389 *pu8 = u8; /* byte writes are atomic on x86 */
3390}
3391
3392
3393/**
3394 * Atomically writes a signed 8-bit value, ordered.
3395 *
3396 * @param pi8 Pointer to the 8-bit variable to read.
3397 * @param i8 The 8-bit value to assign to *pi8.
3398 */
3399DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
3400{
3401 ASMAtomicXchgS8(pi8, i8);
3402}
3403
3404
3405/**
3406 * Atomically writes a signed 8-bit value, unordered.
3407 *
3408 * @param pi8 Pointer to the 8-bit variable to read.
3409 * @param i8 The 8-bit value to assign to *pi8.
3410 */
3411DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
3412{
3413 *pi8 = i8; /* byte writes are atomic on x86 */
3414}
3415
3416
3417/**
3418 * Atomically writes an unsigned 16-bit value, ordered.
3419 *
3420 * @param pu16 Pointer to the 16-bit variable.
3421 * @param u16 The 16-bit value to assign to *pu16.
3422 */
3423DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
3424{
3425 ASMAtomicXchgU16(pu16, u16);
3426}
3427
3428
3429/**
3430 * Atomically writes an unsigned 16-bit value, unordered.
3431 *
3432 * @param pu16 Pointer to the 16-bit variable.
3433 * @param u16 The 16-bit value to assign to *pu16.
3434 */
3435DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
3436{
3437 Assert(!((uintptr_t)pu16 & 1));
3438 *pu16 = u16;
3439}
3440
3441
3442/**
3443 * Atomically writes a signed 16-bit value, ordered.
3444 *
3445 * @param pi16 Pointer to the 16-bit variable to read.
3446 * @param i16 The 16-bit value to assign to *pi16.
3447 */
3448DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
3449{
3450 ASMAtomicXchgS16(pi16, i16);
3451}
3452
3453
3454/**
3455 * Atomically writes a signed 16-bit value, unordered.
3456 *
3457 * @param pi16 Pointer to the 16-bit variable to read.
3458 * @param i16 The 16-bit value to assign to *pi16.
3459 */
3460DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
3461{
3462 Assert(!((uintptr_t)pi16 & 1));
3463 *pi16 = i16;
3464}
3465
3466
3467/**
3468 * Atomically writes an unsigned 32-bit value, ordered.
3469 *
3470 * @param pu32 Pointer to the 32-bit variable.
3471 * @param u32 The 32-bit value to assign to *pu32.
3472 */
3473DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
3474{
3475 ASMAtomicXchgU32(pu32, u32);
3476}
3477
3478
3479/**
3480 * Atomically writes an unsigned 32-bit value, unordered.
3481 *
3482 * @param pu32 Pointer to the 32-bit variable.
3483 * @param u32 The 32-bit value to assign to *pu32.
3484 */
3485DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
3486{
3487 Assert(!((uintptr_t)pu32 & 3));
3488 *pu32 = u32;
3489}
3490
3491
3492/**
3493 * Atomically writes a signed 32-bit value, ordered.
3494 *
3495 * @param pi32 Pointer to the 32-bit variable to read.
3496 * @param i32 The 32-bit value to assign to *pi32.
3497 */
3498DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
3499{
3500 ASMAtomicXchgS32(pi32, i32);
3501}
3502
3503
3504/**
3505 * Atomically writes a signed 32-bit value, unordered.
3506 *
3507 * @param pi32 Pointer to the 32-bit variable to read.
3508 * @param i32 The 32-bit value to assign to *pi32.
3509 */
3510DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
3511{
3512 Assert(!((uintptr_t)pi32 & 3));
3513 *pi32 = i32;
3514}
3515
3516
3517/**
3518 * Atomically writes an unsigned 64-bit value, ordered.
3519 *
3520 * @param pu64 Pointer to the 64-bit variable.
3521 * @param u64 The 64-bit value to assign to *pu64.
3522 */
3523DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
3524{
3525 ASMAtomicXchgU64(pu64, u64);
3526}
3527
3528
3529/**
3530 * Atomically writes an unsigned 64-bit value, unordered.
3531 *
3532 * @param pu64 Pointer to the 64-bit variable.
3533 * @param u64 The 64-bit value to assign to *pu64.
3534 */
3535DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
3536{
3537 Assert(!((uintptr_t)pu64 & 7));
3538#if ARCH_BITS == 64
3539 *pu64 = u64;
3540#else
3541 ASMAtomicXchgU64(pu64, u64);
3542#endif
3543}
3544
3545
3546/**
3547 * Atomically writes a signed 64-bit value, ordered.
3548 *
3549 * @param pi64 Pointer to the 64-bit variable.
3550 * @param i64 The 64-bit value to assign to *pi64.
3551 */
3552DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
3553{
3554 ASMAtomicXchgS64(pi64, i64);
3555}
3556
3557
3558/**
3559 * Atomically writes a signed 64-bit value, unordered.
3560 *
3561 * @param pi64 Pointer to the 64-bit variable.
3562 * @param i64 The 64-bit value to assign to *pi64.
3563 */
3564DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
3565{
3566 Assert(!((uintptr_t)pi64 & 7));
3567#if ARCH_BITS == 64
3568 *pi64 = i64;
3569#else
3570 ASMAtomicXchgS64(pi64, i64);
3571#endif
3572}
3573
3574
3575/**
3576 * Atomically writes a boolean value, unordered.
3577 *
3578 * @param pf Pointer to the boolean variable.
3579 * @param f The boolean value to assign to *pf.
3580 */
3581DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
3582{
3583 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
3584}
3585
3586
3587/**
3588 * Atomically writes a boolean value, unordered.
3589 *
3590 * @param pf Pointer to the boolean variable.
3591 * @param f The boolean value to assign to *pf.
3592 */
3593DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
3594{
3595 *pf = f; /* byte writes are atomic on x86 */
3596}
3597
3598
3599/**
3600 * Atomically writes a pointer value, ordered.
3601 *
3602 * @returns Current *pv value
3603 * @param ppv Pointer to the pointer variable.
3604 * @param pv The pointer value to assigne to *ppv.
3605 */
3606DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, void *pv)
3607{
3608#if ARCH_BITS == 32
3609 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3610#elif ARCH_BITS == 64
3611 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3612#else
3613# error "ARCH_BITS is bogus"
3614#endif
3615}
3616
3617
3618/**
3619 * Atomically writes a pointer value, unordered.
3620 *
3621 * @returns Current *pv value
3622 * @param ppv Pointer to the pointer variable.
3623 * @param pv The pointer value to assigne to *ppv.
3624 */
3625DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, void *pv)
3626{
3627#if ARCH_BITS == 32
3628 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3629#elif ARCH_BITS == 64
3630 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3631#else
3632# error "ARCH_BITS is bogus"
3633#endif
3634}
3635
3636
3637/**
3638 * Atomically write a value which size might differ
3639 * between platforms or compilers, ordered.
3640 *
3641 * @param pu Pointer to the variable to update.
3642 * @param uNew The value to assign to *pu.
3643 */
3644#define ASMAtomicWriteSize(pu, uNew) \
3645 do { \
3646 switch (sizeof(*(pu))) { \
3647 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3648 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3649 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3650 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3651 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3652 } \
3653 } while (0)
3654
3655/**
3656 * Atomically write a value which size might differ
3657 * between platforms or compilers, unordered.
3658 *
3659 * @param pu Pointer to the variable to update.
3660 * @param uNew The value to assign to *pu.
3661 */
3662#define ASMAtomicUoWriteSize(pu, uNew) \
3663 do { \
3664 switch (sizeof(*(pu))) { \
3665 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3666 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3667 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3668 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3669 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3670 } \
3671 } while (0)
3672
3673
3674
3675
3676/**
3677 * Invalidate page.
3678 *
3679 * @param pv Address of the page to invalidate.
3680 */
3681#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3682DECLASM(void) ASMInvalidatePage(void *pv);
3683#else
3684DECLINLINE(void) ASMInvalidatePage(void *pv)
3685{
3686# if RT_INLINE_ASM_USES_INTRIN
3687 __invlpg(pv);
3688
3689# elif RT_INLINE_ASM_GNU_STYLE
3690 __asm__ __volatile__("invlpg %0\n\t"
3691 : : "m" (*(uint8_t *)pv));
3692# else
3693 __asm
3694 {
3695# ifdef RT_ARCH_AMD64
3696 mov rax, [pv]
3697 invlpg [rax]
3698# else
3699 mov eax, [pv]
3700 invlpg [eax]
3701# endif
3702 }
3703# endif
3704}
3705#endif
3706
3707
3708#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3709# if PAGE_SIZE != 0x1000
3710# error "PAGE_SIZE is not 0x1000!"
3711# endif
3712#endif
3713
3714/**
3715 * Zeros a 4K memory page.
3716 *
3717 * @param pv Pointer to the memory block. This must be page aligned.
3718 */
3719#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3720DECLASM(void) ASMMemZeroPage(volatile void *pv);
3721# else
3722DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3723{
3724# if RT_INLINE_ASM_USES_INTRIN
3725# ifdef RT_ARCH_AMD64
3726 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
3727# else
3728 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
3729# endif
3730
3731# elif RT_INLINE_ASM_GNU_STYLE
3732 RTUINTREG uDummy;
3733# ifdef RT_ARCH_AMD64
3734 __asm__ __volatile__ ("rep stosq"
3735 : "=D" (pv),
3736 "=c" (uDummy)
3737 : "0" (pv),
3738 "c" (0x1000 >> 3),
3739 "a" (0)
3740 : "memory");
3741# else
3742 __asm__ __volatile__ ("rep stosl"
3743 : "=D" (pv),
3744 "=c" (uDummy)
3745 : "0" (pv),
3746 "c" (0x1000 >> 2),
3747 "a" (0)
3748 : "memory");
3749# endif
3750# else
3751 __asm
3752 {
3753# ifdef RT_ARCH_AMD64
3754 xor rax, rax
3755 mov ecx, 0200h
3756 mov rdi, [pv]
3757 rep stosq
3758# else
3759 xor eax, eax
3760 mov ecx, 0400h
3761 mov edi, [pv]
3762 rep stosd
3763# endif
3764 }
3765# endif
3766}
3767# endif
3768
3769
3770/**
3771 * Zeros a memory block with a 32-bit aligned size.
3772 *
3773 * @param pv Pointer to the memory block.
3774 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3775 */
3776#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3777DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3778#else
3779DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3780{
3781# if RT_INLINE_ASM_USES_INTRIN
3782 __stosd((unsigned long *)pv, 0, cb >> 2);
3783
3784# elif RT_INLINE_ASM_GNU_STYLE
3785 __asm__ __volatile__ ("rep stosl"
3786 : "=D" (pv),
3787 "=c" (cb)
3788 : "0" (pv),
3789 "1" (cb >> 2),
3790 "a" (0)
3791 : "memory");
3792# else
3793 __asm
3794 {
3795 xor eax, eax
3796# ifdef RT_ARCH_AMD64
3797 mov rcx, [cb]
3798 shr rcx, 2
3799 mov rdi, [pv]
3800# else
3801 mov ecx, [cb]
3802 shr ecx, 2
3803 mov edi, [pv]
3804# endif
3805 rep stosd
3806 }
3807# endif
3808}
3809#endif
3810
3811
3812/**
3813 * Fills a memory block with a 32-bit aligned size.
3814 *
3815 * @param pv Pointer to the memory block.
3816 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3817 * @param u32 The value to fill with.
3818 */
3819#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3820DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3821#else
3822DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3823{
3824# if RT_INLINE_ASM_USES_INTRIN
3825 __stosd((unsigned long *)pv, 0, cb >> 2);
3826
3827# elif RT_INLINE_ASM_GNU_STYLE
3828 __asm__ __volatile__ ("rep stosl"
3829 : "=D" (pv),
3830 "=c" (cb)
3831 : "0" (pv),
3832 "1" (cb >> 2),
3833 "a" (u32)
3834 : "memory");
3835# else
3836 __asm
3837 {
3838# ifdef RT_ARCH_AMD64
3839 mov rcx, [cb]
3840 shr rcx, 2
3841 mov rdi, [pv]
3842# else
3843 mov ecx, [cb]
3844 shr ecx, 2
3845 mov edi, [pv]
3846# endif
3847 mov eax, [u32]
3848 rep stosd
3849 }
3850# endif
3851}
3852#endif
3853
3854
3855/**
3856 * Checks if a memory block is filled with the specified byte.
3857 *
3858 * This is a sort of inverted memchr.
3859 *
3860 * @returns Pointer to the byte which doesn't equal u8.
3861 * @returns NULL if all equal to u8.
3862 *
3863 * @param pv Pointer to the memory block.
3864 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3865 * @param u8 The value it's supposed to be filled with.
3866 */
3867#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3868DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
3869#else
3870DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3871{
3872/** @todo rewrite this in inline assembly. */
3873 uint8_t const *pb = (uint8_t const *)pv;
3874 for (; cb; cb--, pb++)
3875 if (RT_UNLIKELY(*pb != u8))
3876 return (void *)pb;
3877 return NULL;
3878}
3879#endif
3880
3881
3882
3883/**
3884 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
3885 *
3886 * @returns u32F1 * u32F2.
3887 */
3888#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3889DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
3890#else
3891DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
3892{
3893# ifdef RT_ARCH_AMD64
3894 return (uint64_t)u32F1 * u32F2;
3895# else /* !RT_ARCH_AMD64 */
3896 uint64_t u64;
3897# if RT_INLINE_ASM_GNU_STYLE
3898 __asm__ __volatile__("mull %%edx"
3899 : "=A" (u64)
3900 : "a" (u32F2), "d" (u32F1));
3901# else
3902 __asm
3903 {
3904 mov edx, [u32F1]
3905 mov eax, [u32F2]
3906 mul edx
3907 mov dword ptr [u64], eax
3908 mov dword ptr [u64 + 4], edx
3909 }
3910# endif
3911 return u64;
3912# endif /* !RT_ARCH_AMD64 */
3913}
3914#endif
3915
3916
3917/**
3918 * Multiplies two signed 32-bit values returning a signed 64-bit result.
3919 *
3920 * @returns u32F1 * u32F2.
3921 */
3922#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3923DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
3924#else
3925DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
3926{
3927# ifdef RT_ARCH_AMD64
3928 return (int64_t)i32F1 * i32F2;
3929# else /* !RT_ARCH_AMD64 */
3930 int64_t i64;
3931# if RT_INLINE_ASM_GNU_STYLE
3932 __asm__ __volatile__("imull %%edx"
3933 : "=A" (i64)
3934 : "a" (i32F2), "d" (i32F1));
3935# else
3936 __asm
3937 {
3938 mov edx, [i32F1]
3939 mov eax, [i32F2]
3940 imul edx
3941 mov dword ptr [i64], eax
3942 mov dword ptr [i64 + 4], edx
3943 }
3944# endif
3945 return i64;
3946# endif /* !RT_ARCH_AMD64 */
3947}
3948#endif
3949
3950
3951/**
3952 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
3953 *
3954 * @returns u64 / u32.
3955 */
3956#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3957DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
3958#else
3959DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
3960{
3961# ifdef RT_ARCH_AMD64
3962 return (uint32_t)(u64 / u32);
3963# else /* !RT_ARCH_AMD64 */
3964# if RT_INLINE_ASM_GNU_STYLE
3965 RTUINTREG uDummy;
3966 __asm__ __volatile__("divl %3"
3967 : "=a" (u32), "=d"(uDummy)
3968 : "A" (u64), "r" (u32));
3969# else
3970 __asm
3971 {
3972 mov eax, dword ptr [u64]
3973 mov edx, dword ptr [u64 + 4]
3974 mov ecx, [u32]
3975 div ecx
3976 mov [u32], eax
3977 }
3978# endif
3979 return u32;
3980# endif /* !RT_ARCH_AMD64 */
3981}
3982#endif
3983
3984
3985/**
3986 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
3987 *
3988 * @returns u64 / u32.
3989 */
3990#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3991DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
3992#else
3993DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
3994{
3995# ifdef RT_ARCH_AMD64
3996 return (int32_t)(i64 / i32);
3997# else /* !RT_ARCH_AMD64 */
3998# if RT_INLINE_ASM_GNU_STYLE
3999 RTUINTREG iDummy;
4000 __asm__ __volatile__("idivl %3"
4001 : "=a" (i32), "=d"(iDummy)
4002 : "A" (i64), "r" (i32));
4003# else
4004 __asm
4005 {
4006 mov eax, dword ptr [i64]
4007 mov edx, dword ptr [i64 + 4]
4008 mov ecx, [i32]
4009 idiv ecx
4010 mov [i32], eax
4011 }
4012# endif
4013 return i32;
4014# endif /* !RT_ARCH_AMD64 */
4015}
4016#endif
4017
4018
4019/**
4020 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
4021 * using a 96 bit intermediate result.
4022 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
4023 * __udivdi3 and __umoddi3 even if this inline function is not used.
4024 *
4025 * @returns (u64A * u32B) / u32C.
4026 * @param u64A The 64-bit value.
4027 * @param u32B The 32-bit value to multiple by A.
4028 * @param u32C The 32-bit value to divide A*B by.
4029 */
4030#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
4031DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
4032#else
4033DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
4034{
4035# if RT_INLINE_ASM_GNU_STYLE
4036# ifdef RT_ARCH_AMD64
4037 uint64_t u64Result, u64Spill;
4038 __asm__ __volatile__("mulq %2\n\t"
4039 "divq %3\n\t"
4040 : "=a" (u64Result),
4041 "=d" (u64Spill)
4042 : "r" ((uint64_t)u32B),
4043 "r" ((uint64_t)u32C),
4044 "0" (u64A),
4045 "1" (0));
4046 return u64Result;
4047# else
4048 uint32_t u32Dummy;
4049 uint64_t u64Result;
4050 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
4051 edx = u64Lo.hi = (u64A.lo * u32B).hi */
4052 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
4053 eax = u64A.hi */
4054 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
4055 edx = u32C */
4056 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
4057 edx = u32B */
4058 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
4059 edx = u64Hi.hi = (u64A.hi * u32B).hi */
4060 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
4061 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
4062 "divl %%ecx \n\t" /* eax = u64Hi / u32C
4063 edx = u64Hi % u32C */
4064 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
4065 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
4066 "divl %%ecx \n\t" /* u64Result.lo */
4067 "movl %%edi,%%edx \n\t" /* u64Result.hi */
4068 : "=A"(u64Result), "=c"(u32Dummy),
4069 "=S"(u32Dummy), "=D"(u32Dummy)
4070 : "a"((uint32_t)u64A),
4071 "S"((uint32_t)(u64A >> 32)),
4072 "c"(u32B),
4073 "D"(u32C));
4074 return u64Result;
4075# endif
4076# else
4077 RTUINT64U u;
4078 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
4079 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
4080 u64Hi += (u64Lo >> 32);
4081 u.s.Hi = (uint32_t)(u64Hi / u32C);
4082 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
4083 return u.u;
4084# endif
4085}
4086#endif
4087
4088
4089/**
4090 * Probes a byte pointer for read access.
4091 *
4092 * While the function will not fault if the byte is not read accessible,
4093 * the idea is to do this in a safe place like before acquiring locks
4094 * and such like.
4095 *
4096 * Also, this functions guarantees that an eager compiler is not going
4097 * to optimize the probing away.
4098 *
4099 * @param pvByte Pointer to the byte.
4100 */
4101#if RT_INLINE_ASM_EXTERNAL
4102DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4103#else
4104DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4105{
4106 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4107 uint8_t u8;
4108# if RT_INLINE_ASM_GNU_STYLE
4109 __asm__ __volatile__("movb (%1), %0\n\t"
4110 : "=r" (u8)
4111 : "r" (pvByte));
4112# else
4113 __asm
4114 {
4115# ifdef RT_ARCH_AMD64
4116 mov rax, [pvByte]
4117 mov al, [rax]
4118# else
4119 mov eax, [pvByte]
4120 mov al, [eax]
4121# endif
4122 mov [u8], al
4123 }
4124# endif
4125 return u8;
4126}
4127#endif
4128
4129/**
4130 * Probes a buffer for read access page by page.
4131 *
4132 * While the function will fault if the buffer is not fully read
4133 * accessible, the idea is to do this in a safe place like before
4134 * acquiring locks and such like.
4135 *
4136 * Also, this functions guarantees that an eager compiler is not going
4137 * to optimize the probing away.
4138 *
4139 * @param pvBuf Pointer to the buffer.
4140 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4141 */
4142DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4143{
4144 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4145 /* the first byte */
4146 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4147 ASMProbeReadByte(pu8);
4148
4149 /* the pages in between pages. */
4150 while (cbBuf > /*PAGE_SIZE*/0x1000)
4151 {
4152 ASMProbeReadByte(pu8);
4153 cbBuf -= /*PAGE_SIZE*/0x1000;
4154 pu8 += /*PAGE_SIZE*/0x1000;
4155 }
4156
4157 /* the last byte */
4158 ASMProbeReadByte(pu8 + cbBuf - 1);
4159}
4160
4161
4162/** @def ASMBreakpoint
4163 * Debugger Breakpoint.
4164 * @remark In the gnu world we add a nop instruction after the int3 to
4165 * force gdb to remain at the int3 source line.
4166 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
4167 * @internal
4168 */
4169#if RT_INLINE_ASM_GNU_STYLE
4170# ifndef __L4ENV__
4171# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
4172# else
4173# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
4174# endif
4175#else
4176# define ASMBreakpoint() __debugbreak()
4177#endif
4178
4179
4180
4181/** @defgroup grp_inline_bits Bit Operations
4182 * @{
4183 */
4184
4185
4186/**
4187 * Sets a bit in a bitmap.
4188 *
4189 * @param pvBitmap Pointer to the bitmap.
4190 * @param iBit The bit to set.
4191 */
4192#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4193DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4194#else
4195DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4196{
4197# if RT_INLINE_ASM_USES_INTRIN
4198 _bittestandset((long *)pvBitmap, iBit);
4199
4200# elif RT_INLINE_ASM_GNU_STYLE
4201 __asm__ __volatile__ ("btsl %1, %0"
4202 : "=m" (*(volatile long *)pvBitmap)
4203 : "Ir" (iBit)
4204 : "memory");
4205# else
4206 __asm
4207 {
4208# ifdef RT_ARCH_AMD64
4209 mov rax, [pvBitmap]
4210 mov edx, [iBit]
4211 bts [rax], edx
4212# else
4213 mov eax, [pvBitmap]
4214 mov edx, [iBit]
4215 bts [eax], edx
4216# endif
4217 }
4218# endif
4219}
4220#endif
4221
4222
4223/**
4224 * Atomically sets a bit in a bitmap, ordered.
4225 *
4226 * @param pvBitmap Pointer to the bitmap.
4227 * @param iBit The bit to set.
4228 */
4229#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4230DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4231#else
4232DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4233{
4234# if RT_INLINE_ASM_USES_INTRIN
4235 _interlockedbittestandset((long *)pvBitmap, iBit);
4236# elif RT_INLINE_ASM_GNU_STYLE
4237 __asm__ __volatile__ ("lock; btsl %1, %0"
4238 : "=m" (*(volatile long *)pvBitmap)
4239 : "Ir" (iBit)
4240 : "memory");
4241# else
4242 __asm
4243 {
4244# ifdef RT_ARCH_AMD64
4245 mov rax, [pvBitmap]
4246 mov edx, [iBit]
4247 lock bts [rax], edx
4248# else
4249 mov eax, [pvBitmap]
4250 mov edx, [iBit]
4251 lock bts [eax], edx
4252# endif
4253 }
4254# endif
4255}
4256#endif
4257
4258
4259/**
4260 * Clears a bit in a bitmap.
4261 *
4262 * @param pvBitmap Pointer to the bitmap.
4263 * @param iBit The bit to clear.
4264 */
4265#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4266DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4267#else
4268DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4269{
4270# if RT_INLINE_ASM_USES_INTRIN
4271 _bittestandreset((long *)pvBitmap, iBit);
4272
4273# elif RT_INLINE_ASM_GNU_STYLE
4274 __asm__ __volatile__ ("btrl %1, %0"
4275 : "=m" (*(volatile long *)pvBitmap)
4276 : "Ir" (iBit)
4277 : "memory");
4278# else
4279 __asm
4280 {
4281# ifdef RT_ARCH_AMD64
4282 mov rax, [pvBitmap]
4283 mov edx, [iBit]
4284 btr [rax], edx
4285# else
4286 mov eax, [pvBitmap]
4287 mov edx, [iBit]
4288 btr [eax], edx
4289# endif
4290 }
4291# endif
4292}
4293#endif
4294
4295
4296/**
4297 * Atomically clears a bit in a bitmap, ordered.
4298 *
4299 * @param pvBitmap Pointer to the bitmap.
4300 * @param iBit The bit to toggle set.
4301 * @remark No memory barrier, take care on smp.
4302 */
4303#if RT_INLINE_ASM_EXTERNAL
4304DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
4305#else
4306DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
4307{
4308# if RT_INLINE_ASM_GNU_STYLE
4309 __asm__ __volatile__ ("lock; btrl %1, %0"
4310 : "=m" (*(volatile long *)pvBitmap)
4311 : "Ir" (iBit)
4312 : "memory");
4313# else
4314 __asm
4315 {
4316# ifdef RT_ARCH_AMD64
4317 mov rax, [pvBitmap]
4318 mov edx, [iBit]
4319 lock btr [rax], edx
4320# else
4321 mov eax, [pvBitmap]
4322 mov edx, [iBit]
4323 lock btr [eax], edx
4324# endif
4325 }
4326# endif
4327}
4328#endif
4329
4330
4331/**
4332 * Toggles a bit in a bitmap.
4333 *
4334 * @param pvBitmap Pointer to the bitmap.
4335 * @param iBit The bit to toggle.
4336 */
4337#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4338DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
4339#else
4340DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
4341{
4342# if RT_INLINE_ASM_USES_INTRIN
4343 _bittestandcomplement((long *)pvBitmap, iBit);
4344# elif RT_INLINE_ASM_GNU_STYLE
4345 __asm__ __volatile__ ("btcl %1, %0"
4346 : "=m" (*(volatile long *)pvBitmap)
4347 : "Ir" (iBit)
4348 : "memory");
4349# else
4350 __asm
4351 {
4352# ifdef RT_ARCH_AMD64
4353 mov rax, [pvBitmap]
4354 mov edx, [iBit]
4355 btc [rax], edx
4356# else
4357 mov eax, [pvBitmap]
4358 mov edx, [iBit]
4359 btc [eax], edx
4360# endif
4361 }
4362# endif
4363}
4364#endif
4365
4366
4367/**
4368 * Atomically toggles a bit in a bitmap, ordered.
4369 *
4370 * @param pvBitmap Pointer to the bitmap.
4371 * @param iBit The bit to test and set.
4372 */
4373#if RT_INLINE_ASM_EXTERNAL
4374DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4375#else
4376DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4377{
4378# if RT_INLINE_ASM_GNU_STYLE
4379 __asm__ __volatile__ ("lock; btcl %1, %0"
4380 : "=m" (*(volatile long *)pvBitmap)
4381 : "Ir" (iBit)
4382 : "memory");
4383# else
4384 __asm
4385 {
4386# ifdef RT_ARCH_AMD64
4387 mov rax, [pvBitmap]
4388 mov edx, [iBit]
4389 lock btc [rax], edx
4390# else
4391 mov eax, [pvBitmap]
4392 mov edx, [iBit]
4393 lock btc [eax], edx
4394# endif
4395 }
4396# endif
4397}
4398#endif
4399
4400
4401/**
4402 * Tests and sets a bit in a bitmap.
4403 *
4404 * @returns true if the bit was set.
4405 * @returns false if the bit was clear.
4406 * @param pvBitmap Pointer to the bitmap.
4407 * @param iBit The bit to test and set.
4408 */
4409#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4410DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4411#else
4412DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4413{
4414 union { bool f; uint32_t u32; uint8_t u8; } rc;
4415# if RT_INLINE_ASM_USES_INTRIN
4416 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4417
4418# elif RT_INLINE_ASM_GNU_STYLE
4419 __asm__ __volatile__ ("btsl %2, %1\n\t"
4420 "setc %b0\n\t"
4421 "andl $1, %0\n\t"
4422 : "=q" (rc.u32),
4423 "=m" (*(volatile long *)pvBitmap)
4424 : "Ir" (iBit)
4425 : "memory");
4426# else
4427 __asm
4428 {
4429 mov edx, [iBit]
4430# ifdef RT_ARCH_AMD64
4431 mov rax, [pvBitmap]
4432 bts [rax], edx
4433# else
4434 mov eax, [pvBitmap]
4435 bts [eax], edx
4436# endif
4437 setc al
4438 and eax, 1
4439 mov [rc.u32], eax
4440 }
4441# endif
4442 return rc.f;
4443}
4444#endif
4445
4446
4447/**
4448 * Atomically tests and sets a bit in a bitmap, ordered.
4449 *
4450 * @returns true if the bit was set.
4451 * @returns false if the bit was clear.
4452 * @param pvBitmap Pointer to the bitmap.
4453 * @param iBit The bit to set.
4454 */
4455#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4456DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4457#else
4458DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4459{
4460 union { bool f; uint32_t u32; uint8_t u8; } rc;
4461# if RT_INLINE_ASM_USES_INTRIN
4462 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4463# elif RT_INLINE_ASM_GNU_STYLE
4464 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
4465 "setc %b0\n\t"
4466 "andl $1, %0\n\t"
4467 : "=q" (rc.u32),
4468 "=m" (*(volatile long *)pvBitmap)
4469 : "Ir" (iBit)
4470 : "memory");
4471# else
4472 __asm
4473 {
4474 mov edx, [iBit]
4475# ifdef RT_ARCH_AMD64
4476 mov rax, [pvBitmap]
4477 lock bts [rax], edx
4478# else
4479 mov eax, [pvBitmap]
4480 lock bts [eax], edx
4481# endif
4482 setc al
4483 and eax, 1
4484 mov [rc.u32], eax
4485 }
4486# endif
4487 return rc.f;
4488}
4489#endif
4490
4491
4492/**
4493 * Tests and clears a bit in a bitmap.
4494 *
4495 * @returns true if the bit was set.
4496 * @returns false if the bit was clear.
4497 * @param pvBitmap Pointer to the bitmap.
4498 * @param iBit The bit to test and clear.
4499 */
4500#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4501DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4502#else
4503DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4504{
4505 union { bool f; uint32_t u32; uint8_t u8; } rc;
4506# if RT_INLINE_ASM_USES_INTRIN
4507 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4508
4509# elif RT_INLINE_ASM_GNU_STYLE
4510 __asm__ __volatile__ ("btrl %2, %1\n\t"
4511 "setc %b0\n\t"
4512 "andl $1, %0\n\t"
4513 : "=q" (rc.u32),
4514 "=m" (*(volatile long *)pvBitmap)
4515 : "Ir" (iBit)
4516 : "memory");
4517# else
4518 __asm
4519 {
4520 mov edx, [iBit]
4521# ifdef RT_ARCH_AMD64
4522 mov rax, [pvBitmap]
4523 btr [rax], edx
4524# else
4525 mov eax, [pvBitmap]
4526 btr [eax], edx
4527# endif
4528 setc al
4529 and eax, 1
4530 mov [rc.u32], eax
4531 }
4532# endif
4533 return rc.f;
4534}
4535#endif
4536
4537
4538/**
4539 * Atomically tests and clears a bit in a bitmap, ordered.
4540 *
4541 * @returns true if the bit was set.
4542 * @returns false if the bit was clear.
4543 * @param pvBitmap Pointer to the bitmap.
4544 * @param iBit The bit to test and clear.
4545 * @remark No memory barrier, take care on smp.
4546 */
4547#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4548DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4549#else
4550DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4551{
4552 union { bool f; uint32_t u32; uint8_t u8; } rc;
4553# if RT_INLINE_ASM_USES_INTRIN
4554 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4555
4556# elif RT_INLINE_ASM_GNU_STYLE
4557 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
4558 "setc %b0\n\t"
4559 "andl $1, %0\n\t"
4560 : "=q" (rc.u32),
4561 "=m" (*(volatile long *)pvBitmap)
4562 : "Ir" (iBit)
4563 : "memory");
4564# else
4565 __asm
4566 {
4567 mov edx, [iBit]
4568# ifdef RT_ARCH_AMD64
4569 mov rax, [pvBitmap]
4570 lock btr [rax], edx
4571# else
4572 mov eax, [pvBitmap]
4573 lock btr [eax], edx
4574# endif
4575 setc al
4576 and eax, 1
4577 mov [rc.u32], eax
4578 }
4579# endif
4580 return rc.f;
4581}
4582#endif
4583
4584
4585/**
4586 * Tests and toggles a bit in a bitmap.
4587 *
4588 * @returns true if the bit was set.
4589 * @returns false if the bit was clear.
4590 * @param pvBitmap Pointer to the bitmap.
4591 * @param iBit The bit to test and toggle.
4592 */
4593#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4594DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4595#else
4596DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4597{
4598 union { bool f; uint32_t u32; uint8_t u8; } rc;
4599# if RT_INLINE_ASM_USES_INTRIN
4600 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4601
4602# elif RT_INLINE_ASM_GNU_STYLE
4603 __asm__ __volatile__ ("btcl %2, %1\n\t"
4604 "setc %b0\n\t"
4605 "andl $1, %0\n\t"
4606 : "=q" (rc.u32),
4607 "=m" (*(volatile long *)pvBitmap)
4608 : "Ir" (iBit)
4609 : "memory");
4610# else
4611 __asm
4612 {
4613 mov edx, [iBit]
4614# ifdef RT_ARCH_AMD64
4615 mov rax, [pvBitmap]
4616 btc [rax], edx
4617# else
4618 mov eax, [pvBitmap]
4619 btc [eax], edx
4620# endif
4621 setc al
4622 and eax, 1
4623 mov [rc.u32], eax
4624 }
4625# endif
4626 return rc.f;
4627}
4628#endif
4629
4630
4631/**
4632 * Atomically tests and toggles a bit in a bitmap, ordered.
4633 *
4634 * @returns true if the bit was set.
4635 * @returns false if the bit was clear.
4636 * @param pvBitmap Pointer to the bitmap.
4637 * @param iBit The bit to test and toggle.
4638 */
4639#if RT_INLINE_ASM_EXTERNAL
4640DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4641#else
4642DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4643{
4644 union { bool f; uint32_t u32; uint8_t u8; } rc;
4645# if RT_INLINE_ASM_GNU_STYLE
4646 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
4647 "setc %b0\n\t"
4648 "andl $1, %0\n\t"
4649 : "=q" (rc.u32),
4650 "=m" (*(volatile long *)pvBitmap)
4651 : "Ir" (iBit)
4652 : "memory");
4653# else
4654 __asm
4655 {
4656 mov edx, [iBit]
4657# ifdef RT_ARCH_AMD64
4658 mov rax, [pvBitmap]
4659 lock btc [rax], edx
4660# else
4661 mov eax, [pvBitmap]
4662 lock btc [eax], edx
4663# endif
4664 setc al
4665 and eax, 1
4666 mov [rc.u32], eax
4667 }
4668# endif
4669 return rc.f;
4670}
4671#endif
4672
4673
4674/**
4675 * Tests if a bit in a bitmap is set.
4676 *
4677 * @returns true if the bit is set.
4678 * @returns false if the bit is clear.
4679 * @param pvBitmap Pointer to the bitmap.
4680 * @param iBit The bit to test.
4681 */
4682#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4683DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
4684#else
4685DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
4686{
4687 union { bool f; uint32_t u32; uint8_t u8; } rc;
4688# if RT_INLINE_ASM_USES_INTRIN
4689 rc.u32 = _bittest((long *)pvBitmap, iBit);
4690# elif RT_INLINE_ASM_GNU_STYLE
4691
4692 __asm__ __volatile__ ("btl %2, %1\n\t"
4693 "setc %b0\n\t"
4694 "andl $1, %0\n\t"
4695 : "=q" (rc.u32),
4696 "=m" (*(volatile long *)pvBitmap)
4697 : "Ir" (iBit)
4698 : "memory");
4699# else
4700 __asm
4701 {
4702 mov edx, [iBit]
4703# ifdef RT_ARCH_AMD64
4704 mov rax, [pvBitmap]
4705 bt [rax], edx
4706# else
4707 mov eax, [pvBitmap]
4708 bt [eax], edx
4709# endif
4710 setc al
4711 and eax, 1
4712 mov [rc.u32], eax
4713 }
4714# endif
4715 return rc.f;
4716}
4717#endif
4718
4719
4720/**
4721 * Clears a bit range within a bitmap.
4722 *
4723 * @param pvBitmap Pointer to the bitmap.
4724 * @param iBitStart The First bit to clear.
4725 * @param iBitEnd The first bit not to clear.
4726 */
4727DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4728{
4729 if (iBitStart < iBitEnd)
4730 {
4731 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4732 int iStart = iBitStart & ~31;
4733 int iEnd = iBitEnd & ~31;
4734 if (iStart == iEnd)
4735 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
4736 else
4737 {
4738 /* bits in first dword. */
4739 if (iBitStart & 31)
4740 {
4741 *pu32 &= (1 << (iBitStart & 31)) - 1;
4742 pu32++;
4743 iBitStart = iStart + 32;
4744 }
4745
4746 /* whole dword. */
4747 if (iBitStart != iEnd)
4748 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4749
4750 /* bits in last dword. */
4751 if (iBitEnd & 31)
4752 {
4753 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4754 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
4755 }
4756 }
4757 }
4758}
4759
4760
4761/**
4762 * Finds the first clear bit in a bitmap.
4763 *
4764 * @returns Index of the first zero bit.
4765 * @returns -1 if no clear bit was found.
4766 * @param pvBitmap Pointer to the bitmap.
4767 * @param cBits The number of bits in the bitmap. Multiple of 32.
4768 */
4769#if RT_INLINE_ASM_EXTERNAL
4770DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
4771#else
4772DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
4773{
4774 if (cBits)
4775 {
4776 int32_t iBit;
4777# if RT_INLINE_ASM_GNU_STYLE
4778 RTCCUINTREG uEAX, uECX, uEDI;
4779 cBits = RT_ALIGN_32(cBits, 32);
4780 __asm__ __volatile__("repe; scasl\n\t"
4781 "je 1f\n\t"
4782# ifdef RT_ARCH_AMD64
4783 "lea -4(%%rdi), %%rdi\n\t"
4784 "xorl (%%rdi), %%eax\n\t"
4785 "subq %5, %%rdi\n\t"
4786# else
4787 "lea -4(%%edi), %%edi\n\t"
4788 "xorl (%%edi), %%eax\n\t"
4789 "subl %5, %%edi\n\t"
4790# endif
4791 "shll $3, %%edi\n\t"
4792 "bsfl %%eax, %%edx\n\t"
4793 "addl %%edi, %%edx\n\t"
4794 "1:\t\n"
4795 : "=d" (iBit),
4796 "=&c" (uECX),
4797 "=&D" (uEDI),
4798 "=&a" (uEAX)
4799 : "0" (0xffffffff),
4800 "mr" (pvBitmap),
4801 "1" (cBits >> 5),
4802 "2" (pvBitmap),
4803 "3" (0xffffffff));
4804# else
4805 cBits = RT_ALIGN_32(cBits, 32);
4806 __asm
4807 {
4808# ifdef RT_ARCH_AMD64
4809 mov rdi, [pvBitmap]
4810 mov rbx, rdi
4811# else
4812 mov edi, [pvBitmap]
4813 mov ebx, edi
4814# endif
4815 mov edx, 0ffffffffh
4816 mov eax, edx
4817 mov ecx, [cBits]
4818 shr ecx, 5
4819 repe scasd
4820 je done
4821
4822# ifdef RT_ARCH_AMD64
4823 lea rdi, [rdi - 4]
4824 xor eax, [rdi]
4825 sub rdi, rbx
4826# else
4827 lea edi, [edi - 4]
4828 xor eax, [edi]
4829 sub edi, ebx
4830# endif
4831 shl edi, 3
4832 bsf edx, eax
4833 add edx, edi
4834 done:
4835 mov [iBit], edx
4836 }
4837# endif
4838 return iBit;
4839 }
4840 return -1;
4841}
4842#endif
4843
4844
4845/**
4846 * Finds the next clear bit in a bitmap.
4847 *
4848 * @returns Index of the first zero bit.
4849 * @returns -1 if no clear bit was found.
4850 * @param pvBitmap Pointer to the bitmap.
4851 * @param cBits The number of bits in the bitmap. Multiple of 32.
4852 * @param iBitPrev The bit returned from the last search.
4853 * The search will start at iBitPrev + 1.
4854 */
4855#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4856DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4857#else
4858DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4859{
4860 int iBit = ++iBitPrev & 31;
4861 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
4862 cBits -= iBitPrev & ~31;
4863 if (iBit)
4864 {
4865 /* inspect the first dword. */
4866 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
4867# if RT_INLINE_ASM_USES_INTRIN
4868 unsigned long ulBit = 0;
4869 if (_BitScanForward(&ulBit, u32))
4870 return ulBit + iBitPrev;
4871 iBit = -1;
4872# else
4873# if RT_INLINE_ASM_GNU_STYLE
4874 __asm__ __volatile__("bsf %1, %0\n\t"
4875 "jnz 1f\n\t"
4876 "movl $-1, %0\n\t"
4877 "1:\n\t"
4878 : "=r" (iBit)
4879 : "r" (u32));
4880# else
4881 __asm
4882 {
4883 mov edx, [u32]
4884 bsf eax, edx
4885 jnz done
4886 mov eax, 0ffffffffh
4887 done:
4888 mov [iBit], eax
4889 }
4890# endif
4891 if (iBit >= 0)
4892 return iBit + iBitPrev;
4893# endif
4894 /* Search the rest of the bitmap, if there is anything. */
4895 if (cBits > 32)
4896 {
4897 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
4898 if (iBit >= 0)
4899 return iBit + (iBitPrev & ~31) + 32;
4900 }
4901 }
4902 else
4903 {
4904 /* Search the rest of the bitmap. */
4905 iBit = ASMBitFirstClear(pvBitmap, cBits);
4906 if (iBit >= 0)
4907 return iBit + (iBitPrev & ~31);
4908 }
4909 return iBit;
4910}
4911#endif
4912
4913
4914/**
4915 * Finds the first set bit in a bitmap.
4916 *
4917 * @returns Index of the first set bit.
4918 * @returns -1 if no clear bit was found.
4919 * @param pvBitmap Pointer to the bitmap.
4920 * @param cBits The number of bits in the bitmap. Multiple of 32.
4921 */
4922#if RT_INLINE_ASM_EXTERNAL
4923DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
4924#else
4925DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
4926{
4927 if (cBits)
4928 {
4929 int32_t iBit;
4930# if RT_INLINE_ASM_GNU_STYLE
4931 RTCCUINTREG uEAX, uECX, uEDI;
4932 cBits = RT_ALIGN_32(cBits, 32);
4933 __asm__ __volatile__("repe; scasl\n\t"
4934 "je 1f\n\t"
4935# ifdef RT_ARCH_AMD64
4936 "lea -4(%%rdi), %%rdi\n\t"
4937 "movl (%%rdi), %%eax\n\t"
4938 "subq %5, %%rdi\n\t"
4939# else
4940 "lea -4(%%edi), %%edi\n\t"
4941 "movl (%%edi), %%eax\n\t"
4942 "subl %5, %%edi\n\t"
4943# endif
4944 "shll $3, %%edi\n\t"
4945 "bsfl %%eax, %%edx\n\t"
4946 "addl %%edi, %%edx\n\t"
4947 "1:\t\n"
4948 : "=d" (iBit),
4949 "=&c" (uECX),
4950 "=&D" (uEDI),
4951 "=&a" (uEAX)
4952 : "0" (0xffffffff),
4953 "mr" (pvBitmap),
4954 "1" (cBits >> 5),
4955 "2" (pvBitmap),
4956 "3" (0));
4957# else
4958 cBits = RT_ALIGN_32(cBits, 32);
4959 __asm
4960 {
4961# ifdef RT_ARCH_AMD64
4962 mov rdi, [pvBitmap]
4963 mov rbx, rdi
4964# else
4965 mov edi, [pvBitmap]
4966 mov ebx, edi
4967# endif
4968 mov edx, 0ffffffffh
4969 xor eax, eax
4970 mov ecx, [cBits]
4971 shr ecx, 5
4972 repe scasd
4973 je done
4974# ifdef RT_ARCH_AMD64
4975 lea rdi, [rdi - 4]
4976 mov eax, [rdi]
4977 sub rdi, rbx
4978# else
4979 lea edi, [edi - 4]
4980 mov eax, [edi]
4981 sub edi, ebx
4982# endif
4983 shl edi, 3
4984 bsf edx, eax
4985 add edx, edi
4986 done:
4987 mov [iBit], edx
4988 }
4989# endif
4990 return iBit;
4991 }
4992 return -1;
4993}
4994#endif
4995
4996
4997/**
4998 * Finds the next set bit in a bitmap.
4999 *
5000 * @returns Index of the next set bit.
5001 * @returns -1 if no set bit was found.
5002 * @param pvBitmap Pointer to the bitmap.
5003 * @param cBits The number of bits in the bitmap. Multiple of 32.
5004 * @param iBitPrev The bit returned from the last search.
5005 * The search will start at iBitPrev + 1.
5006 */
5007#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5008DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5009#else
5010DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5011{
5012 int iBit = ++iBitPrev & 31;
5013 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5014 cBits -= iBitPrev & ~31;
5015 if (iBit)
5016 {
5017 /* inspect the first dword. */
5018 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
5019# if RT_INLINE_ASM_USES_INTRIN
5020 unsigned long ulBit = 0;
5021 if (_BitScanForward(&ulBit, u32))
5022 return ulBit + iBitPrev;
5023 iBit = -1;
5024# else
5025# if RT_INLINE_ASM_GNU_STYLE
5026 __asm__ __volatile__("bsf %1, %0\n\t"
5027 "jnz 1f\n\t"
5028 "movl $-1, %0\n\t"
5029 "1:\n\t"
5030 : "=r" (iBit)
5031 : "r" (u32));
5032# else
5033 __asm
5034 {
5035 mov edx, u32
5036 bsf eax, edx
5037 jnz done
5038 mov eax, 0ffffffffh
5039 done:
5040 mov [iBit], eax
5041 }
5042# endif
5043 if (iBit >= 0)
5044 return iBit + iBitPrev;
5045# endif
5046 /* Search the rest of the bitmap, if there is anything. */
5047 if (cBits > 32)
5048 {
5049 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5050 if (iBit >= 0)
5051 return iBit + (iBitPrev & ~31) + 32;
5052 }
5053
5054 }
5055 else
5056 {
5057 /* Search the rest of the bitmap. */
5058 iBit = ASMBitFirstSet(pvBitmap, cBits);
5059 if (iBit >= 0)
5060 return iBit + (iBitPrev & ~31);
5061 }
5062 return iBit;
5063}
5064#endif
5065
5066
5067/**
5068 * Finds the first bit which is set in the given 32-bit integer.
5069 * Bits are numbered from 1 (least significant) to 32.
5070 *
5071 * @returns index [1..32] of the first set bit.
5072 * @returns 0 if all bits are cleared.
5073 * @param u32 Integer to search for set bits.
5074 * @remark Similar to ffs() in BSD.
5075 */
5076DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5077{
5078# if RT_INLINE_ASM_USES_INTRIN
5079 unsigned long iBit;
5080 if (_BitScanForward(&iBit, u32))
5081 iBit++;
5082 else
5083 iBit = 0;
5084# elif RT_INLINE_ASM_GNU_STYLE
5085 uint32_t iBit;
5086 __asm__ __volatile__("bsf %1, %0\n\t"
5087 "jnz 1f\n\t"
5088 "xorl %0, %0\n\t"
5089 "jmp 2f\n"
5090 "1:\n\t"
5091 "incl %0\n"
5092 "2:\n\t"
5093 : "=r" (iBit)
5094 : "rm" (u32));
5095# else
5096 uint32_t iBit;
5097 _asm
5098 {
5099 bsf eax, [u32]
5100 jnz found
5101 xor eax, eax
5102 jmp done
5103 found:
5104 inc eax
5105 done:
5106 mov [iBit], eax
5107 }
5108# endif
5109 return iBit;
5110}
5111
5112
5113/**
5114 * Finds the first bit which is set in the given 32-bit integer.
5115 * Bits are numbered from 1 (least significant) to 32.
5116 *
5117 * @returns index [1..32] of the first set bit.
5118 * @returns 0 if all bits are cleared.
5119 * @param i32 Integer to search for set bits.
5120 * @remark Similar to ffs() in BSD.
5121 */
5122DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5123{
5124 return ASMBitFirstSetU32((uint32_t)i32);
5125}
5126
5127
5128/**
5129 * Finds the last bit which is set in the given 32-bit integer.
5130 * Bits are numbered from 1 (least significant) to 32.
5131 *
5132 * @returns index [1..32] of the last set bit.
5133 * @returns 0 if all bits are cleared.
5134 * @param u32 Integer to search for set bits.
5135 * @remark Similar to fls() in BSD.
5136 */
5137DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5138{
5139# if RT_INLINE_ASM_USES_INTRIN
5140 unsigned long iBit;
5141 if (_BitScanReverse(&iBit, u32))
5142 iBit++;
5143 else
5144 iBit = 0;
5145# elif RT_INLINE_ASM_GNU_STYLE
5146 uint32_t iBit;
5147 __asm__ __volatile__("bsrl %1, %0\n\t"
5148 "jnz 1f\n\t"
5149 "xorl %0, %0\n\t"
5150 "jmp 2f\n"
5151 "1:\n\t"
5152 "incl %0\n"
5153 "2:\n\t"
5154 : "=r" (iBit)
5155 : "rm" (u32));
5156# else
5157 uint32_t iBit;
5158 _asm
5159 {
5160 bsr eax, [u32]
5161 jnz found
5162 xor eax, eax
5163 jmp done
5164 found:
5165 inc eax
5166 done:
5167 mov [iBit], eax
5168 }
5169# endif
5170 return iBit;
5171}
5172
5173
5174/**
5175 * Finds the last bit which is set in the given 32-bit integer.
5176 * Bits are numbered from 1 (least significant) to 32.
5177 *
5178 * @returns index [1..32] of the last set bit.
5179 * @returns 0 if all bits are cleared.
5180 * @param i32 Integer to search for set bits.
5181 * @remark Similar to fls() in BSD.
5182 */
5183DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5184{
5185 return ASMBitLastSetS32((uint32_t)i32);
5186}
5187
5188
5189/**
5190 * Reverse the byte order of the given 32-bit integer.
5191 * @param u32 Integer
5192 */
5193DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5194{
5195#if RT_INLINE_ASM_USES_INTRIN
5196 u32 = _byteswap_ulong(u32);
5197#elif RT_INLINE_ASM_GNU_STYLE
5198 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5199#else
5200 _asm
5201 {
5202 mov eax, [u32]
5203 bswap eax
5204 mov [u32], eax
5205 }
5206#endif
5207 return u32;
5208}
5209
5210/** @} */
5211
5212
5213/** @} */
5214#endif
5215
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette