VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 8165

Last change on this file since 8165 was 8163, checked in by vboxsync, 17 years ago

rebrand IPRT

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 132.6 KB
Line 
1/** @file
2 * Incredibly Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42#ifdef _MSC_VER
43# if _MSC_VER >= 1400
44# define RT_INLINE_ASM_USES_INTRIN 1
45# include <intrin.h>
46 /* Emit the intrinsics at all optimization levels. */
47# pragma intrinsic(_ReadWriteBarrier)
48# pragma intrinsic(__cpuid)
49# pragma intrinsic(_enable)
50# pragma intrinsic(_disable)
51# pragma intrinsic(__rdtsc)
52# pragma intrinsic(__readmsr)
53# pragma intrinsic(__writemsr)
54# pragma intrinsic(__outbyte)
55# pragma intrinsic(__outword)
56# pragma intrinsic(__outdword)
57# pragma intrinsic(__inbyte)
58# pragma intrinsic(__inword)
59# pragma intrinsic(__indword)
60# pragma intrinsic(__invlpg)
61# pragma intrinsic(__stosd)
62# pragma intrinsic(__stosw)
63# pragma intrinsic(__stosb)
64# pragma intrinsic(__readcr0)
65# pragma intrinsic(__readcr2)
66# pragma intrinsic(__readcr3)
67# pragma intrinsic(__readcr4)
68# pragma intrinsic(__writecr0)
69# pragma intrinsic(__writecr3)
70# pragma intrinsic(__writecr4)
71# pragma intrinsic(_BitScanForward)
72# pragma intrinsic(_BitScanReverse)
73# pragma intrinsic(_bittest)
74# pragma intrinsic(_bittestandset)
75# pragma intrinsic(_bittestandreset)
76# pragma intrinsic(_bittestandcomplement)
77# pragma intrinsic(_byteswap_ushort)
78# pragma intrinsic(_byteswap_ulong)
79# pragma intrinsic(_interlockedbittestandset)
80# pragma intrinsic(_interlockedbittestandreset)
81# pragma intrinsic(_InterlockedAnd)
82# pragma intrinsic(_InterlockedOr)
83# pragma intrinsic(_InterlockedIncrement)
84# pragma intrinsic(_InterlockedDecrement)
85# pragma intrinsic(_InterlockedExchange)
86# pragma intrinsic(_InterlockedExchangeAdd)
87# pragma intrinsic(_InterlockedCompareExchange)
88# pragma intrinsic(_InterlockedCompareExchange64)
89# ifdef RT_ARCH_AMD64
90# pragma intrinsic(__stosq)
91# pragma intrinsic(__readcr8)
92# pragma intrinsic(__writecr8)
93# pragma intrinsic(_byteswap_uint64)
94# pragma intrinsic(_InterlockedExchange64)
95# endif
96# endif
97#endif
98#ifndef RT_INLINE_ASM_USES_INTRIN
99# define RT_INLINE_ASM_USES_INTRIN 0
100#endif
101
102
103
104/** @defgroup grp_asm ASM - Assembly Routines
105 * @ingroup grp_rt
106 *
107 * @remarks The difference between ordered and unordered atomic operations are that
108 * the former will complete outstanding reads and writes before continuing
109 * while the latter doesn't make any promisses about the order. Ordered
110 * operations doesn't, it seems, make any 100% promise wrt to whether
111 * the operation will complete before any subsequent memory access.
112 * (please, correct if wrong.)
113 *
114 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
115 * are unordered (note the Uo).
116 *
117 * @{
118 */
119
120/** @def RT_INLINE_ASM_EXTERNAL
121 * Defined as 1 if the compiler does not support inline assembly.
122 * The ASM* functions will then be implemented in an external .asm file.
123 *
124 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
125 * inline assmebly in their AMD64 compiler.
126 */
127#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
128# define RT_INLINE_ASM_EXTERNAL 1
129#else
130# define RT_INLINE_ASM_EXTERNAL 0
131#endif
132
133/** @def RT_INLINE_ASM_GNU_STYLE
134 * Defined as 1 if the compiler understand GNU style inline assembly.
135 */
136#if defined(_MSC_VER)
137# define RT_INLINE_ASM_GNU_STYLE 0
138#else
139# define RT_INLINE_ASM_GNU_STYLE 1
140#endif
141
142
143/** @todo find a more proper place for this structure? */
144#pragma pack(1)
145/** IDTR */
146typedef struct RTIDTR
147{
148 /** Size of the IDT. */
149 uint16_t cbIdt;
150 /** Address of the IDT. */
151 uintptr_t pIdt;
152} RTIDTR, *PRTIDTR;
153#pragma pack()
154
155#pragma pack(1)
156/** GDTR */
157typedef struct RTGDTR
158{
159 /** Size of the GDT. */
160 uint16_t cbGdt;
161 /** Address of the GDT. */
162 uintptr_t pGdt;
163} RTGDTR, *PRTGDTR;
164#pragma pack()
165
166
167/** @def ASMReturnAddress
168 * Gets the return address of the current (or calling if you like) function or method.
169 */
170#ifdef _MSC_VER
171# ifdef __cplusplus
172extern "C"
173# endif
174void * _ReturnAddress(void);
175# pragma intrinsic(_ReturnAddress)
176# define ASMReturnAddress() _ReturnAddress()
177#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
178# define ASMReturnAddress() __builtin_return_address(0)
179#else
180# error "Unsupported compiler."
181#endif
182
183
184/**
185 * Gets the content of the IDTR CPU register.
186 * @param pIdtr Where to store the IDTR contents.
187 */
188#if RT_INLINE_ASM_EXTERNAL
189DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
190#else
191DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
192{
193# if RT_INLINE_ASM_GNU_STYLE
194 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
195# else
196 __asm
197 {
198# ifdef RT_ARCH_AMD64
199 mov rax, [pIdtr]
200 sidt [rax]
201# else
202 mov eax, [pIdtr]
203 sidt [eax]
204# endif
205 }
206# endif
207}
208#endif
209
210
211/**
212 * Sets the content of the IDTR CPU register.
213 * @param pIdtr Where to load the IDTR contents from
214 */
215#if RT_INLINE_ASM_EXTERNAL
216DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
217#else
218DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
219{
220# if RT_INLINE_ASM_GNU_STYLE
221 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
222# else
223 __asm
224 {
225# ifdef RT_ARCH_AMD64
226 mov rax, [pIdtr]
227 lidt [rax]
228# else
229 mov eax, [pIdtr]
230 lidt [eax]
231# endif
232 }
233# endif
234}
235#endif
236
237
238/**
239 * Gets the content of the GDTR CPU register.
240 * @param pGdtr Where to store the GDTR contents.
241 */
242#if RT_INLINE_ASM_EXTERNAL
243DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
244#else
245DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
246{
247# if RT_INLINE_ASM_GNU_STYLE
248 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
249# else
250 __asm
251 {
252# ifdef RT_ARCH_AMD64
253 mov rax, [pGdtr]
254 sgdt [rax]
255# else
256 mov eax, [pGdtr]
257 sgdt [eax]
258# endif
259 }
260# endif
261}
262#endif
263
264/**
265 * Get the cs register.
266 * @returns cs.
267 */
268#if RT_INLINE_ASM_EXTERNAL
269DECLASM(RTSEL) ASMGetCS(void);
270#else
271DECLINLINE(RTSEL) ASMGetCS(void)
272{
273 RTSEL SelCS;
274# if RT_INLINE_ASM_GNU_STYLE
275 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
276# else
277 __asm
278 {
279 mov ax, cs
280 mov [SelCS], ax
281 }
282# endif
283 return SelCS;
284}
285#endif
286
287
288/**
289 * Get the DS register.
290 * @returns DS.
291 */
292#if RT_INLINE_ASM_EXTERNAL
293DECLASM(RTSEL) ASMGetDS(void);
294#else
295DECLINLINE(RTSEL) ASMGetDS(void)
296{
297 RTSEL SelDS;
298# if RT_INLINE_ASM_GNU_STYLE
299 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
300# else
301 __asm
302 {
303 mov ax, ds
304 mov [SelDS], ax
305 }
306# endif
307 return SelDS;
308}
309#endif
310
311
312/**
313 * Get the ES register.
314 * @returns ES.
315 */
316#if RT_INLINE_ASM_EXTERNAL
317DECLASM(RTSEL) ASMGetES(void);
318#else
319DECLINLINE(RTSEL) ASMGetES(void)
320{
321 RTSEL SelES;
322# if RT_INLINE_ASM_GNU_STYLE
323 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
324# else
325 __asm
326 {
327 mov ax, es
328 mov [SelES], ax
329 }
330# endif
331 return SelES;
332}
333#endif
334
335
336/**
337 * Get the FS register.
338 * @returns FS.
339 */
340#if RT_INLINE_ASM_EXTERNAL
341DECLASM(RTSEL) ASMGetFS(void);
342#else
343DECLINLINE(RTSEL) ASMGetFS(void)
344{
345 RTSEL SelFS;
346# if RT_INLINE_ASM_GNU_STYLE
347 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
348# else
349 __asm
350 {
351 mov ax, fs
352 mov [SelFS], ax
353 }
354# endif
355 return SelFS;
356}
357# endif
358
359
360/**
361 * Get the GS register.
362 * @returns GS.
363 */
364#if RT_INLINE_ASM_EXTERNAL
365DECLASM(RTSEL) ASMGetGS(void);
366#else
367DECLINLINE(RTSEL) ASMGetGS(void)
368{
369 RTSEL SelGS;
370# if RT_INLINE_ASM_GNU_STYLE
371 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
372# else
373 __asm
374 {
375 mov ax, gs
376 mov [SelGS], ax
377 }
378# endif
379 return SelGS;
380}
381#endif
382
383
384/**
385 * Get the SS register.
386 * @returns SS.
387 */
388#if RT_INLINE_ASM_EXTERNAL
389DECLASM(RTSEL) ASMGetSS(void);
390#else
391DECLINLINE(RTSEL) ASMGetSS(void)
392{
393 RTSEL SelSS;
394# if RT_INLINE_ASM_GNU_STYLE
395 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
396# else
397 __asm
398 {
399 mov ax, ss
400 mov [SelSS], ax
401 }
402# endif
403 return SelSS;
404}
405#endif
406
407
408/**
409 * Get the TR register.
410 * @returns TR.
411 */
412#if RT_INLINE_ASM_EXTERNAL
413DECLASM(RTSEL) ASMGetTR(void);
414#else
415DECLINLINE(RTSEL) ASMGetTR(void)
416{
417 RTSEL SelTR;
418# if RT_INLINE_ASM_GNU_STYLE
419 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
420# else
421 __asm
422 {
423 str ax
424 mov [SelTR], ax
425 }
426# endif
427 return SelTR;
428}
429#endif
430
431
432/**
433 * Get the [RE]FLAGS register.
434 * @returns [RE]FLAGS.
435 */
436#if RT_INLINE_ASM_EXTERNAL
437DECLASM(RTCCUINTREG) ASMGetFlags(void);
438#else
439DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
440{
441 RTCCUINTREG uFlags;
442# if RT_INLINE_ASM_GNU_STYLE
443# ifdef RT_ARCH_AMD64
444 __asm__ __volatile__("pushfq\n\t"
445 "popq %0\n\t"
446 : "=g" (uFlags));
447# else
448 __asm__ __volatile__("pushfl\n\t"
449 "popl %0\n\t"
450 : "=g" (uFlags));
451# endif
452# else
453 __asm
454 {
455# ifdef RT_ARCH_AMD64
456 pushfq
457 pop [uFlags]
458# else
459 pushfd
460 pop [uFlags]
461# endif
462 }
463# endif
464 return uFlags;
465}
466#endif
467
468
469/**
470 * Set the [RE]FLAGS register.
471 * @param uFlags The new [RE]FLAGS value.
472 */
473#if RT_INLINE_ASM_EXTERNAL
474DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
475#else
476DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
477{
478# if RT_INLINE_ASM_GNU_STYLE
479# ifdef RT_ARCH_AMD64
480 __asm__ __volatile__("pushq %0\n\t"
481 "popfq\n\t"
482 : : "g" (uFlags));
483# else
484 __asm__ __volatile__("pushl %0\n\t"
485 "popfl\n\t"
486 : : "g" (uFlags));
487# endif
488# else
489 __asm
490 {
491# ifdef RT_ARCH_AMD64
492 push [uFlags]
493 popfq
494# else
495 push [uFlags]
496 popfd
497# endif
498 }
499# endif
500}
501#endif
502
503
504/**
505 * Gets the content of the CPU timestamp counter register.
506 *
507 * @returns TSC.
508 */
509#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
510DECLASM(uint64_t) ASMReadTSC(void);
511#else
512DECLINLINE(uint64_t) ASMReadTSC(void)
513{
514 RTUINT64U u;
515# if RT_INLINE_ASM_GNU_STYLE
516 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
517# else
518# if RT_INLINE_ASM_USES_INTRIN
519 u.u = __rdtsc();
520# else
521 __asm
522 {
523 rdtsc
524 mov [u.s.Lo], eax
525 mov [u.s.Hi], edx
526 }
527# endif
528# endif
529 return u.u;
530}
531#endif
532
533
534/**
535 * Performs the cpuid instruction returning all registers.
536 *
537 * @param uOperator CPUID operation (eax).
538 * @param pvEAX Where to store eax.
539 * @param pvEBX Where to store ebx.
540 * @param pvECX Where to store ecx.
541 * @param pvEDX Where to store edx.
542 * @remark We're using void pointers to ease the use of special bitfield structures and such.
543 */
544#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
545DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
546#else
547DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
548{
549# if RT_INLINE_ASM_GNU_STYLE
550# ifdef RT_ARCH_AMD64
551 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
552 __asm__ ("cpuid\n\t"
553 : "=a" (uRAX),
554 "=b" (uRBX),
555 "=c" (uRCX),
556 "=d" (uRDX)
557 : "0" (uOperator));
558 *(uint32_t *)pvEAX = (uint32_t)uRAX;
559 *(uint32_t *)pvEBX = (uint32_t)uRBX;
560 *(uint32_t *)pvECX = (uint32_t)uRCX;
561 *(uint32_t *)pvEDX = (uint32_t)uRDX;
562# else
563 __asm__ ("xchgl %%ebx, %1\n\t"
564 "cpuid\n\t"
565 "xchgl %%ebx, %1\n\t"
566 : "=a" (*(uint32_t *)pvEAX),
567 "=r" (*(uint32_t *)pvEBX),
568 "=c" (*(uint32_t *)pvECX),
569 "=d" (*(uint32_t *)pvEDX)
570 : "0" (uOperator));
571# endif
572
573# elif RT_INLINE_ASM_USES_INTRIN
574 int aInfo[4];
575 __cpuid(aInfo, uOperator);
576 *(uint32_t *)pvEAX = aInfo[0];
577 *(uint32_t *)pvEBX = aInfo[1];
578 *(uint32_t *)pvECX = aInfo[2];
579 *(uint32_t *)pvEDX = aInfo[3];
580
581# else
582 uint32_t uEAX;
583 uint32_t uEBX;
584 uint32_t uECX;
585 uint32_t uEDX;
586 __asm
587 {
588 push ebx
589 mov eax, [uOperator]
590 cpuid
591 mov [uEAX], eax
592 mov [uEBX], ebx
593 mov [uECX], ecx
594 mov [uEDX], edx
595 pop ebx
596 }
597 *(uint32_t *)pvEAX = uEAX;
598 *(uint32_t *)pvEBX = uEBX;
599 *(uint32_t *)pvECX = uECX;
600 *(uint32_t *)pvEDX = uEDX;
601# endif
602}
603#endif
604
605
606/**
607 * Performs the cpuid instruction returning all registers.
608 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
609 *
610 * @param uOperator CPUID operation (eax).
611 * @param uIdxECX ecx index
612 * @param pvEAX Where to store eax.
613 * @param pvEBX Where to store ebx.
614 * @param pvECX Where to store ecx.
615 * @param pvEDX Where to store edx.
616 * @remark We're using void pointers to ease the use of special bitfield structures and such.
617 */
618#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
619DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
620#else
621DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
622{
623# if RT_INLINE_ASM_GNU_STYLE
624# ifdef RT_ARCH_AMD64
625 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
626 __asm__ ("cpuid\n\t"
627 : "=a" (uRAX),
628 "=b" (uRBX),
629 "=c" (uRCX),
630 "=d" (uRDX)
631 : "0" (uOperator),
632 "2" (uIdxECX));
633 *(uint32_t *)pvEAX = (uint32_t)uRAX;
634 *(uint32_t *)pvEBX = (uint32_t)uRBX;
635 *(uint32_t *)pvECX = (uint32_t)uRCX;
636 *(uint32_t *)pvEDX = (uint32_t)uRDX;
637# else
638 __asm__ ("xchgl %%ebx, %1\n\t"
639 "cpuid\n\t"
640 "xchgl %%ebx, %1\n\t"
641 : "=a" (*(uint32_t *)pvEAX),
642 "=r" (*(uint32_t *)pvEBX),
643 "=c" (*(uint32_t *)pvECX),
644 "=d" (*(uint32_t *)pvEDX)
645 : "0" (uOperator),
646 "2" (uIdxECX));
647# endif
648
649# elif RT_INLINE_ASM_USES_INTRIN
650 int aInfo[4];
651 /* ??? another intrinsic ??? */
652 __cpuid(aInfo, uOperator);
653 *(uint32_t *)pvEAX = aInfo[0];
654 *(uint32_t *)pvEBX = aInfo[1];
655 *(uint32_t *)pvECX = aInfo[2];
656 *(uint32_t *)pvEDX = aInfo[3];
657
658# else
659 uint32_t uEAX;
660 uint32_t uEBX;
661 uint32_t uECX;
662 uint32_t uEDX;
663 __asm
664 {
665 push ebx
666 mov eax, [uOperator]
667 mov ecx, [uIdxECX]
668 cpuid
669 mov [uEAX], eax
670 mov [uEBX], ebx
671 mov [uECX], ecx
672 mov [uEDX], edx
673 pop ebx
674 }
675 *(uint32_t *)pvEAX = uEAX;
676 *(uint32_t *)pvEBX = uEBX;
677 *(uint32_t *)pvECX = uECX;
678 *(uint32_t *)pvEDX = uEDX;
679# endif
680}
681#endif
682
683
684/**
685 * Performs the cpuid instruction returning ecx and edx.
686 *
687 * @param uOperator CPUID operation (eax).
688 * @param pvECX Where to store ecx.
689 * @param pvEDX Where to store edx.
690 * @remark We're using void pointers to ease the use of special bitfield structures and such.
691 */
692#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
693DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
694#else
695DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
696{
697 uint32_t uEBX;
698 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
699}
700#endif
701
702
703/**
704 * Performs the cpuid instruction returning edx.
705 *
706 * @param uOperator CPUID operation (eax).
707 * @returns EDX after cpuid operation.
708 */
709#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
710DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
711#else
712DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
713{
714 RTCCUINTREG xDX;
715# if RT_INLINE_ASM_GNU_STYLE
716# ifdef RT_ARCH_AMD64
717 RTCCUINTREG uSpill;
718 __asm__ ("cpuid"
719 : "=a" (uSpill),
720 "=d" (xDX)
721 : "0" (uOperator)
722 : "rbx", "rcx");
723# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
724 __asm__ ("push %%ebx\n\t"
725 "cpuid\n\t"
726 "pop %%ebx\n\t"
727 : "=a" (uOperator),
728 "=d" (xDX)
729 : "0" (uOperator)
730 : "ecx");
731# else
732 __asm__ ("cpuid"
733 : "=a" (uOperator),
734 "=d" (xDX)
735 : "0" (uOperator)
736 : "ebx", "ecx");
737# endif
738
739# elif RT_INLINE_ASM_USES_INTRIN
740 int aInfo[4];
741 __cpuid(aInfo, uOperator);
742 xDX = aInfo[3];
743
744# else
745 __asm
746 {
747 push ebx
748 mov eax, [uOperator]
749 cpuid
750 mov [xDX], edx
751 pop ebx
752 }
753# endif
754 return (uint32_t)xDX;
755}
756#endif
757
758
759/**
760 * Performs the cpuid instruction returning ecx.
761 *
762 * @param uOperator CPUID operation (eax).
763 * @returns ECX after cpuid operation.
764 */
765#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
766DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
767#else
768DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
769{
770 RTCCUINTREG xCX;
771# if RT_INLINE_ASM_GNU_STYLE
772# ifdef RT_ARCH_AMD64
773 RTCCUINTREG uSpill;
774 __asm__ ("cpuid"
775 : "=a" (uSpill),
776 "=c" (xCX)
777 : "0" (uOperator)
778 : "rbx", "rdx");
779# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
780 __asm__ ("push %%ebx\n\t"
781 "cpuid\n\t"
782 "pop %%ebx\n\t"
783 : "=a" (uOperator),
784 "=c" (xCX)
785 : "0" (uOperator)
786 : "edx");
787# else
788 __asm__ ("cpuid"
789 : "=a" (uOperator),
790 "=c" (xCX)
791 : "0" (uOperator)
792 : "ebx", "edx");
793
794# endif
795
796# elif RT_INLINE_ASM_USES_INTRIN
797 int aInfo[4];
798 __cpuid(aInfo, uOperator);
799 xCX = aInfo[2];
800
801# else
802 __asm
803 {
804 push ebx
805 mov eax, [uOperator]
806 cpuid
807 mov [xCX], ecx
808 pop ebx
809 }
810# endif
811 return (uint32_t)xCX;
812}
813#endif
814
815
816/**
817 * Checks if the current CPU supports CPUID.
818 *
819 * @returns true if CPUID is supported.
820 */
821DECLINLINE(bool) ASMHasCpuId(void)
822{
823#ifdef RT_ARCH_AMD64
824 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
825#else /* !RT_ARCH_AMD64 */
826 bool fRet = false;
827# if RT_INLINE_ASM_GNU_STYLE
828 uint32_t u1;
829 uint32_t u2;
830 __asm__ ("pushf\n\t"
831 "pop %1\n\t"
832 "mov %1, %2\n\t"
833 "xorl $0x200000, %1\n\t"
834 "push %1\n\t"
835 "popf\n\t"
836 "pushf\n\t"
837 "pop %1\n\t"
838 "cmpl %1, %2\n\t"
839 "setne %0\n\t"
840 "push %2\n\t"
841 "popf\n\t"
842 : "=m" (fRet), "=r" (u1), "=r" (u2));
843# else
844 __asm
845 {
846 pushfd
847 pop eax
848 mov ebx, eax
849 xor eax, 0200000h
850 push eax
851 popfd
852 pushfd
853 pop eax
854 cmp eax, ebx
855 setne fRet
856 push ebx
857 popfd
858 }
859# endif
860 return fRet;
861#endif /* !RT_ARCH_AMD64 */
862}
863
864
865/**
866 * Gets the APIC ID of the current CPU.
867 *
868 * @returns the APIC ID.
869 */
870#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
871DECLASM(uint8_t) ASMGetApicId(void);
872#else
873DECLINLINE(uint8_t) ASMGetApicId(void)
874{
875 RTCCUINTREG xBX;
876# if RT_INLINE_ASM_GNU_STYLE
877# ifdef RT_ARCH_AMD64
878 RTCCUINTREG uSpill;
879 __asm__ ("cpuid"
880 : "=a" (uSpill),
881 "=b" (xBX)
882 : "0" (1)
883 : "rcx", "rdx");
884# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
885 RTCCUINTREG uSpill;
886 __asm__ ("mov %%ebx,%1\n\t"
887 "cpuid\n\t"
888 "xchgl %%ebx,%1\n\t"
889 : "=a" (uSpill),
890 "=r" (xBX)
891 : "0" (1)
892 : "ecx", "edx");
893# else
894 RTCCUINTREG uSpill;
895 __asm__ ("cpuid"
896 : "=a" (uSpill),
897 "=b" (xBX)
898 : "0" (1)
899 : "ecx", "edx");
900# endif
901
902# elif RT_INLINE_ASM_USES_INTRIN
903 int aInfo[4];
904 __cpuid(aInfo, 1);
905 xBX = aInfo[1];
906
907# else
908 __asm
909 {
910 push ebx
911 mov eax, 1
912 cpuid
913 mov [xBX], ebx
914 pop ebx
915 }
916# endif
917 return (uint8_t)(xBX >> 24);
918}
919#endif
920
921/**
922 * Get cr0.
923 * @returns cr0.
924 */
925#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
926DECLASM(RTCCUINTREG) ASMGetCR0(void);
927#else
928DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
929{
930 RTCCUINTREG uCR0;
931# if RT_INLINE_ASM_USES_INTRIN
932 uCR0 = __readcr0();
933
934# elif RT_INLINE_ASM_GNU_STYLE
935# ifdef RT_ARCH_AMD64
936 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
937# else
938 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
939# endif
940# else
941 __asm
942 {
943# ifdef RT_ARCH_AMD64
944 mov rax, cr0
945 mov [uCR0], rax
946# else
947 mov eax, cr0
948 mov [uCR0], eax
949# endif
950 }
951# endif
952 return uCR0;
953}
954#endif
955
956
957/**
958 * Sets the CR0 register.
959 * @param uCR0 The new CR0 value.
960 */
961#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
962DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
963#else
964DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
965{
966# if RT_INLINE_ASM_USES_INTRIN
967 __writecr0(uCR0);
968
969# elif RT_INLINE_ASM_GNU_STYLE
970# ifdef RT_ARCH_AMD64
971 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
972# else
973 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
974# endif
975# else
976 __asm
977 {
978# ifdef RT_ARCH_AMD64
979 mov rax, [uCR0]
980 mov cr0, rax
981# else
982 mov eax, [uCR0]
983 mov cr0, eax
984# endif
985 }
986# endif
987}
988#endif
989
990
991/**
992 * Get cr2.
993 * @returns cr2.
994 */
995#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
996DECLASM(RTCCUINTREG) ASMGetCR2(void);
997#else
998DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
999{
1000 RTCCUINTREG uCR2;
1001# if RT_INLINE_ASM_USES_INTRIN
1002 uCR2 = __readcr2();
1003
1004# elif RT_INLINE_ASM_GNU_STYLE
1005# ifdef RT_ARCH_AMD64
1006 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
1007# else
1008 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
1009# endif
1010# else
1011 __asm
1012 {
1013# ifdef RT_ARCH_AMD64
1014 mov rax, cr2
1015 mov [uCR2], rax
1016# else
1017 mov eax, cr2
1018 mov [uCR2], eax
1019# endif
1020 }
1021# endif
1022 return uCR2;
1023}
1024#endif
1025
1026
1027/**
1028 * Sets the CR2 register.
1029 * @param uCR2 The new CR0 value.
1030 */
1031#if RT_INLINE_ASM_EXTERNAL
1032DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1033#else
1034DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1035{
1036# if RT_INLINE_ASM_GNU_STYLE
1037# ifdef RT_ARCH_AMD64
1038 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1039# else
1040 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1041# endif
1042# else
1043 __asm
1044 {
1045# ifdef RT_ARCH_AMD64
1046 mov rax, [uCR2]
1047 mov cr2, rax
1048# else
1049 mov eax, [uCR2]
1050 mov cr2, eax
1051# endif
1052 }
1053# endif
1054}
1055#endif
1056
1057
1058/**
1059 * Get cr3.
1060 * @returns cr3.
1061 */
1062#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1063DECLASM(RTCCUINTREG) ASMGetCR3(void);
1064#else
1065DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1066{
1067 RTCCUINTREG uCR3;
1068# if RT_INLINE_ASM_USES_INTRIN
1069 uCR3 = __readcr3();
1070
1071# elif RT_INLINE_ASM_GNU_STYLE
1072# ifdef RT_ARCH_AMD64
1073 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
1074# else
1075 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
1076# endif
1077# else
1078 __asm
1079 {
1080# ifdef RT_ARCH_AMD64
1081 mov rax, cr3
1082 mov [uCR3], rax
1083# else
1084 mov eax, cr3
1085 mov [uCR3], eax
1086# endif
1087 }
1088# endif
1089 return uCR3;
1090}
1091#endif
1092
1093
1094/**
1095 * Sets the CR3 register.
1096 *
1097 * @param uCR3 New CR3 value.
1098 */
1099#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1100DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1101#else
1102DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1103{
1104# if RT_INLINE_ASM_USES_INTRIN
1105 __writecr3(uCR3);
1106
1107# elif RT_INLINE_ASM_GNU_STYLE
1108# ifdef RT_ARCH_AMD64
1109 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1110# else
1111 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1112# endif
1113# else
1114 __asm
1115 {
1116# ifdef RT_ARCH_AMD64
1117 mov rax, [uCR3]
1118 mov cr3, rax
1119# else
1120 mov eax, [uCR3]
1121 mov cr3, eax
1122# endif
1123 }
1124# endif
1125}
1126#endif
1127
1128
1129/**
1130 * Reloads the CR3 register.
1131 */
1132#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1133DECLASM(void) ASMReloadCR3(void);
1134#else
1135DECLINLINE(void) ASMReloadCR3(void)
1136{
1137# if RT_INLINE_ASM_USES_INTRIN
1138 __writecr3(__readcr3());
1139
1140# elif RT_INLINE_ASM_GNU_STYLE
1141 RTCCUINTREG u;
1142# ifdef RT_ARCH_AMD64
1143 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1144 "movq %0, %%cr3\n\t"
1145 : "=r" (u));
1146# else
1147 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1148 "movl %0, %%cr3\n\t"
1149 : "=r" (u));
1150# endif
1151# else
1152 __asm
1153 {
1154# ifdef RT_ARCH_AMD64
1155 mov rax, cr3
1156 mov cr3, rax
1157# else
1158 mov eax, cr3
1159 mov cr3, eax
1160# endif
1161 }
1162# endif
1163}
1164#endif
1165
1166
1167/**
1168 * Get cr4.
1169 * @returns cr4.
1170 */
1171#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1172DECLASM(RTCCUINTREG) ASMGetCR4(void);
1173#else
1174DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1175{
1176 RTCCUINTREG uCR4;
1177# if RT_INLINE_ASM_USES_INTRIN
1178 uCR4 = __readcr4();
1179
1180# elif RT_INLINE_ASM_GNU_STYLE
1181# ifdef RT_ARCH_AMD64
1182 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1183# else
1184 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1185# endif
1186# else
1187 __asm
1188 {
1189# ifdef RT_ARCH_AMD64
1190 mov rax, cr4
1191 mov [uCR4], rax
1192# else
1193 push eax /* just in case */
1194 /*mov eax, cr4*/
1195 _emit 0x0f
1196 _emit 0x20
1197 _emit 0xe0
1198 mov [uCR4], eax
1199 pop eax
1200# endif
1201 }
1202# endif
1203 return uCR4;
1204}
1205#endif
1206
1207
1208/**
1209 * Sets the CR4 register.
1210 *
1211 * @param uCR4 New CR4 value.
1212 */
1213#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1214DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1215#else
1216DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1217{
1218# if RT_INLINE_ASM_USES_INTRIN
1219 __writecr4(uCR4);
1220
1221# elif RT_INLINE_ASM_GNU_STYLE
1222# ifdef RT_ARCH_AMD64
1223 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1224# else
1225 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1226# endif
1227# else
1228 __asm
1229 {
1230# ifdef RT_ARCH_AMD64
1231 mov rax, [uCR4]
1232 mov cr4, rax
1233# else
1234 mov eax, [uCR4]
1235 _emit 0x0F
1236 _emit 0x22
1237 _emit 0xE0 /* mov cr4, eax */
1238# endif
1239 }
1240# endif
1241}
1242#endif
1243
1244
1245/**
1246 * Get cr8.
1247 * @returns cr8.
1248 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1249 */
1250#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1251DECLASM(RTCCUINTREG) ASMGetCR8(void);
1252#else
1253DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1254{
1255# ifdef RT_ARCH_AMD64
1256 RTCCUINTREG uCR8;
1257# if RT_INLINE_ASM_USES_INTRIN
1258 uCR8 = __readcr8();
1259
1260# elif RT_INLINE_ASM_GNU_STYLE
1261 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1262# else
1263 __asm
1264 {
1265 mov rax, cr8
1266 mov [uCR8], rax
1267 }
1268# endif
1269 return uCR8;
1270# else /* !RT_ARCH_AMD64 */
1271 return 0;
1272# endif /* !RT_ARCH_AMD64 */
1273}
1274#endif
1275
1276
1277/**
1278 * Enables interrupts (EFLAGS.IF).
1279 */
1280#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1281DECLASM(void) ASMIntEnable(void);
1282#else
1283DECLINLINE(void) ASMIntEnable(void)
1284{
1285# if RT_INLINE_ASM_GNU_STYLE
1286 __asm("sti\n");
1287# elif RT_INLINE_ASM_USES_INTRIN
1288 _enable();
1289# else
1290 __asm sti
1291# endif
1292}
1293#endif
1294
1295
1296/**
1297 * Disables interrupts (!EFLAGS.IF).
1298 */
1299#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1300DECLASM(void) ASMIntDisable(void);
1301#else
1302DECLINLINE(void) ASMIntDisable(void)
1303{
1304# if RT_INLINE_ASM_GNU_STYLE
1305 __asm("cli\n");
1306# elif RT_INLINE_ASM_USES_INTRIN
1307 _disable();
1308# else
1309 __asm cli
1310# endif
1311}
1312#endif
1313
1314
1315/**
1316 * Disables interrupts and returns previous xFLAGS.
1317 */
1318#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1319DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1320#else
1321DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1322{
1323 RTCCUINTREG xFlags;
1324# if RT_INLINE_ASM_GNU_STYLE
1325# ifdef RT_ARCH_AMD64
1326 __asm__ __volatile__("pushfq\n\t"
1327 "cli\n\t"
1328 "popq %0\n\t"
1329 : "=rm" (xFlags));
1330# else
1331 __asm__ __volatile__("pushfl\n\t"
1332 "cli\n\t"
1333 "popl %0\n\t"
1334 : "=rm" (xFlags));
1335# endif
1336# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1337 xFlags = ASMGetFlags();
1338 _disable();
1339# else
1340 __asm {
1341 pushfd
1342 cli
1343 pop [xFlags]
1344 }
1345# endif
1346 return xFlags;
1347}
1348#endif
1349
1350
1351/**
1352 * Reads a machine specific register.
1353 *
1354 * @returns Register content.
1355 * @param uRegister Register to read.
1356 */
1357#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1358DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1359#else
1360DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1361{
1362 RTUINT64U u;
1363# if RT_INLINE_ASM_GNU_STYLE
1364 __asm__ ("rdmsr\n\t"
1365 : "=a" (u.s.Lo),
1366 "=d" (u.s.Hi)
1367 : "c" (uRegister));
1368
1369# elif RT_INLINE_ASM_USES_INTRIN
1370 u.u = __readmsr(uRegister);
1371
1372# else
1373 __asm
1374 {
1375 mov ecx, [uRegister]
1376 rdmsr
1377 mov [u.s.Lo], eax
1378 mov [u.s.Hi], edx
1379 }
1380# endif
1381
1382 return u.u;
1383}
1384#endif
1385
1386
1387/**
1388 * Writes a machine specific register.
1389 *
1390 * @returns Register content.
1391 * @param uRegister Register to write to.
1392 * @param u64Val Value to write.
1393 */
1394#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1395DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1396#else
1397DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1398{
1399 RTUINT64U u;
1400
1401 u.u = u64Val;
1402# if RT_INLINE_ASM_GNU_STYLE
1403 __asm__ __volatile__("wrmsr\n\t"
1404 ::"a" (u.s.Lo),
1405 "d" (u.s.Hi),
1406 "c" (uRegister));
1407
1408# elif RT_INLINE_ASM_USES_INTRIN
1409 __writemsr(uRegister, u.u);
1410
1411# else
1412 __asm
1413 {
1414 mov ecx, [uRegister]
1415 mov edx, [u.s.Hi]
1416 mov eax, [u.s.Lo]
1417 wrmsr
1418 }
1419# endif
1420}
1421#endif
1422
1423
1424/**
1425 * Reads low part of a machine specific register.
1426 *
1427 * @returns Register content.
1428 * @param uRegister Register to read.
1429 */
1430#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1431DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1432#else
1433DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1434{
1435 uint32_t u32;
1436# if RT_INLINE_ASM_GNU_STYLE
1437 __asm__ ("rdmsr\n\t"
1438 : "=a" (u32)
1439 : "c" (uRegister)
1440 : "edx");
1441
1442# elif RT_INLINE_ASM_USES_INTRIN
1443 u32 = (uint32_t)__readmsr(uRegister);
1444
1445#else
1446 __asm
1447 {
1448 mov ecx, [uRegister]
1449 rdmsr
1450 mov [u32], eax
1451 }
1452# endif
1453
1454 return u32;
1455}
1456#endif
1457
1458
1459/**
1460 * Reads high part of a machine specific register.
1461 *
1462 * @returns Register content.
1463 * @param uRegister Register to read.
1464 */
1465#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1466DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1467#else
1468DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1469{
1470 uint32_t u32;
1471# if RT_INLINE_ASM_GNU_STYLE
1472 __asm__ ("rdmsr\n\t"
1473 : "=d" (u32)
1474 : "c" (uRegister)
1475 : "eax");
1476
1477# elif RT_INLINE_ASM_USES_INTRIN
1478 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1479
1480# else
1481 __asm
1482 {
1483 mov ecx, [uRegister]
1484 rdmsr
1485 mov [u32], edx
1486 }
1487# endif
1488
1489 return u32;
1490}
1491#endif
1492
1493
1494/**
1495 * Gets dr7.
1496 *
1497 * @returns dr7.
1498 */
1499#if RT_INLINE_ASM_EXTERNAL
1500DECLASM(RTCCUINTREG) ASMGetDR7(void);
1501#else
1502DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1503{
1504 RTCCUINTREG uDR7;
1505# if RT_INLINE_ASM_GNU_STYLE
1506# ifdef RT_ARCH_AMD64
1507 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1508# else
1509 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1510# endif
1511# else
1512 __asm
1513 {
1514# ifdef RT_ARCH_AMD64
1515 mov rax, dr7
1516 mov [uDR7], rax
1517# else
1518 mov eax, dr7
1519 mov [uDR7], eax
1520# endif
1521 }
1522# endif
1523 return uDR7;
1524}
1525#endif
1526
1527
1528/**
1529 * Gets dr6.
1530 *
1531 * @returns dr6.
1532 */
1533#if RT_INLINE_ASM_EXTERNAL
1534DECLASM(RTCCUINTREG) ASMGetDR6(void);
1535#else
1536DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1537{
1538 RTCCUINTREG uDR6;
1539# if RT_INLINE_ASM_GNU_STYLE
1540# ifdef RT_ARCH_AMD64
1541 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1542# else
1543 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1544# endif
1545# else
1546 __asm
1547 {
1548# ifdef RT_ARCH_AMD64
1549 mov rax, dr6
1550 mov [uDR6], rax
1551# else
1552 mov eax, dr6
1553 mov [uDR6], eax
1554# endif
1555 }
1556# endif
1557 return uDR6;
1558}
1559#endif
1560
1561
1562/**
1563 * Reads and clears DR6.
1564 *
1565 * @returns DR6.
1566 */
1567#if RT_INLINE_ASM_EXTERNAL
1568DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1569#else
1570DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1571{
1572 RTCCUINTREG uDR6;
1573# if RT_INLINE_ASM_GNU_STYLE
1574 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1575# ifdef RT_ARCH_AMD64
1576 __asm__ ("movq %%dr6, %0\n\t"
1577 "movq %1, %%dr6\n\t"
1578 : "=r" (uDR6)
1579 : "r" (uNewValue));
1580# else
1581 __asm__ ("movl %%dr6, %0\n\t"
1582 "movl %1, %%dr6\n\t"
1583 : "=r" (uDR6)
1584 : "r" (uNewValue));
1585# endif
1586# else
1587 __asm
1588 {
1589# ifdef RT_ARCH_AMD64
1590 mov rax, dr6
1591 mov [uDR6], rax
1592 mov rcx, rax
1593 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1594 mov dr6, rcx
1595# else
1596 mov eax, dr6
1597 mov [uDR6], eax
1598 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1599 mov dr6, ecx
1600# endif
1601 }
1602# endif
1603 return uDR6;
1604}
1605#endif
1606
1607
1608/**
1609 * Compiler memory barrier.
1610 *
1611 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1612 * values or any outstanding writes when returning from this function.
1613 *
1614 * This function must be used if non-volatile data is modified by a
1615 * device or the VMM. Typical cases are port access, MMIO access,
1616 * trapping instruction, etc.
1617 */
1618#if RT_INLINE_ASM_GNU_STYLE
1619# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1620#elif RT_INLINE_ASM_USES_INTRIN
1621# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1622#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1623DECLINLINE(void) ASMCompilerBarrier(void)
1624{
1625 __asm
1626 {
1627 }
1628}
1629#endif
1630
1631
1632/**
1633 * Writes a 8-bit unsigned integer to an I/O port, ordered.
1634 *
1635 * @param Port I/O port to read from.
1636 * @param u8 8-bit integer to write.
1637 */
1638#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1639DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1640#else
1641DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1642{
1643# if RT_INLINE_ASM_GNU_STYLE
1644 __asm__ __volatile__("outb %b1, %w0\n\t"
1645 :: "Nd" (Port),
1646 "a" (u8));
1647
1648# elif RT_INLINE_ASM_USES_INTRIN
1649 __outbyte(Port, u8);
1650
1651# else
1652 __asm
1653 {
1654 mov dx, [Port]
1655 mov al, [u8]
1656 out dx, al
1657 }
1658# endif
1659}
1660#endif
1661
1662
1663/**
1664 * Gets a 8-bit unsigned integer from an I/O port, ordered.
1665 *
1666 * @returns 8-bit integer.
1667 * @param Port I/O port to read from.
1668 */
1669#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1670DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1671#else
1672DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1673{
1674 uint8_t u8;
1675# if RT_INLINE_ASM_GNU_STYLE
1676 __asm__ __volatile__("inb %w1, %b0\n\t"
1677 : "=a" (u8)
1678 : "Nd" (Port));
1679
1680# elif RT_INLINE_ASM_USES_INTRIN
1681 u8 = __inbyte(Port);
1682
1683# else
1684 __asm
1685 {
1686 mov dx, [Port]
1687 in al, dx
1688 mov [u8], al
1689 }
1690# endif
1691 return u8;
1692}
1693#endif
1694
1695
1696/**
1697 * Writes a 16-bit unsigned integer to an I/O port, ordered.
1698 *
1699 * @param Port I/O port to read from.
1700 * @param u16 16-bit integer to write.
1701 */
1702#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1703DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1704#else
1705DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1706{
1707# if RT_INLINE_ASM_GNU_STYLE
1708 __asm__ __volatile__("outw %w1, %w0\n\t"
1709 :: "Nd" (Port),
1710 "a" (u16));
1711
1712# elif RT_INLINE_ASM_USES_INTRIN
1713 __outword(Port, u16);
1714
1715# else
1716 __asm
1717 {
1718 mov dx, [Port]
1719 mov ax, [u16]
1720 out dx, ax
1721 }
1722# endif
1723}
1724#endif
1725
1726
1727/**
1728 * Gets a 16-bit unsigned integer from an I/O port, ordered.
1729 *
1730 * @returns 16-bit integer.
1731 * @param Port I/O port to read from.
1732 */
1733#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1734DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1735#else
1736DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1737{
1738 uint16_t u16;
1739# if RT_INLINE_ASM_GNU_STYLE
1740 __asm__ __volatile__("inw %w1, %w0\n\t"
1741 : "=a" (u16)
1742 : "Nd" (Port));
1743
1744# elif RT_INLINE_ASM_USES_INTRIN
1745 u16 = __inword(Port);
1746
1747# else
1748 __asm
1749 {
1750 mov dx, [Port]
1751 in ax, dx
1752 mov [u16], ax
1753 }
1754# endif
1755 return u16;
1756}
1757#endif
1758
1759
1760/**
1761 * Writes a 32-bit unsigned integer to an I/O port, ordered.
1762 *
1763 * @param Port I/O port to read from.
1764 * @param u32 32-bit integer to write.
1765 */
1766#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1767DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1768#else
1769DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1770{
1771# if RT_INLINE_ASM_GNU_STYLE
1772 __asm__ __volatile__("outl %1, %w0\n\t"
1773 :: "Nd" (Port),
1774 "a" (u32));
1775
1776# elif RT_INLINE_ASM_USES_INTRIN
1777 __outdword(Port, u32);
1778
1779# else
1780 __asm
1781 {
1782 mov dx, [Port]
1783 mov eax, [u32]
1784 out dx, eax
1785 }
1786# endif
1787}
1788#endif
1789
1790
1791/**
1792 * Gets a 32-bit unsigned integer from an I/O port, ordered.
1793 *
1794 * @returns 32-bit integer.
1795 * @param Port I/O port to read from.
1796 */
1797#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1798DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1799#else
1800DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1801{
1802 uint32_t u32;
1803# if RT_INLINE_ASM_GNU_STYLE
1804 __asm__ __volatile__("inl %w1, %0\n\t"
1805 : "=a" (u32)
1806 : "Nd" (Port));
1807
1808# elif RT_INLINE_ASM_USES_INTRIN
1809 u32 = __indword(Port);
1810
1811# else
1812 __asm
1813 {
1814 mov dx, [Port]
1815 in eax, dx
1816 mov [u32], eax
1817 }
1818# endif
1819 return u32;
1820}
1821#endif
1822
1823/** @todo string i/o */
1824
1825
1826/**
1827 * Atomically Exchange an unsigned 8-bit value, ordered.
1828 *
1829 * @returns Current *pu8 value
1830 * @param pu8 Pointer to the 8-bit variable to update.
1831 * @param u8 The 8-bit value to assign to *pu8.
1832 */
1833#if RT_INLINE_ASM_EXTERNAL
1834DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1835#else
1836DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1837{
1838# if RT_INLINE_ASM_GNU_STYLE
1839 __asm__ __volatile__("xchgb %0, %1\n\t"
1840 : "=m" (*pu8),
1841 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
1842 : "1" (u8));
1843# else
1844 __asm
1845 {
1846# ifdef RT_ARCH_AMD64
1847 mov rdx, [pu8]
1848 mov al, [u8]
1849 xchg [rdx], al
1850 mov [u8], al
1851# else
1852 mov edx, [pu8]
1853 mov al, [u8]
1854 xchg [edx], al
1855 mov [u8], al
1856# endif
1857 }
1858# endif
1859 return u8;
1860}
1861#endif
1862
1863
1864/**
1865 * Atomically Exchange a signed 8-bit value, ordered.
1866 *
1867 * @returns Current *pu8 value
1868 * @param pi8 Pointer to the 8-bit variable to update.
1869 * @param i8 The 8-bit value to assign to *pi8.
1870 */
1871DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1872{
1873 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1874}
1875
1876
1877/**
1878 * Atomically Exchange a bool value, ordered.
1879 *
1880 * @returns Current *pf value
1881 * @param pf Pointer to the 8-bit variable to update.
1882 * @param f The 8-bit value to assign to *pi8.
1883 */
1884DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
1885{
1886#ifdef _MSC_VER
1887 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1888#else
1889 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1890#endif
1891}
1892
1893
1894/**
1895 * Atomically Exchange an unsigned 16-bit value, ordered.
1896 *
1897 * @returns Current *pu16 value
1898 * @param pu16 Pointer to the 16-bit variable to update.
1899 * @param u16 The 16-bit value to assign to *pu16.
1900 */
1901#if RT_INLINE_ASM_EXTERNAL
1902DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1903#else
1904DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1905{
1906# if RT_INLINE_ASM_GNU_STYLE
1907 __asm__ __volatile__("xchgw %0, %1\n\t"
1908 : "=m" (*pu16),
1909 "=r" (u16)
1910 : "1" (u16));
1911# else
1912 __asm
1913 {
1914# ifdef RT_ARCH_AMD64
1915 mov rdx, [pu16]
1916 mov ax, [u16]
1917 xchg [rdx], ax
1918 mov [u16], ax
1919# else
1920 mov edx, [pu16]
1921 mov ax, [u16]
1922 xchg [edx], ax
1923 mov [u16], ax
1924# endif
1925 }
1926# endif
1927 return u16;
1928}
1929#endif
1930
1931
1932/**
1933 * Atomically Exchange a signed 16-bit value, ordered.
1934 *
1935 * @returns Current *pu16 value
1936 * @param pi16 Pointer to the 16-bit variable to update.
1937 * @param i16 The 16-bit value to assign to *pi16.
1938 */
1939DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1940{
1941 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1942}
1943
1944
1945/**
1946 * Atomically Exchange an unsigned 32-bit value, ordered.
1947 *
1948 * @returns Current *pu32 value
1949 * @param pu32 Pointer to the 32-bit variable to update.
1950 * @param u32 The 32-bit value to assign to *pu32.
1951 */
1952#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1953DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1954#else
1955DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1956{
1957# if RT_INLINE_ASM_GNU_STYLE
1958 __asm__ __volatile__("xchgl %0, %1\n\t"
1959 : "=m" (*pu32),
1960 "=r" (u32)
1961 : "1" (u32));
1962
1963# elif RT_INLINE_ASM_USES_INTRIN
1964 u32 = _InterlockedExchange((long *)pu32, u32);
1965
1966# else
1967 __asm
1968 {
1969# ifdef RT_ARCH_AMD64
1970 mov rdx, [pu32]
1971 mov eax, u32
1972 xchg [rdx], eax
1973 mov [u32], eax
1974# else
1975 mov edx, [pu32]
1976 mov eax, u32
1977 xchg [edx], eax
1978 mov [u32], eax
1979# endif
1980 }
1981# endif
1982 return u32;
1983}
1984#endif
1985
1986
1987/**
1988 * Atomically Exchange a signed 32-bit value, ordered.
1989 *
1990 * @returns Current *pu32 value
1991 * @param pi32 Pointer to the 32-bit variable to update.
1992 * @param i32 The 32-bit value to assign to *pi32.
1993 */
1994DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1995{
1996 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1997}
1998
1999
2000/**
2001 * Atomically Exchange an unsigned 64-bit value, ordered.
2002 *
2003 * @returns Current *pu64 value
2004 * @param pu64 Pointer to the 64-bit variable to update.
2005 * @param u64 The 64-bit value to assign to *pu64.
2006 */
2007#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2008DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2009#else
2010DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2011{
2012# if defined(RT_ARCH_AMD64)
2013# if RT_INLINE_ASM_USES_INTRIN
2014 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2015
2016# elif RT_INLINE_ASM_GNU_STYLE
2017 __asm__ __volatile__("xchgq %0, %1\n\t"
2018 : "=m" (*pu64),
2019 "=r" (u64)
2020 : "1" (u64));
2021# else
2022 __asm
2023 {
2024 mov rdx, [pu64]
2025 mov rax, [u64]
2026 xchg [rdx], rax
2027 mov [u64], rax
2028 }
2029# endif
2030# else /* !RT_ARCH_AMD64 */
2031# if RT_INLINE_ASM_GNU_STYLE
2032# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2033 uint32_t u32 = (uint32_t)u64;
2034 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2035 "xchgl %%ebx, %3\n\t"
2036 "1:\n\t"
2037 "lock; cmpxchg8b (%5)\n\t"
2038 "jnz 1b\n\t"
2039 "xchgl %%ebx, %3\n\t"
2040 /*"xchgl %%esi, %5\n\t"*/
2041 : "=A" (u64),
2042 "=m" (*pu64)
2043 : "0" (*pu64),
2044 "m" ( u32 ),
2045 "c" ( (uint32_t)(u64 >> 32) ),
2046 "S" (pu64) );
2047# else /* !PIC */
2048 __asm__ __volatile__("1:\n\t"
2049 "lock; cmpxchg8b %1\n\t"
2050 "jnz 1b\n\t"
2051 : "=A" (u64),
2052 "=m" (*pu64)
2053 : "0" (*pu64),
2054 "b" ( (uint32_t)u64 ),
2055 "c" ( (uint32_t)(u64 >> 32) ));
2056# endif
2057# else
2058 __asm
2059 {
2060 mov ebx, dword ptr [u64]
2061 mov ecx, dword ptr [u64 + 4]
2062 mov edi, pu64
2063 mov eax, dword ptr [edi]
2064 mov edx, dword ptr [edi + 4]
2065 retry:
2066 lock cmpxchg8b [edi]
2067 jnz retry
2068 mov dword ptr [u64], eax
2069 mov dword ptr [u64 + 4], edx
2070 }
2071# endif
2072# endif /* !RT_ARCH_AMD64 */
2073 return u64;
2074}
2075#endif
2076
2077
2078/**
2079 * Atomically Exchange an signed 64-bit value, ordered.
2080 *
2081 * @returns Current *pi64 value
2082 * @param pi64 Pointer to the 64-bit variable to update.
2083 * @param i64 The 64-bit value to assign to *pi64.
2084 */
2085DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2086{
2087 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2088}
2089
2090
2091#ifdef RT_ARCH_AMD64
2092/**
2093 * Atomically Exchange an unsigned 128-bit value, ordered.
2094 *
2095 * @returns Current *pu128.
2096 * @param pu128 Pointer to the 128-bit variable to update.
2097 * @param u128 The 128-bit value to assign to *pu128.
2098 *
2099 * @remark We cannot really assume that any hardware supports this. Nor do I have
2100 * GAS support for it. So, for the time being we'll BREAK the atomic
2101 * bit of this function and use two 64-bit exchanges instead.
2102 */
2103# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2104DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2105# else
2106DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2107{
2108 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2109 {
2110 /** @todo this is clumsy code */
2111 RTUINT128U u128Ret;
2112 u128Ret.u = u128;
2113 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2114 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2115 return u128Ret.u;
2116 }
2117#if 0 /* later? */
2118 else
2119 {
2120# if RT_INLINE_ASM_GNU_STYLE
2121 __asm__ __volatile__("1:\n\t"
2122 "lock; cmpxchg8b %1\n\t"
2123 "jnz 1b\n\t"
2124 : "=A" (u128),
2125 "=m" (*pu128)
2126 : "0" (*pu128),
2127 "b" ( (uint64_t)u128 ),
2128 "c" ( (uint64_t)(u128 >> 64) ));
2129# else
2130 __asm
2131 {
2132 mov rbx, dword ptr [u128]
2133 mov rcx, dword ptr [u128 + 8]
2134 mov rdi, pu128
2135 mov rax, dword ptr [rdi]
2136 mov rdx, dword ptr [rdi + 8]
2137 retry:
2138 lock cmpxchg16b [rdi]
2139 jnz retry
2140 mov dword ptr [u128], rax
2141 mov dword ptr [u128 + 8], rdx
2142 }
2143# endif
2144 }
2145 return u128;
2146#endif
2147}
2148# endif
2149#endif /* RT_ARCH_AMD64 */
2150
2151
2152/**
2153 * Atomically Exchange a value which size might differ
2154 * between platforms or compilers, ordered.
2155 *
2156 * @param pu Pointer to the variable to update.
2157 * @param uNew The value to assign to *pu.
2158 */
2159#define ASMAtomicXchgSize(pu, uNew) \
2160 do { \
2161 switch (sizeof(*(pu))) { \
2162 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2163 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2164 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2165 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2166 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2167 } \
2168 } while (0)
2169
2170
2171/**
2172 * Atomically Exchange a pointer value, ordered.
2173 *
2174 * @returns Current *ppv value
2175 * @param ppv Pointer to the pointer variable to update.
2176 * @param pv The pointer value to assign to *ppv.
2177 */
2178DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2179{
2180#if ARCH_BITS == 32
2181 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2182#elif ARCH_BITS == 64
2183 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2184#else
2185# error "ARCH_BITS is bogus"
2186#endif
2187}
2188
2189
2190/**
2191 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
2192 *
2193 * @returns true if xchg was done.
2194 * @returns false if xchg wasn't done.
2195 *
2196 * @param pu32 Pointer to the value to update.
2197 * @param u32New The new value to assigned to *pu32.
2198 * @param u32Old The old value to *pu32 compare with.
2199 */
2200#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2201DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2202#else
2203DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
2204{
2205# if RT_INLINE_ASM_GNU_STYLE
2206 uint8_t u8Ret;
2207 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2208 "setz %1\n\t"
2209 : "=m" (*pu32),
2210 "=qm" (u8Ret),
2211 "=a" (u32Old)
2212 : "r" (u32New),
2213 "2" (u32Old));
2214 return (bool)u8Ret;
2215
2216# elif RT_INLINE_ASM_USES_INTRIN
2217 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2218
2219# else
2220 uint32_t u32Ret;
2221 __asm
2222 {
2223# ifdef RT_ARCH_AMD64
2224 mov rdx, [pu32]
2225# else
2226 mov edx, [pu32]
2227# endif
2228 mov eax, [u32Old]
2229 mov ecx, [u32New]
2230# ifdef RT_ARCH_AMD64
2231 lock cmpxchg [rdx], ecx
2232# else
2233 lock cmpxchg [edx], ecx
2234# endif
2235 setz al
2236 movzx eax, al
2237 mov [u32Ret], eax
2238 }
2239 return !!u32Ret;
2240# endif
2241}
2242#endif
2243
2244
2245/**
2246 * Atomically Compare and Exchange a signed 32-bit value, ordered.
2247 *
2248 * @returns true if xchg was done.
2249 * @returns false if xchg wasn't done.
2250 *
2251 * @param pi32 Pointer to the value to update.
2252 * @param i32New The new value to assigned to *pi32.
2253 * @param i32Old The old value to *pi32 compare with.
2254 */
2255DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2256{
2257 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2258}
2259
2260
2261/**
2262 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
2263 *
2264 * @returns true if xchg was done.
2265 * @returns false if xchg wasn't done.
2266 *
2267 * @param pu64 Pointer to the 64-bit variable to update.
2268 * @param u64New The 64-bit value to assign to *pu64.
2269 * @param u64Old The value to compare with.
2270 */
2271#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2272DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2273#else
2274DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
2275{
2276# if RT_INLINE_ASM_USES_INTRIN
2277 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2278
2279# elif defined(RT_ARCH_AMD64)
2280# if RT_INLINE_ASM_GNU_STYLE
2281 uint8_t u8Ret;
2282 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2283 "setz %1\n\t"
2284 : "=m" (*pu64),
2285 "=qm" (u8Ret),
2286 "=a" (u64Old)
2287 : "r" (u64New),
2288 "2" (u64Old));
2289 return (bool)u8Ret;
2290# else
2291 bool fRet;
2292 __asm
2293 {
2294 mov rdx, [pu32]
2295 mov rax, [u64Old]
2296 mov rcx, [u64New]
2297 lock cmpxchg [rdx], rcx
2298 setz al
2299 mov [fRet], al
2300 }
2301 return fRet;
2302# endif
2303# else /* !RT_ARCH_AMD64 */
2304 uint32_t u32Ret;
2305# if RT_INLINE_ASM_GNU_STYLE
2306# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2307 uint32_t u32 = (uint32_t)u64New;
2308 uint32_t u32Spill;
2309 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2310 "lock; cmpxchg8b (%6)\n\t"
2311 "setz %%al\n\t"
2312 "xchgl %%ebx, %4\n\t"
2313 "movzbl %%al, %%eax\n\t"
2314 : "=a" (u32Ret),
2315 "=d" (u32Spill),
2316 "=m" (*pu64)
2317 : "A" (u64Old),
2318 "m" ( u32 ),
2319 "c" ( (uint32_t)(u64New >> 32) ),
2320 "S" (pu64) );
2321# else /* !PIC */
2322 uint32_t u32Spill;
2323 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2324 "setz %%al\n\t"
2325 "movzbl %%al, %%eax\n\t"
2326 : "=a" (u32Ret),
2327 "=d" (u32Spill),
2328 "=m" (*pu64)
2329 : "A" (u64Old),
2330 "b" ( (uint32_t)u64New ),
2331 "c" ( (uint32_t)(u64New >> 32) ));
2332# endif
2333 return (bool)u32Ret;
2334# else
2335 __asm
2336 {
2337 mov ebx, dword ptr [u64New]
2338 mov ecx, dword ptr [u64New + 4]
2339 mov edi, [pu64]
2340 mov eax, dword ptr [u64Old]
2341 mov edx, dword ptr [u64Old + 4]
2342 lock cmpxchg8b [edi]
2343 setz al
2344 movzx eax, al
2345 mov dword ptr [u32Ret], eax
2346 }
2347 return !!u32Ret;
2348# endif
2349# endif /* !RT_ARCH_AMD64 */
2350}
2351#endif
2352
2353
2354/**
2355 * Atomically Compare and exchange a signed 64-bit value, ordered.
2356 *
2357 * @returns true if xchg was done.
2358 * @returns false if xchg wasn't done.
2359 *
2360 * @param pi64 Pointer to the 64-bit variable to update.
2361 * @param i64 The 64-bit value to assign to *pu64.
2362 * @param i64Old The value to compare with.
2363 */
2364DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2365{
2366 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2367}
2368
2369
2370/** @def ASMAtomicCmpXchgSize
2371 * Atomically Compare and Exchange a value which size might differ
2372 * between platforms or compilers, ordered.
2373 *
2374 * @param pu Pointer to the value to update.
2375 * @param uNew The new value to assigned to *pu.
2376 * @param uOld The old value to *pu compare with.
2377 * @param fRc Where to store the result.
2378 */
2379#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2380 do { \
2381 switch (sizeof(*(pu))) { \
2382 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2383 break; \
2384 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2385 break; \
2386 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2387 (fRc) = false; \
2388 break; \
2389 } \
2390 } while (0)
2391
2392
2393/**
2394 * Atomically Compare and Exchange a pointer value, ordered.
2395 *
2396 * @returns true if xchg was done.
2397 * @returns false if xchg wasn't done.
2398 *
2399 * @param ppv Pointer to the value to update.
2400 * @param pvNew The new value to assigned to *ppv.
2401 * @param pvOld The old value to *ppv compare with.
2402 */
2403DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2404{
2405#if ARCH_BITS == 32
2406 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2407#elif ARCH_BITS == 64
2408 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2409#else
2410# error "ARCH_BITS is bogus"
2411#endif
2412}
2413
2414
2415/**
2416 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
2417 * passes back old value, ordered.
2418 *
2419 * @returns true if xchg was done.
2420 * @returns false if xchg wasn't done.
2421 *
2422 * @param pu32 Pointer to the value to update.
2423 * @param u32New The new value to assigned to *pu32.
2424 * @param u32Old The old value to *pu32 compare with.
2425 * @param pu32Old Pointer store the old value at.
2426 */
2427#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2428DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
2429#else
2430DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
2431{
2432# if RT_INLINE_ASM_GNU_STYLE
2433 uint8_t u8Ret;
2434 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2435 "setz %1\n\t"
2436 : "=m" (*pu32),
2437 "=qm" (u8Ret),
2438 "=a" (*pu32Old)
2439 : "r" (u32New),
2440 "a" (u32Old));
2441 return (bool)u8Ret;
2442
2443# elif RT_INLINE_ASM_USES_INTRIN
2444 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
2445
2446# else
2447 uint32_t u32Ret;
2448 __asm
2449 {
2450# ifdef RT_ARCH_AMD64
2451 mov rdx, [pu32]
2452# else
2453 mov edx, [pu32]
2454# endif
2455 mov eax, [u32Old]
2456 mov ecx, [u32New]
2457# ifdef RT_ARCH_AMD64
2458 lock cmpxchg [rdx], ecx
2459 mov rdx, [pu32Old]
2460 mov [rdx], eax
2461# else
2462 lock cmpxchg [edx], ecx
2463 mov edx, [pu32Old]
2464 mov [edx], eax
2465# endif
2466 setz al
2467 movzx eax, al
2468 mov [u32Ret], eax
2469 }
2470 return !!u32Ret;
2471# endif
2472}
2473#endif
2474
2475
2476/**
2477 * Atomically Compare and Exchange a signed 32-bit value, additionally
2478 * passes back old value, ordered.
2479 *
2480 * @returns true if xchg was done.
2481 * @returns false if xchg wasn't done.
2482 *
2483 * @param pi32 Pointer to the value to update.
2484 * @param i32New The new value to assigned to *pi32.
2485 * @param i32Old The old value to *pi32 compare with.
2486 * @param pi32Old Pointer store the old value at.
2487 */
2488DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
2489{
2490 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
2491}
2492
2493
2494/**
2495 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2496 * passing back old value, ordered.
2497 *
2498 * @returns true if xchg was done.
2499 * @returns false if xchg wasn't done.
2500 *
2501 * @param pu64 Pointer to the 64-bit variable to update.
2502 * @param u64New The 64-bit value to assign to *pu64.
2503 * @param u64Old The value to compare with.
2504 * @param pu64Old Pointer store the old value at.
2505 */
2506#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2507DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
2508#else
2509DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
2510{
2511# if RT_INLINE_ASM_USES_INTRIN
2512 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
2513
2514# elif defined(RT_ARCH_AMD64)
2515# if RT_INLINE_ASM_GNU_STYLE
2516 uint8_t u8Ret;
2517 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2518 "setz %1\n\t"
2519 : "=m" (*pu64),
2520 "=qm" (u8Ret),
2521 "=a" (*pu64Old)
2522 : "r" (u64New),
2523 "a" (u64Old));
2524 return (bool)u8Ret;
2525# else
2526 bool fRet;
2527 __asm
2528 {
2529 mov rdx, [pu32]
2530 mov rax, [u64Old]
2531 mov rcx, [u64New]
2532 lock cmpxchg [rdx], rcx
2533 mov rdx, [pu64Old]
2534 mov [rdx], rax
2535 setz al
2536 mov [fRet], al
2537 }
2538 return fRet;
2539# endif
2540# else /* !RT_ARCH_AMD64 */
2541# if RT_INLINE_ASM_GNU_STYLE
2542 uint64_t u64Ret;
2543# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2544 /* NB: this code uses a memory clobber description, because the clean
2545 * solution with an output value for *pu64 makes gcc run out of registers.
2546 * This will cause suboptimal code, and anyone with a better solution is
2547 * welcome to improve this. */
2548 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2549 "lock; cmpxchg8b %3\n\t"
2550 "xchgl %%ebx, %1\n\t"
2551 : "=A" (u64Ret)
2552 : "DS" ((uint32_t)u64New),
2553 "c" ((uint32_t)(u64New >> 32)),
2554 "m" (*pu64),
2555 "0" (u64Old)
2556 : "memory" );
2557# else /* !PIC */
2558 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2559 : "=A" (u64Ret),
2560 "=m" (*pu64)
2561 : "b" ((uint32_t)u64New),
2562 "c" ((uint32_t)(u64New >> 32)),
2563 "m" (*pu64),
2564 "0" (u64Old));
2565# endif
2566 *pu64Old = u64Ret;
2567 return u64Ret == u64Old;
2568# else
2569 uint32_t u32Ret;
2570 __asm
2571 {
2572 mov ebx, dword ptr [u64New]
2573 mov ecx, dword ptr [u64New + 4]
2574 mov edi, [pu64]
2575 mov eax, dword ptr [u64Old]
2576 mov edx, dword ptr [u64Old + 4]
2577 lock cmpxchg8b [edi]
2578 mov ebx, [pu64Old]
2579 mov [ebx], eax
2580 setz al
2581 movzx eax, al
2582 add ebx, 4
2583 mov [ebx], edx
2584 mov dword ptr [u32Ret], eax
2585 }
2586 return !!u32Ret;
2587# endif
2588# endif /* !RT_ARCH_AMD64 */
2589}
2590#endif
2591
2592
2593/**
2594 * Atomically Compare and exchange a signed 64-bit value, additionally
2595 * passing back old value, ordered.
2596 *
2597 * @returns true if xchg was done.
2598 * @returns false if xchg wasn't done.
2599 *
2600 * @param pi64 Pointer to the 64-bit variable to update.
2601 * @param i64 The 64-bit value to assign to *pu64.
2602 * @param i64Old The value to compare with.
2603 * @param pi64Old Pointer store the old value at.
2604 */
2605DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
2606{
2607 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
2608}
2609
2610
2611/** @def ASMAtomicCmpXchgExSize
2612 * Atomically Compare and Exchange a value which size might differ
2613 * between platforms or compilers. Additionally passes back old value.
2614 *
2615 * @param pu Pointer to the value to update.
2616 * @param uNew The new value to assigned to *pu.
2617 * @param uOld The old value to *pu compare with.
2618 * @param fRc Where to store the result.
2619 * @param uOldVal Where to store the old value.
2620 */
2621#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, uOldVal) \
2622 do { \
2623 switch (sizeof(*(pu))) { \
2624 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)&(uOldVal)); \
2625 break; \
2626 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)&(uOldVal)); \
2627 break; \
2628 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2629 (fRc) = false; \
2630 (uOldVal) = 0; \
2631 break; \
2632 } \
2633 } while (0)
2634
2635
2636/**
2637 * Atomically Compare and Exchange a pointer value, additionally
2638 * passing back old value, ordered.
2639 *
2640 * @returns true if xchg was done.
2641 * @returns false if xchg wasn't done.
2642 *
2643 * @param ppv Pointer to the value to update.
2644 * @param pvNew The new value to assigned to *ppv.
2645 * @param pvOld The old value to *ppv compare with.
2646 * @param ppvOld Pointer store the old value at.
2647 */
2648DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, void *pvNew, void *pvOld, void **ppvOld)
2649{
2650#if ARCH_BITS == 32
2651 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
2652#elif ARCH_BITS == 64
2653 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
2654#else
2655# error "ARCH_BITS is bogus"
2656#endif
2657}
2658
2659
2660/**
2661 * Atomically exchanges and adds to a 32-bit value, ordered.
2662 *
2663 * @returns The old value.
2664 * @param pu32 Pointer to the value.
2665 * @param u32 Number to add.
2666 */
2667#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2668DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2669#else
2670DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2671{
2672# if RT_INLINE_ASM_USES_INTRIN
2673 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2674 return u32;
2675
2676# elif RT_INLINE_ASM_GNU_STYLE
2677 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2678 : "=r" (u32),
2679 "=m" (*pu32)
2680 : "0" (u32)
2681 : "memory");
2682 return u32;
2683# else
2684 __asm
2685 {
2686 mov eax, [u32]
2687# ifdef RT_ARCH_AMD64
2688 mov rdx, [pu32]
2689 lock xadd [rdx], eax
2690# else
2691 mov edx, [pu32]
2692 lock xadd [edx], eax
2693# endif
2694 mov [u32], eax
2695 }
2696 return u32;
2697# endif
2698}
2699#endif
2700
2701
2702/**
2703 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2704 *
2705 * @returns The old value.
2706 * @param pi32 Pointer to the value.
2707 * @param i32 Number to add.
2708 */
2709DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2710{
2711 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2712}
2713
2714
2715/**
2716 * Atomically increment a 32-bit value, ordered.
2717 *
2718 * @returns The new value.
2719 * @param pu32 Pointer to the value to increment.
2720 */
2721#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2722DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2723#else
2724DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2725{
2726 uint32_t u32;
2727# if RT_INLINE_ASM_USES_INTRIN
2728 u32 = _InterlockedIncrement((long *)pu32);
2729 return u32;
2730
2731# elif RT_INLINE_ASM_GNU_STYLE
2732 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2733 : "=r" (u32),
2734 "=m" (*pu32)
2735 : "0" (1)
2736 : "memory");
2737 return u32+1;
2738# else
2739 __asm
2740 {
2741 mov eax, 1
2742# ifdef RT_ARCH_AMD64
2743 mov rdx, [pu32]
2744 lock xadd [rdx], eax
2745# else
2746 mov edx, [pu32]
2747 lock xadd [edx], eax
2748# endif
2749 mov u32, eax
2750 }
2751 return u32+1;
2752# endif
2753}
2754#endif
2755
2756
2757/**
2758 * Atomically increment a signed 32-bit value, ordered.
2759 *
2760 * @returns The new value.
2761 * @param pi32 Pointer to the value to increment.
2762 */
2763DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2764{
2765 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2766}
2767
2768
2769/**
2770 * Atomically decrement an unsigned 32-bit value, ordered.
2771 *
2772 * @returns The new value.
2773 * @param pu32 Pointer to the value to decrement.
2774 */
2775#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2776DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2777#else
2778DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2779{
2780 uint32_t u32;
2781# if RT_INLINE_ASM_USES_INTRIN
2782 u32 = _InterlockedDecrement((long *)pu32);
2783 return u32;
2784
2785# elif RT_INLINE_ASM_GNU_STYLE
2786 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2787 : "=r" (u32),
2788 "=m" (*pu32)
2789 : "0" (-1)
2790 : "memory");
2791 return u32-1;
2792# else
2793 __asm
2794 {
2795 mov eax, -1
2796# ifdef RT_ARCH_AMD64
2797 mov rdx, [pu32]
2798 lock xadd [rdx], eax
2799# else
2800 mov edx, [pu32]
2801 lock xadd [edx], eax
2802# endif
2803 mov u32, eax
2804 }
2805 return u32-1;
2806# endif
2807}
2808#endif
2809
2810
2811/**
2812 * Atomically decrement a signed 32-bit value, ordered.
2813 *
2814 * @returns The new value.
2815 * @param pi32 Pointer to the value to decrement.
2816 */
2817DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2818{
2819 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2820}
2821
2822
2823/**
2824 * Atomically Or an unsigned 32-bit value, ordered.
2825 *
2826 * @param pu32 Pointer to the pointer variable to OR u32 with.
2827 * @param u32 The value to OR *pu32 with.
2828 */
2829#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2830DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2831#else
2832DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2833{
2834# if RT_INLINE_ASM_USES_INTRIN
2835 _InterlockedOr((long volatile *)pu32, (long)u32);
2836
2837# elif RT_INLINE_ASM_GNU_STYLE
2838 __asm__ __volatile__("lock; orl %1, %0\n\t"
2839 : "=m" (*pu32)
2840 : "ir" (u32));
2841# else
2842 __asm
2843 {
2844 mov eax, [u32]
2845# ifdef RT_ARCH_AMD64
2846 mov rdx, [pu32]
2847 lock or [rdx], eax
2848# else
2849 mov edx, [pu32]
2850 lock or [edx], eax
2851# endif
2852 }
2853# endif
2854}
2855#endif
2856
2857
2858/**
2859 * Atomically Or a signed 32-bit value, ordered.
2860 *
2861 * @param pi32 Pointer to the pointer variable to OR u32 with.
2862 * @param i32 The value to OR *pu32 with.
2863 */
2864DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2865{
2866 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2867}
2868
2869
2870/**
2871 * Atomically And an unsigned 32-bit value, ordered.
2872 *
2873 * @param pu32 Pointer to the pointer variable to AND u32 with.
2874 * @param u32 The value to AND *pu32 with.
2875 */
2876#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2877DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2878#else
2879DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2880{
2881# if RT_INLINE_ASM_USES_INTRIN
2882 _InterlockedAnd((long volatile *)pu32, u32);
2883
2884# elif RT_INLINE_ASM_GNU_STYLE
2885 __asm__ __volatile__("lock; andl %1, %0\n\t"
2886 : "=m" (*pu32)
2887 : "ir" (u32));
2888# else
2889 __asm
2890 {
2891 mov eax, [u32]
2892# ifdef RT_ARCH_AMD64
2893 mov rdx, [pu32]
2894 lock and [rdx], eax
2895# else
2896 mov edx, [pu32]
2897 lock and [edx], eax
2898# endif
2899 }
2900# endif
2901}
2902#endif
2903
2904
2905/**
2906 * Atomically And a signed 32-bit value, ordered.
2907 *
2908 * @param pi32 Pointer to the pointer variable to AND i32 with.
2909 * @param i32 The value to AND *pi32 with.
2910 */
2911DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2912{
2913 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2914}
2915
2916
2917/**
2918 * Memory fence, waits for any pending writes and reads to complete.
2919 */
2920DECLINLINE(void) ASMMemoryFence(void)
2921{
2922 /** @todo use mfence? check if all cpus we care for support it. */
2923 uint32_t volatile u32;
2924 ASMAtomicXchgU32(&u32, 0);
2925}
2926
2927
2928/**
2929 * Write fence, waits for any pending writes to complete.
2930 */
2931DECLINLINE(void) ASMWriteFence(void)
2932{
2933 /** @todo use sfence? check if all cpus we care for support it. */
2934 ASMMemoryFence();
2935}
2936
2937
2938/**
2939 * Read fence, waits for any pending reads to complete.
2940 */
2941DECLINLINE(void) ASMReadFence(void)
2942{
2943 /** @todo use lfence? check if all cpus we care for support it. */
2944 ASMMemoryFence();
2945}
2946
2947
2948/**
2949 * Atomically reads an unsigned 8-bit value, ordered.
2950 *
2951 * @returns Current *pu8 value
2952 * @param pu8 Pointer to the 8-bit variable to read.
2953 */
2954DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
2955{
2956 ASMMemoryFence();
2957 return *pu8; /* byte reads are atomic on x86 */
2958}
2959
2960
2961/**
2962 * Atomically reads an unsigned 8-bit value, unordered.
2963 *
2964 * @returns Current *pu8 value
2965 * @param pu8 Pointer to the 8-bit variable to read.
2966 */
2967DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
2968{
2969 return *pu8; /* byte reads are atomic on x86 */
2970}
2971
2972
2973/**
2974 * Atomically reads a signed 8-bit value, ordered.
2975 *
2976 * @returns Current *pi8 value
2977 * @param pi8 Pointer to the 8-bit variable to read.
2978 */
2979DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
2980{
2981 ASMMemoryFence();
2982 return *pi8; /* byte reads are atomic on x86 */
2983}
2984
2985
2986/**
2987 * Atomically reads a signed 8-bit value, unordered.
2988 *
2989 * @returns Current *pi8 value
2990 * @param pi8 Pointer to the 8-bit variable to read.
2991 */
2992DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
2993{
2994 return *pi8; /* byte reads are atomic on x86 */
2995}
2996
2997
2998/**
2999 * Atomically reads an unsigned 16-bit value, ordered.
3000 *
3001 * @returns Current *pu16 value
3002 * @param pu16 Pointer to the 16-bit variable to read.
3003 */
3004DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3005{
3006 ASMMemoryFence();
3007 Assert(!((uintptr_t)pu16 & 1));
3008 return *pu16;
3009}
3010
3011
3012/**
3013 * Atomically reads an unsigned 16-bit value, unordered.
3014 *
3015 * @returns Current *pu16 value
3016 * @param pu16 Pointer to the 16-bit variable to read.
3017 */
3018DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3019{
3020 Assert(!((uintptr_t)pu16 & 1));
3021 return *pu16;
3022}
3023
3024
3025/**
3026 * Atomically reads a signed 16-bit value, ordered.
3027 *
3028 * @returns Current *pi16 value
3029 * @param pi16 Pointer to the 16-bit variable to read.
3030 */
3031DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3032{
3033 ASMMemoryFence();
3034 Assert(!((uintptr_t)pi16 & 1));
3035 return *pi16;
3036}
3037
3038
3039/**
3040 * Atomically reads a signed 16-bit value, unordered.
3041 *
3042 * @returns Current *pi16 value
3043 * @param pi16 Pointer to the 16-bit variable to read.
3044 */
3045DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3046{
3047 Assert(!((uintptr_t)pi16 & 1));
3048 return *pi16;
3049}
3050
3051
3052/**
3053 * Atomically reads an unsigned 32-bit value, ordered.
3054 *
3055 * @returns Current *pu32 value
3056 * @param pu32 Pointer to the 32-bit variable to read.
3057 */
3058DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3059{
3060 ASMMemoryFence();
3061 Assert(!((uintptr_t)pu32 & 3));
3062 return *pu32;
3063}
3064
3065
3066/**
3067 * Atomically reads an unsigned 32-bit value, unordered.
3068 *
3069 * @returns Current *pu32 value
3070 * @param pu32 Pointer to the 32-bit variable to read.
3071 */
3072DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3073{
3074 Assert(!((uintptr_t)pu32 & 3));
3075 return *pu32;
3076}
3077
3078
3079/**
3080 * Atomically reads a signed 32-bit value, ordered.
3081 *
3082 * @returns Current *pi32 value
3083 * @param pi32 Pointer to the 32-bit variable to read.
3084 */
3085DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3086{
3087 ASMMemoryFence();
3088 Assert(!((uintptr_t)pi32 & 3));
3089 return *pi32;
3090}
3091
3092
3093/**
3094 * Atomically reads a signed 32-bit value, unordered.
3095 *
3096 * @returns Current *pi32 value
3097 * @param pi32 Pointer to the 32-bit variable to read.
3098 */
3099DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
3100{
3101 Assert(!((uintptr_t)pi32 & 3));
3102 return *pi32;
3103}
3104
3105
3106/**
3107 * Atomically reads an unsigned 64-bit value, ordered.
3108 *
3109 * @returns Current *pu64 value
3110 * @param pu64 Pointer to the 64-bit variable to read.
3111 * The memory pointed to must be writable.
3112 * @remark This will fault if the memory is read-only!
3113 */
3114#if RT_INLINE_ASM_EXTERNAL
3115DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
3116#else
3117DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
3118{
3119 uint64_t u64;
3120# ifdef RT_ARCH_AMD64
3121# if RT_INLINE_ASM_GNU_STYLE
3122 Assert(!((uintptr_t)pu64 & 7));
3123 __asm__ __volatile__( "mfence\n\t"
3124 "movq %1, %0\n\t"
3125 : "=r" (u64)
3126 : "m" (*pu64));
3127# else
3128 __asm
3129 {
3130 mfence
3131 mov rdx, [pu64]
3132 mov rax, [rdx]
3133 mov [u64], rax
3134 }
3135# endif
3136# else /* !RT_ARCH_AMD64 */
3137# if RT_INLINE_ASM_GNU_STYLE
3138# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3139 uint32_t u32EBX = 0;
3140 Assert(!((uintptr_t)pu64 & 7));
3141 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3142 "lock; cmpxchg8b (%5)\n\t"
3143 "xchgl %%ebx, %3\n\t"
3144 : "=A" (u64),
3145 "=m" (*pu64)
3146 : "0" (0),
3147 "m" (u32EBX),
3148 "c" (0),
3149 "S" (pu64));
3150# else /* !PIC */
3151 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3152 : "=A" (u64),
3153 "=m" (*pu64)
3154 : "0" (0),
3155 "b" (0),
3156 "c" (0));
3157# endif
3158# else
3159 Assert(!((uintptr_t)pu64 & 7));
3160 __asm
3161 {
3162 xor eax, eax
3163 xor edx, edx
3164 mov edi, pu64
3165 xor ecx, ecx
3166 xor ebx, ebx
3167 lock cmpxchg8b [edi]
3168 mov dword ptr [u64], eax
3169 mov dword ptr [u64 + 4], edx
3170 }
3171# endif
3172# endif /* !RT_ARCH_AMD64 */
3173 return u64;
3174}
3175#endif
3176
3177
3178/**
3179 * Atomically reads an unsigned 64-bit value, unordered.
3180 *
3181 * @returns Current *pu64 value
3182 * @param pu64 Pointer to the 64-bit variable to read.
3183 * The memory pointed to must be writable.
3184 * @remark This will fault if the memory is read-only!
3185 */
3186#if RT_INLINE_ASM_EXTERNAL
3187DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
3188#else
3189DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
3190{
3191 uint64_t u64;
3192# ifdef RT_ARCH_AMD64
3193# if RT_INLINE_ASM_GNU_STYLE
3194 Assert(!((uintptr_t)pu64 & 7));
3195 __asm__ __volatile__("movq %1, %0\n\t"
3196 : "=r" (u64)
3197 : "m" (*pu64));
3198# else
3199 __asm
3200 {
3201 mov rdx, [pu64]
3202 mov rax, [rdx]
3203 mov [u64], rax
3204 }
3205# endif
3206# else /* !RT_ARCH_AMD64 */
3207# if RT_INLINE_ASM_GNU_STYLE
3208# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3209 uint32_t u32EBX = 0;
3210 Assert(!((uintptr_t)pu64 & 7));
3211 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3212 "lock; cmpxchg8b (%5)\n\t"
3213 "xchgl %%ebx, %3\n\t"
3214 : "=A" (u64),
3215 "=m" (*pu64)
3216 : "0" (0),
3217 "m" (u32EBX),
3218 "c" (0),
3219 "S" (pu64));
3220# else /* !PIC */
3221 __asm__ __volatile__("cmpxchg8b %1\n\t"
3222 : "=A" (u64),
3223 "=m" (*pu64)
3224 : "0" (0),
3225 "b" (0),
3226 "c" (0));
3227# endif
3228# else
3229 Assert(!((uintptr_t)pu64 & 7));
3230 __asm
3231 {
3232 xor eax, eax
3233 xor edx, edx
3234 mov edi, pu64
3235 xor ecx, ecx
3236 xor ebx, ebx
3237 lock cmpxchg8b [edi]
3238 mov dword ptr [u64], eax
3239 mov dword ptr [u64 + 4], edx
3240 }
3241# endif
3242# endif /* !RT_ARCH_AMD64 */
3243 return u64;
3244}
3245#endif
3246
3247
3248/**
3249 * Atomically reads a signed 64-bit value, ordered.
3250 *
3251 * @returns Current *pi64 value
3252 * @param pi64 Pointer to the 64-bit variable to read.
3253 * The memory pointed to must be writable.
3254 * @remark This will fault if the memory is read-only!
3255 */
3256DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
3257{
3258 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
3259}
3260
3261
3262/**
3263 * Atomically reads a signed 64-bit value, unordered.
3264 *
3265 * @returns Current *pi64 value
3266 * @param pi64 Pointer to the 64-bit variable to read.
3267 * The memory pointed to must be writable.
3268 * @remark This will fault if the memory is read-only!
3269 */
3270DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
3271{
3272 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
3273}
3274
3275
3276/**
3277 * Atomically reads a pointer value, ordered.
3278 *
3279 * @returns Current *pv value
3280 * @param ppv Pointer to the pointer variable to read.
3281 */
3282DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
3283{
3284#if ARCH_BITS == 32
3285 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
3286#elif ARCH_BITS == 64
3287 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
3288#else
3289# error "ARCH_BITS is bogus"
3290#endif
3291}
3292
3293
3294/**
3295 * Atomically reads a pointer value, unordered.
3296 *
3297 * @returns Current *pv value
3298 * @param ppv Pointer to the pointer variable to read.
3299 */
3300DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
3301{
3302#if ARCH_BITS == 32
3303 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
3304#elif ARCH_BITS == 64
3305 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
3306#else
3307# error "ARCH_BITS is bogus"
3308#endif
3309}
3310
3311
3312/**
3313 * Atomically reads a boolean value, ordered.
3314 *
3315 * @returns Current *pf value
3316 * @param pf Pointer to the boolean variable to read.
3317 */
3318DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
3319{
3320 ASMMemoryFence();
3321 return *pf; /* byte reads are atomic on x86 */
3322}
3323
3324
3325/**
3326 * Atomically reads a boolean value, unordered.
3327 *
3328 * @returns Current *pf value
3329 * @param pf Pointer to the boolean variable to read.
3330 */
3331DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
3332{
3333 return *pf; /* byte reads are atomic on x86 */
3334}
3335
3336
3337/**
3338 * Atomically read a value which size might differ
3339 * between platforms or compilers, ordered.
3340 *
3341 * @param pu Pointer to the variable to update.
3342 * @param puRes Where to store the result.
3343 */
3344#define ASMAtomicReadSize(pu, puRes) \
3345 do { \
3346 switch (sizeof(*(pu))) { \
3347 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3348 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
3349 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
3350 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
3351 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3352 } \
3353 } while (0)
3354
3355
3356/**
3357 * Atomically read a value which size might differ
3358 * between platforms or compilers, unordered.
3359 *
3360 * @param pu Pointer to the variable to update.
3361 * @param puRes Where to store the result.
3362 */
3363#define ASMAtomicUoReadSize(pu, puRes) \
3364 do { \
3365 switch (sizeof(*(pu))) { \
3366 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3367 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
3368 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
3369 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
3370 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3371 } \
3372 } while (0)
3373
3374
3375/**
3376 * Atomically writes an unsigned 8-bit value, ordered.
3377 *
3378 * @param pu8 Pointer to the 8-bit variable.
3379 * @param u8 The 8-bit value to assign to *pu8.
3380 */
3381DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
3382{
3383 ASMAtomicXchgU8(pu8, u8);
3384}
3385
3386
3387/**
3388 * Atomically writes an unsigned 8-bit value, unordered.
3389 *
3390 * @param pu8 Pointer to the 8-bit variable.
3391 * @param u8 The 8-bit value to assign to *pu8.
3392 */
3393DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
3394{
3395 *pu8 = u8; /* byte writes are atomic on x86 */
3396}
3397
3398
3399/**
3400 * Atomically writes a signed 8-bit value, ordered.
3401 *
3402 * @param pi8 Pointer to the 8-bit variable to read.
3403 * @param i8 The 8-bit value to assign to *pi8.
3404 */
3405DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
3406{
3407 ASMAtomicXchgS8(pi8, i8);
3408}
3409
3410
3411/**
3412 * Atomically writes a signed 8-bit value, unordered.
3413 *
3414 * @param pi8 Pointer to the 8-bit variable to read.
3415 * @param i8 The 8-bit value to assign to *pi8.
3416 */
3417DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
3418{
3419 *pi8 = i8; /* byte writes are atomic on x86 */
3420}
3421
3422
3423/**
3424 * Atomically writes an unsigned 16-bit value, ordered.
3425 *
3426 * @param pu16 Pointer to the 16-bit variable.
3427 * @param u16 The 16-bit value to assign to *pu16.
3428 */
3429DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
3430{
3431 ASMAtomicXchgU16(pu16, u16);
3432}
3433
3434
3435/**
3436 * Atomically writes an unsigned 16-bit value, unordered.
3437 *
3438 * @param pu16 Pointer to the 16-bit variable.
3439 * @param u16 The 16-bit value to assign to *pu16.
3440 */
3441DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
3442{
3443 Assert(!((uintptr_t)pu16 & 1));
3444 *pu16 = u16;
3445}
3446
3447
3448/**
3449 * Atomically writes a signed 16-bit value, ordered.
3450 *
3451 * @param pi16 Pointer to the 16-bit variable to read.
3452 * @param i16 The 16-bit value to assign to *pi16.
3453 */
3454DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
3455{
3456 ASMAtomicXchgS16(pi16, i16);
3457}
3458
3459
3460/**
3461 * Atomically writes a signed 16-bit value, unordered.
3462 *
3463 * @param pi16 Pointer to the 16-bit variable to read.
3464 * @param i16 The 16-bit value to assign to *pi16.
3465 */
3466DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
3467{
3468 Assert(!((uintptr_t)pi16 & 1));
3469 *pi16 = i16;
3470}
3471
3472
3473/**
3474 * Atomically writes an unsigned 32-bit value, ordered.
3475 *
3476 * @param pu32 Pointer to the 32-bit variable.
3477 * @param u32 The 32-bit value to assign to *pu32.
3478 */
3479DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
3480{
3481 ASMAtomicXchgU32(pu32, u32);
3482}
3483
3484
3485/**
3486 * Atomically writes an unsigned 32-bit value, unordered.
3487 *
3488 * @param pu32 Pointer to the 32-bit variable.
3489 * @param u32 The 32-bit value to assign to *pu32.
3490 */
3491DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
3492{
3493 Assert(!((uintptr_t)pu32 & 3));
3494 *pu32 = u32;
3495}
3496
3497
3498/**
3499 * Atomically writes a signed 32-bit value, ordered.
3500 *
3501 * @param pi32 Pointer to the 32-bit variable to read.
3502 * @param i32 The 32-bit value to assign to *pi32.
3503 */
3504DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
3505{
3506 ASMAtomicXchgS32(pi32, i32);
3507}
3508
3509
3510/**
3511 * Atomically writes a signed 32-bit value, unordered.
3512 *
3513 * @param pi32 Pointer to the 32-bit variable to read.
3514 * @param i32 The 32-bit value to assign to *pi32.
3515 */
3516DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
3517{
3518 Assert(!((uintptr_t)pi32 & 3));
3519 *pi32 = i32;
3520}
3521
3522
3523/**
3524 * Atomically writes an unsigned 64-bit value, ordered.
3525 *
3526 * @param pu64 Pointer to the 64-bit variable.
3527 * @param u64 The 64-bit value to assign to *pu64.
3528 */
3529DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
3530{
3531 ASMAtomicXchgU64(pu64, u64);
3532}
3533
3534
3535/**
3536 * Atomically writes an unsigned 64-bit value, unordered.
3537 *
3538 * @param pu64 Pointer to the 64-bit variable.
3539 * @param u64 The 64-bit value to assign to *pu64.
3540 */
3541DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
3542{
3543 Assert(!((uintptr_t)pu64 & 7));
3544#if ARCH_BITS == 64
3545 *pu64 = u64;
3546#else
3547 ASMAtomicXchgU64(pu64, u64);
3548#endif
3549}
3550
3551
3552/**
3553 * Atomically writes a signed 64-bit value, ordered.
3554 *
3555 * @param pi64 Pointer to the 64-bit variable.
3556 * @param i64 The 64-bit value to assign to *pi64.
3557 */
3558DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
3559{
3560 ASMAtomicXchgS64(pi64, i64);
3561}
3562
3563
3564/**
3565 * Atomically writes a signed 64-bit value, unordered.
3566 *
3567 * @param pi64 Pointer to the 64-bit variable.
3568 * @param i64 The 64-bit value to assign to *pi64.
3569 */
3570DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
3571{
3572 Assert(!((uintptr_t)pi64 & 7));
3573#if ARCH_BITS == 64
3574 *pi64 = i64;
3575#else
3576 ASMAtomicXchgS64(pi64, i64);
3577#endif
3578}
3579
3580
3581/**
3582 * Atomically writes a boolean value, unordered.
3583 *
3584 * @param pf Pointer to the boolean variable.
3585 * @param f The boolean value to assign to *pf.
3586 */
3587DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
3588{
3589 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
3590}
3591
3592
3593/**
3594 * Atomically writes a boolean value, unordered.
3595 *
3596 * @param pf Pointer to the boolean variable.
3597 * @param f The boolean value to assign to *pf.
3598 */
3599DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
3600{
3601 *pf = f; /* byte writes are atomic on x86 */
3602}
3603
3604
3605/**
3606 * Atomically writes a pointer value, ordered.
3607 *
3608 * @returns Current *pv value
3609 * @param ppv Pointer to the pointer variable.
3610 * @param pv The pointer value to assigne to *ppv.
3611 */
3612DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, void *pv)
3613{
3614#if ARCH_BITS == 32
3615 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3616#elif ARCH_BITS == 64
3617 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3618#else
3619# error "ARCH_BITS is bogus"
3620#endif
3621}
3622
3623
3624/**
3625 * Atomically writes a pointer value, unordered.
3626 *
3627 * @returns Current *pv value
3628 * @param ppv Pointer to the pointer variable.
3629 * @param pv The pointer value to assigne to *ppv.
3630 */
3631DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, void *pv)
3632{
3633#if ARCH_BITS == 32
3634 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3635#elif ARCH_BITS == 64
3636 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3637#else
3638# error "ARCH_BITS is bogus"
3639#endif
3640}
3641
3642
3643/**
3644 * Atomically write a value which size might differ
3645 * between platforms or compilers, ordered.
3646 *
3647 * @param pu Pointer to the variable to update.
3648 * @param uNew The value to assign to *pu.
3649 */
3650#define ASMAtomicWriteSize(pu, uNew) \
3651 do { \
3652 switch (sizeof(*(pu))) { \
3653 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3654 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3655 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3656 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3657 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3658 } \
3659 } while (0)
3660
3661/**
3662 * Atomically write a value which size might differ
3663 * between platforms or compilers, unordered.
3664 *
3665 * @param pu Pointer to the variable to update.
3666 * @param uNew The value to assign to *pu.
3667 */
3668#define ASMAtomicUoWriteSize(pu, uNew) \
3669 do { \
3670 switch (sizeof(*(pu))) { \
3671 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3672 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3673 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3674 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3675 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3676 } \
3677 } while (0)
3678
3679
3680
3681
3682/**
3683 * Invalidate page.
3684 *
3685 * @param pv Address of the page to invalidate.
3686 */
3687#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3688DECLASM(void) ASMInvalidatePage(void *pv);
3689#else
3690DECLINLINE(void) ASMInvalidatePage(void *pv)
3691{
3692# if RT_INLINE_ASM_USES_INTRIN
3693 __invlpg(pv);
3694
3695# elif RT_INLINE_ASM_GNU_STYLE
3696 __asm__ __volatile__("invlpg %0\n\t"
3697 : : "m" (*(uint8_t *)pv));
3698# else
3699 __asm
3700 {
3701# ifdef RT_ARCH_AMD64
3702 mov rax, [pv]
3703 invlpg [rax]
3704# else
3705 mov eax, [pv]
3706 invlpg [eax]
3707# endif
3708 }
3709# endif
3710}
3711#endif
3712
3713
3714#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3715# if PAGE_SIZE != 0x1000
3716# error "PAGE_SIZE is not 0x1000!"
3717# endif
3718#endif
3719
3720/**
3721 * Zeros a 4K memory page.
3722 *
3723 * @param pv Pointer to the memory block. This must be page aligned.
3724 */
3725#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3726DECLASM(void) ASMMemZeroPage(volatile void *pv);
3727# else
3728DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3729{
3730# if RT_INLINE_ASM_USES_INTRIN
3731# ifdef RT_ARCH_AMD64
3732 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
3733# else
3734 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
3735# endif
3736
3737# elif RT_INLINE_ASM_GNU_STYLE
3738 RTUINTREG uDummy;
3739# ifdef RT_ARCH_AMD64
3740 __asm__ __volatile__ ("rep stosq"
3741 : "=D" (pv),
3742 "=c" (uDummy)
3743 : "0" (pv),
3744 "c" (0x1000 >> 3),
3745 "a" (0)
3746 : "memory");
3747# else
3748 __asm__ __volatile__ ("rep stosl"
3749 : "=D" (pv),
3750 "=c" (uDummy)
3751 : "0" (pv),
3752 "c" (0x1000 >> 2),
3753 "a" (0)
3754 : "memory");
3755# endif
3756# else
3757 __asm
3758 {
3759# ifdef RT_ARCH_AMD64
3760 xor rax, rax
3761 mov ecx, 0200h
3762 mov rdi, [pv]
3763 rep stosq
3764# else
3765 xor eax, eax
3766 mov ecx, 0400h
3767 mov edi, [pv]
3768 rep stosd
3769# endif
3770 }
3771# endif
3772}
3773# endif
3774
3775
3776/**
3777 * Zeros a memory block with a 32-bit aligned size.
3778 *
3779 * @param pv Pointer to the memory block.
3780 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3781 */
3782#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3783DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3784#else
3785DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3786{
3787# if RT_INLINE_ASM_USES_INTRIN
3788 __stosd((unsigned long *)pv, 0, cb >> 2);
3789
3790# elif RT_INLINE_ASM_GNU_STYLE
3791 __asm__ __volatile__ ("rep stosl"
3792 : "=D" (pv),
3793 "=c" (cb)
3794 : "0" (pv),
3795 "1" (cb >> 2),
3796 "a" (0)
3797 : "memory");
3798# else
3799 __asm
3800 {
3801 xor eax, eax
3802# ifdef RT_ARCH_AMD64
3803 mov rcx, [cb]
3804 shr rcx, 2
3805 mov rdi, [pv]
3806# else
3807 mov ecx, [cb]
3808 shr ecx, 2
3809 mov edi, [pv]
3810# endif
3811 rep stosd
3812 }
3813# endif
3814}
3815#endif
3816
3817
3818/**
3819 * Fills a memory block with a 32-bit aligned size.
3820 *
3821 * @param pv Pointer to the memory block.
3822 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3823 * @param u32 The value to fill with.
3824 */
3825#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3826DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3827#else
3828DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3829{
3830# if RT_INLINE_ASM_USES_INTRIN
3831 __stosd((unsigned long *)pv, 0, cb >> 2);
3832
3833# elif RT_INLINE_ASM_GNU_STYLE
3834 __asm__ __volatile__ ("rep stosl"
3835 : "=D" (pv),
3836 "=c" (cb)
3837 : "0" (pv),
3838 "1" (cb >> 2),
3839 "a" (u32)
3840 : "memory");
3841# else
3842 __asm
3843 {
3844# ifdef RT_ARCH_AMD64
3845 mov rcx, [cb]
3846 shr rcx, 2
3847 mov rdi, [pv]
3848# else
3849 mov ecx, [cb]
3850 shr ecx, 2
3851 mov edi, [pv]
3852# endif
3853 mov eax, [u32]
3854 rep stosd
3855 }
3856# endif
3857}
3858#endif
3859
3860
3861/**
3862 * Checks if a memory block is filled with the specified byte.
3863 *
3864 * This is a sort of inverted memchr.
3865 *
3866 * @returns Pointer to the byte which doesn't equal u8.
3867 * @returns NULL if all equal to u8.
3868 *
3869 * @param pv Pointer to the memory block.
3870 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3871 * @param u8 The value it's supposed to be filled with.
3872 */
3873#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3874DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
3875#else
3876DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3877{
3878/** @todo rewrite this in inline assembly? */
3879 uint8_t const *pb = (uint8_t const *)pv;
3880 for (; cb; cb--, pb++)
3881 if (RT_UNLIKELY(*pb != u8))
3882 return (void *)pb;
3883 return NULL;
3884}
3885#endif
3886
3887
3888/**
3889 * Checks if a memory block is filled with the specified 32-bit value.
3890 *
3891 * This is a sort of inverted memchr.
3892 *
3893 * @returns Pointer to the first value which doesn't equal u32.
3894 * @returns NULL if all equal to u32.
3895 *
3896 * @param pv Pointer to the memory block.
3897 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3898 * @param u32 The value it's supposed to be filled with.
3899 */
3900#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3901DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
3902#else
3903DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
3904{
3905/** @todo rewrite this in inline assembly? */
3906 uint32_t const *pu32 = (uint32_t const *)pv;
3907 for (; cb; cb -= 4, pu32++)
3908 if (RT_UNLIKELY(*pu32 != u32))
3909 return (uint32_t *)pu32;
3910 return NULL;
3911}
3912#endif
3913
3914
3915/**
3916 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
3917 *
3918 * @returns u32F1 * u32F2.
3919 */
3920#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3921DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
3922#else
3923DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
3924{
3925# ifdef RT_ARCH_AMD64
3926 return (uint64_t)u32F1 * u32F2;
3927# else /* !RT_ARCH_AMD64 */
3928 uint64_t u64;
3929# if RT_INLINE_ASM_GNU_STYLE
3930 __asm__ __volatile__("mull %%edx"
3931 : "=A" (u64)
3932 : "a" (u32F2), "d" (u32F1));
3933# else
3934 __asm
3935 {
3936 mov edx, [u32F1]
3937 mov eax, [u32F2]
3938 mul edx
3939 mov dword ptr [u64], eax
3940 mov dword ptr [u64 + 4], edx
3941 }
3942# endif
3943 return u64;
3944# endif /* !RT_ARCH_AMD64 */
3945}
3946#endif
3947
3948
3949/**
3950 * Multiplies two signed 32-bit values returning a signed 64-bit result.
3951 *
3952 * @returns u32F1 * u32F2.
3953 */
3954#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3955DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
3956#else
3957DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
3958{
3959# ifdef RT_ARCH_AMD64
3960 return (int64_t)i32F1 * i32F2;
3961# else /* !RT_ARCH_AMD64 */
3962 int64_t i64;
3963# if RT_INLINE_ASM_GNU_STYLE
3964 __asm__ __volatile__("imull %%edx"
3965 : "=A" (i64)
3966 : "a" (i32F2), "d" (i32F1));
3967# else
3968 __asm
3969 {
3970 mov edx, [i32F1]
3971 mov eax, [i32F2]
3972 imul edx
3973 mov dword ptr [i64], eax
3974 mov dword ptr [i64 + 4], edx
3975 }
3976# endif
3977 return i64;
3978# endif /* !RT_ARCH_AMD64 */
3979}
3980#endif
3981
3982
3983/**
3984 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
3985 *
3986 * @returns u64 / u32.
3987 */
3988#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3989DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
3990#else
3991DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
3992{
3993# ifdef RT_ARCH_AMD64
3994 return (uint32_t)(u64 / u32);
3995# else /* !RT_ARCH_AMD64 */
3996# if RT_INLINE_ASM_GNU_STYLE
3997 RTUINTREG uDummy;
3998 __asm__ __volatile__("divl %3"
3999 : "=a" (u32), "=d"(uDummy)
4000 : "A" (u64), "r" (u32));
4001# else
4002 __asm
4003 {
4004 mov eax, dword ptr [u64]
4005 mov edx, dword ptr [u64 + 4]
4006 mov ecx, [u32]
4007 div ecx
4008 mov [u32], eax
4009 }
4010# endif
4011 return u32;
4012# endif /* !RT_ARCH_AMD64 */
4013}
4014#endif
4015
4016
4017/**
4018 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
4019 *
4020 * @returns u64 / u32.
4021 */
4022#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4023DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
4024#else
4025DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
4026{
4027# ifdef RT_ARCH_AMD64
4028 return (int32_t)(i64 / i32);
4029# else /* !RT_ARCH_AMD64 */
4030# if RT_INLINE_ASM_GNU_STYLE
4031 RTUINTREG iDummy;
4032 __asm__ __volatile__("idivl %3"
4033 : "=a" (i32), "=d"(iDummy)
4034 : "A" (i64), "r" (i32));
4035# else
4036 __asm
4037 {
4038 mov eax, dword ptr [i64]
4039 mov edx, dword ptr [i64 + 4]
4040 mov ecx, [i32]
4041 idiv ecx
4042 mov [i32], eax
4043 }
4044# endif
4045 return i32;
4046# endif /* !RT_ARCH_AMD64 */
4047}
4048#endif
4049
4050
4051/**
4052 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
4053 * using a 96 bit intermediate result.
4054 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
4055 * __udivdi3 and __umoddi3 even if this inline function is not used.
4056 *
4057 * @returns (u64A * u32B) / u32C.
4058 * @param u64A The 64-bit value.
4059 * @param u32B The 32-bit value to multiple by A.
4060 * @param u32C The 32-bit value to divide A*B by.
4061 */
4062#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
4063DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
4064#else
4065DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
4066{
4067# if RT_INLINE_ASM_GNU_STYLE
4068# ifdef RT_ARCH_AMD64
4069 uint64_t u64Result, u64Spill;
4070 __asm__ __volatile__("mulq %2\n\t"
4071 "divq %3\n\t"
4072 : "=a" (u64Result),
4073 "=d" (u64Spill)
4074 : "r" ((uint64_t)u32B),
4075 "r" ((uint64_t)u32C),
4076 "0" (u64A),
4077 "1" (0));
4078 return u64Result;
4079# else
4080 uint32_t u32Dummy;
4081 uint64_t u64Result;
4082 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
4083 edx = u64Lo.hi = (u64A.lo * u32B).hi */
4084 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
4085 eax = u64A.hi */
4086 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
4087 edx = u32C */
4088 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
4089 edx = u32B */
4090 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
4091 edx = u64Hi.hi = (u64A.hi * u32B).hi */
4092 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
4093 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
4094 "divl %%ecx \n\t" /* eax = u64Hi / u32C
4095 edx = u64Hi % u32C */
4096 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
4097 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
4098 "divl %%ecx \n\t" /* u64Result.lo */
4099 "movl %%edi,%%edx \n\t" /* u64Result.hi */
4100 : "=A"(u64Result), "=c"(u32Dummy),
4101 "=S"(u32Dummy), "=D"(u32Dummy)
4102 : "a"((uint32_t)u64A),
4103 "S"((uint32_t)(u64A >> 32)),
4104 "c"(u32B),
4105 "D"(u32C));
4106 return u64Result;
4107# endif
4108# else
4109 RTUINT64U u;
4110 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
4111 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
4112 u64Hi += (u64Lo >> 32);
4113 u.s.Hi = (uint32_t)(u64Hi / u32C);
4114 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
4115 return u.u;
4116# endif
4117}
4118#endif
4119
4120
4121/**
4122 * Probes a byte pointer for read access.
4123 *
4124 * While the function will not fault if the byte is not read accessible,
4125 * the idea is to do this in a safe place like before acquiring locks
4126 * and such like.
4127 *
4128 * Also, this functions guarantees that an eager compiler is not going
4129 * to optimize the probing away.
4130 *
4131 * @param pvByte Pointer to the byte.
4132 */
4133#if RT_INLINE_ASM_EXTERNAL
4134DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4135#else
4136DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4137{
4138 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4139 uint8_t u8;
4140# if RT_INLINE_ASM_GNU_STYLE
4141 __asm__ __volatile__("movb (%1), %0\n\t"
4142 : "=r" (u8)
4143 : "r" (pvByte));
4144# else
4145 __asm
4146 {
4147# ifdef RT_ARCH_AMD64
4148 mov rax, [pvByte]
4149 mov al, [rax]
4150# else
4151 mov eax, [pvByte]
4152 mov al, [eax]
4153# endif
4154 mov [u8], al
4155 }
4156# endif
4157 return u8;
4158}
4159#endif
4160
4161/**
4162 * Probes a buffer for read access page by page.
4163 *
4164 * While the function will fault if the buffer is not fully read
4165 * accessible, the idea is to do this in a safe place like before
4166 * acquiring locks and such like.
4167 *
4168 * Also, this functions guarantees that an eager compiler is not going
4169 * to optimize the probing away.
4170 *
4171 * @param pvBuf Pointer to the buffer.
4172 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4173 */
4174DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4175{
4176 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4177 /* the first byte */
4178 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4179 ASMProbeReadByte(pu8);
4180
4181 /* the pages in between pages. */
4182 while (cbBuf > /*PAGE_SIZE*/0x1000)
4183 {
4184 ASMProbeReadByte(pu8);
4185 cbBuf -= /*PAGE_SIZE*/0x1000;
4186 pu8 += /*PAGE_SIZE*/0x1000;
4187 }
4188
4189 /* the last byte */
4190 ASMProbeReadByte(pu8 + cbBuf - 1);
4191}
4192
4193
4194/** @def ASMBreakpoint
4195 * Debugger Breakpoint.
4196 * @remark In the gnu world we add a nop instruction after the int3 to
4197 * force gdb to remain at the int3 source line.
4198 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
4199 * @internal
4200 */
4201#if RT_INLINE_ASM_GNU_STYLE
4202# ifndef __L4ENV__
4203# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
4204# else
4205# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
4206# endif
4207#else
4208# define ASMBreakpoint() __debugbreak()
4209#endif
4210
4211
4212
4213/** @defgroup grp_inline_bits Bit Operations
4214 * @{
4215 */
4216
4217
4218/**
4219 * Sets a bit in a bitmap.
4220 *
4221 * @param pvBitmap Pointer to the bitmap.
4222 * @param iBit The bit to set.
4223 */
4224#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4225DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4226#else
4227DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4228{
4229# if RT_INLINE_ASM_USES_INTRIN
4230 _bittestandset((long *)pvBitmap, iBit);
4231
4232# elif RT_INLINE_ASM_GNU_STYLE
4233 __asm__ __volatile__ ("btsl %1, %0"
4234 : "=m" (*(volatile long *)pvBitmap)
4235 : "Ir" (iBit)
4236 : "memory");
4237# else
4238 __asm
4239 {
4240# ifdef RT_ARCH_AMD64
4241 mov rax, [pvBitmap]
4242 mov edx, [iBit]
4243 bts [rax], edx
4244# else
4245 mov eax, [pvBitmap]
4246 mov edx, [iBit]
4247 bts [eax], edx
4248# endif
4249 }
4250# endif
4251}
4252#endif
4253
4254
4255/**
4256 * Atomically sets a bit in a bitmap, ordered.
4257 *
4258 * @param pvBitmap Pointer to the bitmap.
4259 * @param iBit The bit to set.
4260 */
4261#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4262DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4263#else
4264DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4265{
4266# if RT_INLINE_ASM_USES_INTRIN
4267 _interlockedbittestandset((long *)pvBitmap, iBit);
4268# elif RT_INLINE_ASM_GNU_STYLE
4269 __asm__ __volatile__ ("lock; btsl %1, %0"
4270 : "=m" (*(volatile long *)pvBitmap)
4271 : "Ir" (iBit)
4272 : "memory");
4273# else
4274 __asm
4275 {
4276# ifdef RT_ARCH_AMD64
4277 mov rax, [pvBitmap]
4278 mov edx, [iBit]
4279 lock bts [rax], edx
4280# else
4281 mov eax, [pvBitmap]
4282 mov edx, [iBit]
4283 lock bts [eax], edx
4284# endif
4285 }
4286# endif
4287}
4288#endif
4289
4290
4291/**
4292 * Clears a bit in a bitmap.
4293 *
4294 * @param pvBitmap Pointer to the bitmap.
4295 * @param iBit The bit to clear.
4296 */
4297#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4298DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4299#else
4300DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4301{
4302# if RT_INLINE_ASM_USES_INTRIN
4303 _bittestandreset((long *)pvBitmap, iBit);
4304
4305# elif RT_INLINE_ASM_GNU_STYLE
4306 __asm__ __volatile__ ("btrl %1, %0"
4307 : "=m" (*(volatile long *)pvBitmap)
4308 : "Ir" (iBit)
4309 : "memory");
4310# else
4311 __asm
4312 {
4313# ifdef RT_ARCH_AMD64
4314 mov rax, [pvBitmap]
4315 mov edx, [iBit]
4316 btr [rax], edx
4317# else
4318 mov eax, [pvBitmap]
4319 mov edx, [iBit]
4320 btr [eax], edx
4321# endif
4322 }
4323# endif
4324}
4325#endif
4326
4327
4328/**
4329 * Atomically clears a bit in a bitmap, ordered.
4330 *
4331 * @param pvBitmap Pointer to the bitmap.
4332 * @param iBit The bit to toggle set.
4333 * @remark No memory barrier, take care on smp.
4334 */
4335#if RT_INLINE_ASM_EXTERNAL
4336DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
4337#else
4338DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
4339{
4340# if RT_INLINE_ASM_GNU_STYLE
4341 __asm__ __volatile__ ("lock; btrl %1, %0"
4342 : "=m" (*(volatile long *)pvBitmap)
4343 : "Ir" (iBit)
4344 : "memory");
4345# else
4346 __asm
4347 {
4348# ifdef RT_ARCH_AMD64
4349 mov rax, [pvBitmap]
4350 mov edx, [iBit]
4351 lock btr [rax], edx
4352# else
4353 mov eax, [pvBitmap]
4354 mov edx, [iBit]
4355 lock btr [eax], edx
4356# endif
4357 }
4358# endif
4359}
4360#endif
4361
4362
4363/**
4364 * Toggles a bit in a bitmap.
4365 *
4366 * @param pvBitmap Pointer to the bitmap.
4367 * @param iBit The bit to toggle.
4368 */
4369#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4370DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
4371#else
4372DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
4373{
4374# if RT_INLINE_ASM_USES_INTRIN
4375 _bittestandcomplement((long *)pvBitmap, iBit);
4376# elif RT_INLINE_ASM_GNU_STYLE
4377 __asm__ __volatile__ ("btcl %1, %0"
4378 : "=m" (*(volatile long *)pvBitmap)
4379 : "Ir" (iBit)
4380 : "memory");
4381# else
4382 __asm
4383 {
4384# ifdef RT_ARCH_AMD64
4385 mov rax, [pvBitmap]
4386 mov edx, [iBit]
4387 btc [rax], edx
4388# else
4389 mov eax, [pvBitmap]
4390 mov edx, [iBit]
4391 btc [eax], edx
4392# endif
4393 }
4394# endif
4395}
4396#endif
4397
4398
4399/**
4400 * Atomically toggles a bit in a bitmap, ordered.
4401 *
4402 * @param pvBitmap Pointer to the bitmap.
4403 * @param iBit The bit to test and set.
4404 */
4405#if RT_INLINE_ASM_EXTERNAL
4406DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4407#else
4408DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4409{
4410# if RT_INLINE_ASM_GNU_STYLE
4411 __asm__ __volatile__ ("lock; btcl %1, %0"
4412 : "=m" (*(volatile long *)pvBitmap)
4413 : "Ir" (iBit)
4414 : "memory");
4415# else
4416 __asm
4417 {
4418# ifdef RT_ARCH_AMD64
4419 mov rax, [pvBitmap]
4420 mov edx, [iBit]
4421 lock btc [rax], edx
4422# else
4423 mov eax, [pvBitmap]
4424 mov edx, [iBit]
4425 lock btc [eax], edx
4426# endif
4427 }
4428# endif
4429}
4430#endif
4431
4432
4433/**
4434 * Tests and sets a bit in a bitmap.
4435 *
4436 * @returns true if the bit was set.
4437 * @returns false if the bit was clear.
4438 * @param pvBitmap Pointer to the bitmap.
4439 * @param iBit The bit to test and set.
4440 */
4441#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4442DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4443#else
4444DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4445{
4446 union { bool f; uint32_t u32; uint8_t u8; } rc;
4447# if RT_INLINE_ASM_USES_INTRIN
4448 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4449
4450# elif RT_INLINE_ASM_GNU_STYLE
4451 __asm__ __volatile__ ("btsl %2, %1\n\t"
4452 "setc %b0\n\t"
4453 "andl $1, %0\n\t"
4454 : "=q" (rc.u32),
4455 "=m" (*(volatile long *)pvBitmap)
4456 : "Ir" (iBit)
4457 : "memory");
4458# else
4459 __asm
4460 {
4461 mov edx, [iBit]
4462# ifdef RT_ARCH_AMD64
4463 mov rax, [pvBitmap]
4464 bts [rax], edx
4465# else
4466 mov eax, [pvBitmap]
4467 bts [eax], edx
4468# endif
4469 setc al
4470 and eax, 1
4471 mov [rc.u32], eax
4472 }
4473# endif
4474 return rc.f;
4475}
4476#endif
4477
4478
4479/**
4480 * Atomically tests and sets a bit in a bitmap, ordered.
4481 *
4482 * @returns true if the bit was set.
4483 * @returns false if the bit was clear.
4484 * @param pvBitmap Pointer to the bitmap.
4485 * @param iBit The bit to set.
4486 */
4487#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4488DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4489#else
4490DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4491{
4492 union { bool f; uint32_t u32; uint8_t u8; } rc;
4493# if RT_INLINE_ASM_USES_INTRIN
4494 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4495# elif RT_INLINE_ASM_GNU_STYLE
4496 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
4497 "setc %b0\n\t"
4498 "andl $1, %0\n\t"
4499 : "=q" (rc.u32),
4500 "=m" (*(volatile long *)pvBitmap)
4501 : "Ir" (iBit)
4502 : "memory");
4503# else
4504 __asm
4505 {
4506 mov edx, [iBit]
4507# ifdef RT_ARCH_AMD64
4508 mov rax, [pvBitmap]
4509 lock bts [rax], edx
4510# else
4511 mov eax, [pvBitmap]
4512 lock bts [eax], edx
4513# endif
4514 setc al
4515 and eax, 1
4516 mov [rc.u32], eax
4517 }
4518# endif
4519 return rc.f;
4520}
4521#endif
4522
4523
4524/**
4525 * Tests and clears a bit in a bitmap.
4526 *
4527 * @returns true if the bit was set.
4528 * @returns false if the bit was clear.
4529 * @param pvBitmap Pointer to the bitmap.
4530 * @param iBit The bit to test and clear.
4531 */
4532#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4533DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4534#else
4535DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4536{
4537 union { bool f; uint32_t u32; uint8_t u8; } rc;
4538# if RT_INLINE_ASM_USES_INTRIN
4539 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4540
4541# elif RT_INLINE_ASM_GNU_STYLE
4542 __asm__ __volatile__ ("btrl %2, %1\n\t"
4543 "setc %b0\n\t"
4544 "andl $1, %0\n\t"
4545 : "=q" (rc.u32),
4546 "=m" (*(volatile long *)pvBitmap)
4547 : "Ir" (iBit)
4548 : "memory");
4549# else
4550 __asm
4551 {
4552 mov edx, [iBit]
4553# ifdef RT_ARCH_AMD64
4554 mov rax, [pvBitmap]
4555 btr [rax], edx
4556# else
4557 mov eax, [pvBitmap]
4558 btr [eax], edx
4559# endif
4560 setc al
4561 and eax, 1
4562 mov [rc.u32], eax
4563 }
4564# endif
4565 return rc.f;
4566}
4567#endif
4568
4569
4570/**
4571 * Atomically tests and clears a bit in a bitmap, ordered.
4572 *
4573 * @returns true if the bit was set.
4574 * @returns false if the bit was clear.
4575 * @param pvBitmap Pointer to the bitmap.
4576 * @param iBit The bit to test and clear.
4577 * @remark No memory barrier, take care on smp.
4578 */
4579#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4580DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4581#else
4582DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4583{
4584 union { bool f; uint32_t u32; uint8_t u8; } rc;
4585# if RT_INLINE_ASM_USES_INTRIN
4586 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4587
4588# elif RT_INLINE_ASM_GNU_STYLE
4589 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
4590 "setc %b0\n\t"
4591 "andl $1, %0\n\t"
4592 : "=q" (rc.u32),
4593 "=m" (*(volatile long *)pvBitmap)
4594 : "Ir" (iBit)
4595 : "memory");
4596# else
4597 __asm
4598 {
4599 mov edx, [iBit]
4600# ifdef RT_ARCH_AMD64
4601 mov rax, [pvBitmap]
4602 lock btr [rax], edx
4603# else
4604 mov eax, [pvBitmap]
4605 lock btr [eax], edx
4606# endif
4607 setc al
4608 and eax, 1
4609 mov [rc.u32], eax
4610 }
4611# endif
4612 return rc.f;
4613}
4614#endif
4615
4616
4617/**
4618 * Tests and toggles a bit in a bitmap.
4619 *
4620 * @returns true if the bit was set.
4621 * @returns false if the bit was clear.
4622 * @param pvBitmap Pointer to the bitmap.
4623 * @param iBit The bit to test and toggle.
4624 */
4625#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4626DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4627#else
4628DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4629{
4630 union { bool f; uint32_t u32; uint8_t u8; } rc;
4631# if RT_INLINE_ASM_USES_INTRIN
4632 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4633
4634# elif RT_INLINE_ASM_GNU_STYLE
4635 __asm__ __volatile__ ("btcl %2, %1\n\t"
4636 "setc %b0\n\t"
4637 "andl $1, %0\n\t"
4638 : "=q" (rc.u32),
4639 "=m" (*(volatile long *)pvBitmap)
4640 : "Ir" (iBit)
4641 : "memory");
4642# else
4643 __asm
4644 {
4645 mov edx, [iBit]
4646# ifdef RT_ARCH_AMD64
4647 mov rax, [pvBitmap]
4648 btc [rax], edx
4649# else
4650 mov eax, [pvBitmap]
4651 btc [eax], edx
4652# endif
4653 setc al
4654 and eax, 1
4655 mov [rc.u32], eax
4656 }
4657# endif
4658 return rc.f;
4659}
4660#endif
4661
4662
4663/**
4664 * Atomically tests and toggles a bit in a bitmap, ordered.
4665 *
4666 * @returns true if the bit was set.
4667 * @returns false if the bit was clear.
4668 * @param pvBitmap Pointer to the bitmap.
4669 * @param iBit The bit to test and toggle.
4670 */
4671#if RT_INLINE_ASM_EXTERNAL
4672DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4673#else
4674DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4675{
4676 union { bool f; uint32_t u32; uint8_t u8; } rc;
4677# if RT_INLINE_ASM_GNU_STYLE
4678 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
4679 "setc %b0\n\t"
4680 "andl $1, %0\n\t"
4681 : "=q" (rc.u32),
4682 "=m" (*(volatile long *)pvBitmap)
4683 : "Ir" (iBit)
4684 : "memory");
4685# else
4686 __asm
4687 {
4688 mov edx, [iBit]
4689# ifdef RT_ARCH_AMD64
4690 mov rax, [pvBitmap]
4691 lock btc [rax], edx
4692# else
4693 mov eax, [pvBitmap]
4694 lock btc [eax], edx
4695# endif
4696 setc al
4697 and eax, 1
4698 mov [rc.u32], eax
4699 }
4700# endif
4701 return rc.f;
4702}
4703#endif
4704
4705
4706/**
4707 * Tests if a bit in a bitmap is set.
4708 *
4709 * @returns true if the bit is set.
4710 * @returns false if the bit is clear.
4711 * @param pvBitmap Pointer to the bitmap.
4712 * @param iBit The bit to test.
4713 */
4714#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4715DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
4716#else
4717DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
4718{
4719 union { bool f; uint32_t u32; uint8_t u8; } rc;
4720# if RT_INLINE_ASM_USES_INTRIN
4721 rc.u32 = _bittest((long *)pvBitmap, iBit);
4722# elif RT_INLINE_ASM_GNU_STYLE
4723
4724 __asm__ __volatile__ ("btl %2, %1\n\t"
4725 "setc %b0\n\t"
4726 "andl $1, %0\n\t"
4727 : "=q" (rc.u32),
4728 "=m" (*(volatile long *)pvBitmap)
4729 : "Ir" (iBit)
4730 : "memory");
4731# else
4732 __asm
4733 {
4734 mov edx, [iBit]
4735# ifdef RT_ARCH_AMD64
4736 mov rax, [pvBitmap]
4737 bt [rax], edx
4738# else
4739 mov eax, [pvBitmap]
4740 bt [eax], edx
4741# endif
4742 setc al
4743 and eax, 1
4744 mov [rc.u32], eax
4745 }
4746# endif
4747 return rc.f;
4748}
4749#endif
4750
4751
4752/**
4753 * Clears a bit range within a bitmap.
4754 *
4755 * @param pvBitmap Pointer to the bitmap.
4756 * @param iBitStart The First bit to clear.
4757 * @param iBitEnd The first bit not to clear.
4758 */
4759DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4760{
4761 if (iBitStart < iBitEnd)
4762 {
4763 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4764 int iStart = iBitStart & ~31;
4765 int iEnd = iBitEnd & ~31;
4766 if (iStart == iEnd)
4767 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
4768 else
4769 {
4770 /* bits in first dword. */
4771 if (iBitStart & 31)
4772 {
4773 *pu32 &= (1 << (iBitStart & 31)) - 1;
4774 pu32++;
4775 iBitStart = iStart + 32;
4776 }
4777
4778 /* whole dword. */
4779 if (iBitStart != iEnd)
4780 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4781
4782 /* bits in last dword. */
4783 if (iBitEnd & 31)
4784 {
4785 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4786 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
4787 }
4788 }
4789 }
4790}
4791
4792
4793/**
4794 * Finds the first clear bit in a bitmap.
4795 *
4796 * @returns Index of the first zero bit.
4797 * @returns -1 if no clear bit was found.
4798 * @param pvBitmap Pointer to the bitmap.
4799 * @param cBits The number of bits in the bitmap. Multiple of 32.
4800 */
4801#if RT_INLINE_ASM_EXTERNAL
4802DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
4803#else
4804DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
4805{
4806 if (cBits)
4807 {
4808 int32_t iBit;
4809# if RT_INLINE_ASM_GNU_STYLE
4810 RTCCUINTREG uEAX, uECX, uEDI;
4811 cBits = RT_ALIGN_32(cBits, 32);
4812 __asm__ __volatile__("repe; scasl\n\t"
4813 "je 1f\n\t"
4814# ifdef RT_ARCH_AMD64
4815 "lea -4(%%rdi), %%rdi\n\t"
4816 "xorl (%%rdi), %%eax\n\t"
4817 "subq %5, %%rdi\n\t"
4818# else
4819 "lea -4(%%edi), %%edi\n\t"
4820 "xorl (%%edi), %%eax\n\t"
4821 "subl %5, %%edi\n\t"
4822# endif
4823 "shll $3, %%edi\n\t"
4824 "bsfl %%eax, %%edx\n\t"
4825 "addl %%edi, %%edx\n\t"
4826 "1:\t\n"
4827 : "=d" (iBit),
4828 "=&c" (uECX),
4829 "=&D" (uEDI),
4830 "=&a" (uEAX)
4831 : "0" (0xffffffff),
4832 "mr" (pvBitmap),
4833 "1" (cBits >> 5),
4834 "2" (pvBitmap),
4835 "3" (0xffffffff));
4836# else
4837 cBits = RT_ALIGN_32(cBits, 32);
4838 __asm
4839 {
4840# ifdef RT_ARCH_AMD64
4841 mov rdi, [pvBitmap]
4842 mov rbx, rdi
4843# else
4844 mov edi, [pvBitmap]
4845 mov ebx, edi
4846# endif
4847 mov edx, 0ffffffffh
4848 mov eax, edx
4849 mov ecx, [cBits]
4850 shr ecx, 5
4851 repe scasd
4852 je done
4853
4854# ifdef RT_ARCH_AMD64
4855 lea rdi, [rdi - 4]
4856 xor eax, [rdi]
4857 sub rdi, rbx
4858# else
4859 lea edi, [edi - 4]
4860 xor eax, [edi]
4861 sub edi, ebx
4862# endif
4863 shl edi, 3
4864 bsf edx, eax
4865 add edx, edi
4866 done:
4867 mov [iBit], edx
4868 }
4869# endif
4870 return iBit;
4871 }
4872 return -1;
4873}
4874#endif
4875
4876
4877/**
4878 * Finds the next clear bit in a bitmap.
4879 *
4880 * @returns Index of the first zero bit.
4881 * @returns -1 if no clear bit was found.
4882 * @param pvBitmap Pointer to the bitmap.
4883 * @param cBits The number of bits in the bitmap. Multiple of 32.
4884 * @param iBitPrev The bit returned from the last search.
4885 * The search will start at iBitPrev + 1.
4886 */
4887#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4888DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4889#else
4890DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4891{
4892 int iBit = ++iBitPrev & 31;
4893 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
4894 cBits -= iBitPrev & ~31;
4895 if (iBit)
4896 {
4897 /* inspect the first dword. */
4898 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
4899# if RT_INLINE_ASM_USES_INTRIN
4900 unsigned long ulBit = 0;
4901 if (_BitScanForward(&ulBit, u32))
4902 return ulBit + iBitPrev;
4903 iBit = -1;
4904# else
4905# if RT_INLINE_ASM_GNU_STYLE
4906 __asm__ __volatile__("bsf %1, %0\n\t"
4907 "jnz 1f\n\t"
4908 "movl $-1, %0\n\t"
4909 "1:\n\t"
4910 : "=r" (iBit)
4911 : "r" (u32));
4912# else
4913 __asm
4914 {
4915 mov edx, [u32]
4916 bsf eax, edx
4917 jnz done
4918 mov eax, 0ffffffffh
4919 done:
4920 mov [iBit], eax
4921 }
4922# endif
4923 if (iBit >= 0)
4924 return iBit + iBitPrev;
4925# endif
4926 /* Search the rest of the bitmap, if there is anything. */
4927 if (cBits > 32)
4928 {
4929 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
4930 if (iBit >= 0)
4931 return iBit + (iBitPrev & ~31) + 32;
4932 }
4933 }
4934 else
4935 {
4936 /* Search the rest of the bitmap. */
4937 iBit = ASMBitFirstClear(pvBitmap, cBits);
4938 if (iBit >= 0)
4939 return iBit + (iBitPrev & ~31);
4940 }
4941 return iBit;
4942}
4943#endif
4944
4945
4946/**
4947 * Finds the first set bit in a bitmap.
4948 *
4949 * @returns Index of the first set bit.
4950 * @returns -1 if no clear bit was found.
4951 * @param pvBitmap Pointer to the bitmap.
4952 * @param cBits The number of bits in the bitmap. Multiple of 32.
4953 */
4954#if RT_INLINE_ASM_EXTERNAL
4955DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
4956#else
4957DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
4958{
4959 if (cBits)
4960 {
4961 int32_t iBit;
4962# if RT_INLINE_ASM_GNU_STYLE
4963 RTCCUINTREG uEAX, uECX, uEDI;
4964 cBits = RT_ALIGN_32(cBits, 32);
4965 __asm__ __volatile__("repe; scasl\n\t"
4966 "je 1f\n\t"
4967# ifdef RT_ARCH_AMD64
4968 "lea -4(%%rdi), %%rdi\n\t"
4969 "movl (%%rdi), %%eax\n\t"
4970 "subq %5, %%rdi\n\t"
4971# else
4972 "lea -4(%%edi), %%edi\n\t"
4973 "movl (%%edi), %%eax\n\t"
4974 "subl %5, %%edi\n\t"
4975# endif
4976 "shll $3, %%edi\n\t"
4977 "bsfl %%eax, %%edx\n\t"
4978 "addl %%edi, %%edx\n\t"
4979 "1:\t\n"
4980 : "=d" (iBit),
4981 "=&c" (uECX),
4982 "=&D" (uEDI),
4983 "=&a" (uEAX)
4984 : "0" (0xffffffff),
4985 "mr" (pvBitmap),
4986 "1" (cBits >> 5),
4987 "2" (pvBitmap),
4988 "3" (0));
4989# else
4990 cBits = RT_ALIGN_32(cBits, 32);
4991 __asm
4992 {
4993# ifdef RT_ARCH_AMD64
4994 mov rdi, [pvBitmap]
4995 mov rbx, rdi
4996# else
4997 mov edi, [pvBitmap]
4998 mov ebx, edi
4999# endif
5000 mov edx, 0ffffffffh
5001 xor eax, eax
5002 mov ecx, [cBits]
5003 shr ecx, 5
5004 repe scasd
5005 je done
5006# ifdef RT_ARCH_AMD64
5007 lea rdi, [rdi - 4]
5008 mov eax, [rdi]
5009 sub rdi, rbx
5010# else
5011 lea edi, [edi - 4]
5012 mov eax, [edi]
5013 sub edi, ebx
5014# endif
5015 shl edi, 3
5016 bsf edx, eax
5017 add edx, edi
5018 done:
5019 mov [iBit], edx
5020 }
5021# endif
5022 return iBit;
5023 }
5024 return -1;
5025}
5026#endif
5027
5028
5029/**
5030 * Finds the next set bit in a bitmap.
5031 *
5032 * @returns Index of the next set bit.
5033 * @returns -1 if no set bit was found.
5034 * @param pvBitmap Pointer to the bitmap.
5035 * @param cBits The number of bits in the bitmap. Multiple of 32.
5036 * @param iBitPrev The bit returned from the last search.
5037 * The search will start at iBitPrev + 1.
5038 */
5039#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5040DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5041#else
5042DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5043{
5044 int iBit = ++iBitPrev & 31;
5045 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5046 cBits -= iBitPrev & ~31;
5047 if (iBit)
5048 {
5049 /* inspect the first dword. */
5050 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
5051# if RT_INLINE_ASM_USES_INTRIN
5052 unsigned long ulBit = 0;
5053 if (_BitScanForward(&ulBit, u32))
5054 return ulBit + iBitPrev;
5055 iBit = -1;
5056# else
5057# if RT_INLINE_ASM_GNU_STYLE
5058 __asm__ __volatile__("bsf %1, %0\n\t"
5059 "jnz 1f\n\t"
5060 "movl $-1, %0\n\t"
5061 "1:\n\t"
5062 : "=r" (iBit)
5063 : "r" (u32));
5064# else
5065 __asm
5066 {
5067 mov edx, u32
5068 bsf eax, edx
5069 jnz done
5070 mov eax, 0ffffffffh
5071 done:
5072 mov [iBit], eax
5073 }
5074# endif
5075 if (iBit >= 0)
5076 return iBit + iBitPrev;
5077# endif
5078 /* Search the rest of the bitmap, if there is anything. */
5079 if (cBits > 32)
5080 {
5081 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5082 if (iBit >= 0)
5083 return iBit + (iBitPrev & ~31) + 32;
5084 }
5085
5086 }
5087 else
5088 {
5089 /* Search the rest of the bitmap. */
5090 iBit = ASMBitFirstSet(pvBitmap, cBits);
5091 if (iBit >= 0)
5092 return iBit + (iBitPrev & ~31);
5093 }
5094 return iBit;
5095}
5096#endif
5097
5098
5099/**
5100 * Finds the first bit which is set in the given 32-bit integer.
5101 * Bits are numbered from 1 (least significant) to 32.
5102 *
5103 * @returns index [1..32] of the first set bit.
5104 * @returns 0 if all bits are cleared.
5105 * @param u32 Integer to search for set bits.
5106 * @remark Similar to ffs() in BSD.
5107 */
5108DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5109{
5110# if RT_INLINE_ASM_USES_INTRIN
5111 unsigned long iBit;
5112 if (_BitScanForward(&iBit, u32))
5113 iBit++;
5114 else
5115 iBit = 0;
5116# elif RT_INLINE_ASM_GNU_STYLE
5117 uint32_t iBit;
5118 __asm__ __volatile__("bsf %1, %0\n\t"
5119 "jnz 1f\n\t"
5120 "xorl %0, %0\n\t"
5121 "jmp 2f\n"
5122 "1:\n\t"
5123 "incl %0\n"
5124 "2:\n\t"
5125 : "=r" (iBit)
5126 : "rm" (u32));
5127# else
5128 uint32_t iBit;
5129 _asm
5130 {
5131 bsf eax, [u32]
5132 jnz found
5133 xor eax, eax
5134 jmp done
5135 found:
5136 inc eax
5137 done:
5138 mov [iBit], eax
5139 }
5140# endif
5141 return iBit;
5142}
5143
5144
5145/**
5146 * Finds the first bit which is set in the given 32-bit integer.
5147 * Bits are numbered from 1 (least significant) to 32.
5148 *
5149 * @returns index [1..32] of the first set bit.
5150 * @returns 0 if all bits are cleared.
5151 * @param i32 Integer to search for set bits.
5152 * @remark Similar to ffs() in BSD.
5153 */
5154DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5155{
5156 return ASMBitFirstSetU32((uint32_t)i32);
5157}
5158
5159
5160/**
5161 * Finds the last bit which is set in the given 32-bit integer.
5162 * Bits are numbered from 1 (least significant) to 32.
5163 *
5164 * @returns index [1..32] of the last set bit.
5165 * @returns 0 if all bits are cleared.
5166 * @param u32 Integer to search for set bits.
5167 * @remark Similar to fls() in BSD.
5168 */
5169DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5170{
5171# if RT_INLINE_ASM_USES_INTRIN
5172 unsigned long iBit;
5173 if (_BitScanReverse(&iBit, u32))
5174 iBit++;
5175 else
5176 iBit = 0;
5177# elif RT_INLINE_ASM_GNU_STYLE
5178 uint32_t iBit;
5179 __asm__ __volatile__("bsrl %1, %0\n\t"
5180 "jnz 1f\n\t"
5181 "xorl %0, %0\n\t"
5182 "jmp 2f\n"
5183 "1:\n\t"
5184 "incl %0\n"
5185 "2:\n\t"
5186 : "=r" (iBit)
5187 : "rm" (u32));
5188# else
5189 uint32_t iBit;
5190 _asm
5191 {
5192 bsr eax, [u32]
5193 jnz found
5194 xor eax, eax
5195 jmp done
5196 found:
5197 inc eax
5198 done:
5199 mov [iBit], eax
5200 }
5201# endif
5202 return iBit;
5203}
5204
5205
5206/**
5207 * Finds the last bit which is set in the given 32-bit integer.
5208 * Bits are numbered from 1 (least significant) to 32.
5209 *
5210 * @returns index [1..32] of the last set bit.
5211 * @returns 0 if all bits are cleared.
5212 * @param i32 Integer to search for set bits.
5213 * @remark Similar to fls() in BSD.
5214 */
5215DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5216{
5217 return ASMBitLastSetS32((uint32_t)i32);
5218}
5219
5220
5221/**
5222 * Reverse the byte order of the given 32-bit integer.
5223 * @param u32 Integer
5224 */
5225DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5226{
5227#if RT_INLINE_ASM_USES_INTRIN
5228 u32 = _byteswap_ulong(u32);
5229#elif RT_INLINE_ASM_GNU_STYLE
5230 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5231#else
5232 _asm
5233 {
5234 mov eax, [u32]
5235 bswap eax
5236 mov [u32], eax
5237 }
5238#endif
5239 return u32;
5240}
5241
5242/** @} */
5243
5244
5245/** @} */
5246#endif
5247
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette