VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 12156

Last change on this file since 12156 was 12156, checked in by vboxsync, 16 years ago

not necessary

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 153.4 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42#ifdef _MSC_VER
43# if _MSC_VER >= 1400
44# define RT_INLINE_ASM_USES_INTRIN 1
45# include <intrin.h>
46 /* Emit the intrinsics at all optimization levels. */
47# pragma intrinsic(_ReadWriteBarrier)
48# pragma intrinsic(__cpuid)
49# pragma intrinsic(_enable)
50# pragma intrinsic(_disable)
51# pragma intrinsic(__rdtsc)
52# pragma intrinsic(__readmsr)
53# pragma intrinsic(__writemsr)
54# pragma intrinsic(__outbyte)
55# pragma intrinsic(__outword)
56# pragma intrinsic(__outdword)
57# pragma intrinsic(__inbyte)
58# pragma intrinsic(__inword)
59# pragma intrinsic(__indword)
60# pragma intrinsic(__invlpg)
61# pragma intrinsic(__stosd)
62# pragma intrinsic(__stosw)
63# pragma intrinsic(__stosb)
64# pragma intrinsic(__readcr0)
65# pragma intrinsic(__readcr2)
66# pragma intrinsic(__readcr3)
67# pragma intrinsic(__readcr4)
68# pragma intrinsic(__writecr0)
69# pragma intrinsic(__writecr3)
70# pragma intrinsic(__writecr4)
71# pragma intrinsic(_BitScanForward)
72# pragma intrinsic(_BitScanReverse)
73# pragma intrinsic(_bittest)
74# pragma intrinsic(_bittestandset)
75# pragma intrinsic(_bittestandreset)
76# pragma intrinsic(_bittestandcomplement)
77# pragma intrinsic(_byteswap_ushort)
78# pragma intrinsic(_byteswap_ulong)
79# pragma intrinsic(_interlockedbittestandset)
80# pragma intrinsic(_interlockedbittestandreset)
81# pragma intrinsic(_InterlockedAnd)
82# pragma intrinsic(_InterlockedOr)
83# pragma intrinsic(_InterlockedIncrement)
84# pragma intrinsic(_InterlockedDecrement)
85# pragma intrinsic(_InterlockedExchange)
86# pragma intrinsic(_InterlockedExchangeAdd)
87# pragma intrinsic(_InterlockedCompareExchange)
88# pragma intrinsic(_InterlockedCompareExchange64)
89# ifdef RT_ARCH_AMD64
90# pragma intrinsic(__stosq)
91# pragma intrinsic(__readcr8)
92# pragma intrinsic(__writecr8)
93# pragma intrinsic(_byteswap_uint64)
94# pragma intrinsic(_InterlockedExchange64)
95# endif
96# endif
97#endif
98#ifndef RT_INLINE_ASM_USES_INTRIN
99# define RT_INLINE_ASM_USES_INTRIN 0
100#endif
101
102
103
104/** @defgroup grp_asm ASM - Assembly Routines
105 * @ingroup grp_rt
106 *
107 * @remarks The difference between ordered and unordered atomic operations are that
108 * the former will complete outstanding reads and writes before continuing
109 * while the latter doesn't make any promisses about the order. Ordered
110 * operations doesn't, it seems, make any 100% promise wrt to whether
111 * the operation will complete before any subsequent memory access.
112 * (please, correct if wrong.)
113 *
114 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
115 * are unordered (note the Uo).
116 *
117 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
118 * or even optimize assembler instructions away. For instance, in the following code
119 * the second rdmsr instruction is optimized away because gcc treats that instruction
120 * as deterministic:
121 *
122 * @code
123 * static inline uint64_t rdmsr_low(int idx)
124 * {
125 * uint32_t low;
126 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
127 * }
128 * ...
129 * uint32_t msr1 = rdmsr_low(1);
130 * foo(msr1);
131 * msr1 = rdmsr_low(1);
132 * bar(msr1);
133 * @endcode
134 *
135 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
136 * use the result of the first call as input parameter for bar() as well. For rdmsr this
137 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
138 * machine status information in general.
139 *
140 * @{
141 */
142
143/** @def RT_INLINE_ASM_EXTERNAL
144 * Defined as 1 if the compiler does not support inline assembly.
145 * The ASM* functions will then be implemented in an external .asm file.
146 *
147 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
148 * inline assmebly in their AMD64 compiler.
149 */
150#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
151# define RT_INLINE_ASM_EXTERNAL 1
152#else
153# define RT_INLINE_ASM_EXTERNAL 0
154#endif
155
156/** @def RT_INLINE_ASM_GNU_STYLE
157 * Defined as 1 if the compiler understand GNU style inline assembly.
158 */
159#if defined(_MSC_VER)
160# define RT_INLINE_ASM_GNU_STYLE 0
161#else
162# define RT_INLINE_ASM_GNU_STYLE 1
163#endif
164
165
166/** @todo find a more proper place for this structure? */
167#pragma pack(1)
168/** IDTR */
169typedef struct RTIDTR
170{
171 /** Size of the IDT. */
172 uint16_t cbIdt;
173 /** Address of the IDT. */
174 uintptr_t pIdt;
175} RTIDTR, *PRTIDTR;
176#pragma pack()
177
178#pragma pack(1)
179/** GDTR */
180typedef struct RTGDTR
181{
182 /** Size of the GDT. */
183 uint16_t cbGdt;
184 /** Address of the GDT. */
185 uintptr_t pGdt;
186} RTGDTR, *PRTGDTR;
187#pragma pack()
188
189
190/** @def ASMReturnAddress
191 * Gets the return address of the current (or calling if you like) function or method.
192 */
193#ifdef _MSC_VER
194# ifdef __cplusplus
195extern "C"
196# endif
197void * _ReturnAddress(void);
198# pragma intrinsic(_ReturnAddress)
199# define ASMReturnAddress() _ReturnAddress()
200#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
201# define ASMReturnAddress() __builtin_return_address(0)
202#else
203# error "Unsupported compiler."
204#endif
205
206
207/**
208 * Gets the content of the IDTR CPU register.
209 * @param pIdtr Where to store the IDTR contents.
210 */
211#if RT_INLINE_ASM_EXTERNAL
212DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
213#else
214DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
215{
216# if RT_INLINE_ASM_GNU_STYLE
217 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
218# else
219 __asm
220 {
221# ifdef RT_ARCH_AMD64
222 mov rax, [pIdtr]
223 sidt [rax]
224# else
225 mov eax, [pIdtr]
226 sidt [eax]
227# endif
228 }
229# endif
230}
231#endif
232
233
234/**
235 * Sets the content of the IDTR CPU register.
236 * @param pIdtr Where to load the IDTR contents from
237 */
238#if RT_INLINE_ASM_EXTERNAL
239DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
240#else
241DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
242{
243# if RT_INLINE_ASM_GNU_STYLE
244 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
245# else
246 __asm
247 {
248# ifdef RT_ARCH_AMD64
249 mov rax, [pIdtr]
250 lidt [rax]
251# else
252 mov eax, [pIdtr]
253 lidt [eax]
254# endif
255 }
256# endif
257}
258#endif
259
260
261/**
262 * Gets the content of the GDTR CPU register.
263 * @param pGdtr Where to store the GDTR contents.
264 */
265#if RT_INLINE_ASM_EXTERNAL
266DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
267#else
268DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
269{
270# if RT_INLINE_ASM_GNU_STYLE
271 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
272# else
273 __asm
274 {
275# ifdef RT_ARCH_AMD64
276 mov rax, [pGdtr]
277 sgdt [rax]
278# else
279 mov eax, [pGdtr]
280 sgdt [eax]
281# endif
282 }
283# endif
284}
285#endif
286
287/**
288 * Get the cs register.
289 * @returns cs.
290 */
291#if RT_INLINE_ASM_EXTERNAL
292DECLASM(RTSEL) ASMGetCS(void);
293#else
294DECLINLINE(RTSEL) ASMGetCS(void)
295{
296 RTSEL SelCS;
297# if RT_INLINE_ASM_GNU_STYLE
298 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
299# else
300 __asm
301 {
302 mov ax, cs
303 mov [SelCS], ax
304 }
305# endif
306 return SelCS;
307}
308#endif
309
310
311/**
312 * Get the DS register.
313 * @returns DS.
314 */
315#if RT_INLINE_ASM_EXTERNAL
316DECLASM(RTSEL) ASMGetDS(void);
317#else
318DECLINLINE(RTSEL) ASMGetDS(void)
319{
320 RTSEL SelDS;
321# if RT_INLINE_ASM_GNU_STYLE
322 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
323# else
324 __asm
325 {
326 mov ax, ds
327 mov [SelDS], ax
328 }
329# endif
330 return SelDS;
331}
332#endif
333
334
335/**
336 * Get the ES register.
337 * @returns ES.
338 */
339#if RT_INLINE_ASM_EXTERNAL
340DECLASM(RTSEL) ASMGetES(void);
341#else
342DECLINLINE(RTSEL) ASMGetES(void)
343{
344 RTSEL SelES;
345# if RT_INLINE_ASM_GNU_STYLE
346 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
347# else
348 __asm
349 {
350 mov ax, es
351 mov [SelES], ax
352 }
353# endif
354 return SelES;
355}
356#endif
357
358
359/**
360 * Get the FS register.
361 * @returns FS.
362 */
363#if RT_INLINE_ASM_EXTERNAL
364DECLASM(RTSEL) ASMGetFS(void);
365#else
366DECLINLINE(RTSEL) ASMGetFS(void)
367{
368 RTSEL SelFS;
369# if RT_INLINE_ASM_GNU_STYLE
370 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
371# else
372 __asm
373 {
374 mov ax, fs
375 mov [SelFS], ax
376 }
377# endif
378 return SelFS;
379}
380# endif
381
382
383/**
384 * Get the GS register.
385 * @returns GS.
386 */
387#if RT_INLINE_ASM_EXTERNAL
388DECLASM(RTSEL) ASMGetGS(void);
389#else
390DECLINLINE(RTSEL) ASMGetGS(void)
391{
392 RTSEL SelGS;
393# if RT_INLINE_ASM_GNU_STYLE
394 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
395# else
396 __asm
397 {
398 mov ax, gs
399 mov [SelGS], ax
400 }
401# endif
402 return SelGS;
403}
404#endif
405
406
407/**
408 * Get the SS register.
409 * @returns SS.
410 */
411#if RT_INLINE_ASM_EXTERNAL
412DECLASM(RTSEL) ASMGetSS(void);
413#else
414DECLINLINE(RTSEL) ASMGetSS(void)
415{
416 RTSEL SelSS;
417# if RT_INLINE_ASM_GNU_STYLE
418 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
419# else
420 __asm
421 {
422 mov ax, ss
423 mov [SelSS], ax
424 }
425# endif
426 return SelSS;
427}
428#endif
429
430
431/**
432 * Get the TR register.
433 * @returns TR.
434 */
435#if RT_INLINE_ASM_EXTERNAL
436DECLASM(RTSEL) ASMGetTR(void);
437#else
438DECLINLINE(RTSEL) ASMGetTR(void)
439{
440 RTSEL SelTR;
441# if RT_INLINE_ASM_GNU_STYLE
442 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
443# else
444 __asm
445 {
446 str ax
447 mov [SelTR], ax
448 }
449# endif
450 return SelTR;
451}
452#endif
453
454
455/**
456 * Get the [RE]FLAGS register.
457 * @returns [RE]FLAGS.
458 */
459#if RT_INLINE_ASM_EXTERNAL
460DECLASM(RTCCUINTREG) ASMGetFlags(void);
461#else
462DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
463{
464 RTCCUINTREG uFlags;
465# if RT_INLINE_ASM_GNU_STYLE
466# ifdef RT_ARCH_AMD64
467 __asm__ __volatile__("pushfq\n\t"
468 "popq %0\n\t"
469 : "=g" (uFlags));
470# else
471 __asm__ __volatile__("pushfl\n\t"
472 "popl %0\n\t"
473 : "=g" (uFlags));
474# endif
475# else
476 __asm
477 {
478# ifdef RT_ARCH_AMD64
479 pushfq
480 pop [uFlags]
481# else
482 pushfd
483 pop [uFlags]
484# endif
485 }
486# endif
487 return uFlags;
488}
489#endif
490
491
492/**
493 * Set the [RE]FLAGS register.
494 * @param uFlags The new [RE]FLAGS value.
495 */
496#if RT_INLINE_ASM_EXTERNAL
497DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
498#else
499DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
500{
501# if RT_INLINE_ASM_GNU_STYLE
502# ifdef RT_ARCH_AMD64
503 __asm__ __volatile__("pushq %0\n\t"
504 "popfq\n\t"
505 : : "g" (uFlags));
506# else
507 __asm__ __volatile__("pushl %0\n\t"
508 "popfl\n\t"
509 : : "g" (uFlags));
510# endif
511# else
512 __asm
513 {
514# ifdef RT_ARCH_AMD64
515 push [uFlags]
516 popfq
517# else
518 push [uFlags]
519 popfd
520# endif
521 }
522# endif
523}
524#endif
525
526
527/**
528 * Gets the content of the CPU timestamp counter register.
529 *
530 * @returns TSC.
531 */
532#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
533DECLASM(uint64_t) ASMReadTSC(void);
534#else
535DECLINLINE(uint64_t) ASMReadTSC(void)
536{
537 RTUINT64U u;
538# if RT_INLINE_ASM_GNU_STYLE
539 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
540# else
541# if RT_INLINE_ASM_USES_INTRIN
542 u.u = __rdtsc();
543# else
544 __asm
545 {
546 rdtsc
547 mov [u.s.Lo], eax
548 mov [u.s.Hi], edx
549 }
550# endif
551# endif
552 return u.u;
553}
554#endif
555
556
557/**
558 * Performs the cpuid instruction returning all registers.
559 *
560 * @param uOperator CPUID operation (eax).
561 * @param pvEAX Where to store eax.
562 * @param pvEBX Where to store ebx.
563 * @param pvECX Where to store ecx.
564 * @param pvEDX Where to store edx.
565 * @remark We're using void pointers to ease the use of special bitfield structures and such.
566 */
567#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
568DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
569#else
570DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
571{
572# if RT_INLINE_ASM_GNU_STYLE
573# ifdef RT_ARCH_AMD64
574 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
575 __asm__ ("cpuid\n\t"
576 : "=a" (uRAX),
577 "=b" (uRBX),
578 "=c" (uRCX),
579 "=d" (uRDX)
580 : "0" (uOperator));
581 *(uint32_t *)pvEAX = (uint32_t)uRAX;
582 *(uint32_t *)pvEBX = (uint32_t)uRBX;
583 *(uint32_t *)pvECX = (uint32_t)uRCX;
584 *(uint32_t *)pvEDX = (uint32_t)uRDX;
585# else
586 __asm__ ("xchgl %%ebx, %1\n\t"
587 "cpuid\n\t"
588 "xchgl %%ebx, %1\n\t"
589 : "=a" (*(uint32_t *)pvEAX),
590 "=r" (*(uint32_t *)pvEBX),
591 "=c" (*(uint32_t *)pvECX),
592 "=d" (*(uint32_t *)pvEDX)
593 : "0" (uOperator));
594# endif
595
596# elif RT_INLINE_ASM_USES_INTRIN
597 int aInfo[4];
598 __cpuid(aInfo, uOperator);
599 *(uint32_t *)pvEAX = aInfo[0];
600 *(uint32_t *)pvEBX = aInfo[1];
601 *(uint32_t *)pvECX = aInfo[2];
602 *(uint32_t *)pvEDX = aInfo[3];
603
604# else
605 uint32_t uEAX;
606 uint32_t uEBX;
607 uint32_t uECX;
608 uint32_t uEDX;
609 __asm
610 {
611 push ebx
612 mov eax, [uOperator]
613 cpuid
614 mov [uEAX], eax
615 mov [uEBX], ebx
616 mov [uECX], ecx
617 mov [uEDX], edx
618 pop ebx
619 }
620 *(uint32_t *)pvEAX = uEAX;
621 *(uint32_t *)pvEBX = uEBX;
622 *(uint32_t *)pvECX = uECX;
623 *(uint32_t *)pvEDX = uEDX;
624# endif
625}
626#endif
627
628
629/**
630 * Performs the cpuid instruction returning all registers.
631 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
632 *
633 * @param uOperator CPUID operation (eax).
634 * @param uIdxECX ecx index
635 * @param pvEAX Where to store eax.
636 * @param pvEBX Where to store ebx.
637 * @param pvECX Where to store ecx.
638 * @param pvEDX Where to store edx.
639 * @remark We're using void pointers to ease the use of special bitfield structures and such.
640 */
641#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
642DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
643#else
644DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
645{
646# if RT_INLINE_ASM_GNU_STYLE
647# ifdef RT_ARCH_AMD64
648 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
649 __asm__ ("cpuid\n\t"
650 : "=a" (uRAX),
651 "=b" (uRBX),
652 "=c" (uRCX),
653 "=d" (uRDX)
654 : "0" (uOperator),
655 "2" (uIdxECX));
656 *(uint32_t *)pvEAX = (uint32_t)uRAX;
657 *(uint32_t *)pvEBX = (uint32_t)uRBX;
658 *(uint32_t *)pvECX = (uint32_t)uRCX;
659 *(uint32_t *)pvEDX = (uint32_t)uRDX;
660# else
661 __asm__ ("xchgl %%ebx, %1\n\t"
662 "cpuid\n\t"
663 "xchgl %%ebx, %1\n\t"
664 : "=a" (*(uint32_t *)pvEAX),
665 "=r" (*(uint32_t *)pvEBX),
666 "=c" (*(uint32_t *)pvECX),
667 "=d" (*(uint32_t *)pvEDX)
668 : "0" (uOperator),
669 "2" (uIdxECX));
670# endif
671
672# elif RT_INLINE_ASM_USES_INTRIN
673 int aInfo[4];
674 /* ??? another intrinsic ??? */
675 __cpuid(aInfo, uOperator);
676 *(uint32_t *)pvEAX = aInfo[0];
677 *(uint32_t *)pvEBX = aInfo[1];
678 *(uint32_t *)pvECX = aInfo[2];
679 *(uint32_t *)pvEDX = aInfo[3];
680
681# else
682 uint32_t uEAX;
683 uint32_t uEBX;
684 uint32_t uECX;
685 uint32_t uEDX;
686 __asm
687 {
688 push ebx
689 mov eax, [uOperator]
690 mov ecx, [uIdxECX]
691 cpuid
692 mov [uEAX], eax
693 mov [uEBX], ebx
694 mov [uECX], ecx
695 mov [uEDX], edx
696 pop ebx
697 }
698 *(uint32_t *)pvEAX = uEAX;
699 *(uint32_t *)pvEBX = uEBX;
700 *(uint32_t *)pvECX = uECX;
701 *(uint32_t *)pvEDX = uEDX;
702# endif
703}
704#endif
705
706
707/**
708 * Performs the cpuid instruction returning ecx and edx.
709 *
710 * @param uOperator CPUID operation (eax).
711 * @param pvECX Where to store ecx.
712 * @param pvEDX Where to store edx.
713 * @remark We're using void pointers to ease the use of special bitfield structures and such.
714 */
715#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
716DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
717#else
718DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
719{
720 uint32_t uEBX;
721 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
722}
723#endif
724
725
726/**
727 * Performs the cpuid instruction returning edx.
728 *
729 * @param uOperator CPUID operation (eax).
730 * @returns EDX after cpuid operation.
731 */
732#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
733DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
734#else
735DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
736{
737 RTCCUINTREG xDX;
738# if RT_INLINE_ASM_GNU_STYLE
739# ifdef RT_ARCH_AMD64
740 RTCCUINTREG uSpill;
741 __asm__ ("cpuid"
742 : "=a" (uSpill),
743 "=d" (xDX)
744 : "0" (uOperator)
745 : "rbx", "rcx");
746# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
747 __asm__ ("push %%ebx\n\t"
748 "cpuid\n\t"
749 "pop %%ebx\n\t"
750 : "=a" (uOperator),
751 "=d" (xDX)
752 : "0" (uOperator)
753 : "ecx");
754# else
755 __asm__ ("cpuid"
756 : "=a" (uOperator),
757 "=d" (xDX)
758 : "0" (uOperator)
759 : "ebx", "ecx");
760# endif
761
762# elif RT_INLINE_ASM_USES_INTRIN
763 int aInfo[4];
764 __cpuid(aInfo, uOperator);
765 xDX = aInfo[3];
766
767# else
768 __asm
769 {
770 push ebx
771 mov eax, [uOperator]
772 cpuid
773 mov [xDX], edx
774 pop ebx
775 }
776# endif
777 return (uint32_t)xDX;
778}
779#endif
780
781
782/**
783 * Performs the cpuid instruction returning ecx.
784 *
785 * @param uOperator CPUID operation (eax).
786 * @returns ECX after cpuid operation.
787 */
788#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
789DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
790#else
791DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
792{
793 RTCCUINTREG xCX;
794# if RT_INLINE_ASM_GNU_STYLE
795# ifdef RT_ARCH_AMD64
796 RTCCUINTREG uSpill;
797 __asm__ ("cpuid"
798 : "=a" (uSpill),
799 "=c" (xCX)
800 : "0" (uOperator)
801 : "rbx", "rdx");
802# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
803 __asm__ ("push %%ebx\n\t"
804 "cpuid\n\t"
805 "pop %%ebx\n\t"
806 : "=a" (uOperator),
807 "=c" (xCX)
808 : "0" (uOperator)
809 : "edx");
810# else
811 __asm__ ("cpuid"
812 : "=a" (uOperator),
813 "=c" (xCX)
814 : "0" (uOperator)
815 : "ebx", "edx");
816
817# endif
818
819# elif RT_INLINE_ASM_USES_INTRIN
820 int aInfo[4];
821 __cpuid(aInfo, uOperator);
822 xCX = aInfo[2];
823
824# else
825 __asm
826 {
827 push ebx
828 mov eax, [uOperator]
829 cpuid
830 mov [xCX], ecx
831 pop ebx
832 }
833# endif
834 return (uint32_t)xCX;
835}
836#endif
837
838
839/**
840 * Checks if the current CPU supports CPUID.
841 *
842 * @returns true if CPUID is supported.
843 */
844DECLINLINE(bool) ASMHasCpuId(void)
845{
846#ifdef RT_ARCH_AMD64
847 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
848#else /* !RT_ARCH_AMD64 */
849 bool fRet = false;
850# if RT_INLINE_ASM_GNU_STYLE
851 uint32_t u1;
852 uint32_t u2;
853 __asm__ ("pushf\n\t"
854 "pop %1\n\t"
855 "mov %1, %2\n\t"
856 "xorl $0x200000, %1\n\t"
857 "push %1\n\t"
858 "popf\n\t"
859 "pushf\n\t"
860 "pop %1\n\t"
861 "cmpl %1, %2\n\t"
862 "setne %0\n\t"
863 "push %2\n\t"
864 "popf\n\t"
865 : "=m" (fRet), "=r" (u1), "=r" (u2));
866# else
867 __asm
868 {
869 pushfd
870 pop eax
871 mov ebx, eax
872 xor eax, 0200000h
873 push eax
874 popfd
875 pushfd
876 pop eax
877 cmp eax, ebx
878 setne fRet
879 push ebx
880 popfd
881 }
882# endif
883 return fRet;
884#endif /* !RT_ARCH_AMD64 */
885}
886
887
888/**
889 * Gets the APIC ID of the current CPU.
890 *
891 * @returns the APIC ID.
892 */
893#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
894DECLASM(uint8_t) ASMGetApicId(void);
895#else
896DECLINLINE(uint8_t) ASMGetApicId(void)
897{
898 RTCCUINTREG xBX;
899# if RT_INLINE_ASM_GNU_STYLE
900# ifdef RT_ARCH_AMD64
901 RTCCUINTREG uSpill;
902 __asm__ ("cpuid"
903 : "=a" (uSpill),
904 "=b" (xBX)
905 : "0" (1)
906 : "rcx", "rdx");
907# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
908 RTCCUINTREG uSpill;
909 __asm__ ("mov %%ebx,%1\n\t"
910 "cpuid\n\t"
911 "xchgl %%ebx,%1\n\t"
912 : "=a" (uSpill),
913 "=r" (xBX)
914 : "0" (1)
915 : "ecx", "edx");
916# else
917 RTCCUINTREG uSpill;
918 __asm__ ("cpuid"
919 : "=a" (uSpill),
920 "=b" (xBX)
921 : "0" (1)
922 : "ecx", "edx");
923# endif
924
925# elif RT_INLINE_ASM_USES_INTRIN
926 int aInfo[4];
927 __cpuid(aInfo, 1);
928 xBX = aInfo[1];
929
930# else
931 __asm
932 {
933 push ebx
934 mov eax, 1
935 cpuid
936 mov [xBX], ebx
937 pop ebx
938 }
939# endif
940 return (uint8_t)(xBX >> 24);
941}
942#endif
943
944
945/**
946 * Tests if it an genuin Intel CPU based on the ASMCpuId(0) output.
947 *
948 * @returns true/false.
949 * @param uEBX EBX return from ASMCpuId(0)
950 * @param uECX ECX return from ASMCpuId(0)
951 * @param uEDX EDX return from ASMCpuId(0)
952 */
953DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
954{
955 return uEBX == 0x756e6547
956 || uECX == 0x6c65746e
957 || uEDX == 0x49656e69;
958}
959
960
961/**
962 * Tests if this is an genuin Intel CPU.
963 *
964 * @returns true/false.
965 */
966DECLINLINE(bool) ASMIsIntelCpu(void)
967{
968 uint32_t uEAX, uEBX, uECX, uEDX;
969 ASMCpuId(1, &uEAX, &uEBX, &uECX, &uEDX);
970 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
971}
972
973
974/**
975 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
976 *
977 * @returns Family.
978 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
979 */
980DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
981{
982 return ((uEAX >> 8) & 0xf) == 0xf
983 ? ((uEAX >> 20) & 0x7f) + 0xf
984 : ((uEAX >> 8) & 0xf);
985}
986
987
988/**
989 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
990 *
991 * @returns Model.
992 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
993 * @param fIntel Whether it's an intel CPU.
994 */
995DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
996{
997 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
998 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
999 : ((uEAX >> 4) & 0xf);
1000}
1001
1002
1003/**
1004 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1005 *
1006 * @returns Model.
1007 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1008 * @param fIntel Whether it's an intel CPU.
1009 */
1010DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1011{
1012 return ((uEAX >> 8) & 0xf) == 0xf
1013 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1014 : ((uEAX >> 4) & 0xf);
1015}
1016
1017
1018/**
1019 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1020 *
1021 * @returns Model.
1022 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1023 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1024 */
1025DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1026{
1027 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1028 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1029 : ((uEAX >> 4) & 0xf);
1030}
1031
1032
1033/**
1034 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1035 *
1036 * @returns Model.
1037 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1038 */
1039DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1040{
1041 return uEAX & 0xf;
1042}
1043
1044
1045/**
1046 * Get cr0.
1047 * @returns cr0.
1048 */
1049#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1050DECLASM(RTCCUINTREG) ASMGetCR0(void);
1051#else
1052DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1053{
1054 RTCCUINTREG uCR0;
1055# if RT_INLINE_ASM_USES_INTRIN
1056 uCR0 = __readcr0();
1057
1058# elif RT_INLINE_ASM_GNU_STYLE
1059# ifdef RT_ARCH_AMD64
1060 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1061# else
1062 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1063# endif
1064# else
1065 __asm
1066 {
1067# ifdef RT_ARCH_AMD64
1068 mov rax, cr0
1069 mov [uCR0], rax
1070# else
1071 mov eax, cr0
1072 mov [uCR0], eax
1073# endif
1074 }
1075# endif
1076 return uCR0;
1077}
1078#endif
1079
1080
1081/**
1082 * Sets the CR0 register.
1083 * @param uCR0 The new CR0 value.
1084 */
1085#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1086DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1087#else
1088DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1089{
1090# if RT_INLINE_ASM_USES_INTRIN
1091 __writecr0(uCR0);
1092
1093# elif RT_INLINE_ASM_GNU_STYLE
1094# ifdef RT_ARCH_AMD64
1095 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1096# else
1097 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1098# endif
1099# else
1100 __asm
1101 {
1102# ifdef RT_ARCH_AMD64
1103 mov rax, [uCR0]
1104 mov cr0, rax
1105# else
1106 mov eax, [uCR0]
1107 mov cr0, eax
1108# endif
1109 }
1110# endif
1111}
1112#endif
1113
1114
1115/**
1116 * Get cr2.
1117 * @returns cr2.
1118 */
1119#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1120DECLASM(RTCCUINTREG) ASMGetCR2(void);
1121#else
1122DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1123{
1124 RTCCUINTREG uCR2;
1125# if RT_INLINE_ASM_USES_INTRIN
1126 uCR2 = __readcr2();
1127
1128# elif RT_INLINE_ASM_GNU_STYLE
1129# ifdef RT_ARCH_AMD64
1130 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1131# else
1132 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1133# endif
1134# else
1135 __asm
1136 {
1137# ifdef RT_ARCH_AMD64
1138 mov rax, cr2
1139 mov [uCR2], rax
1140# else
1141 mov eax, cr2
1142 mov [uCR2], eax
1143# endif
1144 }
1145# endif
1146 return uCR2;
1147}
1148#endif
1149
1150
1151/**
1152 * Sets the CR2 register.
1153 * @param uCR2 The new CR0 value.
1154 */
1155#if RT_INLINE_ASM_EXTERNAL
1156DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1157#else
1158DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1159{
1160# if RT_INLINE_ASM_GNU_STYLE
1161# ifdef RT_ARCH_AMD64
1162 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1163# else
1164 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1165# endif
1166# else
1167 __asm
1168 {
1169# ifdef RT_ARCH_AMD64
1170 mov rax, [uCR2]
1171 mov cr2, rax
1172# else
1173 mov eax, [uCR2]
1174 mov cr2, eax
1175# endif
1176 }
1177# endif
1178}
1179#endif
1180
1181
1182/**
1183 * Get cr3.
1184 * @returns cr3.
1185 */
1186#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1187DECLASM(RTCCUINTREG) ASMGetCR3(void);
1188#else
1189DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1190{
1191 RTCCUINTREG uCR3;
1192# if RT_INLINE_ASM_USES_INTRIN
1193 uCR3 = __readcr3();
1194
1195# elif RT_INLINE_ASM_GNU_STYLE
1196# ifdef RT_ARCH_AMD64
1197 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1198# else
1199 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1200# endif
1201# else
1202 __asm
1203 {
1204# ifdef RT_ARCH_AMD64
1205 mov rax, cr3
1206 mov [uCR3], rax
1207# else
1208 mov eax, cr3
1209 mov [uCR3], eax
1210# endif
1211 }
1212# endif
1213 return uCR3;
1214}
1215#endif
1216
1217
1218/**
1219 * Sets the CR3 register.
1220 *
1221 * @param uCR3 New CR3 value.
1222 */
1223#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1224DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1225#else
1226DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1227{
1228# if RT_INLINE_ASM_USES_INTRIN
1229 __writecr3(uCR3);
1230
1231# elif RT_INLINE_ASM_GNU_STYLE
1232# ifdef RT_ARCH_AMD64
1233 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1234# else
1235 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1236# endif
1237# else
1238 __asm
1239 {
1240# ifdef RT_ARCH_AMD64
1241 mov rax, [uCR3]
1242 mov cr3, rax
1243# else
1244 mov eax, [uCR3]
1245 mov cr3, eax
1246# endif
1247 }
1248# endif
1249}
1250#endif
1251
1252
1253/**
1254 * Reloads the CR3 register.
1255 */
1256#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1257DECLASM(void) ASMReloadCR3(void);
1258#else
1259DECLINLINE(void) ASMReloadCR3(void)
1260{
1261# if RT_INLINE_ASM_USES_INTRIN
1262 __writecr3(__readcr3());
1263
1264# elif RT_INLINE_ASM_GNU_STYLE
1265 RTCCUINTREG u;
1266# ifdef RT_ARCH_AMD64
1267 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1268 "movq %0, %%cr3\n\t"
1269 : "=r" (u));
1270# else
1271 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1272 "movl %0, %%cr3\n\t"
1273 : "=r" (u));
1274# endif
1275# else
1276 __asm
1277 {
1278# ifdef RT_ARCH_AMD64
1279 mov rax, cr3
1280 mov cr3, rax
1281# else
1282 mov eax, cr3
1283 mov cr3, eax
1284# endif
1285 }
1286# endif
1287}
1288#endif
1289
1290
1291/**
1292 * Get cr4.
1293 * @returns cr4.
1294 */
1295#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1296DECLASM(RTCCUINTREG) ASMGetCR4(void);
1297#else
1298DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1299{
1300 RTCCUINTREG uCR4;
1301# if RT_INLINE_ASM_USES_INTRIN
1302 uCR4 = __readcr4();
1303
1304# elif RT_INLINE_ASM_GNU_STYLE
1305# ifdef RT_ARCH_AMD64
1306 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1307# else
1308 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1309# endif
1310# else
1311 __asm
1312 {
1313# ifdef RT_ARCH_AMD64
1314 mov rax, cr4
1315 mov [uCR4], rax
1316# else
1317 push eax /* just in case */
1318 /*mov eax, cr4*/
1319 _emit 0x0f
1320 _emit 0x20
1321 _emit 0xe0
1322 mov [uCR4], eax
1323 pop eax
1324# endif
1325 }
1326# endif
1327 return uCR4;
1328}
1329#endif
1330
1331
1332/**
1333 * Sets the CR4 register.
1334 *
1335 * @param uCR4 New CR4 value.
1336 */
1337#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1338DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1339#else
1340DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1341{
1342# if RT_INLINE_ASM_USES_INTRIN
1343 __writecr4(uCR4);
1344
1345# elif RT_INLINE_ASM_GNU_STYLE
1346# ifdef RT_ARCH_AMD64
1347 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1348# else
1349 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1350# endif
1351# else
1352 __asm
1353 {
1354# ifdef RT_ARCH_AMD64
1355 mov rax, [uCR4]
1356 mov cr4, rax
1357# else
1358 mov eax, [uCR4]
1359 _emit 0x0F
1360 _emit 0x22
1361 _emit 0xE0 /* mov cr4, eax */
1362# endif
1363 }
1364# endif
1365}
1366#endif
1367
1368
1369/**
1370 * Get cr8.
1371 * @returns cr8.
1372 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1373 */
1374#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1375DECLASM(RTCCUINTREG) ASMGetCR8(void);
1376#else
1377DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1378{
1379# ifdef RT_ARCH_AMD64
1380 RTCCUINTREG uCR8;
1381# if RT_INLINE_ASM_USES_INTRIN
1382 uCR8 = __readcr8();
1383
1384# elif RT_INLINE_ASM_GNU_STYLE
1385 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1386# else
1387 __asm
1388 {
1389 mov rax, cr8
1390 mov [uCR8], rax
1391 }
1392# endif
1393 return uCR8;
1394# else /* !RT_ARCH_AMD64 */
1395 return 0;
1396# endif /* !RT_ARCH_AMD64 */
1397}
1398#endif
1399
1400
1401/**
1402 * Enables interrupts (EFLAGS.IF).
1403 */
1404#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1405DECLASM(void) ASMIntEnable(void);
1406#else
1407DECLINLINE(void) ASMIntEnable(void)
1408{
1409# if RT_INLINE_ASM_GNU_STYLE
1410 __asm("sti\n");
1411# elif RT_INLINE_ASM_USES_INTRIN
1412 _enable();
1413# else
1414 __asm sti
1415# endif
1416}
1417#endif
1418
1419
1420/**
1421 * Disables interrupts (!EFLAGS.IF).
1422 */
1423#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1424DECLASM(void) ASMIntDisable(void);
1425#else
1426DECLINLINE(void) ASMIntDisable(void)
1427{
1428# if RT_INLINE_ASM_GNU_STYLE
1429 __asm("cli\n");
1430# elif RT_INLINE_ASM_USES_INTRIN
1431 _disable();
1432# else
1433 __asm cli
1434# endif
1435}
1436#endif
1437
1438
1439/**
1440 * Disables interrupts and returns previous xFLAGS.
1441 */
1442#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1443DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1444#else
1445DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1446{
1447 RTCCUINTREG xFlags;
1448# if RT_INLINE_ASM_GNU_STYLE
1449# ifdef RT_ARCH_AMD64
1450 __asm__ __volatile__("pushfq\n\t"
1451 "cli\n\t"
1452 "popq %0\n\t"
1453 : "=rm" (xFlags));
1454# else
1455 __asm__ __volatile__("pushfl\n\t"
1456 "cli\n\t"
1457 "popl %0\n\t"
1458 : "=rm" (xFlags));
1459# endif
1460# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1461 xFlags = ASMGetFlags();
1462 _disable();
1463# else
1464 __asm {
1465 pushfd
1466 cli
1467 pop [xFlags]
1468 }
1469# endif
1470 return xFlags;
1471}
1472#endif
1473
1474
1475/**
1476 * Reads a machine specific register.
1477 *
1478 * @returns Register content.
1479 * @param uRegister Register to read.
1480 */
1481#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1482DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1483#else
1484DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1485{
1486 RTUINT64U u;
1487# if RT_INLINE_ASM_GNU_STYLE
1488 __asm__ __volatile__("rdmsr\n\t"
1489 : "=a" (u.s.Lo),
1490 "=d" (u.s.Hi)
1491 : "c" (uRegister));
1492
1493# elif RT_INLINE_ASM_USES_INTRIN
1494 u.u = __readmsr(uRegister);
1495
1496# else
1497 __asm
1498 {
1499 mov ecx, [uRegister]
1500 rdmsr
1501 mov [u.s.Lo], eax
1502 mov [u.s.Hi], edx
1503 }
1504# endif
1505
1506 return u.u;
1507}
1508#endif
1509
1510
1511/**
1512 * Writes a machine specific register.
1513 *
1514 * @returns Register content.
1515 * @param uRegister Register to write to.
1516 * @param u64Val Value to write.
1517 */
1518#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1519DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1520#else
1521DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1522{
1523 RTUINT64U u;
1524
1525 u.u = u64Val;
1526# if RT_INLINE_ASM_GNU_STYLE
1527 __asm__ __volatile__("wrmsr\n\t"
1528 ::"a" (u.s.Lo),
1529 "d" (u.s.Hi),
1530 "c" (uRegister));
1531
1532# elif RT_INLINE_ASM_USES_INTRIN
1533 __writemsr(uRegister, u.u);
1534
1535# else
1536 __asm
1537 {
1538 mov ecx, [uRegister]
1539 mov edx, [u.s.Hi]
1540 mov eax, [u.s.Lo]
1541 wrmsr
1542 }
1543# endif
1544}
1545#endif
1546
1547
1548/**
1549 * Reads low part of a machine specific register.
1550 *
1551 * @returns Register content.
1552 * @param uRegister Register to read.
1553 */
1554#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1555DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1556#else
1557DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1558{
1559 uint32_t u32;
1560# if RT_INLINE_ASM_GNU_STYLE
1561 __asm__ __volatile__("rdmsr\n\t"
1562 : "=a" (u32)
1563 : "c" (uRegister)
1564 : "edx");
1565
1566# elif RT_INLINE_ASM_USES_INTRIN
1567 u32 = (uint32_t)__readmsr(uRegister);
1568
1569#else
1570 __asm
1571 {
1572 mov ecx, [uRegister]
1573 rdmsr
1574 mov [u32], eax
1575 }
1576# endif
1577
1578 return u32;
1579}
1580#endif
1581
1582
1583/**
1584 * Reads high part of a machine specific register.
1585 *
1586 * @returns Register content.
1587 * @param uRegister Register to read.
1588 */
1589#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1590DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1591#else
1592DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1593{
1594 uint32_t u32;
1595# if RT_INLINE_ASM_GNU_STYLE
1596 __asm__ __volatile__("rdmsr\n\t"
1597 : "=d" (u32)
1598 : "c" (uRegister)
1599 : "eax");
1600
1601# elif RT_INLINE_ASM_USES_INTRIN
1602 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1603
1604# else
1605 __asm
1606 {
1607 mov ecx, [uRegister]
1608 rdmsr
1609 mov [u32], edx
1610 }
1611# endif
1612
1613 return u32;
1614}
1615#endif
1616
1617
1618/**
1619 * Gets dr7.
1620 *
1621 * @returns dr7.
1622 */
1623#if RT_INLINE_ASM_EXTERNAL
1624DECLASM(RTCCUINTREG) ASMGetDR7(void);
1625#else
1626DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1627{
1628 RTCCUINTREG uDR7;
1629# if RT_INLINE_ASM_GNU_STYLE
1630# ifdef RT_ARCH_AMD64
1631 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1632# else
1633 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1634# endif
1635# else
1636 __asm
1637 {
1638# ifdef RT_ARCH_AMD64
1639 mov rax, dr7
1640 mov [uDR7], rax
1641# else
1642 mov eax, dr7
1643 mov [uDR7], eax
1644# endif
1645 }
1646# endif
1647 return uDR7;
1648}
1649#endif
1650
1651
1652/**
1653 * Gets dr6.
1654 *
1655 * @returns dr6.
1656 */
1657#if RT_INLINE_ASM_EXTERNAL
1658DECLASM(RTCCUINTREG) ASMGetDR6(void);
1659#else
1660DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1661{
1662 RTCCUINTREG uDR6;
1663# if RT_INLINE_ASM_GNU_STYLE
1664# ifdef RT_ARCH_AMD64
1665 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1666# else
1667 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1668# endif
1669# else
1670 __asm
1671 {
1672# ifdef RT_ARCH_AMD64
1673 mov rax, dr6
1674 mov [uDR6], rax
1675# else
1676 mov eax, dr6
1677 mov [uDR6], eax
1678# endif
1679 }
1680# endif
1681 return uDR6;
1682}
1683#endif
1684
1685
1686/**
1687 * Reads and clears DR6.
1688 *
1689 * @returns DR6.
1690 */
1691#if RT_INLINE_ASM_EXTERNAL
1692DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1693#else
1694DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1695{
1696 RTCCUINTREG uDR6;
1697# if RT_INLINE_ASM_GNU_STYLE
1698 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1699# ifdef RT_ARCH_AMD64
1700 __asm__ __volatile__("movq %%dr6, %0\n\t"
1701 "movq %1, %%dr6\n\t"
1702 : "=r" (uDR6)
1703 : "r" (uNewValue));
1704# else
1705 __asm__ __volatile__("movl %%dr6, %0\n\t"
1706 "movl %1, %%dr6\n\t"
1707 : "=r" (uDR6)
1708 : "r" (uNewValue));
1709# endif
1710# else
1711 __asm
1712 {
1713# ifdef RT_ARCH_AMD64
1714 mov rax, dr6
1715 mov [uDR6], rax
1716 mov rcx, rax
1717 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1718 mov dr6, rcx
1719# else
1720 mov eax, dr6
1721 mov [uDR6], eax
1722 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1723 mov dr6, ecx
1724# endif
1725 }
1726# endif
1727 return uDR6;
1728}
1729#endif
1730
1731/**
1732 * Gets dr0.
1733 *
1734 * @returns dr0.
1735 */
1736#if RT_INLINE_ASM_EXTERNAL
1737DECLASM(RTCCUINTREG) ASMGetDR0(void);
1738#else
1739DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
1740{
1741 RTCCUINTREG uDR0;
1742# if RT_INLINE_ASM_GNU_STYLE
1743# ifdef RT_ARCH_AMD64
1744 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));
1745# else
1746 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));
1747# endif
1748# else
1749 __asm
1750 {
1751# ifdef RT_ARCH_AMD64
1752 mov rax, dr0
1753 mov [uDR0], rax
1754# else
1755 mov eax, dr0
1756 mov [uDR0], eax
1757# endif
1758 }
1759# endif
1760 return uDR0;
1761}
1762#endif
1763
1764
1765/**
1766 * Gets dr1.
1767 *
1768 * @returns dr1.
1769 */
1770#if RT_INLINE_ASM_EXTERNAL
1771DECLASM(RTCCUINTREG) ASMGetDR1(void);
1772#else
1773DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
1774{
1775 RTCCUINTREG uDR1;
1776# if RT_INLINE_ASM_GNU_STYLE
1777# ifdef RT_ARCH_AMD64
1778 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));
1779# else
1780 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));
1781# endif
1782# else
1783 __asm
1784 {
1785# ifdef RT_ARCH_AMD64
1786 mov rax, dr1
1787 mov [uDR1], rax
1788# else
1789 mov eax, dr1
1790 mov [uDR1], eax
1791# endif
1792 }
1793# endif
1794 return uDR1;
1795}
1796#endif
1797
1798/**
1799 * Gets dr2.
1800 *
1801 * @returns dr2.
1802 */
1803#if RT_INLINE_ASM_EXTERNAL
1804DECLASM(RTCCUINTREG) ASMGetDR2(void);
1805#else
1806DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
1807{
1808 RTCCUINTREG uDR2;
1809# if RT_INLINE_ASM_GNU_STYLE
1810# ifdef RT_ARCH_AMD64
1811 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));
1812# else
1813 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));
1814# endif
1815# else
1816 __asm
1817 {
1818# ifdef RT_ARCH_AMD64
1819 mov rax, dr2
1820 mov [uDR2], rax
1821# else
1822 mov eax, dr2
1823 mov [uDR2], eax
1824# endif
1825 }
1826# endif
1827 return uDR2;
1828}
1829#endif
1830
1831/**
1832 * Gets dr3.
1833 *
1834 * @returns dr3.
1835 */
1836#if RT_INLINE_ASM_EXTERNAL
1837DECLASM(RTCCUINTREG) ASMGetDR3(void);
1838#else
1839DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
1840{
1841 RTCCUINTREG uDR3;
1842# if RT_INLINE_ASM_GNU_STYLE
1843# ifdef RT_ARCH_AMD64
1844 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));
1845# else
1846 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));
1847# endif
1848# else
1849 __asm
1850 {
1851# ifdef RT_ARCH_AMD64
1852 mov rax, dr3
1853 mov [uDR3], rax
1854# else
1855 mov eax, dr3
1856 mov [uDR3], eax
1857# endif
1858 }
1859# endif
1860 return uDR3;
1861}
1862#endif
1863
1864/**
1865 * Sets dr0.
1866 *
1867 * @param uDRVal Debug register value to write
1868 */
1869#if RT_INLINE_ASM_EXTERNAL
1870DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);
1871#else
1872DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)
1873{
1874# if RT_INLINE_ASM_GNU_STYLE
1875# ifdef RT_ARCH_AMD64
1876 __asm__ __volatile__("movq %0, %%dr0\n\t" : "=r" (uDRVal));
1877# else
1878 __asm__ __volatile__("movl %0, %%dr0\n\t" : "=r" (uDRVal));
1879# endif
1880# else
1881 __asm
1882 {
1883# ifdef RT_ARCH_AMD64
1884 mov dr0, [uDRVal]
1885# else
1886 mov eax, [uDRVal]
1887 mov dr0, eax
1888# endif
1889 }
1890# endif
1891}
1892#endif
1893
1894/**
1895 * Sets dr1.
1896 *
1897 * @param uDRVal Debug register value to write
1898 */
1899#if RT_INLINE_ASM_EXTERNAL
1900DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);
1901#else
1902DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)
1903{
1904# if RT_INLINE_ASM_GNU_STYLE
1905# ifdef RT_ARCH_AMD64
1906 __asm__ __volatile__("movq %0, %%dr1\n\t" : "=r" (uDRVal));
1907# else
1908 __asm__ __volatile__("movl %0, %%dr1\n\t" : "=r" (uDRVal));
1909# endif
1910# else
1911 __asm
1912 {
1913# ifdef RT_ARCH_AMD64
1914 mov dr1, [uDRVal]
1915# else
1916 mov eax, [uDRVal]
1917 mov dr1, eax
1918# endif
1919 }
1920# endif
1921}
1922#endif
1923
1924/**
1925 * Sets dr2.
1926 *
1927 * @param uDRVal Debug register value to write
1928 */
1929#if RT_INLINE_ASM_EXTERNAL
1930DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);
1931#else
1932DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)
1933{
1934# if RT_INLINE_ASM_GNU_STYLE
1935# ifdef RT_ARCH_AMD64
1936 __asm__ __volatile__("movq %0, %%dr2\n\t" : "=r" (uDRVal));
1937# else
1938 __asm__ __volatile__("movl %0, %%dr2\n\t" : "=r" (uDRVal));
1939# endif
1940# else
1941 __asm
1942 {
1943# ifdef RT_ARCH_AMD64
1944 mov dr2, [uDRVal]
1945# else
1946 mov eax, [uDRVal]
1947 mov dr2, eax
1948# endif
1949 }
1950# endif
1951}
1952#endif
1953
1954/**
1955 * Sets dr3.
1956 *
1957 * @param uDRVal Debug register value to write
1958 */
1959#if RT_INLINE_ASM_EXTERNAL
1960DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);
1961#else
1962DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)
1963{
1964# if RT_INLINE_ASM_GNU_STYLE
1965# ifdef RT_ARCH_AMD64
1966 __asm__ __volatile__("movq %0, %%dr3\n\t" : "=r" (uDRVal));
1967# else
1968 __asm__ __volatile__("movl %0, %%dr3\n\t" : "=r" (uDRVal));
1969# endif
1970# else
1971 __asm
1972 {
1973# ifdef RT_ARCH_AMD64
1974 mov dr3, [uDRVal]
1975# else
1976 mov eax, [uDRVal]
1977 mov dr3, eax
1978# endif
1979 }
1980# endif
1981}
1982#endif
1983
1984/**
1985 * Sets dr6.
1986 *
1987 * @param uDRVal Debug register value to write
1988 */
1989#if RT_INLINE_ASM_EXTERNAL
1990DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);
1991#else
1992DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)
1993{
1994# if RT_INLINE_ASM_GNU_STYLE
1995# ifdef RT_ARCH_AMD64
1996 __asm__ __volatile__("movq %0, %%dr6\n\t" : "=r" (uDRVal));
1997# else
1998 __asm__ __volatile__("movl %0, %%dr6\n\t" : "=r" (uDRVal));
1999# endif
2000# else
2001 __asm
2002 {
2003# ifdef RT_ARCH_AMD64
2004 mov dr6, [uDRVal]
2005# else
2006 mov eax, [uDRVal]
2007 mov dr6, eax
2008# endif
2009 }
2010# endif
2011}
2012#endif
2013
2014/**
2015 * Sets dr7.
2016 *
2017 * @param uDRVal Debug register value to write
2018 */
2019#if RT_INLINE_ASM_EXTERNAL
2020DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);
2021#else
2022DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)
2023{
2024# if RT_INLINE_ASM_GNU_STYLE
2025# ifdef RT_ARCH_AMD64
2026 __asm__ __volatile__("movq %0, %%dr7\n\t" : "=r" (uDRVal));
2027# else
2028 __asm__ __volatile__("movl %0, %%dr7\n\t" : "=r" (uDRVal));
2029# endif
2030# else
2031 __asm
2032 {
2033# ifdef RT_ARCH_AMD64
2034 mov dr7, [uDRVal]
2035# else
2036 mov eax, [uDRVal]
2037 mov dr7, eax
2038# endif
2039 }
2040# endif
2041}
2042#endif
2043
2044/**
2045 * Compiler memory barrier.
2046 *
2047 * Ensure that the compiler does not use any cached (register/tmp stack) memory
2048 * values or any outstanding writes when returning from this function.
2049 *
2050 * This function must be used if non-volatile data is modified by a
2051 * device or the VMM. Typical cases are port access, MMIO access,
2052 * trapping instruction, etc.
2053 */
2054#if RT_INLINE_ASM_GNU_STYLE
2055# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
2056#elif RT_INLINE_ASM_USES_INTRIN
2057# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
2058#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
2059DECLINLINE(void) ASMCompilerBarrier(void)
2060{
2061 __asm
2062 {
2063 }
2064}
2065#endif
2066
2067
2068/**
2069 * Writes a 8-bit unsigned integer to an I/O port, ordered.
2070 *
2071 * @param Port I/O port to read from.
2072 * @param u8 8-bit integer to write.
2073 */
2074#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2075DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
2076#else
2077DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
2078{
2079# if RT_INLINE_ASM_GNU_STYLE
2080 __asm__ __volatile__("outb %b1, %w0\n\t"
2081 :: "Nd" (Port),
2082 "a" (u8));
2083
2084# elif RT_INLINE_ASM_USES_INTRIN
2085 __outbyte(Port, u8);
2086
2087# else
2088 __asm
2089 {
2090 mov dx, [Port]
2091 mov al, [u8]
2092 out dx, al
2093 }
2094# endif
2095}
2096#endif
2097
2098
2099/**
2100 * Gets a 8-bit unsigned integer from an I/O port, ordered.
2101 *
2102 * @returns 8-bit integer.
2103 * @param Port I/O port to read from.
2104 */
2105#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2106DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
2107#else
2108DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
2109{
2110 uint8_t u8;
2111# if RT_INLINE_ASM_GNU_STYLE
2112 __asm__ __volatile__("inb %w1, %b0\n\t"
2113 : "=a" (u8)
2114 : "Nd" (Port));
2115
2116# elif RT_INLINE_ASM_USES_INTRIN
2117 u8 = __inbyte(Port);
2118
2119# else
2120 __asm
2121 {
2122 mov dx, [Port]
2123 in al, dx
2124 mov [u8], al
2125 }
2126# endif
2127 return u8;
2128}
2129#endif
2130
2131
2132/**
2133 * Writes a 16-bit unsigned integer to an I/O port, ordered.
2134 *
2135 * @param Port I/O port to read from.
2136 * @param u16 16-bit integer to write.
2137 */
2138#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2139DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
2140#else
2141DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
2142{
2143# if RT_INLINE_ASM_GNU_STYLE
2144 __asm__ __volatile__("outw %w1, %w0\n\t"
2145 :: "Nd" (Port),
2146 "a" (u16));
2147
2148# elif RT_INLINE_ASM_USES_INTRIN
2149 __outword(Port, u16);
2150
2151# else
2152 __asm
2153 {
2154 mov dx, [Port]
2155 mov ax, [u16]
2156 out dx, ax
2157 }
2158# endif
2159}
2160#endif
2161
2162
2163/**
2164 * Gets a 16-bit unsigned integer from an I/O port, ordered.
2165 *
2166 * @returns 16-bit integer.
2167 * @param Port I/O port to read from.
2168 */
2169#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2170DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
2171#else
2172DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
2173{
2174 uint16_t u16;
2175# if RT_INLINE_ASM_GNU_STYLE
2176 __asm__ __volatile__("inw %w1, %w0\n\t"
2177 : "=a" (u16)
2178 : "Nd" (Port));
2179
2180# elif RT_INLINE_ASM_USES_INTRIN
2181 u16 = __inword(Port);
2182
2183# else
2184 __asm
2185 {
2186 mov dx, [Port]
2187 in ax, dx
2188 mov [u16], ax
2189 }
2190# endif
2191 return u16;
2192}
2193#endif
2194
2195
2196/**
2197 * Writes a 32-bit unsigned integer to an I/O port, ordered.
2198 *
2199 * @param Port I/O port to read from.
2200 * @param u32 32-bit integer to write.
2201 */
2202#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2203DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
2204#else
2205DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
2206{
2207# if RT_INLINE_ASM_GNU_STYLE
2208 __asm__ __volatile__("outl %1, %w0\n\t"
2209 :: "Nd" (Port),
2210 "a" (u32));
2211
2212# elif RT_INLINE_ASM_USES_INTRIN
2213 __outdword(Port, u32);
2214
2215# else
2216 __asm
2217 {
2218 mov dx, [Port]
2219 mov eax, [u32]
2220 out dx, eax
2221 }
2222# endif
2223}
2224#endif
2225
2226
2227/**
2228 * Gets a 32-bit unsigned integer from an I/O port, ordered.
2229 *
2230 * @returns 32-bit integer.
2231 * @param Port I/O port to read from.
2232 */
2233#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2234DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
2235#else
2236DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
2237{
2238 uint32_t u32;
2239# if RT_INLINE_ASM_GNU_STYLE
2240 __asm__ __volatile__("inl %w1, %0\n\t"
2241 : "=a" (u32)
2242 : "Nd" (Port));
2243
2244# elif RT_INLINE_ASM_USES_INTRIN
2245 u32 = __indword(Port);
2246
2247# else
2248 __asm
2249 {
2250 mov dx, [Port]
2251 in eax, dx
2252 mov [u32], eax
2253 }
2254# endif
2255 return u32;
2256}
2257#endif
2258
2259/** @todo string i/o */
2260
2261
2262/**
2263 * Atomically Exchange an unsigned 8-bit value, ordered.
2264 *
2265 * @returns Current *pu8 value
2266 * @param pu8 Pointer to the 8-bit variable to update.
2267 * @param u8 The 8-bit value to assign to *pu8.
2268 */
2269#if RT_INLINE_ASM_EXTERNAL
2270DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
2271#else
2272DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
2273{
2274# if RT_INLINE_ASM_GNU_STYLE
2275 __asm__ __volatile__("xchgb %0, %1\n\t"
2276 : "=m" (*pu8),
2277 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
2278 : "1" (u8),
2279 "m" (*pu8));
2280# else
2281 __asm
2282 {
2283# ifdef RT_ARCH_AMD64
2284 mov rdx, [pu8]
2285 mov al, [u8]
2286 xchg [rdx], al
2287 mov [u8], al
2288# else
2289 mov edx, [pu8]
2290 mov al, [u8]
2291 xchg [edx], al
2292 mov [u8], al
2293# endif
2294 }
2295# endif
2296 return u8;
2297}
2298#endif
2299
2300
2301/**
2302 * Atomically Exchange a signed 8-bit value, ordered.
2303 *
2304 * @returns Current *pu8 value
2305 * @param pi8 Pointer to the 8-bit variable to update.
2306 * @param i8 The 8-bit value to assign to *pi8.
2307 */
2308DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
2309{
2310 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
2311}
2312
2313
2314/**
2315 * Atomically Exchange a bool value, ordered.
2316 *
2317 * @returns Current *pf value
2318 * @param pf Pointer to the 8-bit variable to update.
2319 * @param f The 8-bit value to assign to *pi8.
2320 */
2321DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2322{
2323#ifdef _MSC_VER
2324 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2325#else
2326 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2327#endif
2328}
2329
2330
2331/**
2332 * Atomically Exchange an unsigned 16-bit value, ordered.
2333 *
2334 * @returns Current *pu16 value
2335 * @param pu16 Pointer to the 16-bit variable to update.
2336 * @param u16 The 16-bit value to assign to *pu16.
2337 */
2338#if RT_INLINE_ASM_EXTERNAL
2339DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2340#else
2341DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2342{
2343# if RT_INLINE_ASM_GNU_STYLE
2344 __asm__ __volatile__("xchgw %0, %1\n\t"
2345 : "=m" (*pu16),
2346 "=r" (u16)
2347 : "1" (u16),
2348 "m" (*pu16));
2349# else
2350 __asm
2351 {
2352# ifdef RT_ARCH_AMD64
2353 mov rdx, [pu16]
2354 mov ax, [u16]
2355 xchg [rdx], ax
2356 mov [u16], ax
2357# else
2358 mov edx, [pu16]
2359 mov ax, [u16]
2360 xchg [edx], ax
2361 mov [u16], ax
2362# endif
2363 }
2364# endif
2365 return u16;
2366}
2367#endif
2368
2369
2370/**
2371 * Atomically Exchange a signed 16-bit value, ordered.
2372 *
2373 * @returns Current *pu16 value
2374 * @param pi16 Pointer to the 16-bit variable to update.
2375 * @param i16 The 16-bit value to assign to *pi16.
2376 */
2377DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2378{
2379 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2380}
2381
2382
2383/**
2384 * Atomically Exchange an unsigned 32-bit value, ordered.
2385 *
2386 * @returns Current *pu32 value
2387 * @param pu32 Pointer to the 32-bit variable to update.
2388 * @param u32 The 32-bit value to assign to *pu32.
2389 */
2390#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2391DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2392#else
2393DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2394{
2395# if RT_INLINE_ASM_GNU_STYLE
2396 __asm__ __volatile__("xchgl %0, %1\n\t"
2397 : "=m" (*pu32),
2398 "=r" (u32)
2399 : "1" (u32),
2400 "m" (*pu32));
2401
2402# elif RT_INLINE_ASM_USES_INTRIN
2403 u32 = _InterlockedExchange((long *)pu32, u32);
2404
2405# else
2406 __asm
2407 {
2408# ifdef RT_ARCH_AMD64
2409 mov rdx, [pu32]
2410 mov eax, u32
2411 xchg [rdx], eax
2412 mov [u32], eax
2413# else
2414 mov edx, [pu32]
2415 mov eax, u32
2416 xchg [edx], eax
2417 mov [u32], eax
2418# endif
2419 }
2420# endif
2421 return u32;
2422}
2423#endif
2424
2425
2426/**
2427 * Atomically Exchange a signed 32-bit value, ordered.
2428 *
2429 * @returns Current *pu32 value
2430 * @param pi32 Pointer to the 32-bit variable to update.
2431 * @param i32 The 32-bit value to assign to *pi32.
2432 */
2433DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2434{
2435 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2436}
2437
2438
2439/**
2440 * Atomically Exchange an unsigned 64-bit value, ordered.
2441 *
2442 * @returns Current *pu64 value
2443 * @param pu64 Pointer to the 64-bit variable to update.
2444 * @param u64 The 64-bit value to assign to *pu64.
2445 */
2446#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2447DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2448#else
2449DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2450{
2451# if defined(RT_ARCH_AMD64)
2452# if RT_INLINE_ASM_USES_INTRIN
2453 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2454
2455# elif RT_INLINE_ASM_GNU_STYLE
2456 __asm__ __volatile__("xchgq %0, %1\n\t"
2457 : "=m" (*pu64),
2458 "=r" (u64)
2459 : "1" (u64),
2460 "m" (pu64));
2461# else
2462 __asm
2463 {
2464 mov rdx, [pu64]
2465 mov rax, [u64]
2466 xchg [rdx], rax
2467 mov [u64], rax
2468 }
2469# endif
2470# else /* !RT_ARCH_AMD64 */
2471# if RT_INLINE_ASM_GNU_STYLE
2472# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2473 uint32_t u32EBX = (uint32_t)u64;
2474 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2475 "xchgl %%ebx, %3\n\t"
2476 "1:\n\t"
2477 "lock; cmpxchg8b (%5)\n\t"
2478 "jnz 1b\n\t"
2479 "movl %3, %%ebx\n\t"
2480 /*"xchgl %%esi, %5\n\t"*/
2481 : "=A" (u64),
2482 "=m" (*pu64)
2483 : "0" (*pu64),
2484 "m" ( u32EBX ),
2485 "c" ( (uint32_t)(u64 >> 32) ),
2486 "S" (pu64));
2487# else /* !PIC */
2488 __asm__ __volatile__("1:\n\t"
2489 "lock; cmpxchg8b %1\n\t"
2490 "jnz 1b\n\t"
2491 : "=A" (u64),
2492 "=m" (*pu64)
2493 : "0" (*pu64),
2494 "b" ( (uint32_t)u64 ),
2495 "c" ( (uint32_t)(u64 >> 32) ),
2496 "m" (*pu64));
2497# endif
2498# else
2499 __asm
2500 {
2501 mov ebx, dword ptr [u64]
2502 mov ecx, dword ptr [u64 + 4]
2503 mov edi, pu64
2504 mov eax, dword ptr [edi]
2505 mov edx, dword ptr [edi + 4]
2506 retry:
2507 lock cmpxchg8b [edi]
2508 jnz retry
2509 mov dword ptr [u64], eax
2510 mov dword ptr [u64 + 4], edx
2511 }
2512# endif
2513# endif /* !RT_ARCH_AMD64 */
2514 return u64;
2515}
2516#endif
2517
2518
2519/**
2520 * Atomically Exchange an signed 64-bit value, ordered.
2521 *
2522 * @returns Current *pi64 value
2523 * @param pi64 Pointer to the 64-bit variable to update.
2524 * @param i64 The 64-bit value to assign to *pi64.
2525 */
2526DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2527{
2528 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2529}
2530
2531
2532#ifdef RT_ARCH_AMD64
2533/**
2534 * Atomically Exchange an unsigned 128-bit value, ordered.
2535 *
2536 * @returns Current *pu128.
2537 * @param pu128 Pointer to the 128-bit variable to update.
2538 * @param u128 The 128-bit value to assign to *pu128.
2539 *
2540 * @remark We cannot really assume that any hardware supports this. Nor do I have
2541 * GAS support for it. So, for the time being we'll BREAK the atomic
2542 * bit of this function and use two 64-bit exchanges instead.
2543 */
2544# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2545DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2546# else
2547DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2548{
2549 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2550 {
2551 /** @todo this is clumsy code */
2552 RTUINT128U u128Ret;
2553 u128Ret.u = u128;
2554 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2555 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2556 return u128Ret.u;
2557 }
2558#if 0 /* later? */
2559 else
2560 {
2561# if RT_INLINE_ASM_GNU_STYLE
2562 __asm__ __volatile__("1:\n\t"
2563 "lock; cmpxchg8b %1\n\t"
2564 "jnz 1b\n\t"
2565 : "=A" (u128),
2566 "=m" (*pu128)
2567 : "0" (*pu128),
2568 "b" ( (uint64_t)u128 ),
2569 "c" ( (uint64_t)(u128 >> 64) ));
2570# else
2571 __asm
2572 {
2573 mov rbx, dword ptr [u128]
2574 mov rcx, dword ptr [u128 + 8]
2575 mov rdi, pu128
2576 mov rax, dword ptr [rdi]
2577 mov rdx, dword ptr [rdi + 8]
2578 retry:
2579 lock cmpxchg16b [rdi]
2580 jnz retry
2581 mov dword ptr [u128], rax
2582 mov dword ptr [u128 + 8], rdx
2583 }
2584# endif
2585 }
2586 return u128;
2587#endif
2588}
2589# endif
2590#endif /* RT_ARCH_AMD64 */
2591
2592
2593/**
2594 * Atomically Exchange a pointer value, ordered.
2595 *
2596 * @returns Current *ppv value
2597 * @param ppv Pointer to the pointer variable to update.
2598 * @param pv The pointer value to assign to *ppv.
2599 */
2600DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2601{
2602#if ARCH_BITS == 32
2603 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2604#elif ARCH_BITS == 64
2605 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2606#else
2607# error "ARCH_BITS is bogus"
2608#endif
2609}
2610
2611
2612/** @def ASMAtomicXchgHandle
2613 * Atomically Exchange a typical IPRT handle value, ordered.
2614 *
2615 * @param ph Pointer to the value to update.
2616 * @param hNew The new value to assigned to *pu.
2617 * @param phRes Where to store the current *ph value.
2618 *
2619 * @remarks This doesn't currently work for all handles (like RTFILE).
2620 */
2621#define ASMAtomicXchgHandle(ph, hNew, phRes) \
2622 do { \
2623 *(void **)(phRes) = ASMAtomicXchgPtr((void * volatile *)(ph), (void *)(hNew)); \
2624 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2625 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
2626 } while (0)
2627
2628
2629/**
2630 * Atomically Exchange a value which size might differ
2631 * between platforms or compilers, ordered.
2632 *
2633 * @param pu Pointer to the variable to update.
2634 * @param uNew The value to assign to *pu.
2635 * @todo This is busted as its missing the result argument.
2636 */
2637#define ASMAtomicXchgSize(pu, uNew) \
2638 do { \
2639 switch (sizeof(*(pu))) { \
2640 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2641 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2642 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2643 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2644 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2645 } \
2646 } while (0)
2647
2648/**
2649 * Atomically Exchange a value which size might differ
2650 * between platforms or compilers, ordered.
2651 *
2652 * @param pu Pointer to the variable to update.
2653 * @param uNew The value to assign to *pu.
2654 * @param puRes Where to store the current *pu value.
2655 */
2656#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
2657 do { \
2658 switch (sizeof(*(pu))) { \
2659 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2660 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2661 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2662 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2663 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2664 } \
2665 } while (0)
2666
2667
2668/**
2669 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
2670 *
2671 * @returns true if xchg was done.
2672 * @returns false if xchg wasn't done.
2673 *
2674 * @param pu32 Pointer to the value to update.
2675 * @param u32New The new value to assigned to *pu32.
2676 * @param u32Old The old value to *pu32 compare with.
2677 */
2678#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2679DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2680#else
2681DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
2682{
2683# if RT_INLINE_ASM_GNU_STYLE
2684 uint8_t u8Ret;
2685 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2686 "setz %1\n\t"
2687 : "=m" (*pu32),
2688 "=qm" (u8Ret),
2689 "=a" (u32Old)
2690 : "r" (u32New),
2691 "2" (u32Old),
2692 "m" (*pu32));
2693 return (bool)u8Ret;
2694
2695# elif RT_INLINE_ASM_USES_INTRIN
2696 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2697
2698# else
2699 uint32_t u32Ret;
2700 __asm
2701 {
2702# ifdef RT_ARCH_AMD64
2703 mov rdx, [pu32]
2704# else
2705 mov edx, [pu32]
2706# endif
2707 mov eax, [u32Old]
2708 mov ecx, [u32New]
2709# ifdef RT_ARCH_AMD64
2710 lock cmpxchg [rdx], ecx
2711# else
2712 lock cmpxchg [edx], ecx
2713# endif
2714 setz al
2715 movzx eax, al
2716 mov [u32Ret], eax
2717 }
2718 return !!u32Ret;
2719# endif
2720}
2721#endif
2722
2723
2724/**
2725 * Atomically Compare and Exchange a signed 32-bit value, ordered.
2726 *
2727 * @returns true if xchg was done.
2728 * @returns false if xchg wasn't done.
2729 *
2730 * @param pi32 Pointer to the value to update.
2731 * @param i32New The new value to assigned to *pi32.
2732 * @param i32Old The old value to *pi32 compare with.
2733 */
2734DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2735{
2736 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2737}
2738
2739
2740/**
2741 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
2742 *
2743 * @returns true if xchg was done.
2744 * @returns false if xchg wasn't done.
2745 *
2746 * @param pu64 Pointer to the 64-bit variable to update.
2747 * @param u64New The 64-bit value to assign to *pu64.
2748 * @param u64Old The value to compare with.
2749 */
2750#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2751DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2752#else
2753DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
2754{
2755# if RT_INLINE_ASM_USES_INTRIN
2756 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2757
2758# elif defined(RT_ARCH_AMD64)
2759# if RT_INLINE_ASM_GNU_STYLE
2760 uint8_t u8Ret;
2761 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2762 "setz %1\n\t"
2763 : "=m" (*pu64),
2764 "=qm" (u8Ret),
2765 "=a" (u64Old)
2766 : "r" (u64New),
2767 "2" (u64Old),
2768 "m" (*pu64));
2769 return (bool)u8Ret;
2770# else
2771 bool fRet;
2772 __asm
2773 {
2774 mov rdx, [pu32]
2775 mov rax, [u64Old]
2776 mov rcx, [u64New]
2777 lock cmpxchg [rdx], rcx
2778 setz al
2779 mov [fRet], al
2780 }
2781 return fRet;
2782# endif
2783# else /* !RT_ARCH_AMD64 */
2784 uint32_t u32Ret;
2785# if RT_INLINE_ASM_GNU_STYLE
2786# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2787 uint32_t u32EBX = (uint32_t)u64New;
2788 uint32_t u32Spill;
2789 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2790 "lock; cmpxchg8b (%6)\n\t"
2791 "setz %%al\n\t"
2792 "movl %4, %%ebx\n\t"
2793 "movzbl %%al, %%eax\n\t"
2794 : "=a" (u32Ret),
2795 "=d" (u32Spill),
2796 "=m" (*pu64)
2797 : "A" (u64Old),
2798 "m" ( u32EBX ),
2799 "c" ( (uint32_t)(u64New >> 32) ),
2800 "S" (pu64),
2801 "m" (*pu64));
2802# else /* !PIC */
2803 uint32_t u32Spill;
2804 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2805 "setz %%al\n\t"
2806 "movzbl %%al, %%eax\n\t"
2807 : "=a" (u32Ret),
2808 "=d" (u32Spill),
2809 "=m" (*pu64)
2810 : "A" (u64Old),
2811 "b" ( (uint32_t)u64New ),
2812 "c" ( (uint32_t)(u64New >> 32) ),
2813 "m" (*pu64));
2814# endif
2815 return (bool)u32Ret;
2816# else
2817 __asm
2818 {
2819 mov ebx, dword ptr [u64New]
2820 mov ecx, dword ptr [u64New + 4]
2821 mov edi, [pu64]
2822 mov eax, dword ptr [u64Old]
2823 mov edx, dword ptr [u64Old + 4]
2824 lock cmpxchg8b [edi]
2825 setz al
2826 movzx eax, al
2827 mov dword ptr [u32Ret], eax
2828 }
2829 return !!u32Ret;
2830# endif
2831# endif /* !RT_ARCH_AMD64 */
2832}
2833#endif
2834
2835
2836/**
2837 * Atomically Compare and exchange a signed 64-bit value, ordered.
2838 *
2839 * @returns true if xchg was done.
2840 * @returns false if xchg wasn't done.
2841 *
2842 * @param pi64 Pointer to the 64-bit variable to update.
2843 * @param i64 The 64-bit value to assign to *pu64.
2844 * @param i64Old The value to compare with.
2845 */
2846DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2847{
2848 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2849}
2850
2851
2852/**
2853 * Atomically Compare and Exchange a pointer value, ordered.
2854 *
2855 * @returns true if xchg was done.
2856 * @returns false if xchg wasn't done.
2857 *
2858 * @param ppv Pointer to the value to update.
2859 * @param pvNew The new value to assigned to *ppv.
2860 * @param pvOld The old value to *ppv compare with.
2861 */
2862DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2863{
2864#if ARCH_BITS == 32
2865 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2866#elif ARCH_BITS == 64
2867 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2868#else
2869# error "ARCH_BITS is bogus"
2870#endif
2871}
2872
2873
2874/** @def ASMAtomicCmpXchgHandle
2875 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
2876 *
2877 * @param ph Pointer to the value to update.
2878 * @param hNew The new value to assigned to *pu.
2879 * @param hOld The old value to *pu compare with.
2880 * @param fRc Where to store the result.
2881 *
2882 * @remarks This doesn't currently work for all handles (like RTFILE).
2883 */
2884#define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
2885 do { \
2886 (fRc) = ASMAtomicCmpXchgPtr((void * volatile *)(ph), (void *)(hNew), (void *)(hOld)); \
2887 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2888 } while (0)
2889
2890
2891/** @def ASMAtomicCmpXchgSize
2892 * Atomically Compare and Exchange a value which size might differ
2893 * between platforms or compilers, ordered.
2894 *
2895 * @param pu Pointer to the value to update.
2896 * @param uNew The new value to assigned to *pu.
2897 * @param uOld The old value to *pu compare with.
2898 * @param fRc Where to store the result.
2899 */
2900#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2901 do { \
2902 switch (sizeof(*(pu))) { \
2903 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2904 break; \
2905 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2906 break; \
2907 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2908 (fRc) = false; \
2909 break; \
2910 } \
2911 } while (0)
2912
2913
2914/**
2915 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
2916 * passes back old value, ordered.
2917 *
2918 * @returns true if xchg was done.
2919 * @returns false if xchg wasn't done.
2920 *
2921 * @param pu32 Pointer to the value to update.
2922 * @param u32New The new value to assigned to *pu32.
2923 * @param u32Old The old value to *pu32 compare with.
2924 * @param pu32Old Pointer store the old value at.
2925 */
2926#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2927DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
2928#else
2929DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
2930{
2931# if RT_INLINE_ASM_GNU_STYLE
2932 uint8_t u8Ret;
2933 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2934 "setz %1\n\t"
2935 : "=m" (*pu32),
2936 "=qm" (u8Ret),
2937 "=a" (*pu32Old)
2938 : "r" (u32New),
2939 "a" (u32Old),
2940 "m" (*pu32));
2941 return (bool)u8Ret;
2942
2943# elif RT_INLINE_ASM_USES_INTRIN
2944 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
2945
2946# else
2947 uint32_t u32Ret;
2948 __asm
2949 {
2950# ifdef RT_ARCH_AMD64
2951 mov rdx, [pu32]
2952# else
2953 mov edx, [pu32]
2954# endif
2955 mov eax, [u32Old]
2956 mov ecx, [u32New]
2957# ifdef RT_ARCH_AMD64
2958 lock cmpxchg [rdx], ecx
2959 mov rdx, [pu32Old]
2960 mov [rdx], eax
2961# else
2962 lock cmpxchg [edx], ecx
2963 mov edx, [pu32Old]
2964 mov [edx], eax
2965# endif
2966 setz al
2967 movzx eax, al
2968 mov [u32Ret], eax
2969 }
2970 return !!u32Ret;
2971# endif
2972}
2973#endif
2974
2975
2976/**
2977 * Atomically Compare and Exchange a signed 32-bit value, additionally
2978 * passes back old value, ordered.
2979 *
2980 * @returns true if xchg was done.
2981 * @returns false if xchg wasn't done.
2982 *
2983 * @param pi32 Pointer to the value to update.
2984 * @param i32New The new value to assigned to *pi32.
2985 * @param i32Old The old value to *pi32 compare with.
2986 * @param pi32Old Pointer store the old value at.
2987 */
2988DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
2989{
2990 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
2991}
2992
2993
2994/**
2995 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2996 * passing back old value, ordered.
2997 *
2998 * @returns true if xchg was done.
2999 * @returns false if xchg wasn't done.
3000 *
3001 * @param pu64 Pointer to the 64-bit variable to update.
3002 * @param u64New The 64-bit value to assign to *pu64.
3003 * @param u64Old The value to compare with.
3004 * @param pu64Old Pointer store the old value at.
3005 */
3006#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3007DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
3008#else
3009DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
3010{
3011# if RT_INLINE_ASM_USES_INTRIN
3012 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
3013
3014# elif defined(RT_ARCH_AMD64)
3015# if RT_INLINE_ASM_GNU_STYLE
3016 uint8_t u8Ret;
3017 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3018 "setz %1\n\t"
3019 : "=m" (*pu64),
3020 "=qm" (u8Ret),
3021 "=a" (*pu64Old)
3022 : "r" (u64New),
3023 "a" (u64Old),
3024 "m" (*pu64));
3025 return (bool)u8Ret;
3026# else
3027 bool fRet;
3028 __asm
3029 {
3030 mov rdx, [pu32]
3031 mov rax, [u64Old]
3032 mov rcx, [u64New]
3033 lock cmpxchg [rdx], rcx
3034 mov rdx, [pu64Old]
3035 mov [rdx], rax
3036 setz al
3037 mov [fRet], al
3038 }
3039 return fRet;
3040# endif
3041# else /* !RT_ARCH_AMD64 */
3042# if RT_INLINE_ASM_GNU_STYLE
3043 uint64_t u64Ret;
3044# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3045 /* NB: this code uses a memory clobber description, because the clean
3046 * solution with an output value for *pu64 makes gcc run out of registers.
3047 * This will cause suboptimal code, and anyone with a better solution is
3048 * welcome to improve this. */
3049 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
3050 "lock; cmpxchg8b %3\n\t"
3051 "xchgl %%ebx, %1\n\t"
3052 : "=A" (u64Ret)
3053 : "DS" ((uint32_t)u64New),
3054 "c" ((uint32_t)(u64New >> 32)),
3055 "m" (*pu64),
3056 "0" (u64Old)
3057 : "memory" );
3058# else /* !PIC */
3059 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
3060 : "=A" (u64Ret),
3061 "=m" (*pu64)
3062 : "b" ((uint32_t)u64New),
3063 "c" ((uint32_t)(u64New >> 32)),
3064 "m" (*pu64),
3065 "0" (u64Old));
3066# endif
3067 *pu64Old = u64Ret;
3068 return u64Ret == u64Old;
3069# else
3070 uint32_t u32Ret;
3071 __asm
3072 {
3073 mov ebx, dword ptr [u64New]
3074 mov ecx, dword ptr [u64New + 4]
3075 mov edi, [pu64]
3076 mov eax, dword ptr [u64Old]
3077 mov edx, dword ptr [u64Old + 4]
3078 lock cmpxchg8b [edi]
3079 mov ebx, [pu64Old]
3080 mov [ebx], eax
3081 setz al
3082 movzx eax, al
3083 add ebx, 4
3084 mov [ebx], edx
3085 mov dword ptr [u32Ret], eax
3086 }
3087 return !!u32Ret;
3088# endif
3089# endif /* !RT_ARCH_AMD64 */
3090}
3091#endif
3092
3093
3094/**
3095 * Atomically Compare and exchange a signed 64-bit value, additionally
3096 * passing back old value, ordered.
3097 *
3098 * @returns true if xchg was done.
3099 * @returns false if xchg wasn't done.
3100 *
3101 * @param pi64 Pointer to the 64-bit variable to update.
3102 * @param i64 The 64-bit value to assign to *pu64.
3103 * @param i64Old The value to compare with.
3104 * @param pi64Old Pointer store the old value at.
3105 */
3106DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
3107{
3108 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
3109}
3110
3111/** @def ASMAtomicCmpXchgExHandle
3112 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3113 *
3114 * @param ph Pointer to the value to update.
3115 * @param hNew The new value to assigned to *pu.
3116 * @param hOld The old value to *pu compare with.
3117 * @param fRc Where to store the result.
3118 * @param phOldVal Pointer to where to store the old value.
3119 *
3120 * @remarks This doesn't currently work for all handles (like RTFILE).
3121 */
3122#if ARCH_BITS == 32
3123# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3124 do { \
3125 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
3126 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3127 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
3128 } while (0)
3129#elif ARCH_BITS == 64
3130# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3131 do { \
3132 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
3133 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3134 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
3135 } while (0)
3136#endif
3137
3138
3139/** @def ASMAtomicCmpXchgExSize
3140 * Atomically Compare and Exchange a value which size might differ
3141 * between platforms or compilers. Additionally passes back old value.
3142 *
3143 * @param pu Pointer to the value to update.
3144 * @param uNew The new value to assigned to *pu.
3145 * @param uOld The old value to *pu compare with.
3146 * @param fRc Where to store the result.
3147 * @param puOldVal Pointer to where to store the old value.
3148 */
3149#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
3150 do { \
3151 switch (sizeof(*(pu))) { \
3152 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
3153 break; \
3154 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
3155 break; \
3156 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3157 (fRc) = false; \
3158 (uOldVal) = 0; \
3159 break; \
3160 } \
3161 } while (0)
3162
3163
3164/**
3165 * Atomically Compare and Exchange a pointer value, additionally
3166 * passing back old value, ordered.
3167 *
3168 * @returns true if xchg was done.
3169 * @returns false if xchg wasn't done.
3170 *
3171 * @param ppv Pointer to the value to update.
3172 * @param pvNew The new value to assigned to *ppv.
3173 * @param pvOld The old value to *ppv compare with.
3174 * @param ppvOld Pointer store the old value at.
3175 */
3176DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, void *pvNew, void *pvOld, void **ppvOld)
3177{
3178#if ARCH_BITS == 32
3179 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
3180#elif ARCH_BITS == 64
3181 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
3182#else
3183# error "ARCH_BITS is bogus"
3184#endif
3185}
3186
3187
3188/**
3189 * Atomically exchanges and adds to a 32-bit value, ordered.
3190 *
3191 * @returns The old value.
3192 * @param pu32 Pointer to the value.
3193 * @param u32 Number to add.
3194 */
3195#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3196DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
3197#else
3198DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
3199{
3200# if RT_INLINE_ASM_USES_INTRIN
3201 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
3202 return u32;
3203
3204# elif RT_INLINE_ASM_GNU_STYLE
3205 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3206 : "=r" (u32),
3207 "=m" (*pu32)
3208 : "0" (u32),
3209 "m" (*pu32)
3210 : "memory");
3211 return u32;
3212# else
3213 __asm
3214 {
3215 mov eax, [u32]
3216# ifdef RT_ARCH_AMD64
3217 mov rdx, [pu32]
3218 lock xadd [rdx], eax
3219# else
3220 mov edx, [pu32]
3221 lock xadd [edx], eax
3222# endif
3223 mov [u32], eax
3224 }
3225 return u32;
3226# endif
3227}
3228#endif
3229
3230
3231/**
3232 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3233 *
3234 * @returns The old value.
3235 * @param pi32 Pointer to the value.
3236 * @param i32 Number to add.
3237 */
3238DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
3239{
3240 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
3241}
3242
3243
3244/**
3245 * Atomically increment a 32-bit value, ordered.
3246 *
3247 * @returns The new value.
3248 * @param pu32 Pointer to the value to increment.
3249 */
3250#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3251DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
3252#else
3253DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
3254{
3255 uint32_t u32;
3256# if RT_INLINE_ASM_USES_INTRIN
3257 u32 = _InterlockedIncrement((long *)pu32);
3258 return u32;
3259
3260# elif RT_INLINE_ASM_GNU_STYLE
3261 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3262 : "=r" (u32),
3263 "=m" (*pu32)
3264 : "0" (1),
3265 "m" (*pu32)
3266 : "memory");
3267 return u32+1;
3268# else
3269 __asm
3270 {
3271 mov eax, 1
3272# ifdef RT_ARCH_AMD64
3273 mov rdx, [pu32]
3274 lock xadd [rdx], eax
3275# else
3276 mov edx, [pu32]
3277 lock xadd [edx], eax
3278# endif
3279 mov u32, eax
3280 }
3281 return u32+1;
3282# endif
3283}
3284#endif
3285
3286
3287/**
3288 * Atomically increment a signed 32-bit value, ordered.
3289 *
3290 * @returns The new value.
3291 * @param pi32 Pointer to the value to increment.
3292 */
3293DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3294{
3295 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3296}
3297
3298
3299/**
3300 * Atomically decrement an unsigned 32-bit value, ordered.
3301 *
3302 * @returns The new value.
3303 * @param pu32 Pointer to the value to decrement.
3304 */
3305#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3306DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3307#else
3308DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3309{
3310 uint32_t u32;
3311# if RT_INLINE_ASM_USES_INTRIN
3312 u32 = _InterlockedDecrement((long *)pu32);
3313 return u32;
3314
3315# elif RT_INLINE_ASM_GNU_STYLE
3316 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3317 : "=r" (u32),
3318 "=m" (*pu32)
3319 : "0" (-1),
3320 "m" (*pu32)
3321 : "memory");
3322 return u32-1;
3323# else
3324 __asm
3325 {
3326 mov eax, -1
3327# ifdef RT_ARCH_AMD64
3328 mov rdx, [pu32]
3329 lock xadd [rdx], eax
3330# else
3331 mov edx, [pu32]
3332 lock xadd [edx], eax
3333# endif
3334 mov u32, eax
3335 }
3336 return u32-1;
3337# endif
3338}
3339#endif
3340
3341
3342/**
3343 * Atomically decrement a signed 32-bit value, ordered.
3344 *
3345 * @returns The new value.
3346 * @param pi32 Pointer to the value to decrement.
3347 */
3348DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3349{
3350 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3351}
3352
3353
3354/**
3355 * Atomically Or an unsigned 32-bit value, ordered.
3356 *
3357 * @param pu32 Pointer to the pointer variable to OR u32 with.
3358 * @param u32 The value to OR *pu32 with.
3359 */
3360#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3361DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3362#else
3363DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3364{
3365# if RT_INLINE_ASM_USES_INTRIN
3366 _InterlockedOr((long volatile *)pu32, (long)u32);
3367
3368# elif RT_INLINE_ASM_GNU_STYLE
3369 __asm__ __volatile__("lock; orl %1, %0\n\t"
3370 : "=m" (*pu32)
3371 : "ir" (u32),
3372 "m" (*pu32));
3373# else
3374 __asm
3375 {
3376 mov eax, [u32]
3377# ifdef RT_ARCH_AMD64
3378 mov rdx, [pu32]
3379 lock or [rdx], eax
3380# else
3381 mov edx, [pu32]
3382 lock or [edx], eax
3383# endif
3384 }
3385# endif
3386}
3387#endif
3388
3389
3390/**
3391 * Atomically Or a signed 32-bit value, ordered.
3392 *
3393 * @param pi32 Pointer to the pointer variable to OR u32 with.
3394 * @param i32 The value to OR *pu32 with.
3395 */
3396DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3397{
3398 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3399}
3400
3401
3402/**
3403 * Atomically And an unsigned 32-bit value, ordered.
3404 *
3405 * @param pu32 Pointer to the pointer variable to AND u32 with.
3406 * @param u32 The value to AND *pu32 with.
3407 */
3408#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3409DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3410#else
3411DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3412{
3413# if RT_INLINE_ASM_USES_INTRIN
3414 _InterlockedAnd((long volatile *)pu32, u32);
3415
3416# elif RT_INLINE_ASM_GNU_STYLE
3417 __asm__ __volatile__("lock; andl %1, %0\n\t"
3418 : "=m" (*pu32)
3419 : "ir" (u32),
3420 "m" (*pu32));
3421# else
3422 __asm
3423 {
3424 mov eax, [u32]
3425# ifdef RT_ARCH_AMD64
3426 mov rdx, [pu32]
3427 lock and [rdx], eax
3428# else
3429 mov edx, [pu32]
3430 lock and [edx], eax
3431# endif
3432 }
3433# endif
3434}
3435#endif
3436
3437
3438/**
3439 * Atomically And a signed 32-bit value, ordered.
3440 *
3441 * @param pi32 Pointer to the pointer variable to AND i32 with.
3442 * @param i32 The value to AND *pi32 with.
3443 */
3444DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3445{
3446 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3447}
3448
3449
3450/**
3451 * Memory fence, waits for any pending writes and reads to complete.
3452 */
3453DECLINLINE(void) ASMMemoryFence(void)
3454{
3455 /** @todo use mfence? check if all cpus we care for support it. */
3456 uint32_t volatile u32;
3457 ASMAtomicXchgU32(&u32, 0);
3458}
3459
3460
3461/**
3462 * Write fence, waits for any pending writes to complete.
3463 */
3464DECLINLINE(void) ASMWriteFence(void)
3465{
3466 /** @todo use sfence? check if all cpus we care for support it. */
3467 ASMMemoryFence();
3468}
3469
3470
3471/**
3472 * Read fence, waits for any pending reads to complete.
3473 */
3474DECLINLINE(void) ASMReadFence(void)
3475{
3476 /** @todo use lfence? check if all cpus we care for support it. */
3477 ASMMemoryFence();
3478}
3479
3480
3481/**
3482 * Atomically reads an unsigned 8-bit value, ordered.
3483 *
3484 * @returns Current *pu8 value
3485 * @param pu8 Pointer to the 8-bit variable to read.
3486 */
3487DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
3488{
3489 ASMMemoryFence();
3490 return *pu8; /* byte reads are atomic on x86 */
3491}
3492
3493
3494/**
3495 * Atomically reads an unsigned 8-bit value, unordered.
3496 *
3497 * @returns Current *pu8 value
3498 * @param pu8 Pointer to the 8-bit variable to read.
3499 */
3500DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
3501{
3502 return *pu8; /* byte reads are atomic on x86 */
3503}
3504
3505
3506/**
3507 * Atomically reads a signed 8-bit value, ordered.
3508 *
3509 * @returns Current *pi8 value
3510 * @param pi8 Pointer to the 8-bit variable to read.
3511 */
3512DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
3513{
3514 ASMMemoryFence();
3515 return *pi8; /* byte reads are atomic on x86 */
3516}
3517
3518
3519/**
3520 * Atomically reads a signed 8-bit value, unordered.
3521 *
3522 * @returns Current *pi8 value
3523 * @param pi8 Pointer to the 8-bit variable to read.
3524 */
3525DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
3526{
3527 return *pi8; /* byte reads are atomic on x86 */
3528}
3529
3530
3531/**
3532 * Atomically reads an unsigned 16-bit value, ordered.
3533 *
3534 * @returns Current *pu16 value
3535 * @param pu16 Pointer to the 16-bit variable to read.
3536 */
3537DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3538{
3539 ASMMemoryFence();
3540 Assert(!((uintptr_t)pu16 & 1));
3541 return *pu16;
3542}
3543
3544
3545/**
3546 * Atomically reads an unsigned 16-bit value, unordered.
3547 *
3548 * @returns Current *pu16 value
3549 * @param pu16 Pointer to the 16-bit variable to read.
3550 */
3551DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3552{
3553 Assert(!((uintptr_t)pu16 & 1));
3554 return *pu16;
3555}
3556
3557
3558/**
3559 * Atomically reads a signed 16-bit value, ordered.
3560 *
3561 * @returns Current *pi16 value
3562 * @param pi16 Pointer to the 16-bit variable to read.
3563 */
3564DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3565{
3566 ASMMemoryFence();
3567 Assert(!((uintptr_t)pi16 & 1));
3568 return *pi16;
3569}
3570
3571
3572/**
3573 * Atomically reads a signed 16-bit value, unordered.
3574 *
3575 * @returns Current *pi16 value
3576 * @param pi16 Pointer to the 16-bit variable to read.
3577 */
3578DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3579{
3580 Assert(!((uintptr_t)pi16 & 1));
3581 return *pi16;
3582}
3583
3584
3585/**
3586 * Atomically reads an unsigned 32-bit value, ordered.
3587 *
3588 * @returns Current *pu32 value
3589 * @param pu32 Pointer to the 32-bit variable to read.
3590 */
3591DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3592{
3593 ASMMemoryFence();
3594 Assert(!((uintptr_t)pu32 & 3));
3595 return *pu32;
3596}
3597
3598
3599/**
3600 * Atomically reads an unsigned 32-bit value, unordered.
3601 *
3602 * @returns Current *pu32 value
3603 * @param pu32 Pointer to the 32-bit variable to read.
3604 */
3605DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3606{
3607 Assert(!((uintptr_t)pu32 & 3));
3608 return *pu32;
3609}
3610
3611
3612/**
3613 * Atomically reads a signed 32-bit value, ordered.
3614 *
3615 * @returns Current *pi32 value
3616 * @param pi32 Pointer to the 32-bit variable to read.
3617 */
3618DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3619{
3620 ASMMemoryFence();
3621 Assert(!((uintptr_t)pi32 & 3));
3622 return *pi32;
3623}
3624
3625
3626/**
3627 * Atomically reads a signed 32-bit value, unordered.
3628 *
3629 * @returns Current *pi32 value
3630 * @param pi32 Pointer to the 32-bit variable to read.
3631 */
3632DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
3633{
3634 Assert(!((uintptr_t)pi32 & 3));
3635 return *pi32;
3636}
3637
3638
3639/**
3640 * Atomically reads an unsigned 64-bit value, ordered.
3641 *
3642 * @returns Current *pu64 value
3643 * @param pu64 Pointer to the 64-bit variable to read.
3644 * The memory pointed to must be writable.
3645 * @remark This will fault if the memory is read-only!
3646 */
3647#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3648DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
3649#else
3650DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
3651{
3652 uint64_t u64;
3653# ifdef RT_ARCH_AMD64
3654 Assert(!((uintptr_t)pu64 & 7));
3655/*# if RT_INLINE_ASM_GNU_STYLE
3656 __asm__ __volatile__( "mfence\n\t"
3657 "movq %1, %0\n\t"
3658 : "=r" (u64)
3659 : "m" (*pu64));
3660# else
3661 __asm
3662 {
3663 mfence
3664 mov rdx, [pu64]
3665 mov rax, [rdx]
3666 mov [u64], rax
3667 }
3668# endif*/
3669 ASMMemoryFence();
3670 u64 = *pu64;
3671# else /* !RT_ARCH_AMD64 */
3672# if RT_INLINE_ASM_GNU_STYLE
3673# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3674 uint32_t u32EBX = 0;
3675 Assert(!((uintptr_t)pu64 & 7));
3676 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3677 "lock; cmpxchg8b (%5)\n\t"
3678 "movl %3, %%ebx\n\t"
3679 : "=A" (u64),
3680 "=m" (*pu64)
3681 : "0" (0),
3682 "m" (u32EBX),
3683 "c" (0),
3684 "S" (pu64),
3685 "m" (*pu64));
3686# else /* !PIC */
3687 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3688 : "=A" (u64),
3689 "=m" (*pu64)
3690 : "0" (0),
3691 "b" (0),
3692 "c" (0),
3693 "m" (*pu64));
3694# endif
3695# else
3696 Assert(!((uintptr_t)pu64 & 7));
3697 __asm
3698 {
3699 xor eax, eax
3700 xor edx, edx
3701 mov edi, pu64
3702 xor ecx, ecx
3703 xor ebx, ebx
3704 lock cmpxchg8b [edi]
3705 mov dword ptr [u64], eax
3706 mov dword ptr [u64 + 4], edx
3707 }
3708# endif
3709# endif /* !RT_ARCH_AMD64 */
3710 return u64;
3711}
3712#endif
3713
3714
3715/**
3716 * Atomically reads an unsigned 64-bit value, unordered.
3717 *
3718 * @returns Current *pu64 value
3719 * @param pu64 Pointer to the 64-bit variable to read.
3720 * The memory pointed to must be writable.
3721 * @remark This will fault if the memory is read-only!
3722 */
3723#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3724DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
3725#else
3726DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
3727{
3728 uint64_t u64;
3729# ifdef RT_ARCH_AMD64
3730 Assert(!((uintptr_t)pu64 & 7));
3731/*# if RT_INLINE_ASM_GNU_STYLE
3732 Assert(!((uintptr_t)pu64 & 7));
3733 __asm__ __volatile__("movq %1, %0\n\t"
3734 : "=r" (u64)
3735 : "m" (*pu64));
3736# else
3737 __asm
3738 {
3739 mov rdx, [pu64]
3740 mov rax, [rdx]
3741 mov [u64], rax
3742 }
3743# endif */
3744 u64 = *pu64;
3745# else /* !RT_ARCH_AMD64 */
3746# if RT_INLINE_ASM_GNU_STYLE
3747# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3748 uint32_t u32EBX = 0;
3749 Assert(!((uintptr_t)pu64 & 7));
3750 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3751 "lock; cmpxchg8b (%5)\n\t"
3752 "movl %3, %%ebx\n\t"
3753 : "=A" (u64),
3754 "=m" (*pu64)
3755 : "0" (0),
3756 "m" (u32EBX),
3757 "c" (0),
3758 "S" (pu64),
3759 "m" (*pu64));
3760# else /* !PIC */
3761 __asm__ __volatile__("cmpxchg8b %1\n\t"
3762 : "=A" (u64),
3763 "=m" (*pu64)
3764 : "0" (0),
3765 "b" (0),
3766 "c" (0),
3767 "m" (*pu64));
3768# endif
3769# else
3770 Assert(!((uintptr_t)pu64 & 7));
3771 __asm
3772 {
3773 xor eax, eax
3774 xor edx, edx
3775 mov edi, pu64
3776 xor ecx, ecx
3777 xor ebx, ebx
3778 lock cmpxchg8b [edi]
3779 mov dword ptr [u64], eax
3780 mov dword ptr [u64 + 4], edx
3781 }
3782# endif
3783# endif /* !RT_ARCH_AMD64 */
3784 return u64;
3785}
3786#endif
3787
3788
3789/**
3790 * Atomically reads a signed 64-bit value, ordered.
3791 *
3792 * @returns Current *pi64 value
3793 * @param pi64 Pointer to the 64-bit variable to read.
3794 * The memory pointed to must be writable.
3795 * @remark This will fault if the memory is read-only!
3796 */
3797DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
3798{
3799 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
3800}
3801
3802
3803/**
3804 * Atomically reads a signed 64-bit value, unordered.
3805 *
3806 * @returns Current *pi64 value
3807 * @param pi64 Pointer to the 64-bit variable to read.
3808 * The memory pointed to must be writable.
3809 * @remark This will fault if the memory is read-only!
3810 */
3811DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
3812{
3813 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
3814}
3815
3816
3817/**
3818 * Atomically reads a pointer value, ordered.
3819 *
3820 * @returns Current *pv value
3821 * @param ppv Pointer to the pointer variable to read.
3822 */
3823DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
3824{
3825#if ARCH_BITS == 32
3826 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
3827#elif ARCH_BITS == 64
3828 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
3829#else
3830# error "ARCH_BITS is bogus"
3831#endif
3832}
3833
3834
3835/**
3836 * Atomically reads a pointer value, unordered.
3837 *
3838 * @returns Current *pv value
3839 * @param ppv Pointer to the pointer variable to read.
3840 */
3841DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
3842{
3843#if ARCH_BITS == 32
3844 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
3845#elif ARCH_BITS == 64
3846 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
3847#else
3848# error "ARCH_BITS is bogus"
3849#endif
3850}
3851
3852
3853/**
3854 * Atomically reads a boolean value, ordered.
3855 *
3856 * @returns Current *pf value
3857 * @param pf Pointer to the boolean variable to read.
3858 */
3859DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
3860{
3861 ASMMemoryFence();
3862 return *pf; /* byte reads are atomic on x86 */
3863}
3864
3865
3866/**
3867 * Atomically reads a boolean value, unordered.
3868 *
3869 * @returns Current *pf value
3870 * @param pf Pointer to the boolean variable to read.
3871 */
3872DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
3873{
3874 return *pf; /* byte reads are atomic on x86 */
3875}
3876
3877
3878/**
3879 * Atomically read a typical IPRT handle value, ordered.
3880 *
3881 * @param ph Pointer to the handle variable to read.
3882 * @param phRes Where to store the result.
3883 *
3884 * @remarks This doesn't currently work for all handles (like RTFILE).
3885 */
3886#define ASMAtomicReadHandle(ph, phRes) \
3887 do { \
3888 *(void **)(phRes) = ASMAtomicReadPtr((void * volatile *)(ph)); \
3889 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3890 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
3891 } while (0)
3892
3893
3894/**
3895 * Atomically read a typical IPRT handle value, unordered.
3896 *
3897 * @param ph Pointer to the handle variable to read.
3898 * @param phRes Where to store the result.
3899 *
3900 * @remarks This doesn't currently work for all handles (like RTFILE).
3901 */
3902#define ASMAtomicUoReadHandle(ph, phRes) \
3903 do { \
3904 *(void **)(phRes) = ASMAtomicUoReadPtr((void * volatile *)(ph)); \
3905 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3906 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
3907 } while (0)
3908
3909
3910/**
3911 * Atomically read a value which size might differ
3912 * between platforms or compilers, ordered.
3913 *
3914 * @param pu Pointer to the variable to update.
3915 * @param puRes Where to store the result.
3916 */
3917#define ASMAtomicReadSize(pu, puRes) \
3918 do { \
3919 switch (sizeof(*(pu))) { \
3920 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3921 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
3922 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
3923 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
3924 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3925 } \
3926 } while (0)
3927
3928
3929/**
3930 * Atomically read a value which size might differ
3931 * between platforms or compilers, unordered.
3932 *
3933 * @param pu Pointer to the variable to update.
3934 * @param puRes Where to store the result.
3935 */
3936#define ASMAtomicUoReadSize(pu, puRes) \
3937 do { \
3938 switch (sizeof(*(pu))) { \
3939 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3940 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
3941 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
3942 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
3943 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3944 } \
3945 } while (0)
3946
3947
3948/**
3949 * Atomically writes an unsigned 8-bit value, ordered.
3950 *
3951 * @param pu8 Pointer to the 8-bit variable.
3952 * @param u8 The 8-bit value to assign to *pu8.
3953 */
3954DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
3955{
3956 ASMAtomicXchgU8(pu8, u8);
3957}
3958
3959
3960/**
3961 * Atomically writes an unsigned 8-bit value, unordered.
3962 *
3963 * @param pu8 Pointer to the 8-bit variable.
3964 * @param u8 The 8-bit value to assign to *pu8.
3965 */
3966DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
3967{
3968 *pu8 = u8; /* byte writes are atomic on x86 */
3969}
3970
3971
3972/**
3973 * Atomically writes a signed 8-bit value, ordered.
3974 *
3975 * @param pi8 Pointer to the 8-bit variable to read.
3976 * @param i8 The 8-bit value to assign to *pi8.
3977 */
3978DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
3979{
3980 ASMAtomicXchgS8(pi8, i8);
3981}
3982
3983
3984/**
3985 * Atomically writes a signed 8-bit value, unordered.
3986 *
3987 * @param pi8 Pointer to the 8-bit variable to read.
3988 * @param i8 The 8-bit value to assign to *pi8.
3989 */
3990DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
3991{
3992 *pi8 = i8; /* byte writes are atomic on x86 */
3993}
3994
3995
3996/**
3997 * Atomically writes an unsigned 16-bit value, ordered.
3998 *
3999 * @param pu16 Pointer to the 16-bit variable.
4000 * @param u16 The 16-bit value to assign to *pu16.
4001 */
4002DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
4003{
4004 ASMAtomicXchgU16(pu16, u16);
4005}
4006
4007
4008/**
4009 * Atomically writes an unsigned 16-bit value, unordered.
4010 *
4011 * @param pu16 Pointer to the 16-bit variable.
4012 * @param u16 The 16-bit value to assign to *pu16.
4013 */
4014DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
4015{
4016 Assert(!((uintptr_t)pu16 & 1));
4017 *pu16 = u16;
4018}
4019
4020
4021/**
4022 * Atomically writes a signed 16-bit value, ordered.
4023 *
4024 * @param pi16 Pointer to the 16-bit variable to read.
4025 * @param i16 The 16-bit value to assign to *pi16.
4026 */
4027DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
4028{
4029 ASMAtomicXchgS16(pi16, i16);
4030}
4031
4032
4033/**
4034 * Atomically writes a signed 16-bit value, unordered.
4035 *
4036 * @param pi16 Pointer to the 16-bit variable to read.
4037 * @param i16 The 16-bit value to assign to *pi16.
4038 */
4039DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
4040{
4041 Assert(!((uintptr_t)pi16 & 1));
4042 *pi16 = i16;
4043}
4044
4045
4046/**
4047 * Atomically writes an unsigned 32-bit value, ordered.
4048 *
4049 * @param pu32 Pointer to the 32-bit variable.
4050 * @param u32 The 32-bit value to assign to *pu32.
4051 */
4052DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
4053{
4054 ASMAtomicXchgU32(pu32, u32);
4055}
4056
4057
4058/**
4059 * Atomically writes an unsigned 32-bit value, unordered.
4060 *
4061 * @param pu32 Pointer to the 32-bit variable.
4062 * @param u32 The 32-bit value to assign to *pu32.
4063 */
4064DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
4065{
4066 Assert(!((uintptr_t)pu32 & 3));
4067 *pu32 = u32;
4068}
4069
4070
4071/**
4072 * Atomically writes a signed 32-bit value, ordered.
4073 *
4074 * @param pi32 Pointer to the 32-bit variable to read.
4075 * @param i32 The 32-bit value to assign to *pi32.
4076 */
4077DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
4078{
4079 ASMAtomicXchgS32(pi32, i32);
4080}
4081
4082
4083/**
4084 * Atomically writes a signed 32-bit value, unordered.
4085 *
4086 * @param pi32 Pointer to the 32-bit variable to read.
4087 * @param i32 The 32-bit value to assign to *pi32.
4088 */
4089DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
4090{
4091 Assert(!((uintptr_t)pi32 & 3));
4092 *pi32 = i32;
4093}
4094
4095
4096/**
4097 * Atomically writes an unsigned 64-bit value, ordered.
4098 *
4099 * @param pu64 Pointer to the 64-bit variable.
4100 * @param u64 The 64-bit value to assign to *pu64.
4101 */
4102DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
4103{
4104 ASMAtomicXchgU64(pu64, u64);
4105}
4106
4107
4108/**
4109 * Atomically writes an unsigned 64-bit value, unordered.
4110 *
4111 * @param pu64 Pointer to the 64-bit variable.
4112 * @param u64 The 64-bit value to assign to *pu64.
4113 */
4114DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
4115{
4116 Assert(!((uintptr_t)pu64 & 7));
4117#if ARCH_BITS == 64
4118 *pu64 = u64;
4119#else
4120 ASMAtomicXchgU64(pu64, u64);
4121#endif
4122}
4123
4124
4125/**
4126 * Atomically writes a signed 64-bit value, ordered.
4127 *
4128 * @param pi64 Pointer to the 64-bit variable.
4129 * @param i64 The 64-bit value to assign to *pi64.
4130 */
4131DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
4132{
4133 ASMAtomicXchgS64(pi64, i64);
4134}
4135
4136
4137/**
4138 * Atomically writes a signed 64-bit value, unordered.
4139 *
4140 * @param pi64 Pointer to the 64-bit variable.
4141 * @param i64 The 64-bit value to assign to *pi64.
4142 */
4143DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
4144{
4145 Assert(!((uintptr_t)pi64 & 7));
4146#if ARCH_BITS == 64
4147 *pi64 = i64;
4148#else
4149 ASMAtomicXchgS64(pi64, i64);
4150#endif
4151}
4152
4153
4154/**
4155 * Atomically writes a boolean value, unordered.
4156 *
4157 * @param pf Pointer to the boolean variable.
4158 * @param f The boolean value to assign to *pf.
4159 */
4160DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
4161{
4162 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
4163}
4164
4165
4166/**
4167 * Atomically writes a boolean value, unordered.
4168 *
4169 * @param pf Pointer to the boolean variable.
4170 * @param f The boolean value to assign to *pf.
4171 */
4172DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
4173{
4174 *pf = f; /* byte writes are atomic on x86 */
4175}
4176
4177
4178/**
4179 * Atomically writes a pointer value, ordered.
4180 *
4181 * @returns Current *pv value
4182 * @param ppv Pointer to the pointer variable.
4183 * @param pv The pointer value to assigne to *ppv.
4184 */
4185DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, void *pv)
4186{
4187#if ARCH_BITS == 32
4188 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4189#elif ARCH_BITS == 64
4190 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4191#else
4192# error "ARCH_BITS is bogus"
4193#endif
4194}
4195
4196
4197/**
4198 * Atomically writes a pointer value, unordered.
4199 *
4200 * @returns Current *pv value
4201 * @param ppv Pointer to the pointer variable.
4202 * @param pv The pointer value to assigne to *ppv.
4203 */
4204DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, void *pv)
4205{
4206#if ARCH_BITS == 32
4207 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4208#elif ARCH_BITS == 64
4209 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4210#else
4211# error "ARCH_BITS is bogus"
4212#endif
4213}
4214
4215
4216/**
4217 * Atomically write a typical IPRT handle value, ordered.
4218 *
4219 * @param ph Pointer to the variable to update.
4220 * @param hNew The value to assign to *ph.
4221 *
4222 * @remarks This doesn't currently work for all handles (like RTFILE).
4223 */
4224#define ASMAtomicWriteHandle(ph, hNew) \
4225 do { \
4226 ASMAtomicWritePtr((void * volatile *)(ph), (void *)hNew); \
4227 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4228 } while (0)
4229
4230
4231/**
4232 * Atomically write a typical IPRT handle value, unordered.
4233 *
4234 * @param ph Pointer to the variable to update.
4235 * @param hNew The value to assign to *ph.
4236 *
4237 * @remarks This doesn't currently work for all handles (like RTFILE).
4238 */
4239#define ASMAtomicUoWriteHandle(ph, hNew) \
4240 do { \
4241 ASMAtomicUoWritePtr((void * volatile *)(ph), (void *)hNew); \
4242 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4243 } while (0)
4244
4245
4246/**
4247 * Atomically write a value which size might differ
4248 * between platforms or compilers, ordered.
4249 *
4250 * @param pu Pointer to the variable to update.
4251 * @param uNew The value to assign to *pu.
4252 */
4253#define ASMAtomicWriteSize(pu, uNew) \
4254 do { \
4255 switch (sizeof(*(pu))) { \
4256 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4257 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4258 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4259 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4260 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4261 } \
4262 } while (0)
4263
4264/**
4265 * Atomically write a value which size might differ
4266 * between platforms or compilers, unordered.
4267 *
4268 * @param pu Pointer to the variable to update.
4269 * @param uNew The value to assign to *pu.
4270 */
4271#define ASMAtomicUoWriteSize(pu, uNew) \
4272 do { \
4273 switch (sizeof(*(pu))) { \
4274 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4275 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4276 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4277 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4278 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4279 } \
4280 } while (0)
4281
4282
4283
4284
4285/**
4286 * Invalidate page.
4287 *
4288 * @param pv Address of the page to invalidate.
4289 */
4290#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4291DECLASM(void) ASMInvalidatePage(void *pv);
4292#else
4293DECLINLINE(void) ASMInvalidatePage(void *pv)
4294{
4295# if RT_INLINE_ASM_USES_INTRIN
4296 __invlpg(pv);
4297
4298# elif RT_INLINE_ASM_GNU_STYLE
4299 __asm__ __volatile__("invlpg %0\n\t"
4300 : : "m" (*(uint8_t *)pv));
4301# else
4302 __asm
4303 {
4304# ifdef RT_ARCH_AMD64
4305 mov rax, [pv]
4306 invlpg [rax]
4307# else
4308 mov eax, [pv]
4309 invlpg [eax]
4310# endif
4311 }
4312# endif
4313}
4314#endif
4315
4316
4317#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4318# if PAGE_SIZE != 0x1000
4319# error "PAGE_SIZE is not 0x1000!"
4320# endif
4321#endif
4322
4323/**
4324 * Zeros a 4K memory page.
4325 *
4326 * @param pv Pointer to the memory block. This must be page aligned.
4327 */
4328#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4329DECLASM(void) ASMMemZeroPage(volatile void *pv);
4330# else
4331DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
4332{
4333# if RT_INLINE_ASM_USES_INTRIN
4334# ifdef RT_ARCH_AMD64
4335 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
4336# else
4337 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
4338# endif
4339
4340# elif RT_INLINE_ASM_GNU_STYLE
4341 RTCCUINTREG uDummy;
4342# ifdef RT_ARCH_AMD64
4343 __asm__ __volatile__ ("rep stosq"
4344 : "=D" (pv),
4345 "=c" (uDummy)
4346 : "0" (pv),
4347 "c" (0x1000 >> 3),
4348 "a" (0)
4349 : "memory");
4350# else
4351 __asm__ __volatile__ ("rep stosl"
4352 : "=D" (pv),
4353 "=c" (uDummy)
4354 : "0" (pv),
4355 "c" (0x1000 >> 2),
4356 "a" (0)
4357 : "memory");
4358# endif
4359# else
4360 __asm
4361 {
4362# ifdef RT_ARCH_AMD64
4363 xor rax, rax
4364 mov ecx, 0200h
4365 mov rdi, [pv]
4366 rep stosq
4367# else
4368 xor eax, eax
4369 mov ecx, 0400h
4370 mov edi, [pv]
4371 rep stosd
4372# endif
4373 }
4374# endif
4375}
4376# endif
4377
4378
4379/**
4380 * Zeros a memory block with a 32-bit aligned size.
4381 *
4382 * @param pv Pointer to the memory block.
4383 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4384 */
4385#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4386DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
4387#else
4388DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
4389{
4390# if RT_INLINE_ASM_USES_INTRIN
4391# ifdef RT_ARCH_AMD64
4392 if (!(cb & 7))
4393 __stosq((unsigned __int64 *)pv, 0, cb / 8);
4394 else
4395# endif
4396 __stosd((unsigned long *)pv, 0, cb / 4);
4397
4398# elif RT_INLINE_ASM_GNU_STYLE
4399 __asm__ __volatile__ ("rep stosl"
4400 : "=D" (pv),
4401 "=c" (cb)
4402 : "0" (pv),
4403 "1" (cb >> 2),
4404 "a" (0)
4405 : "memory");
4406# else
4407 __asm
4408 {
4409 xor eax, eax
4410# ifdef RT_ARCH_AMD64
4411 mov rcx, [cb]
4412 shr rcx, 2
4413 mov rdi, [pv]
4414# else
4415 mov ecx, [cb]
4416 shr ecx, 2
4417 mov edi, [pv]
4418# endif
4419 rep stosd
4420 }
4421# endif
4422}
4423#endif
4424
4425
4426/**
4427 * Fills a memory block with a 32-bit aligned size.
4428 *
4429 * @param pv Pointer to the memory block.
4430 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4431 * @param u32 The value to fill with.
4432 */
4433#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4434DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
4435#else
4436DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
4437{
4438# if RT_INLINE_ASM_USES_INTRIN
4439# ifdef RT_ARCH_AMD64
4440 if (!(cb & 7))
4441 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
4442 else
4443# endif
4444 __stosd((unsigned long *)pv, u32, cb / 4);
4445
4446# elif RT_INLINE_ASM_GNU_STYLE
4447 __asm__ __volatile__ ("rep stosl"
4448 : "=D" (pv),
4449 "=c" (cb)
4450 : "0" (pv),
4451 "1" (cb >> 2),
4452 "a" (u32)
4453 : "memory");
4454# else
4455 __asm
4456 {
4457# ifdef RT_ARCH_AMD64
4458 mov rcx, [cb]
4459 shr rcx, 2
4460 mov rdi, [pv]
4461# else
4462 mov ecx, [cb]
4463 shr ecx, 2
4464 mov edi, [pv]
4465# endif
4466 mov eax, [u32]
4467 rep stosd
4468 }
4469# endif
4470}
4471#endif
4472
4473
4474/**
4475 * Checks if a memory block is filled with the specified byte.
4476 *
4477 * This is a sort of inverted memchr.
4478 *
4479 * @returns Pointer to the byte which doesn't equal u8.
4480 * @returns NULL if all equal to u8.
4481 *
4482 * @param pv Pointer to the memory block.
4483 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4484 * @param u8 The value it's supposed to be filled with.
4485 */
4486#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4487DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
4488#else
4489DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
4490{
4491/** @todo rewrite this in inline assembly? */
4492 uint8_t const *pb = (uint8_t const *)pv;
4493 for (; cb; cb--, pb++)
4494 if (RT_UNLIKELY(*pb != u8))
4495 return (void *)pb;
4496 return NULL;
4497}
4498#endif
4499
4500
4501/**
4502 * Checks if a memory block is filled with the specified 32-bit value.
4503 *
4504 * This is a sort of inverted memchr.
4505 *
4506 * @returns Pointer to the first value which doesn't equal u32.
4507 * @returns NULL if all equal to u32.
4508 *
4509 * @param pv Pointer to the memory block.
4510 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4511 * @param u32 The value it's supposed to be filled with.
4512 */
4513#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4514DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
4515#else
4516DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
4517{
4518/** @todo rewrite this in inline assembly? */
4519 uint32_t const *pu32 = (uint32_t const *)pv;
4520 for (; cb; cb -= 4, pu32++)
4521 if (RT_UNLIKELY(*pu32 != u32))
4522 return (uint32_t *)pu32;
4523 return NULL;
4524}
4525#endif
4526
4527
4528/**
4529 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
4530 *
4531 * @returns u32F1 * u32F2.
4532 */
4533#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4534DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
4535#else
4536DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
4537{
4538# ifdef RT_ARCH_AMD64
4539 return (uint64_t)u32F1 * u32F2;
4540# else /* !RT_ARCH_AMD64 */
4541 uint64_t u64;
4542# if RT_INLINE_ASM_GNU_STYLE
4543 __asm__ __volatile__("mull %%edx"
4544 : "=A" (u64)
4545 : "a" (u32F2), "d" (u32F1));
4546# else
4547 __asm
4548 {
4549 mov edx, [u32F1]
4550 mov eax, [u32F2]
4551 mul edx
4552 mov dword ptr [u64], eax
4553 mov dword ptr [u64 + 4], edx
4554 }
4555# endif
4556 return u64;
4557# endif /* !RT_ARCH_AMD64 */
4558}
4559#endif
4560
4561
4562/**
4563 * Multiplies two signed 32-bit values returning a signed 64-bit result.
4564 *
4565 * @returns u32F1 * u32F2.
4566 */
4567#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4568DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
4569#else
4570DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
4571{
4572# ifdef RT_ARCH_AMD64
4573 return (int64_t)i32F1 * i32F2;
4574# else /* !RT_ARCH_AMD64 */
4575 int64_t i64;
4576# if RT_INLINE_ASM_GNU_STYLE
4577 __asm__ __volatile__("imull %%edx"
4578 : "=A" (i64)
4579 : "a" (i32F2), "d" (i32F1));
4580# else
4581 __asm
4582 {
4583 mov edx, [i32F1]
4584 mov eax, [i32F2]
4585 imul edx
4586 mov dword ptr [i64], eax
4587 mov dword ptr [i64 + 4], edx
4588 }
4589# endif
4590 return i64;
4591# endif /* !RT_ARCH_AMD64 */
4592}
4593#endif
4594
4595
4596/**
4597 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
4598 *
4599 * @returns u64 / u32.
4600 */
4601#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4602DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
4603#else
4604DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
4605{
4606# ifdef RT_ARCH_AMD64
4607 return (uint32_t)(u64 / u32);
4608# else /* !RT_ARCH_AMD64 */
4609# if RT_INLINE_ASM_GNU_STYLE
4610 RTCCUINTREG uDummy;
4611 __asm__ __volatile__("divl %3"
4612 : "=a" (u32), "=d"(uDummy)
4613 : "A" (u64), "r" (u32));
4614# else
4615 __asm
4616 {
4617 mov eax, dword ptr [u64]
4618 mov edx, dword ptr [u64 + 4]
4619 mov ecx, [u32]
4620 div ecx
4621 mov [u32], eax
4622 }
4623# endif
4624 return u32;
4625# endif /* !RT_ARCH_AMD64 */
4626}
4627#endif
4628
4629
4630/**
4631 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
4632 *
4633 * @returns u64 / u32.
4634 */
4635#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4636DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
4637#else
4638DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
4639{
4640# ifdef RT_ARCH_AMD64
4641 return (int32_t)(i64 / i32);
4642# else /* !RT_ARCH_AMD64 */
4643# if RT_INLINE_ASM_GNU_STYLE
4644 RTCCUINTREG iDummy;
4645 __asm__ __volatile__("idivl %3"
4646 : "=a" (i32), "=d"(iDummy)
4647 : "A" (i64), "r" (i32));
4648# else
4649 __asm
4650 {
4651 mov eax, dword ptr [i64]
4652 mov edx, dword ptr [i64 + 4]
4653 mov ecx, [i32]
4654 idiv ecx
4655 mov [i32], eax
4656 }
4657# endif
4658 return i32;
4659# endif /* !RT_ARCH_AMD64 */
4660}
4661#endif
4662
4663
4664/**
4665 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
4666 * returning the rest.
4667 *
4668 * @returns u64 % u32.
4669 *
4670 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
4671 */
4672#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4673DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
4674#else
4675DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
4676{
4677# ifdef RT_ARCH_AMD64
4678 return (uint32_t)(u64 % u32);
4679# else /* !RT_ARCH_AMD64 */
4680# if RT_INLINE_ASM_GNU_STYLE
4681 RTCCUINTREG uDummy;
4682 __asm__ __volatile__("divl %3"
4683 : "=a" (uDummy), "=d"(u32)
4684 : "A" (u64), "r" (u32));
4685# else
4686 __asm
4687 {
4688 mov eax, dword ptr [u64]
4689 mov edx, dword ptr [u64 + 4]
4690 mov ecx, [u32]
4691 div ecx
4692 mov [u32], edx
4693 }
4694# endif
4695 return u32;
4696# endif /* !RT_ARCH_AMD64 */
4697}
4698#endif
4699
4700
4701/**
4702 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
4703 * returning the rest.
4704 *
4705 * @returns u64 % u32.
4706 *
4707 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
4708 */
4709#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4710DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
4711#else
4712DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
4713{
4714# ifdef RT_ARCH_AMD64
4715 return (int32_t)(i64 % i32);
4716# else /* !RT_ARCH_AMD64 */
4717# if RT_INLINE_ASM_GNU_STYLE
4718 RTCCUINTREG iDummy;
4719 __asm__ __volatile__("idivl %3"
4720 : "=a" (iDummy), "=d"(i32)
4721 : "A" (i64), "r" (i32));
4722# else
4723 __asm
4724 {
4725 mov eax, dword ptr [i64]
4726 mov edx, dword ptr [i64 + 4]
4727 mov ecx, [i32]
4728 idiv ecx
4729 mov [i32], edx
4730 }
4731# endif
4732 return i32;
4733# endif /* !RT_ARCH_AMD64 */
4734}
4735#endif
4736
4737
4738/**
4739 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
4740 * using a 96 bit intermediate result.
4741 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
4742 * __udivdi3 and __umoddi3 even if this inline function is not used.
4743 *
4744 * @returns (u64A * u32B) / u32C.
4745 * @param u64A The 64-bit value.
4746 * @param u32B The 32-bit value to multiple by A.
4747 * @param u32C The 32-bit value to divide A*B by.
4748 */
4749#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
4750DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
4751#else
4752DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
4753{
4754# if RT_INLINE_ASM_GNU_STYLE
4755# ifdef RT_ARCH_AMD64
4756 uint64_t u64Result, u64Spill;
4757 __asm__ __volatile__("mulq %2\n\t"
4758 "divq %3\n\t"
4759 : "=a" (u64Result),
4760 "=d" (u64Spill)
4761 : "r" ((uint64_t)u32B),
4762 "r" ((uint64_t)u32C),
4763 "0" (u64A),
4764 "1" (0));
4765 return u64Result;
4766# else
4767 uint32_t u32Dummy;
4768 uint64_t u64Result;
4769 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
4770 edx = u64Lo.hi = (u64A.lo * u32B).hi */
4771 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
4772 eax = u64A.hi */
4773 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
4774 edx = u32C */
4775 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
4776 edx = u32B */
4777 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
4778 edx = u64Hi.hi = (u64A.hi * u32B).hi */
4779 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
4780 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
4781 "divl %%ecx \n\t" /* eax = u64Hi / u32C
4782 edx = u64Hi % u32C */
4783 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
4784 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
4785 "divl %%ecx \n\t" /* u64Result.lo */
4786 "movl %%edi,%%edx \n\t" /* u64Result.hi */
4787 : "=A"(u64Result), "=c"(u32Dummy),
4788 "=S"(u32Dummy), "=D"(u32Dummy)
4789 : "a"((uint32_t)u64A),
4790 "S"((uint32_t)(u64A >> 32)),
4791 "c"(u32B),
4792 "D"(u32C));
4793 return u64Result;
4794# endif
4795# else
4796 RTUINT64U u;
4797 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
4798 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
4799 u64Hi += (u64Lo >> 32);
4800 u.s.Hi = (uint32_t)(u64Hi / u32C);
4801 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
4802 return u.u;
4803# endif
4804}
4805#endif
4806
4807
4808/**
4809 * Probes a byte pointer for read access.
4810 *
4811 * While the function will not fault if the byte is not read accessible,
4812 * the idea is to do this in a safe place like before acquiring locks
4813 * and such like.
4814 *
4815 * Also, this functions guarantees that an eager compiler is not going
4816 * to optimize the probing away.
4817 *
4818 * @param pvByte Pointer to the byte.
4819 */
4820#if RT_INLINE_ASM_EXTERNAL
4821DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4822#else
4823DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4824{
4825 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4826 uint8_t u8;
4827# if RT_INLINE_ASM_GNU_STYLE
4828 __asm__ __volatile__("movb (%1), %0\n\t"
4829 : "=r" (u8)
4830 : "r" (pvByte));
4831# else
4832 __asm
4833 {
4834# ifdef RT_ARCH_AMD64
4835 mov rax, [pvByte]
4836 mov al, [rax]
4837# else
4838 mov eax, [pvByte]
4839 mov al, [eax]
4840# endif
4841 mov [u8], al
4842 }
4843# endif
4844 return u8;
4845}
4846#endif
4847
4848/**
4849 * Probes a buffer for read access page by page.
4850 *
4851 * While the function will fault if the buffer is not fully read
4852 * accessible, the idea is to do this in a safe place like before
4853 * acquiring locks and such like.
4854 *
4855 * Also, this functions guarantees that an eager compiler is not going
4856 * to optimize the probing away.
4857 *
4858 * @param pvBuf Pointer to the buffer.
4859 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4860 */
4861DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4862{
4863 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4864 /* the first byte */
4865 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4866 ASMProbeReadByte(pu8);
4867
4868 /* the pages in between pages. */
4869 while (cbBuf > /*PAGE_SIZE*/0x1000)
4870 {
4871 ASMProbeReadByte(pu8);
4872 cbBuf -= /*PAGE_SIZE*/0x1000;
4873 pu8 += /*PAGE_SIZE*/0x1000;
4874 }
4875
4876 /* the last byte */
4877 ASMProbeReadByte(pu8 + cbBuf - 1);
4878}
4879
4880
4881/** @def ASMBreakpoint
4882 * Debugger Breakpoint.
4883 * @remark In the gnu world we add a nop instruction after the int3 to
4884 * force gdb to remain at the int3 source line.
4885 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
4886 * @internal
4887 */
4888#if RT_INLINE_ASM_GNU_STYLE
4889# ifndef __L4ENV__
4890# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
4891# else
4892# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
4893# endif
4894#else
4895# define ASMBreakpoint() __debugbreak()
4896#endif
4897
4898
4899
4900/** @defgroup grp_inline_bits Bit Operations
4901 * @{
4902 */
4903
4904
4905/**
4906 * Sets a bit in a bitmap.
4907 *
4908 * @param pvBitmap Pointer to the bitmap.
4909 * @param iBit The bit to set.
4910 */
4911#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4912DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4913#else
4914DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4915{
4916# if RT_INLINE_ASM_USES_INTRIN
4917 _bittestandset((long *)pvBitmap, iBit);
4918
4919# elif RT_INLINE_ASM_GNU_STYLE
4920 __asm__ __volatile__ ("btsl %1, %0"
4921 : "=m" (*(volatile long *)pvBitmap)
4922 : "Ir" (iBit),
4923 "m" (*(volatile long *)pvBitmap)
4924 : "memory");
4925# else
4926 __asm
4927 {
4928# ifdef RT_ARCH_AMD64
4929 mov rax, [pvBitmap]
4930 mov edx, [iBit]
4931 bts [rax], edx
4932# else
4933 mov eax, [pvBitmap]
4934 mov edx, [iBit]
4935 bts [eax], edx
4936# endif
4937 }
4938# endif
4939}
4940#endif
4941
4942
4943/**
4944 * Atomically sets a bit in a bitmap, ordered.
4945 *
4946 * @param pvBitmap Pointer to the bitmap.
4947 * @param iBit The bit to set.
4948 */
4949#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4950DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4951#else
4952DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4953{
4954# if RT_INLINE_ASM_USES_INTRIN
4955 _interlockedbittestandset((long *)pvBitmap, iBit);
4956# elif RT_INLINE_ASM_GNU_STYLE
4957 __asm__ __volatile__ ("lock; btsl %1, %0"
4958 : "=m" (*(volatile long *)pvBitmap)
4959 : "Ir" (iBit),
4960 "m" (*(volatile long *)pvBitmap)
4961 : "memory");
4962# else
4963 __asm
4964 {
4965# ifdef RT_ARCH_AMD64
4966 mov rax, [pvBitmap]
4967 mov edx, [iBit]
4968 lock bts [rax], edx
4969# else
4970 mov eax, [pvBitmap]
4971 mov edx, [iBit]
4972 lock bts [eax], edx
4973# endif
4974 }
4975# endif
4976}
4977#endif
4978
4979
4980/**
4981 * Clears a bit in a bitmap.
4982 *
4983 * @param pvBitmap Pointer to the bitmap.
4984 * @param iBit The bit to clear.
4985 */
4986#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4987DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4988#else
4989DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4990{
4991# if RT_INLINE_ASM_USES_INTRIN
4992 _bittestandreset((long *)pvBitmap, iBit);
4993
4994# elif RT_INLINE_ASM_GNU_STYLE
4995 __asm__ __volatile__ ("btrl %1, %0"
4996 : "=m" (*(volatile long *)pvBitmap)
4997 : "Ir" (iBit),
4998 "m" (*(volatile long *)pvBitmap)
4999 : "memory");
5000# else
5001 __asm
5002 {
5003# ifdef RT_ARCH_AMD64
5004 mov rax, [pvBitmap]
5005 mov edx, [iBit]
5006 btr [rax], edx
5007# else
5008 mov eax, [pvBitmap]
5009 mov edx, [iBit]
5010 btr [eax], edx
5011# endif
5012 }
5013# endif
5014}
5015#endif
5016
5017
5018/**
5019 * Atomically clears a bit in a bitmap, ordered.
5020 *
5021 * @param pvBitmap Pointer to the bitmap.
5022 * @param iBit The bit to toggle set.
5023 * @remark No memory barrier, take care on smp.
5024 */
5025#if RT_INLINE_ASM_EXTERNAL
5026DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
5027#else
5028DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
5029{
5030# if RT_INLINE_ASM_GNU_STYLE
5031 __asm__ __volatile__ ("lock; btrl %1, %0"
5032 : "=m" (*(volatile long *)pvBitmap)
5033 : "Ir" (iBit),
5034 "m" (*(volatile long *)pvBitmap)
5035 : "memory");
5036# else
5037 __asm
5038 {
5039# ifdef RT_ARCH_AMD64
5040 mov rax, [pvBitmap]
5041 mov edx, [iBit]
5042 lock btr [rax], edx
5043# else
5044 mov eax, [pvBitmap]
5045 mov edx, [iBit]
5046 lock btr [eax], edx
5047# endif
5048 }
5049# endif
5050}
5051#endif
5052
5053
5054/**
5055 * Toggles a bit in a bitmap.
5056 *
5057 * @param pvBitmap Pointer to the bitmap.
5058 * @param iBit The bit to toggle.
5059 */
5060#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5061DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
5062#else
5063DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
5064{
5065# if RT_INLINE_ASM_USES_INTRIN
5066 _bittestandcomplement((long *)pvBitmap, iBit);
5067# elif RT_INLINE_ASM_GNU_STYLE
5068 __asm__ __volatile__ ("btcl %1, %0"
5069 : "=m" (*(volatile long *)pvBitmap)
5070 : "Ir" (iBit),
5071 "m" (*(volatile long *)pvBitmap)
5072 : "memory");
5073# else
5074 __asm
5075 {
5076# ifdef RT_ARCH_AMD64
5077 mov rax, [pvBitmap]
5078 mov edx, [iBit]
5079 btc [rax], edx
5080# else
5081 mov eax, [pvBitmap]
5082 mov edx, [iBit]
5083 btc [eax], edx
5084# endif
5085 }
5086# endif
5087}
5088#endif
5089
5090
5091/**
5092 * Atomically toggles a bit in a bitmap, ordered.
5093 *
5094 * @param pvBitmap Pointer to the bitmap.
5095 * @param iBit The bit to test and set.
5096 */
5097#if RT_INLINE_ASM_EXTERNAL
5098DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
5099#else
5100DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
5101{
5102# if RT_INLINE_ASM_GNU_STYLE
5103 __asm__ __volatile__ ("lock; btcl %1, %0"
5104 : "=m" (*(volatile long *)pvBitmap)
5105 : "Ir" (iBit),
5106 "m" (*(volatile long *)pvBitmap)
5107 : "memory");
5108# else
5109 __asm
5110 {
5111# ifdef RT_ARCH_AMD64
5112 mov rax, [pvBitmap]
5113 mov edx, [iBit]
5114 lock btc [rax], edx
5115# else
5116 mov eax, [pvBitmap]
5117 mov edx, [iBit]
5118 lock btc [eax], edx
5119# endif
5120 }
5121# endif
5122}
5123#endif
5124
5125
5126/**
5127 * Tests and sets a bit in a bitmap.
5128 *
5129 * @returns true if the bit was set.
5130 * @returns false if the bit was clear.
5131 * @param pvBitmap Pointer to the bitmap.
5132 * @param iBit The bit to test and set.
5133 */
5134#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5135DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5136#else
5137DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5138{
5139 union { bool f; uint32_t u32; uint8_t u8; } rc;
5140# if RT_INLINE_ASM_USES_INTRIN
5141 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
5142
5143# elif RT_INLINE_ASM_GNU_STYLE
5144 __asm__ __volatile__ ("btsl %2, %1\n\t"
5145 "setc %b0\n\t"
5146 "andl $1, %0\n\t"
5147 : "=q" (rc.u32),
5148 "=m" (*(volatile long *)pvBitmap)
5149 : "Ir" (iBit),
5150 "m" (*(volatile long *)pvBitmap)
5151 : "memory");
5152# else
5153 __asm
5154 {
5155 mov edx, [iBit]
5156# ifdef RT_ARCH_AMD64
5157 mov rax, [pvBitmap]
5158 bts [rax], edx
5159# else
5160 mov eax, [pvBitmap]
5161 bts [eax], edx
5162# endif
5163 setc al
5164 and eax, 1
5165 mov [rc.u32], eax
5166 }
5167# endif
5168 return rc.f;
5169}
5170#endif
5171
5172
5173/**
5174 * Atomically tests and sets a bit in a bitmap, ordered.
5175 *
5176 * @returns true if the bit was set.
5177 * @returns false if the bit was clear.
5178 * @param pvBitmap Pointer to the bitmap.
5179 * @param iBit The bit to set.
5180 */
5181#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5182DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5183#else
5184DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5185{
5186 union { bool f; uint32_t u32; uint8_t u8; } rc;
5187# if RT_INLINE_ASM_USES_INTRIN
5188 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
5189# elif RT_INLINE_ASM_GNU_STYLE
5190 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
5191 "setc %b0\n\t"
5192 "andl $1, %0\n\t"
5193 : "=q" (rc.u32),
5194 "=m" (*(volatile long *)pvBitmap)
5195 : "Ir" (iBit),
5196 "m" (*(volatile long *)pvBitmap)
5197 : "memory");
5198# else
5199 __asm
5200 {
5201 mov edx, [iBit]
5202# ifdef RT_ARCH_AMD64
5203 mov rax, [pvBitmap]
5204 lock bts [rax], edx
5205# else
5206 mov eax, [pvBitmap]
5207 lock bts [eax], edx
5208# endif
5209 setc al
5210 and eax, 1
5211 mov [rc.u32], eax
5212 }
5213# endif
5214 return rc.f;
5215}
5216#endif
5217
5218
5219/**
5220 * Tests and clears a bit in a bitmap.
5221 *
5222 * @returns true if the bit was set.
5223 * @returns false if the bit was clear.
5224 * @param pvBitmap Pointer to the bitmap.
5225 * @param iBit The bit to test and clear.
5226 */
5227#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5228DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5229#else
5230DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5231{
5232 union { bool f; uint32_t u32; uint8_t u8; } rc;
5233# if RT_INLINE_ASM_USES_INTRIN
5234 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
5235
5236# elif RT_INLINE_ASM_GNU_STYLE
5237 __asm__ __volatile__ ("btrl %2, %1\n\t"
5238 "setc %b0\n\t"
5239 "andl $1, %0\n\t"
5240 : "=q" (rc.u32),
5241 "=m" (*(volatile long *)pvBitmap)
5242 : "Ir" (iBit),
5243 "m" (*(volatile long *)pvBitmap)
5244 : "memory");
5245# else
5246 __asm
5247 {
5248 mov edx, [iBit]
5249# ifdef RT_ARCH_AMD64
5250 mov rax, [pvBitmap]
5251 btr [rax], edx
5252# else
5253 mov eax, [pvBitmap]
5254 btr [eax], edx
5255# endif
5256 setc al
5257 and eax, 1
5258 mov [rc.u32], eax
5259 }
5260# endif
5261 return rc.f;
5262}
5263#endif
5264
5265
5266/**
5267 * Atomically tests and clears a bit in a bitmap, ordered.
5268 *
5269 * @returns true if the bit was set.
5270 * @returns false if the bit was clear.
5271 * @param pvBitmap Pointer to the bitmap.
5272 * @param iBit The bit to test and clear.
5273 * @remark No memory barrier, take care on smp.
5274 */
5275#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5276DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5277#else
5278DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5279{
5280 union { bool f; uint32_t u32; uint8_t u8; } rc;
5281# if RT_INLINE_ASM_USES_INTRIN
5282 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
5283
5284# elif RT_INLINE_ASM_GNU_STYLE
5285 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
5286 "setc %b0\n\t"
5287 "andl $1, %0\n\t"
5288 : "=q" (rc.u32),
5289 "=m" (*(volatile long *)pvBitmap)
5290 : "Ir" (iBit),
5291 "m" (*(volatile long *)pvBitmap)
5292 : "memory");
5293# else
5294 __asm
5295 {
5296 mov edx, [iBit]
5297# ifdef RT_ARCH_AMD64
5298 mov rax, [pvBitmap]
5299 lock btr [rax], edx
5300# else
5301 mov eax, [pvBitmap]
5302 lock btr [eax], edx
5303# endif
5304 setc al
5305 and eax, 1
5306 mov [rc.u32], eax
5307 }
5308# endif
5309 return rc.f;
5310}
5311#endif
5312
5313
5314/**
5315 * Tests and toggles a bit in a bitmap.
5316 *
5317 * @returns true if the bit was set.
5318 * @returns false if the bit was clear.
5319 * @param pvBitmap Pointer to the bitmap.
5320 * @param iBit The bit to test and toggle.
5321 */
5322#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5323DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5324#else
5325DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5326{
5327 union { bool f; uint32_t u32; uint8_t u8; } rc;
5328# if RT_INLINE_ASM_USES_INTRIN
5329 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
5330
5331# elif RT_INLINE_ASM_GNU_STYLE
5332 __asm__ __volatile__ ("btcl %2, %1\n\t"
5333 "setc %b0\n\t"
5334 "andl $1, %0\n\t"
5335 : "=q" (rc.u32),
5336 "=m" (*(volatile long *)pvBitmap)
5337 : "Ir" (iBit),
5338 "m" (*(volatile long *)pvBitmap)
5339 : "memory");
5340# else
5341 __asm
5342 {
5343 mov edx, [iBit]
5344# ifdef RT_ARCH_AMD64
5345 mov rax, [pvBitmap]
5346 btc [rax], edx
5347# else
5348 mov eax, [pvBitmap]
5349 btc [eax], edx
5350# endif
5351 setc al
5352 and eax, 1
5353 mov [rc.u32], eax
5354 }
5355# endif
5356 return rc.f;
5357}
5358#endif
5359
5360
5361/**
5362 * Atomically tests and toggles a bit in a bitmap, ordered.
5363 *
5364 * @returns true if the bit was set.
5365 * @returns false if the bit was clear.
5366 * @param pvBitmap Pointer to the bitmap.
5367 * @param iBit The bit to test and toggle.
5368 */
5369#if RT_INLINE_ASM_EXTERNAL
5370DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5371#else
5372DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5373{
5374 union { bool f; uint32_t u32; uint8_t u8; } rc;
5375# if RT_INLINE_ASM_GNU_STYLE
5376 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
5377 "setc %b0\n\t"
5378 "andl $1, %0\n\t"
5379 : "=q" (rc.u32),
5380 "=m" (*(volatile long *)pvBitmap)
5381 : "Ir" (iBit),
5382 "m" (*(volatile long *)pvBitmap)
5383 : "memory");
5384# else
5385 __asm
5386 {
5387 mov edx, [iBit]
5388# ifdef RT_ARCH_AMD64
5389 mov rax, [pvBitmap]
5390 lock btc [rax], edx
5391# else
5392 mov eax, [pvBitmap]
5393 lock btc [eax], edx
5394# endif
5395 setc al
5396 and eax, 1
5397 mov [rc.u32], eax
5398 }
5399# endif
5400 return rc.f;
5401}
5402#endif
5403
5404
5405/**
5406 * Tests if a bit in a bitmap is set.
5407 *
5408 * @returns true if the bit is set.
5409 * @returns false if the bit is clear.
5410 * @param pvBitmap Pointer to the bitmap.
5411 * @param iBit The bit to test.
5412 */
5413#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5414DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
5415#else
5416DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
5417{
5418 union { bool f; uint32_t u32; uint8_t u8; } rc;
5419# if RT_INLINE_ASM_USES_INTRIN
5420 rc.u32 = _bittest((long *)pvBitmap, iBit);
5421# elif RT_INLINE_ASM_GNU_STYLE
5422
5423 __asm__ __volatile__ ("btl %2, %1\n\t"
5424 "setc %b0\n\t"
5425 "andl $1, %0\n\t"
5426 : "=q" (rc.u32)
5427 : "m" (*(const volatile long *)pvBitmap),
5428 "Ir" (iBit)
5429 : "memory");
5430# else
5431 __asm
5432 {
5433 mov edx, [iBit]
5434# ifdef RT_ARCH_AMD64
5435 mov rax, [pvBitmap]
5436 bt [rax], edx
5437# else
5438 mov eax, [pvBitmap]
5439 bt [eax], edx
5440# endif
5441 setc al
5442 and eax, 1
5443 mov [rc.u32], eax
5444 }
5445# endif
5446 return rc.f;
5447}
5448#endif
5449
5450
5451/**
5452 * Clears a bit range within a bitmap.
5453 *
5454 * @param pvBitmap Pointer to the bitmap.
5455 * @param iBitStart The First bit to clear.
5456 * @param iBitEnd The first bit not to clear.
5457 */
5458DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5459{
5460 if (iBitStart < iBitEnd)
5461 {
5462 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5463 int iStart = iBitStart & ~31;
5464 int iEnd = iBitEnd & ~31;
5465 if (iStart == iEnd)
5466 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
5467 else
5468 {
5469 /* bits in first dword. */
5470 if (iBitStart & 31)
5471 {
5472 *pu32 &= (1 << (iBitStart & 31)) - 1;
5473 pu32++;
5474 iBitStart = iStart + 32;
5475 }
5476
5477 /* whole dword. */
5478 if (iBitStart != iEnd)
5479 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
5480
5481 /* bits in last dword. */
5482 if (iBitEnd & 31)
5483 {
5484 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5485 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
5486 }
5487 }
5488 }
5489}
5490
5491
5492/**
5493 * Sets a bit range within a bitmap.
5494 *
5495 * @param pvBitmap Pointer to the bitmap.
5496 * @param iBitStart The First bit to set.
5497 * @param iBitEnd The first bit not to set.
5498 */
5499DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5500{
5501 if (iBitStart < iBitEnd)
5502 {
5503 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5504 int iStart = iBitStart & ~31;
5505 int iEnd = iBitEnd & ~31;
5506 if (iStart == iEnd)
5507 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
5508 else
5509 {
5510 /* bits in first dword. */
5511 if (iBitStart & 31)
5512 {
5513 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
5514 pu32++;
5515 iBitStart = iStart + 32;
5516 }
5517
5518 /* whole dword. */
5519 if (iBitStart != iEnd)
5520 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
5521
5522 /* bits in last dword. */
5523 if (iBitEnd & 31)
5524 {
5525 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5526 *pu32 |= (1 << (iBitEnd & 31)) - 1;
5527 }
5528 }
5529 }
5530}
5531
5532
5533/**
5534 * Finds the first clear bit in a bitmap.
5535 *
5536 * @returns Index of the first zero bit.
5537 * @returns -1 if no clear bit was found.
5538 * @param pvBitmap Pointer to the bitmap.
5539 * @param cBits The number of bits in the bitmap. Multiple of 32.
5540 */
5541#if RT_INLINE_ASM_EXTERNAL
5542DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
5543#else
5544DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
5545{
5546 if (cBits)
5547 {
5548 int32_t iBit;
5549# if RT_INLINE_ASM_GNU_STYLE
5550 RTCCUINTREG uEAX, uECX, uEDI;
5551 cBits = RT_ALIGN_32(cBits, 32);
5552 __asm__ __volatile__("repe; scasl\n\t"
5553 "je 1f\n\t"
5554# ifdef RT_ARCH_AMD64
5555 "lea -4(%%rdi), %%rdi\n\t"
5556 "xorl (%%rdi), %%eax\n\t"
5557 "subq %5, %%rdi\n\t"
5558# else
5559 "lea -4(%%edi), %%edi\n\t"
5560 "xorl (%%edi), %%eax\n\t"
5561 "subl %5, %%edi\n\t"
5562# endif
5563 "shll $3, %%edi\n\t"
5564 "bsfl %%eax, %%edx\n\t"
5565 "addl %%edi, %%edx\n\t"
5566 "1:\t\n"
5567 : "=d" (iBit),
5568 "=&c" (uECX),
5569 "=&D" (uEDI),
5570 "=&a" (uEAX)
5571 : "0" (0xffffffff),
5572 "mr" (pvBitmap),
5573 "1" (cBits >> 5),
5574 "2" (pvBitmap),
5575 "3" (0xffffffff));
5576# else
5577 cBits = RT_ALIGN_32(cBits, 32);
5578 __asm
5579 {
5580# ifdef RT_ARCH_AMD64
5581 mov rdi, [pvBitmap]
5582 mov rbx, rdi
5583# else
5584 mov edi, [pvBitmap]
5585 mov ebx, edi
5586# endif
5587 mov edx, 0ffffffffh
5588 mov eax, edx
5589 mov ecx, [cBits]
5590 shr ecx, 5
5591 repe scasd
5592 je done
5593
5594# ifdef RT_ARCH_AMD64
5595 lea rdi, [rdi - 4]
5596 xor eax, [rdi]
5597 sub rdi, rbx
5598# else
5599 lea edi, [edi - 4]
5600 xor eax, [edi]
5601 sub edi, ebx
5602# endif
5603 shl edi, 3
5604 bsf edx, eax
5605 add edx, edi
5606 done:
5607 mov [iBit], edx
5608 }
5609# endif
5610 return iBit;
5611 }
5612 return -1;
5613}
5614#endif
5615
5616
5617/**
5618 * Finds the next clear bit in a bitmap.
5619 *
5620 * @returns Index of the first zero bit.
5621 * @returns -1 if no clear bit was found.
5622 * @param pvBitmap Pointer to the bitmap.
5623 * @param cBits The number of bits in the bitmap. Multiple of 32.
5624 * @param iBitPrev The bit returned from the last search.
5625 * The search will start at iBitPrev + 1.
5626 */
5627#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5628DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5629#else
5630DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5631{
5632 int iBit = ++iBitPrev & 31;
5633 pvBitmap = (const volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5634 cBits -= iBitPrev & ~31;
5635 if (iBit)
5636 {
5637 /* inspect the first dword. */
5638 uint32_t u32 = (~*(const volatile uint32_t *)pvBitmap) >> iBit;
5639# if RT_INLINE_ASM_USES_INTRIN
5640 unsigned long ulBit = 0;
5641 if (_BitScanForward(&ulBit, u32))
5642 return ulBit + iBitPrev;
5643 iBit = -1;
5644# else
5645# if RT_INLINE_ASM_GNU_STYLE
5646 __asm__ __volatile__("bsf %1, %0\n\t"
5647 "jnz 1f\n\t"
5648 "movl $-1, %0\n\t"
5649 "1:\n\t"
5650 : "=r" (iBit)
5651 : "r" (u32));
5652# else
5653 __asm
5654 {
5655 mov edx, [u32]
5656 bsf eax, edx
5657 jnz done
5658 mov eax, 0ffffffffh
5659 done:
5660 mov [iBit], eax
5661 }
5662# endif
5663 if (iBit >= 0)
5664 return iBit + iBitPrev;
5665# endif
5666 /* Search the rest of the bitmap, if there is anything. */
5667 if (cBits > 32)
5668 {
5669 iBit = ASMBitFirstClear((const volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5670 if (iBit >= 0)
5671 return iBit + (iBitPrev & ~31) + 32;
5672 }
5673 }
5674 else
5675 {
5676 /* Search the rest of the bitmap. */
5677 iBit = ASMBitFirstClear(pvBitmap, cBits);
5678 if (iBit >= 0)
5679 return iBit + (iBitPrev & ~31);
5680 }
5681 return iBit;
5682}
5683#endif
5684
5685
5686/**
5687 * Finds the first set bit in a bitmap.
5688 *
5689 * @returns Index of the first set bit.
5690 * @returns -1 if no clear bit was found.
5691 * @param pvBitmap Pointer to the bitmap.
5692 * @param cBits The number of bits in the bitmap. Multiple of 32.
5693 */
5694#if RT_INLINE_ASM_EXTERNAL
5695DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
5696#else
5697DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
5698{
5699 if (cBits)
5700 {
5701 int32_t iBit;
5702# if RT_INLINE_ASM_GNU_STYLE
5703 RTCCUINTREG uEAX, uECX, uEDI;
5704 cBits = RT_ALIGN_32(cBits, 32);
5705 __asm__ __volatile__("repe; scasl\n\t"
5706 "je 1f\n\t"
5707# ifdef RT_ARCH_AMD64
5708 "lea -4(%%rdi), %%rdi\n\t"
5709 "movl (%%rdi), %%eax\n\t"
5710 "subq %5, %%rdi\n\t"
5711# else
5712 "lea -4(%%edi), %%edi\n\t"
5713 "movl (%%edi), %%eax\n\t"
5714 "subl %5, %%edi\n\t"
5715# endif
5716 "shll $3, %%edi\n\t"
5717 "bsfl %%eax, %%edx\n\t"
5718 "addl %%edi, %%edx\n\t"
5719 "1:\t\n"
5720 : "=d" (iBit),
5721 "=&c" (uECX),
5722 "=&D" (uEDI),
5723 "=&a" (uEAX)
5724 : "0" (0xffffffff),
5725 "mr" (pvBitmap),
5726 "1" (cBits >> 5),
5727 "2" (pvBitmap),
5728 "3" (0));
5729# else
5730 cBits = RT_ALIGN_32(cBits, 32);
5731 __asm
5732 {
5733# ifdef RT_ARCH_AMD64
5734 mov rdi, [pvBitmap]
5735 mov rbx, rdi
5736# else
5737 mov edi, [pvBitmap]
5738 mov ebx, edi
5739# endif
5740 mov edx, 0ffffffffh
5741 xor eax, eax
5742 mov ecx, [cBits]
5743 shr ecx, 5
5744 repe scasd
5745 je done
5746# ifdef RT_ARCH_AMD64
5747 lea rdi, [rdi - 4]
5748 mov eax, [rdi]
5749 sub rdi, rbx
5750# else
5751 lea edi, [edi - 4]
5752 mov eax, [edi]
5753 sub edi, ebx
5754# endif
5755 shl edi, 3
5756 bsf edx, eax
5757 add edx, edi
5758 done:
5759 mov [iBit], edx
5760 }
5761# endif
5762 return iBit;
5763 }
5764 return -1;
5765}
5766#endif
5767
5768
5769/**
5770 * Finds the next set bit in a bitmap.
5771 *
5772 * @returns Index of the next set bit.
5773 * @returns -1 if no set bit was found.
5774 * @param pvBitmap Pointer to the bitmap.
5775 * @param cBits The number of bits in the bitmap. Multiple of 32.
5776 * @param iBitPrev The bit returned from the last search.
5777 * The search will start at iBitPrev + 1.
5778 */
5779#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5780DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5781#else
5782DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5783{
5784 int iBit = ++iBitPrev & 31;
5785 pvBitmap = (const volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5786 cBits -= iBitPrev & ~31;
5787 if (iBit)
5788 {
5789 /* inspect the first dword. */
5790 uint32_t u32 = *(const volatile uint32_t *)pvBitmap >> iBit;
5791# if RT_INLINE_ASM_USES_INTRIN
5792 unsigned long ulBit = 0;
5793 if (_BitScanForward(&ulBit, u32))
5794 return ulBit + iBitPrev;
5795 iBit = -1;
5796# else
5797# if RT_INLINE_ASM_GNU_STYLE
5798 __asm__ __volatile__("bsf %1, %0\n\t"
5799 "jnz 1f\n\t"
5800 "movl $-1, %0\n\t"
5801 "1:\n\t"
5802 : "=r" (iBit)
5803 : "r" (u32));
5804# else
5805 __asm
5806 {
5807 mov edx, u32
5808 bsf eax, edx
5809 jnz done
5810 mov eax, 0ffffffffh
5811 done:
5812 mov [iBit], eax
5813 }
5814# endif
5815 if (iBit >= 0)
5816 return iBit + iBitPrev;
5817# endif
5818 /* Search the rest of the bitmap, if there is anything. */
5819 if (cBits > 32)
5820 {
5821 iBit = ASMBitFirstSet((const volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5822 if (iBit >= 0)
5823 return iBit + (iBitPrev & ~31) + 32;
5824 }
5825
5826 }
5827 else
5828 {
5829 /* Search the rest of the bitmap. */
5830 iBit = ASMBitFirstSet(pvBitmap, cBits);
5831 if (iBit >= 0)
5832 return iBit + (iBitPrev & ~31);
5833 }
5834 return iBit;
5835}
5836#endif
5837
5838
5839/**
5840 * Finds the first bit which is set in the given 32-bit integer.
5841 * Bits are numbered from 1 (least significant) to 32.
5842 *
5843 * @returns index [1..32] of the first set bit.
5844 * @returns 0 if all bits are cleared.
5845 * @param u32 Integer to search for set bits.
5846 * @remark Similar to ffs() in BSD.
5847 */
5848DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5849{
5850# if RT_INLINE_ASM_USES_INTRIN
5851 unsigned long iBit;
5852 if (_BitScanForward(&iBit, u32))
5853 iBit++;
5854 else
5855 iBit = 0;
5856# elif RT_INLINE_ASM_GNU_STYLE
5857 uint32_t iBit;
5858 __asm__ __volatile__("bsf %1, %0\n\t"
5859 "jnz 1f\n\t"
5860 "xorl %0, %0\n\t"
5861 "jmp 2f\n"
5862 "1:\n\t"
5863 "incl %0\n"
5864 "2:\n\t"
5865 : "=r" (iBit)
5866 : "rm" (u32));
5867# else
5868 uint32_t iBit;
5869 _asm
5870 {
5871 bsf eax, [u32]
5872 jnz found
5873 xor eax, eax
5874 jmp done
5875 found:
5876 inc eax
5877 done:
5878 mov [iBit], eax
5879 }
5880# endif
5881 return iBit;
5882}
5883
5884
5885/**
5886 * Finds the first bit which is set in the given 32-bit integer.
5887 * Bits are numbered from 1 (least significant) to 32.
5888 *
5889 * @returns index [1..32] of the first set bit.
5890 * @returns 0 if all bits are cleared.
5891 * @param i32 Integer to search for set bits.
5892 * @remark Similar to ffs() in BSD.
5893 */
5894DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5895{
5896 return ASMBitFirstSetU32((uint32_t)i32);
5897}
5898
5899
5900/**
5901 * Finds the last bit which is set in the given 32-bit integer.
5902 * Bits are numbered from 1 (least significant) to 32.
5903 *
5904 * @returns index [1..32] of the last set bit.
5905 * @returns 0 if all bits are cleared.
5906 * @param u32 Integer to search for set bits.
5907 * @remark Similar to fls() in BSD.
5908 */
5909DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5910{
5911# if RT_INLINE_ASM_USES_INTRIN
5912 unsigned long iBit;
5913 if (_BitScanReverse(&iBit, u32))
5914 iBit++;
5915 else
5916 iBit = 0;
5917# elif RT_INLINE_ASM_GNU_STYLE
5918 uint32_t iBit;
5919 __asm__ __volatile__("bsrl %1, %0\n\t"
5920 "jnz 1f\n\t"
5921 "xorl %0, %0\n\t"
5922 "jmp 2f\n"
5923 "1:\n\t"
5924 "incl %0\n"
5925 "2:\n\t"
5926 : "=r" (iBit)
5927 : "rm" (u32));
5928# else
5929 uint32_t iBit;
5930 _asm
5931 {
5932 bsr eax, [u32]
5933 jnz found
5934 xor eax, eax
5935 jmp done
5936 found:
5937 inc eax
5938 done:
5939 mov [iBit], eax
5940 }
5941# endif
5942 return iBit;
5943}
5944
5945
5946/**
5947 * Finds the last bit which is set in the given 32-bit integer.
5948 * Bits are numbered from 1 (least significant) to 32.
5949 *
5950 * @returns index [1..32] of the last set bit.
5951 * @returns 0 if all bits are cleared.
5952 * @param i32 Integer to search for set bits.
5953 * @remark Similar to fls() in BSD.
5954 */
5955DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5956{
5957 return ASMBitLastSetS32((uint32_t)i32);
5958}
5959
5960/**
5961 * Reverse the byte order of the given 16-bit integer.
5962 *
5963 * @returns Revert
5964 * @param u16 16-bit integer value.
5965 */
5966DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5967{
5968#if RT_INLINE_ASM_USES_INTRIN
5969 u16 = _byteswap_ushort(u16);
5970#elif RT_INLINE_ASM_GNU_STYLE
5971 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5972#else
5973 _asm
5974 {
5975 mov ax, [u16]
5976 ror ax, 8
5977 mov [u16], ax
5978 }
5979#endif
5980 return u16;
5981}
5982
5983/**
5984 * Reverse the byte order of the given 32-bit integer.
5985 *
5986 * @returns Revert
5987 * @param u32 32-bit integer value.
5988 */
5989DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5990{
5991#if RT_INLINE_ASM_USES_INTRIN
5992 u32 = _byteswap_ulong(u32);
5993#elif RT_INLINE_ASM_GNU_STYLE
5994 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5995#else
5996 _asm
5997 {
5998 mov eax, [u32]
5999 bswap eax
6000 mov [u32], eax
6001 }
6002#endif
6003 return u32;
6004}
6005
6006
6007/**
6008 * Reverse the byte order of the given 64-bit integer.
6009 *
6010 * @returns Revert
6011 * @param u64 64-bit integer value.
6012 */
6013DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
6014{
6015#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6016 u64 = _byteswap_uint64(u64);
6017#else
6018 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6019 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6020#endif
6021 return u64;
6022}
6023
6024
6025/** @} */
6026
6027
6028/** @} */
6029#endif
6030
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette