VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 11173

Last change on this file since 11173 was 11173, checked in by vboxsync, 16 years ago

IPRT: Added ASMModS64ByS32RetS32 and ASMModU64ByU32RetU32.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 146.3 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42#ifdef _MSC_VER
43# if _MSC_VER >= 1400
44# define RT_INLINE_ASM_USES_INTRIN 1
45# include <intrin.h>
46 /* Emit the intrinsics at all optimization levels. */
47# pragma intrinsic(_ReadWriteBarrier)
48# pragma intrinsic(__cpuid)
49# pragma intrinsic(_enable)
50# pragma intrinsic(_disable)
51# pragma intrinsic(__rdtsc)
52# pragma intrinsic(__readmsr)
53# pragma intrinsic(__writemsr)
54# pragma intrinsic(__outbyte)
55# pragma intrinsic(__outword)
56# pragma intrinsic(__outdword)
57# pragma intrinsic(__inbyte)
58# pragma intrinsic(__inword)
59# pragma intrinsic(__indword)
60# pragma intrinsic(__invlpg)
61# pragma intrinsic(__stosd)
62# pragma intrinsic(__stosw)
63# pragma intrinsic(__stosb)
64# pragma intrinsic(__readcr0)
65# pragma intrinsic(__readcr2)
66# pragma intrinsic(__readcr3)
67# pragma intrinsic(__readcr4)
68# pragma intrinsic(__writecr0)
69# pragma intrinsic(__writecr3)
70# pragma intrinsic(__writecr4)
71# pragma intrinsic(_BitScanForward)
72# pragma intrinsic(_BitScanReverse)
73# pragma intrinsic(_bittest)
74# pragma intrinsic(_bittestandset)
75# pragma intrinsic(_bittestandreset)
76# pragma intrinsic(_bittestandcomplement)
77# pragma intrinsic(_byteswap_ushort)
78# pragma intrinsic(_byteswap_ulong)
79# pragma intrinsic(_interlockedbittestandset)
80# pragma intrinsic(_interlockedbittestandreset)
81# pragma intrinsic(_InterlockedAnd)
82# pragma intrinsic(_InterlockedOr)
83# pragma intrinsic(_InterlockedIncrement)
84# pragma intrinsic(_InterlockedDecrement)
85# pragma intrinsic(_InterlockedExchange)
86# pragma intrinsic(_InterlockedExchangeAdd)
87# pragma intrinsic(_InterlockedCompareExchange)
88# pragma intrinsic(_InterlockedCompareExchange64)
89# ifdef RT_ARCH_AMD64
90# pragma intrinsic(__stosq)
91# pragma intrinsic(__readcr8)
92# pragma intrinsic(__writecr8)
93# pragma intrinsic(_byteswap_uint64)
94# pragma intrinsic(_InterlockedExchange64)
95# endif
96# endif
97#endif
98#ifndef RT_INLINE_ASM_USES_INTRIN
99# define RT_INLINE_ASM_USES_INTRIN 0
100#endif
101
102
103
104/** @defgroup grp_asm ASM - Assembly Routines
105 * @ingroup grp_rt
106 *
107 * @remarks The difference between ordered and unordered atomic operations are that
108 * the former will complete outstanding reads and writes before continuing
109 * while the latter doesn't make any promisses about the order. Ordered
110 * operations doesn't, it seems, make any 100% promise wrt to whether
111 * the operation will complete before any subsequent memory access.
112 * (please, correct if wrong.)
113 *
114 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
115 * are unordered (note the Uo).
116 *
117 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
118 * or even optimize assembler instructions away. For instance, in the following code
119 * the second rdmsr instruction is optimized away because gcc treats that instruction
120 * as deterministic:
121 *
122 * @code
123 * static inline uint64_t rdmsr_low(int idx)
124 * {
125 * uint32_t low;
126 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
127 * }
128 * ...
129 * uint32_t msr1 = rdmsr_low(1);
130 * foo(msr1);
131 * msr1 = rdmsr_low(1);
132 * bar(msr1);
133 * @endcode
134 *
135 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
136 * use the result of the first call as input parameter for bar() as well. For rdmsr this
137 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
138 * machine status information in general.
139 *
140 * @{
141 */
142
143/** @def RT_INLINE_ASM_EXTERNAL
144 * Defined as 1 if the compiler does not support inline assembly.
145 * The ASM* functions will then be implemented in an external .asm file.
146 *
147 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
148 * inline assmebly in their AMD64 compiler.
149 */
150#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
151# define RT_INLINE_ASM_EXTERNAL 1
152#else
153# define RT_INLINE_ASM_EXTERNAL 0
154#endif
155
156/** @def RT_INLINE_ASM_GNU_STYLE
157 * Defined as 1 if the compiler understand GNU style inline assembly.
158 */
159#if defined(_MSC_VER)
160# define RT_INLINE_ASM_GNU_STYLE 0
161#else
162# define RT_INLINE_ASM_GNU_STYLE 1
163#endif
164
165
166/** @todo find a more proper place for this structure? */
167#pragma pack(1)
168/** IDTR */
169typedef struct RTIDTR
170{
171 /** Size of the IDT. */
172 uint16_t cbIdt;
173 /** Address of the IDT. */
174 uintptr_t pIdt;
175} RTIDTR, *PRTIDTR;
176#pragma pack()
177
178#pragma pack(1)
179/** GDTR */
180typedef struct RTGDTR
181{
182 /** Size of the GDT. */
183 uint16_t cbGdt;
184 /** Address of the GDT. */
185 uintptr_t pGdt;
186} RTGDTR, *PRTGDTR;
187#pragma pack()
188
189
190/** @def ASMReturnAddress
191 * Gets the return address of the current (or calling if you like) function or method.
192 */
193#ifdef _MSC_VER
194# ifdef __cplusplus
195extern "C"
196# endif
197void * _ReturnAddress(void);
198# pragma intrinsic(_ReturnAddress)
199# define ASMReturnAddress() _ReturnAddress()
200#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
201# define ASMReturnAddress() __builtin_return_address(0)
202#else
203# error "Unsupported compiler."
204#endif
205
206
207/**
208 * Gets the content of the IDTR CPU register.
209 * @param pIdtr Where to store the IDTR contents.
210 */
211#if RT_INLINE_ASM_EXTERNAL
212DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
213#else
214DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
215{
216# if RT_INLINE_ASM_GNU_STYLE
217 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
218# else
219 __asm
220 {
221# ifdef RT_ARCH_AMD64
222 mov rax, [pIdtr]
223 sidt [rax]
224# else
225 mov eax, [pIdtr]
226 sidt [eax]
227# endif
228 }
229# endif
230}
231#endif
232
233
234/**
235 * Sets the content of the IDTR CPU register.
236 * @param pIdtr Where to load the IDTR contents from
237 */
238#if RT_INLINE_ASM_EXTERNAL
239DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
240#else
241DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
242{
243# if RT_INLINE_ASM_GNU_STYLE
244 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
245# else
246 __asm
247 {
248# ifdef RT_ARCH_AMD64
249 mov rax, [pIdtr]
250 lidt [rax]
251# else
252 mov eax, [pIdtr]
253 lidt [eax]
254# endif
255 }
256# endif
257}
258#endif
259
260
261/**
262 * Gets the content of the GDTR CPU register.
263 * @param pGdtr Where to store the GDTR contents.
264 */
265#if RT_INLINE_ASM_EXTERNAL
266DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
267#else
268DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
269{
270# if RT_INLINE_ASM_GNU_STYLE
271 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
272# else
273 __asm
274 {
275# ifdef RT_ARCH_AMD64
276 mov rax, [pGdtr]
277 sgdt [rax]
278# else
279 mov eax, [pGdtr]
280 sgdt [eax]
281# endif
282 }
283# endif
284}
285#endif
286
287/**
288 * Get the cs register.
289 * @returns cs.
290 */
291#if RT_INLINE_ASM_EXTERNAL
292DECLASM(RTSEL) ASMGetCS(void);
293#else
294DECLINLINE(RTSEL) ASMGetCS(void)
295{
296 RTSEL SelCS;
297# if RT_INLINE_ASM_GNU_STYLE
298 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
299# else
300 __asm
301 {
302 mov ax, cs
303 mov [SelCS], ax
304 }
305# endif
306 return SelCS;
307}
308#endif
309
310
311/**
312 * Get the DS register.
313 * @returns DS.
314 */
315#if RT_INLINE_ASM_EXTERNAL
316DECLASM(RTSEL) ASMGetDS(void);
317#else
318DECLINLINE(RTSEL) ASMGetDS(void)
319{
320 RTSEL SelDS;
321# if RT_INLINE_ASM_GNU_STYLE
322 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
323# else
324 __asm
325 {
326 mov ax, ds
327 mov [SelDS], ax
328 }
329# endif
330 return SelDS;
331}
332#endif
333
334
335/**
336 * Get the ES register.
337 * @returns ES.
338 */
339#if RT_INLINE_ASM_EXTERNAL
340DECLASM(RTSEL) ASMGetES(void);
341#else
342DECLINLINE(RTSEL) ASMGetES(void)
343{
344 RTSEL SelES;
345# if RT_INLINE_ASM_GNU_STYLE
346 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
347# else
348 __asm
349 {
350 mov ax, es
351 mov [SelES], ax
352 }
353# endif
354 return SelES;
355}
356#endif
357
358
359/**
360 * Get the FS register.
361 * @returns FS.
362 */
363#if RT_INLINE_ASM_EXTERNAL
364DECLASM(RTSEL) ASMGetFS(void);
365#else
366DECLINLINE(RTSEL) ASMGetFS(void)
367{
368 RTSEL SelFS;
369# if RT_INLINE_ASM_GNU_STYLE
370 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
371# else
372 __asm
373 {
374 mov ax, fs
375 mov [SelFS], ax
376 }
377# endif
378 return SelFS;
379}
380# endif
381
382
383/**
384 * Get the GS register.
385 * @returns GS.
386 */
387#if RT_INLINE_ASM_EXTERNAL
388DECLASM(RTSEL) ASMGetGS(void);
389#else
390DECLINLINE(RTSEL) ASMGetGS(void)
391{
392 RTSEL SelGS;
393# if RT_INLINE_ASM_GNU_STYLE
394 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
395# else
396 __asm
397 {
398 mov ax, gs
399 mov [SelGS], ax
400 }
401# endif
402 return SelGS;
403}
404#endif
405
406
407/**
408 * Get the SS register.
409 * @returns SS.
410 */
411#if RT_INLINE_ASM_EXTERNAL
412DECLASM(RTSEL) ASMGetSS(void);
413#else
414DECLINLINE(RTSEL) ASMGetSS(void)
415{
416 RTSEL SelSS;
417# if RT_INLINE_ASM_GNU_STYLE
418 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
419# else
420 __asm
421 {
422 mov ax, ss
423 mov [SelSS], ax
424 }
425# endif
426 return SelSS;
427}
428#endif
429
430
431/**
432 * Get the TR register.
433 * @returns TR.
434 */
435#if RT_INLINE_ASM_EXTERNAL
436DECLASM(RTSEL) ASMGetTR(void);
437#else
438DECLINLINE(RTSEL) ASMGetTR(void)
439{
440 RTSEL SelTR;
441# if RT_INLINE_ASM_GNU_STYLE
442 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
443# else
444 __asm
445 {
446 str ax
447 mov [SelTR], ax
448 }
449# endif
450 return SelTR;
451}
452#endif
453
454
455/**
456 * Get the [RE]FLAGS register.
457 * @returns [RE]FLAGS.
458 */
459#if RT_INLINE_ASM_EXTERNAL
460DECLASM(RTCCUINTREG) ASMGetFlags(void);
461#else
462DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
463{
464 RTCCUINTREG uFlags;
465# if RT_INLINE_ASM_GNU_STYLE
466# ifdef RT_ARCH_AMD64
467 __asm__ __volatile__("pushfq\n\t"
468 "popq %0\n\t"
469 : "=g" (uFlags));
470# else
471 __asm__ __volatile__("pushfl\n\t"
472 "popl %0\n\t"
473 : "=g" (uFlags));
474# endif
475# else
476 __asm
477 {
478# ifdef RT_ARCH_AMD64
479 pushfq
480 pop [uFlags]
481# else
482 pushfd
483 pop [uFlags]
484# endif
485 }
486# endif
487 return uFlags;
488}
489#endif
490
491
492/**
493 * Set the [RE]FLAGS register.
494 * @param uFlags The new [RE]FLAGS value.
495 */
496#if RT_INLINE_ASM_EXTERNAL
497DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
498#else
499DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
500{
501# if RT_INLINE_ASM_GNU_STYLE
502# ifdef RT_ARCH_AMD64
503 __asm__ __volatile__("pushq %0\n\t"
504 "popfq\n\t"
505 : : "g" (uFlags));
506# else
507 __asm__ __volatile__("pushl %0\n\t"
508 "popfl\n\t"
509 : : "g" (uFlags));
510# endif
511# else
512 __asm
513 {
514# ifdef RT_ARCH_AMD64
515 push [uFlags]
516 popfq
517# else
518 push [uFlags]
519 popfd
520# endif
521 }
522# endif
523}
524#endif
525
526
527/**
528 * Gets the content of the CPU timestamp counter register.
529 *
530 * @returns TSC.
531 */
532#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
533DECLASM(uint64_t) ASMReadTSC(void);
534#else
535DECLINLINE(uint64_t) ASMReadTSC(void)
536{
537 RTUINT64U u;
538# if RT_INLINE_ASM_GNU_STYLE
539 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
540# else
541# if RT_INLINE_ASM_USES_INTRIN
542 u.u = __rdtsc();
543# else
544 __asm
545 {
546 rdtsc
547 mov [u.s.Lo], eax
548 mov [u.s.Hi], edx
549 }
550# endif
551# endif
552 return u.u;
553}
554#endif
555
556
557/**
558 * Performs the cpuid instruction returning all registers.
559 *
560 * @param uOperator CPUID operation (eax).
561 * @param pvEAX Where to store eax.
562 * @param pvEBX Where to store ebx.
563 * @param pvECX Where to store ecx.
564 * @param pvEDX Where to store edx.
565 * @remark We're using void pointers to ease the use of special bitfield structures and such.
566 */
567#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
568DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
569#else
570DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
571{
572# if RT_INLINE_ASM_GNU_STYLE
573# ifdef RT_ARCH_AMD64
574 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
575 __asm__ ("cpuid\n\t"
576 : "=a" (uRAX),
577 "=b" (uRBX),
578 "=c" (uRCX),
579 "=d" (uRDX)
580 : "0" (uOperator));
581 *(uint32_t *)pvEAX = (uint32_t)uRAX;
582 *(uint32_t *)pvEBX = (uint32_t)uRBX;
583 *(uint32_t *)pvECX = (uint32_t)uRCX;
584 *(uint32_t *)pvEDX = (uint32_t)uRDX;
585# else
586 __asm__ ("xchgl %%ebx, %1\n\t"
587 "cpuid\n\t"
588 "xchgl %%ebx, %1\n\t"
589 : "=a" (*(uint32_t *)pvEAX),
590 "=r" (*(uint32_t *)pvEBX),
591 "=c" (*(uint32_t *)pvECX),
592 "=d" (*(uint32_t *)pvEDX)
593 : "0" (uOperator));
594# endif
595
596# elif RT_INLINE_ASM_USES_INTRIN
597 int aInfo[4];
598 __cpuid(aInfo, uOperator);
599 *(uint32_t *)pvEAX = aInfo[0];
600 *(uint32_t *)pvEBX = aInfo[1];
601 *(uint32_t *)pvECX = aInfo[2];
602 *(uint32_t *)pvEDX = aInfo[3];
603
604# else
605 uint32_t uEAX;
606 uint32_t uEBX;
607 uint32_t uECX;
608 uint32_t uEDX;
609 __asm
610 {
611 push ebx
612 mov eax, [uOperator]
613 cpuid
614 mov [uEAX], eax
615 mov [uEBX], ebx
616 mov [uECX], ecx
617 mov [uEDX], edx
618 pop ebx
619 }
620 *(uint32_t *)pvEAX = uEAX;
621 *(uint32_t *)pvEBX = uEBX;
622 *(uint32_t *)pvECX = uECX;
623 *(uint32_t *)pvEDX = uEDX;
624# endif
625}
626#endif
627
628
629/**
630 * Performs the cpuid instruction returning all registers.
631 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
632 *
633 * @param uOperator CPUID operation (eax).
634 * @param uIdxECX ecx index
635 * @param pvEAX Where to store eax.
636 * @param pvEBX Where to store ebx.
637 * @param pvECX Where to store ecx.
638 * @param pvEDX Where to store edx.
639 * @remark We're using void pointers to ease the use of special bitfield structures and such.
640 */
641#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
642DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
643#else
644DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
645{
646# if RT_INLINE_ASM_GNU_STYLE
647# ifdef RT_ARCH_AMD64
648 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
649 __asm__ ("cpuid\n\t"
650 : "=a" (uRAX),
651 "=b" (uRBX),
652 "=c" (uRCX),
653 "=d" (uRDX)
654 : "0" (uOperator),
655 "2" (uIdxECX));
656 *(uint32_t *)pvEAX = (uint32_t)uRAX;
657 *(uint32_t *)pvEBX = (uint32_t)uRBX;
658 *(uint32_t *)pvECX = (uint32_t)uRCX;
659 *(uint32_t *)pvEDX = (uint32_t)uRDX;
660# else
661 __asm__ ("xchgl %%ebx, %1\n\t"
662 "cpuid\n\t"
663 "xchgl %%ebx, %1\n\t"
664 : "=a" (*(uint32_t *)pvEAX),
665 "=r" (*(uint32_t *)pvEBX),
666 "=c" (*(uint32_t *)pvECX),
667 "=d" (*(uint32_t *)pvEDX)
668 : "0" (uOperator),
669 "2" (uIdxECX));
670# endif
671
672# elif RT_INLINE_ASM_USES_INTRIN
673 int aInfo[4];
674 /* ??? another intrinsic ??? */
675 __cpuid(aInfo, uOperator);
676 *(uint32_t *)pvEAX = aInfo[0];
677 *(uint32_t *)pvEBX = aInfo[1];
678 *(uint32_t *)pvECX = aInfo[2];
679 *(uint32_t *)pvEDX = aInfo[3];
680
681# else
682 uint32_t uEAX;
683 uint32_t uEBX;
684 uint32_t uECX;
685 uint32_t uEDX;
686 __asm
687 {
688 push ebx
689 mov eax, [uOperator]
690 mov ecx, [uIdxECX]
691 cpuid
692 mov [uEAX], eax
693 mov [uEBX], ebx
694 mov [uECX], ecx
695 mov [uEDX], edx
696 pop ebx
697 }
698 *(uint32_t *)pvEAX = uEAX;
699 *(uint32_t *)pvEBX = uEBX;
700 *(uint32_t *)pvECX = uECX;
701 *(uint32_t *)pvEDX = uEDX;
702# endif
703}
704#endif
705
706
707/**
708 * Performs the cpuid instruction returning ecx and edx.
709 *
710 * @param uOperator CPUID operation (eax).
711 * @param pvECX Where to store ecx.
712 * @param pvEDX Where to store edx.
713 * @remark We're using void pointers to ease the use of special bitfield structures and such.
714 */
715#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
716DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
717#else
718DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
719{
720 uint32_t uEBX;
721 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
722}
723#endif
724
725
726/**
727 * Performs the cpuid instruction returning edx.
728 *
729 * @param uOperator CPUID operation (eax).
730 * @returns EDX after cpuid operation.
731 */
732#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
733DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
734#else
735DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
736{
737 RTCCUINTREG xDX;
738# if RT_INLINE_ASM_GNU_STYLE
739# ifdef RT_ARCH_AMD64
740 RTCCUINTREG uSpill;
741 __asm__ ("cpuid"
742 : "=a" (uSpill),
743 "=d" (xDX)
744 : "0" (uOperator)
745 : "rbx", "rcx");
746# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
747 __asm__ ("push %%ebx\n\t"
748 "cpuid\n\t"
749 "pop %%ebx\n\t"
750 : "=a" (uOperator),
751 "=d" (xDX)
752 : "0" (uOperator)
753 : "ecx");
754# else
755 __asm__ ("cpuid"
756 : "=a" (uOperator),
757 "=d" (xDX)
758 : "0" (uOperator)
759 : "ebx", "ecx");
760# endif
761
762# elif RT_INLINE_ASM_USES_INTRIN
763 int aInfo[4];
764 __cpuid(aInfo, uOperator);
765 xDX = aInfo[3];
766
767# else
768 __asm
769 {
770 push ebx
771 mov eax, [uOperator]
772 cpuid
773 mov [xDX], edx
774 pop ebx
775 }
776# endif
777 return (uint32_t)xDX;
778}
779#endif
780
781
782/**
783 * Performs the cpuid instruction returning ecx.
784 *
785 * @param uOperator CPUID operation (eax).
786 * @returns ECX after cpuid operation.
787 */
788#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
789DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
790#else
791DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
792{
793 RTCCUINTREG xCX;
794# if RT_INLINE_ASM_GNU_STYLE
795# ifdef RT_ARCH_AMD64
796 RTCCUINTREG uSpill;
797 __asm__ ("cpuid"
798 : "=a" (uSpill),
799 "=c" (xCX)
800 : "0" (uOperator)
801 : "rbx", "rdx");
802# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
803 __asm__ ("push %%ebx\n\t"
804 "cpuid\n\t"
805 "pop %%ebx\n\t"
806 : "=a" (uOperator),
807 "=c" (xCX)
808 : "0" (uOperator)
809 : "edx");
810# else
811 __asm__ ("cpuid"
812 : "=a" (uOperator),
813 "=c" (xCX)
814 : "0" (uOperator)
815 : "ebx", "edx");
816
817# endif
818
819# elif RT_INLINE_ASM_USES_INTRIN
820 int aInfo[4];
821 __cpuid(aInfo, uOperator);
822 xCX = aInfo[2];
823
824# else
825 __asm
826 {
827 push ebx
828 mov eax, [uOperator]
829 cpuid
830 mov [xCX], ecx
831 pop ebx
832 }
833# endif
834 return (uint32_t)xCX;
835}
836#endif
837
838
839/**
840 * Checks if the current CPU supports CPUID.
841 *
842 * @returns true if CPUID is supported.
843 */
844DECLINLINE(bool) ASMHasCpuId(void)
845{
846#ifdef RT_ARCH_AMD64
847 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
848#else /* !RT_ARCH_AMD64 */
849 bool fRet = false;
850# if RT_INLINE_ASM_GNU_STYLE
851 uint32_t u1;
852 uint32_t u2;
853 __asm__ ("pushf\n\t"
854 "pop %1\n\t"
855 "mov %1, %2\n\t"
856 "xorl $0x200000, %1\n\t"
857 "push %1\n\t"
858 "popf\n\t"
859 "pushf\n\t"
860 "pop %1\n\t"
861 "cmpl %1, %2\n\t"
862 "setne %0\n\t"
863 "push %2\n\t"
864 "popf\n\t"
865 : "=m" (fRet), "=r" (u1), "=r" (u2));
866# else
867 __asm
868 {
869 pushfd
870 pop eax
871 mov ebx, eax
872 xor eax, 0200000h
873 push eax
874 popfd
875 pushfd
876 pop eax
877 cmp eax, ebx
878 setne fRet
879 push ebx
880 popfd
881 }
882# endif
883 return fRet;
884#endif /* !RT_ARCH_AMD64 */
885}
886
887
888/**
889 * Gets the APIC ID of the current CPU.
890 *
891 * @returns the APIC ID.
892 */
893#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
894DECLASM(uint8_t) ASMGetApicId(void);
895#else
896DECLINLINE(uint8_t) ASMGetApicId(void)
897{
898 RTCCUINTREG xBX;
899# if RT_INLINE_ASM_GNU_STYLE
900# ifdef RT_ARCH_AMD64
901 RTCCUINTREG uSpill;
902 __asm__ ("cpuid"
903 : "=a" (uSpill),
904 "=b" (xBX)
905 : "0" (1)
906 : "rcx", "rdx");
907# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
908 RTCCUINTREG uSpill;
909 __asm__ ("mov %%ebx,%1\n\t"
910 "cpuid\n\t"
911 "xchgl %%ebx,%1\n\t"
912 : "=a" (uSpill),
913 "=r" (xBX)
914 : "0" (1)
915 : "ecx", "edx");
916# else
917 RTCCUINTREG uSpill;
918 __asm__ ("cpuid"
919 : "=a" (uSpill),
920 "=b" (xBX)
921 : "0" (1)
922 : "ecx", "edx");
923# endif
924
925# elif RT_INLINE_ASM_USES_INTRIN
926 int aInfo[4];
927 __cpuid(aInfo, 1);
928 xBX = aInfo[1];
929
930# else
931 __asm
932 {
933 push ebx
934 mov eax, 1
935 cpuid
936 mov [xBX], ebx
937 pop ebx
938 }
939# endif
940 return (uint8_t)(xBX >> 24);
941}
942#endif
943
944
945/**
946 * Tests if it an genuin Intel CPU based on the ASMCpuId(0) output.
947 *
948 * @returns true/false.
949 * @param uEBX EBX return from ASMCpuId(0)
950 * @param uECX ECX return from ASMCpuId(0)
951 * @param uEDX EDX return from ASMCpuId(0)
952 */
953DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
954{
955 return uEBX == 0x756e6547
956 || uECX == 0x6c65746e
957 || uEDX == 0x49656e69;
958}
959
960
961/**
962 * Tests if this is an genuin Intel CPU.
963 *
964 * @returns true/false.
965 */
966DECLINLINE(bool) ASMIsIntelCpu(void)
967{
968 uint32_t uEAX, uEBX, uECX, uEDX;
969 ASMCpuId(1, &uEAX, &uEBX, &uECX, &uEDX);
970 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
971}
972
973
974/**
975 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
976 *
977 * @returns Family.
978 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
979 */
980DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
981{
982 return ((uEAX >> 8) & 0xf) == 0xf
983 ? ((uEAX >> 20) & 0x7f) + 0xf
984 : ((uEAX >> 8) & 0xf);
985}
986
987
988/**
989 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
990 *
991 * @returns Model.
992 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
993 * @param fIntel Whether it's an intel CPU.
994 */
995DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
996{
997 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
998 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
999 : ((uEAX >> 4) & 0xf);
1000}
1001
1002
1003/**
1004 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1005 *
1006 * @returns Model.
1007 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1008 * @param fIntel Whether it's an intel CPU.
1009 */
1010DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1011{
1012 return ((uEAX >> 8) & 0xf) == 0xf
1013 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1014 : ((uEAX >> 4) & 0xf);
1015}
1016
1017
1018/**
1019 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1020 *
1021 * @returns Model.
1022 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1023 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1024 */
1025DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1026{
1027 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1028 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1029 : ((uEAX >> 4) & 0xf);
1030}
1031
1032
1033/**
1034 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1035 *
1036 * @returns Model.
1037 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1038 */
1039DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1040{
1041 return uEAX & 0xf;
1042}
1043
1044
1045/**
1046 * Get cr0.
1047 * @returns cr0.
1048 */
1049#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1050DECLASM(RTCCUINTREG) ASMGetCR0(void);
1051#else
1052DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1053{
1054 RTCCUINTREG uCR0;
1055# if RT_INLINE_ASM_USES_INTRIN
1056 uCR0 = __readcr0();
1057
1058# elif RT_INLINE_ASM_GNU_STYLE
1059# ifdef RT_ARCH_AMD64
1060 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1061# else
1062 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1063# endif
1064# else
1065 __asm
1066 {
1067# ifdef RT_ARCH_AMD64
1068 mov rax, cr0
1069 mov [uCR0], rax
1070# else
1071 mov eax, cr0
1072 mov [uCR0], eax
1073# endif
1074 }
1075# endif
1076 return uCR0;
1077}
1078#endif
1079
1080
1081/**
1082 * Sets the CR0 register.
1083 * @param uCR0 The new CR0 value.
1084 */
1085#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1086DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1087#else
1088DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1089{
1090# if RT_INLINE_ASM_USES_INTRIN
1091 __writecr0(uCR0);
1092
1093# elif RT_INLINE_ASM_GNU_STYLE
1094# ifdef RT_ARCH_AMD64
1095 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1096# else
1097 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1098# endif
1099# else
1100 __asm
1101 {
1102# ifdef RT_ARCH_AMD64
1103 mov rax, [uCR0]
1104 mov cr0, rax
1105# else
1106 mov eax, [uCR0]
1107 mov cr0, eax
1108# endif
1109 }
1110# endif
1111}
1112#endif
1113
1114
1115/**
1116 * Get cr2.
1117 * @returns cr2.
1118 */
1119#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1120DECLASM(RTCCUINTREG) ASMGetCR2(void);
1121#else
1122DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1123{
1124 RTCCUINTREG uCR2;
1125# if RT_INLINE_ASM_USES_INTRIN
1126 uCR2 = __readcr2();
1127
1128# elif RT_INLINE_ASM_GNU_STYLE
1129# ifdef RT_ARCH_AMD64
1130 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1131# else
1132 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1133# endif
1134# else
1135 __asm
1136 {
1137# ifdef RT_ARCH_AMD64
1138 mov rax, cr2
1139 mov [uCR2], rax
1140# else
1141 mov eax, cr2
1142 mov [uCR2], eax
1143# endif
1144 }
1145# endif
1146 return uCR2;
1147}
1148#endif
1149
1150
1151/**
1152 * Sets the CR2 register.
1153 * @param uCR2 The new CR0 value.
1154 */
1155#if RT_INLINE_ASM_EXTERNAL
1156DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1157#else
1158DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1159{
1160# if RT_INLINE_ASM_GNU_STYLE
1161# ifdef RT_ARCH_AMD64
1162 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1163# else
1164 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1165# endif
1166# else
1167 __asm
1168 {
1169# ifdef RT_ARCH_AMD64
1170 mov rax, [uCR2]
1171 mov cr2, rax
1172# else
1173 mov eax, [uCR2]
1174 mov cr2, eax
1175# endif
1176 }
1177# endif
1178}
1179#endif
1180
1181
1182/**
1183 * Get cr3.
1184 * @returns cr3.
1185 */
1186#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1187DECLASM(RTCCUINTREG) ASMGetCR3(void);
1188#else
1189DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1190{
1191 RTCCUINTREG uCR3;
1192# if RT_INLINE_ASM_USES_INTRIN
1193 uCR3 = __readcr3();
1194
1195# elif RT_INLINE_ASM_GNU_STYLE
1196# ifdef RT_ARCH_AMD64
1197 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1198# else
1199 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1200# endif
1201# else
1202 __asm
1203 {
1204# ifdef RT_ARCH_AMD64
1205 mov rax, cr3
1206 mov [uCR3], rax
1207# else
1208 mov eax, cr3
1209 mov [uCR3], eax
1210# endif
1211 }
1212# endif
1213 return uCR3;
1214}
1215#endif
1216
1217
1218/**
1219 * Sets the CR3 register.
1220 *
1221 * @param uCR3 New CR3 value.
1222 */
1223#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1224DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1225#else
1226DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1227{
1228# if RT_INLINE_ASM_USES_INTRIN
1229 __writecr3(uCR3);
1230
1231# elif RT_INLINE_ASM_GNU_STYLE
1232# ifdef RT_ARCH_AMD64
1233 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1234# else
1235 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1236# endif
1237# else
1238 __asm
1239 {
1240# ifdef RT_ARCH_AMD64
1241 mov rax, [uCR3]
1242 mov cr3, rax
1243# else
1244 mov eax, [uCR3]
1245 mov cr3, eax
1246# endif
1247 }
1248# endif
1249}
1250#endif
1251
1252
1253/**
1254 * Reloads the CR3 register.
1255 */
1256#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1257DECLASM(void) ASMReloadCR3(void);
1258#else
1259DECLINLINE(void) ASMReloadCR3(void)
1260{
1261# if RT_INLINE_ASM_USES_INTRIN
1262 __writecr3(__readcr3());
1263
1264# elif RT_INLINE_ASM_GNU_STYLE
1265 RTCCUINTREG u;
1266# ifdef RT_ARCH_AMD64
1267 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1268 "movq %0, %%cr3\n\t"
1269 : "=r" (u));
1270# else
1271 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1272 "movl %0, %%cr3\n\t"
1273 : "=r" (u));
1274# endif
1275# else
1276 __asm
1277 {
1278# ifdef RT_ARCH_AMD64
1279 mov rax, cr3
1280 mov cr3, rax
1281# else
1282 mov eax, cr3
1283 mov cr3, eax
1284# endif
1285 }
1286# endif
1287}
1288#endif
1289
1290
1291/**
1292 * Get cr4.
1293 * @returns cr4.
1294 */
1295#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1296DECLASM(RTCCUINTREG) ASMGetCR4(void);
1297#else
1298DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1299{
1300 RTCCUINTREG uCR4;
1301# if RT_INLINE_ASM_USES_INTRIN
1302 uCR4 = __readcr4();
1303
1304# elif RT_INLINE_ASM_GNU_STYLE
1305# ifdef RT_ARCH_AMD64
1306 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1307# else
1308 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1309# endif
1310# else
1311 __asm
1312 {
1313# ifdef RT_ARCH_AMD64
1314 mov rax, cr4
1315 mov [uCR4], rax
1316# else
1317 push eax /* just in case */
1318 /*mov eax, cr4*/
1319 _emit 0x0f
1320 _emit 0x20
1321 _emit 0xe0
1322 mov [uCR4], eax
1323 pop eax
1324# endif
1325 }
1326# endif
1327 return uCR4;
1328}
1329#endif
1330
1331
1332/**
1333 * Sets the CR4 register.
1334 *
1335 * @param uCR4 New CR4 value.
1336 */
1337#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1338DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1339#else
1340DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1341{
1342# if RT_INLINE_ASM_USES_INTRIN
1343 __writecr4(uCR4);
1344
1345# elif RT_INLINE_ASM_GNU_STYLE
1346# ifdef RT_ARCH_AMD64
1347 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1348# else
1349 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1350# endif
1351# else
1352 __asm
1353 {
1354# ifdef RT_ARCH_AMD64
1355 mov rax, [uCR4]
1356 mov cr4, rax
1357# else
1358 mov eax, [uCR4]
1359 _emit 0x0F
1360 _emit 0x22
1361 _emit 0xE0 /* mov cr4, eax */
1362# endif
1363 }
1364# endif
1365}
1366#endif
1367
1368
1369/**
1370 * Get cr8.
1371 * @returns cr8.
1372 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1373 */
1374#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1375DECLASM(RTCCUINTREG) ASMGetCR8(void);
1376#else
1377DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1378{
1379# ifdef RT_ARCH_AMD64
1380 RTCCUINTREG uCR8;
1381# if RT_INLINE_ASM_USES_INTRIN
1382 uCR8 = __readcr8();
1383
1384# elif RT_INLINE_ASM_GNU_STYLE
1385 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1386# else
1387 __asm
1388 {
1389 mov rax, cr8
1390 mov [uCR8], rax
1391 }
1392# endif
1393 return uCR8;
1394# else /* !RT_ARCH_AMD64 */
1395 return 0;
1396# endif /* !RT_ARCH_AMD64 */
1397}
1398#endif
1399
1400
1401/**
1402 * Enables interrupts (EFLAGS.IF).
1403 */
1404#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1405DECLASM(void) ASMIntEnable(void);
1406#else
1407DECLINLINE(void) ASMIntEnable(void)
1408{
1409# if RT_INLINE_ASM_GNU_STYLE
1410 __asm("sti\n");
1411# elif RT_INLINE_ASM_USES_INTRIN
1412 _enable();
1413# else
1414 __asm sti
1415# endif
1416}
1417#endif
1418
1419
1420/**
1421 * Disables interrupts (!EFLAGS.IF).
1422 */
1423#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1424DECLASM(void) ASMIntDisable(void);
1425#else
1426DECLINLINE(void) ASMIntDisable(void)
1427{
1428# if RT_INLINE_ASM_GNU_STYLE
1429 __asm("cli\n");
1430# elif RT_INLINE_ASM_USES_INTRIN
1431 _disable();
1432# else
1433 __asm cli
1434# endif
1435}
1436#endif
1437
1438
1439/**
1440 * Disables interrupts and returns previous xFLAGS.
1441 */
1442#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1443DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1444#else
1445DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1446{
1447 RTCCUINTREG xFlags;
1448# if RT_INLINE_ASM_GNU_STYLE
1449# ifdef RT_ARCH_AMD64
1450 __asm__ __volatile__("pushfq\n\t"
1451 "cli\n\t"
1452 "popq %0\n\t"
1453 : "=rm" (xFlags));
1454# else
1455 __asm__ __volatile__("pushfl\n\t"
1456 "cli\n\t"
1457 "popl %0\n\t"
1458 : "=rm" (xFlags));
1459# endif
1460# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1461 xFlags = ASMGetFlags();
1462 _disable();
1463# else
1464 __asm {
1465 pushfd
1466 cli
1467 pop [xFlags]
1468 }
1469# endif
1470 return xFlags;
1471}
1472#endif
1473
1474
1475/**
1476 * Reads a machine specific register.
1477 *
1478 * @returns Register content.
1479 * @param uRegister Register to read.
1480 */
1481#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1482DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1483#else
1484DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1485{
1486 RTUINT64U u;
1487# if RT_INLINE_ASM_GNU_STYLE
1488 __asm__ __volatile__("rdmsr\n\t"
1489 : "=a" (u.s.Lo),
1490 "=d" (u.s.Hi)
1491 : "c" (uRegister));
1492
1493# elif RT_INLINE_ASM_USES_INTRIN
1494 u.u = __readmsr(uRegister);
1495
1496# else
1497 __asm
1498 {
1499 mov ecx, [uRegister]
1500 rdmsr
1501 mov [u.s.Lo], eax
1502 mov [u.s.Hi], edx
1503 }
1504# endif
1505
1506 return u.u;
1507}
1508#endif
1509
1510
1511/**
1512 * Writes a machine specific register.
1513 *
1514 * @returns Register content.
1515 * @param uRegister Register to write to.
1516 * @param u64Val Value to write.
1517 */
1518#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1519DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1520#else
1521DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1522{
1523 RTUINT64U u;
1524
1525 u.u = u64Val;
1526# if RT_INLINE_ASM_GNU_STYLE
1527 __asm__ __volatile__("wrmsr\n\t"
1528 ::"a" (u.s.Lo),
1529 "d" (u.s.Hi),
1530 "c" (uRegister));
1531
1532# elif RT_INLINE_ASM_USES_INTRIN
1533 __writemsr(uRegister, u.u);
1534
1535# else
1536 __asm
1537 {
1538 mov ecx, [uRegister]
1539 mov edx, [u.s.Hi]
1540 mov eax, [u.s.Lo]
1541 wrmsr
1542 }
1543# endif
1544}
1545#endif
1546
1547
1548/**
1549 * Reads low part of a machine specific register.
1550 *
1551 * @returns Register content.
1552 * @param uRegister Register to read.
1553 */
1554#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1555DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1556#else
1557DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1558{
1559 uint32_t u32;
1560# if RT_INLINE_ASM_GNU_STYLE
1561 __asm__ __volatile__("rdmsr\n\t"
1562 : "=a" (u32)
1563 : "c" (uRegister)
1564 : "edx");
1565
1566# elif RT_INLINE_ASM_USES_INTRIN
1567 u32 = (uint32_t)__readmsr(uRegister);
1568
1569#else
1570 __asm
1571 {
1572 mov ecx, [uRegister]
1573 rdmsr
1574 mov [u32], eax
1575 }
1576# endif
1577
1578 return u32;
1579}
1580#endif
1581
1582
1583/**
1584 * Reads high part of a machine specific register.
1585 *
1586 * @returns Register content.
1587 * @param uRegister Register to read.
1588 */
1589#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1590DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1591#else
1592DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1593{
1594 uint32_t u32;
1595# if RT_INLINE_ASM_GNU_STYLE
1596 __asm__ __volatile__("rdmsr\n\t"
1597 : "=d" (u32)
1598 : "c" (uRegister)
1599 : "eax");
1600
1601# elif RT_INLINE_ASM_USES_INTRIN
1602 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1603
1604# else
1605 __asm
1606 {
1607 mov ecx, [uRegister]
1608 rdmsr
1609 mov [u32], edx
1610 }
1611# endif
1612
1613 return u32;
1614}
1615#endif
1616
1617
1618/**
1619 * Gets dr7.
1620 *
1621 * @returns dr7.
1622 */
1623#if RT_INLINE_ASM_EXTERNAL
1624DECLASM(RTCCUINTREG) ASMGetDR7(void);
1625#else
1626DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1627{
1628 RTCCUINTREG uDR7;
1629# if RT_INLINE_ASM_GNU_STYLE
1630# ifdef RT_ARCH_AMD64
1631 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1632# else
1633 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1634# endif
1635# else
1636 __asm
1637 {
1638# ifdef RT_ARCH_AMD64
1639 mov rax, dr7
1640 mov [uDR7], rax
1641# else
1642 mov eax, dr7
1643 mov [uDR7], eax
1644# endif
1645 }
1646# endif
1647 return uDR7;
1648}
1649#endif
1650
1651
1652/**
1653 * Gets dr6.
1654 *
1655 * @returns dr6.
1656 */
1657#if RT_INLINE_ASM_EXTERNAL
1658DECLASM(RTCCUINTREG) ASMGetDR6(void);
1659#else
1660DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1661{
1662 RTCCUINTREG uDR6;
1663# if RT_INLINE_ASM_GNU_STYLE
1664# ifdef RT_ARCH_AMD64
1665 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1666# else
1667 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1668# endif
1669# else
1670 __asm
1671 {
1672# ifdef RT_ARCH_AMD64
1673 mov rax, dr6
1674 mov [uDR6], rax
1675# else
1676 mov eax, dr6
1677 mov [uDR6], eax
1678# endif
1679 }
1680# endif
1681 return uDR6;
1682}
1683#endif
1684
1685
1686/**
1687 * Reads and clears DR6.
1688 *
1689 * @returns DR6.
1690 */
1691#if RT_INLINE_ASM_EXTERNAL
1692DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1693#else
1694DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1695{
1696 RTCCUINTREG uDR6;
1697# if RT_INLINE_ASM_GNU_STYLE
1698 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1699# ifdef RT_ARCH_AMD64
1700 __asm__ __volatile__("movq %%dr6, %0\n\t"
1701 "movq %1, %%dr6\n\t"
1702 : "=r" (uDR6)
1703 : "r" (uNewValue));
1704# else
1705 __asm__ __volatile__("movl %%dr6, %0\n\t"
1706 "movl %1, %%dr6\n\t"
1707 : "=r" (uDR6)
1708 : "r" (uNewValue));
1709# endif
1710# else
1711 __asm
1712 {
1713# ifdef RT_ARCH_AMD64
1714 mov rax, dr6
1715 mov [uDR6], rax
1716 mov rcx, rax
1717 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1718 mov dr6, rcx
1719# else
1720 mov eax, dr6
1721 mov [uDR6], eax
1722 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1723 mov dr6, ecx
1724# endif
1725 }
1726# endif
1727 return uDR6;
1728}
1729#endif
1730
1731
1732/**
1733 * Compiler memory barrier.
1734 *
1735 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1736 * values or any outstanding writes when returning from this function.
1737 *
1738 * This function must be used if non-volatile data is modified by a
1739 * device or the VMM. Typical cases are port access, MMIO access,
1740 * trapping instruction, etc.
1741 */
1742#if RT_INLINE_ASM_GNU_STYLE
1743# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1744#elif RT_INLINE_ASM_USES_INTRIN
1745# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1746#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1747DECLINLINE(void) ASMCompilerBarrier(void)
1748{
1749 __asm
1750 {
1751 }
1752}
1753#endif
1754
1755
1756/**
1757 * Writes a 8-bit unsigned integer to an I/O port, ordered.
1758 *
1759 * @param Port I/O port to read from.
1760 * @param u8 8-bit integer to write.
1761 */
1762#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1763DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1764#else
1765DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1766{
1767# if RT_INLINE_ASM_GNU_STYLE
1768 __asm__ __volatile__("outb %b1, %w0\n\t"
1769 :: "Nd" (Port),
1770 "a" (u8));
1771
1772# elif RT_INLINE_ASM_USES_INTRIN
1773 __outbyte(Port, u8);
1774
1775# else
1776 __asm
1777 {
1778 mov dx, [Port]
1779 mov al, [u8]
1780 out dx, al
1781 }
1782# endif
1783}
1784#endif
1785
1786
1787/**
1788 * Gets a 8-bit unsigned integer from an I/O port, ordered.
1789 *
1790 * @returns 8-bit integer.
1791 * @param Port I/O port to read from.
1792 */
1793#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1794DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1795#else
1796DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1797{
1798 uint8_t u8;
1799# if RT_INLINE_ASM_GNU_STYLE
1800 __asm__ __volatile__("inb %w1, %b0\n\t"
1801 : "=a" (u8)
1802 : "Nd" (Port));
1803
1804# elif RT_INLINE_ASM_USES_INTRIN
1805 u8 = __inbyte(Port);
1806
1807# else
1808 __asm
1809 {
1810 mov dx, [Port]
1811 in al, dx
1812 mov [u8], al
1813 }
1814# endif
1815 return u8;
1816}
1817#endif
1818
1819
1820/**
1821 * Writes a 16-bit unsigned integer to an I/O port, ordered.
1822 *
1823 * @param Port I/O port to read from.
1824 * @param u16 16-bit integer to write.
1825 */
1826#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1827DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1828#else
1829DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1830{
1831# if RT_INLINE_ASM_GNU_STYLE
1832 __asm__ __volatile__("outw %w1, %w0\n\t"
1833 :: "Nd" (Port),
1834 "a" (u16));
1835
1836# elif RT_INLINE_ASM_USES_INTRIN
1837 __outword(Port, u16);
1838
1839# else
1840 __asm
1841 {
1842 mov dx, [Port]
1843 mov ax, [u16]
1844 out dx, ax
1845 }
1846# endif
1847}
1848#endif
1849
1850
1851/**
1852 * Gets a 16-bit unsigned integer from an I/O port, ordered.
1853 *
1854 * @returns 16-bit integer.
1855 * @param Port I/O port to read from.
1856 */
1857#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1858DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1859#else
1860DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1861{
1862 uint16_t u16;
1863# if RT_INLINE_ASM_GNU_STYLE
1864 __asm__ __volatile__("inw %w1, %w0\n\t"
1865 : "=a" (u16)
1866 : "Nd" (Port));
1867
1868# elif RT_INLINE_ASM_USES_INTRIN
1869 u16 = __inword(Port);
1870
1871# else
1872 __asm
1873 {
1874 mov dx, [Port]
1875 in ax, dx
1876 mov [u16], ax
1877 }
1878# endif
1879 return u16;
1880}
1881#endif
1882
1883
1884/**
1885 * Writes a 32-bit unsigned integer to an I/O port, ordered.
1886 *
1887 * @param Port I/O port to read from.
1888 * @param u32 32-bit integer to write.
1889 */
1890#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1891DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1892#else
1893DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1894{
1895# if RT_INLINE_ASM_GNU_STYLE
1896 __asm__ __volatile__("outl %1, %w0\n\t"
1897 :: "Nd" (Port),
1898 "a" (u32));
1899
1900# elif RT_INLINE_ASM_USES_INTRIN
1901 __outdword(Port, u32);
1902
1903# else
1904 __asm
1905 {
1906 mov dx, [Port]
1907 mov eax, [u32]
1908 out dx, eax
1909 }
1910# endif
1911}
1912#endif
1913
1914
1915/**
1916 * Gets a 32-bit unsigned integer from an I/O port, ordered.
1917 *
1918 * @returns 32-bit integer.
1919 * @param Port I/O port to read from.
1920 */
1921#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1922DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1923#else
1924DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1925{
1926 uint32_t u32;
1927# if RT_INLINE_ASM_GNU_STYLE
1928 __asm__ __volatile__("inl %w1, %0\n\t"
1929 : "=a" (u32)
1930 : "Nd" (Port));
1931
1932# elif RT_INLINE_ASM_USES_INTRIN
1933 u32 = __indword(Port);
1934
1935# else
1936 __asm
1937 {
1938 mov dx, [Port]
1939 in eax, dx
1940 mov [u32], eax
1941 }
1942# endif
1943 return u32;
1944}
1945#endif
1946
1947/** @todo string i/o */
1948
1949
1950/**
1951 * Atomically Exchange an unsigned 8-bit value, ordered.
1952 *
1953 * @returns Current *pu8 value
1954 * @param pu8 Pointer to the 8-bit variable to update.
1955 * @param u8 The 8-bit value to assign to *pu8.
1956 */
1957#if RT_INLINE_ASM_EXTERNAL
1958DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1959#else
1960DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1961{
1962# if RT_INLINE_ASM_GNU_STYLE
1963 __asm__ __volatile__("xchgb %0, %1\n\t"
1964 : "=m" (*pu8),
1965 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
1966 : "1" (u8));
1967# else
1968 __asm
1969 {
1970# ifdef RT_ARCH_AMD64
1971 mov rdx, [pu8]
1972 mov al, [u8]
1973 xchg [rdx], al
1974 mov [u8], al
1975# else
1976 mov edx, [pu8]
1977 mov al, [u8]
1978 xchg [edx], al
1979 mov [u8], al
1980# endif
1981 }
1982# endif
1983 return u8;
1984}
1985#endif
1986
1987
1988/**
1989 * Atomically Exchange a signed 8-bit value, ordered.
1990 *
1991 * @returns Current *pu8 value
1992 * @param pi8 Pointer to the 8-bit variable to update.
1993 * @param i8 The 8-bit value to assign to *pi8.
1994 */
1995DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1996{
1997 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1998}
1999
2000
2001/**
2002 * Atomically Exchange a bool value, ordered.
2003 *
2004 * @returns Current *pf value
2005 * @param pf Pointer to the 8-bit variable to update.
2006 * @param f The 8-bit value to assign to *pi8.
2007 */
2008DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2009{
2010#ifdef _MSC_VER
2011 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2012#else
2013 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2014#endif
2015}
2016
2017
2018/**
2019 * Atomically Exchange an unsigned 16-bit value, ordered.
2020 *
2021 * @returns Current *pu16 value
2022 * @param pu16 Pointer to the 16-bit variable to update.
2023 * @param u16 The 16-bit value to assign to *pu16.
2024 */
2025#if RT_INLINE_ASM_EXTERNAL
2026DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2027#else
2028DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2029{
2030# if RT_INLINE_ASM_GNU_STYLE
2031 __asm__ __volatile__("xchgw %0, %1\n\t"
2032 : "=m" (*pu16),
2033 "=r" (u16)
2034 : "1" (u16));
2035# else
2036 __asm
2037 {
2038# ifdef RT_ARCH_AMD64
2039 mov rdx, [pu16]
2040 mov ax, [u16]
2041 xchg [rdx], ax
2042 mov [u16], ax
2043# else
2044 mov edx, [pu16]
2045 mov ax, [u16]
2046 xchg [edx], ax
2047 mov [u16], ax
2048# endif
2049 }
2050# endif
2051 return u16;
2052}
2053#endif
2054
2055
2056/**
2057 * Atomically Exchange a signed 16-bit value, ordered.
2058 *
2059 * @returns Current *pu16 value
2060 * @param pi16 Pointer to the 16-bit variable to update.
2061 * @param i16 The 16-bit value to assign to *pi16.
2062 */
2063DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2064{
2065 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2066}
2067
2068
2069/**
2070 * Atomically Exchange an unsigned 32-bit value, ordered.
2071 *
2072 * @returns Current *pu32 value
2073 * @param pu32 Pointer to the 32-bit variable to update.
2074 * @param u32 The 32-bit value to assign to *pu32.
2075 */
2076#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2077DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2078#else
2079DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2080{
2081# if RT_INLINE_ASM_GNU_STYLE
2082 __asm__ __volatile__("xchgl %0, %1\n\t"
2083 : "=m" (*pu32),
2084 "=r" (u32)
2085 : "1" (u32));
2086
2087# elif RT_INLINE_ASM_USES_INTRIN
2088 u32 = _InterlockedExchange((long *)pu32, u32);
2089
2090# else
2091 __asm
2092 {
2093# ifdef RT_ARCH_AMD64
2094 mov rdx, [pu32]
2095 mov eax, u32
2096 xchg [rdx], eax
2097 mov [u32], eax
2098# else
2099 mov edx, [pu32]
2100 mov eax, u32
2101 xchg [edx], eax
2102 mov [u32], eax
2103# endif
2104 }
2105# endif
2106 return u32;
2107}
2108#endif
2109
2110
2111/**
2112 * Atomically Exchange a signed 32-bit value, ordered.
2113 *
2114 * @returns Current *pu32 value
2115 * @param pi32 Pointer to the 32-bit variable to update.
2116 * @param i32 The 32-bit value to assign to *pi32.
2117 */
2118DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2119{
2120 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2121}
2122
2123
2124/**
2125 * Atomically Exchange an unsigned 64-bit value, ordered.
2126 *
2127 * @returns Current *pu64 value
2128 * @param pu64 Pointer to the 64-bit variable to update.
2129 * @param u64 The 64-bit value to assign to *pu64.
2130 */
2131#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2132DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2133#else
2134DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2135{
2136# if defined(RT_ARCH_AMD64)
2137# if RT_INLINE_ASM_USES_INTRIN
2138 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2139
2140# elif RT_INLINE_ASM_GNU_STYLE
2141 __asm__ __volatile__("xchgq %0, %1\n\t"
2142 : "=m" (*pu64),
2143 "=r" (u64)
2144 : "1" (u64));
2145# else
2146 __asm
2147 {
2148 mov rdx, [pu64]
2149 mov rax, [u64]
2150 xchg [rdx], rax
2151 mov [u64], rax
2152 }
2153# endif
2154# else /* !RT_ARCH_AMD64 */
2155# if RT_INLINE_ASM_GNU_STYLE
2156# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2157 uint32_t u32EBX = (uint32_t)u64;
2158 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2159 "xchgl %%ebx, %3\n\t"
2160 "1:\n\t"
2161 "lock; cmpxchg8b (%5)\n\t"
2162 "jnz 1b\n\t"
2163 "movl %3, %%ebx\n\t"
2164 /*"xchgl %%esi, %5\n\t"*/
2165 : "=A" (u64),
2166 "=m" (*pu64)
2167 : "0" (*pu64),
2168 "m" ( u32EBX ),
2169 "c" ( (uint32_t)(u64 >> 32) ),
2170 "S" (pu64) );
2171# else /* !PIC */
2172 __asm__ __volatile__("1:\n\t"
2173 "lock; cmpxchg8b %1\n\t"
2174 "jnz 1b\n\t"
2175 : "=A" (u64),
2176 "=m" (*pu64)
2177 : "0" (*pu64),
2178 "b" ( (uint32_t)u64 ),
2179 "c" ( (uint32_t)(u64 >> 32) ));
2180# endif
2181# else
2182 __asm
2183 {
2184 mov ebx, dword ptr [u64]
2185 mov ecx, dword ptr [u64 + 4]
2186 mov edi, pu64
2187 mov eax, dword ptr [edi]
2188 mov edx, dword ptr [edi + 4]
2189 retry:
2190 lock cmpxchg8b [edi]
2191 jnz retry
2192 mov dword ptr [u64], eax
2193 mov dword ptr [u64 + 4], edx
2194 }
2195# endif
2196# endif /* !RT_ARCH_AMD64 */
2197 return u64;
2198}
2199#endif
2200
2201
2202/**
2203 * Atomically Exchange an signed 64-bit value, ordered.
2204 *
2205 * @returns Current *pi64 value
2206 * @param pi64 Pointer to the 64-bit variable to update.
2207 * @param i64 The 64-bit value to assign to *pi64.
2208 */
2209DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2210{
2211 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2212}
2213
2214
2215#ifdef RT_ARCH_AMD64
2216/**
2217 * Atomically Exchange an unsigned 128-bit value, ordered.
2218 *
2219 * @returns Current *pu128.
2220 * @param pu128 Pointer to the 128-bit variable to update.
2221 * @param u128 The 128-bit value to assign to *pu128.
2222 *
2223 * @remark We cannot really assume that any hardware supports this. Nor do I have
2224 * GAS support for it. So, for the time being we'll BREAK the atomic
2225 * bit of this function and use two 64-bit exchanges instead.
2226 */
2227# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2228DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2229# else
2230DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2231{
2232 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2233 {
2234 /** @todo this is clumsy code */
2235 RTUINT128U u128Ret;
2236 u128Ret.u = u128;
2237 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2238 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2239 return u128Ret.u;
2240 }
2241#if 0 /* later? */
2242 else
2243 {
2244# if RT_INLINE_ASM_GNU_STYLE
2245 __asm__ __volatile__("1:\n\t"
2246 "lock; cmpxchg8b %1\n\t"
2247 "jnz 1b\n\t"
2248 : "=A" (u128),
2249 "=m" (*pu128)
2250 : "0" (*pu128),
2251 "b" ( (uint64_t)u128 ),
2252 "c" ( (uint64_t)(u128 >> 64) ));
2253# else
2254 __asm
2255 {
2256 mov rbx, dword ptr [u128]
2257 mov rcx, dword ptr [u128 + 8]
2258 mov rdi, pu128
2259 mov rax, dword ptr [rdi]
2260 mov rdx, dword ptr [rdi + 8]
2261 retry:
2262 lock cmpxchg16b [rdi]
2263 jnz retry
2264 mov dword ptr [u128], rax
2265 mov dword ptr [u128 + 8], rdx
2266 }
2267# endif
2268 }
2269 return u128;
2270#endif
2271}
2272# endif
2273#endif /* RT_ARCH_AMD64 */
2274
2275
2276/**
2277 * Atomically Exchange a pointer value, ordered.
2278 *
2279 * @returns Current *ppv value
2280 * @param ppv Pointer to the pointer variable to update.
2281 * @param pv The pointer value to assign to *ppv.
2282 */
2283DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2284{
2285#if ARCH_BITS == 32
2286 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2287#elif ARCH_BITS == 64
2288 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2289#else
2290# error "ARCH_BITS is bogus"
2291#endif
2292}
2293
2294
2295/** @def ASMAtomicXchgHandle
2296 * Atomically Exchange a typical IPRT handle value, ordered.
2297 *
2298 * @param ph Pointer to the value to update.
2299 * @param hNew The new value to assigned to *pu.
2300 * @param phRes Where to store the current *ph value.
2301 *
2302 * @remarks This doesn't currently work for all handles (like RTFILE).
2303 */
2304#define ASMAtomicXchgHandle(ph, hNew, phRes) \
2305 do { \
2306 *(void **)(phRes) = ASMAtomicXchgPtr((void * volatile *)(ph), (void *)(hNew)); \
2307 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2308 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
2309 } while (0)
2310
2311
2312/**
2313 * Atomically Exchange a value which size might differ
2314 * between platforms or compilers, ordered.
2315 *
2316 * @param pu Pointer to the variable to update.
2317 * @param uNew The value to assign to *pu.
2318 * @todo This is busted as its missing the result argument.
2319 */
2320#define ASMAtomicXchgSize(pu, uNew) \
2321 do { \
2322 switch (sizeof(*(pu))) { \
2323 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2324 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2325 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2326 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2327 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2328 } \
2329 } while (0)
2330
2331/**
2332 * Atomically Exchange a value which size might differ
2333 * between platforms or compilers, ordered.
2334 *
2335 * @param pu Pointer to the variable to update.
2336 * @param uNew The value to assign to *pu.
2337 * @param puRes Where to store the current *pu value.
2338 */
2339#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
2340 do { \
2341 switch (sizeof(*(pu))) { \
2342 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2343 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2344 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2345 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2346 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2347 } \
2348 } while (0)
2349
2350
2351/**
2352 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
2353 *
2354 * @returns true if xchg was done.
2355 * @returns false if xchg wasn't done.
2356 *
2357 * @param pu32 Pointer to the value to update.
2358 * @param u32New The new value to assigned to *pu32.
2359 * @param u32Old The old value to *pu32 compare with.
2360 */
2361#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2362DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2363#else
2364DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
2365{
2366# if RT_INLINE_ASM_GNU_STYLE
2367 uint8_t u8Ret;
2368 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2369 "setz %1\n\t"
2370 : "=m" (*pu32),
2371 "=qm" (u8Ret),
2372 "=a" (u32Old)
2373 : "r" (u32New),
2374 "2" (u32Old));
2375 return (bool)u8Ret;
2376
2377# elif RT_INLINE_ASM_USES_INTRIN
2378 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2379
2380# else
2381 uint32_t u32Ret;
2382 __asm
2383 {
2384# ifdef RT_ARCH_AMD64
2385 mov rdx, [pu32]
2386# else
2387 mov edx, [pu32]
2388# endif
2389 mov eax, [u32Old]
2390 mov ecx, [u32New]
2391# ifdef RT_ARCH_AMD64
2392 lock cmpxchg [rdx], ecx
2393# else
2394 lock cmpxchg [edx], ecx
2395# endif
2396 setz al
2397 movzx eax, al
2398 mov [u32Ret], eax
2399 }
2400 return !!u32Ret;
2401# endif
2402}
2403#endif
2404
2405
2406/**
2407 * Atomically Compare and Exchange a signed 32-bit value, ordered.
2408 *
2409 * @returns true if xchg was done.
2410 * @returns false if xchg wasn't done.
2411 *
2412 * @param pi32 Pointer to the value to update.
2413 * @param i32New The new value to assigned to *pi32.
2414 * @param i32Old The old value to *pi32 compare with.
2415 */
2416DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2417{
2418 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2419}
2420
2421
2422/**
2423 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
2424 *
2425 * @returns true if xchg was done.
2426 * @returns false if xchg wasn't done.
2427 *
2428 * @param pu64 Pointer to the 64-bit variable to update.
2429 * @param u64New The 64-bit value to assign to *pu64.
2430 * @param u64Old The value to compare with.
2431 */
2432#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2433DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2434#else
2435DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
2436{
2437# if RT_INLINE_ASM_USES_INTRIN
2438 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2439
2440# elif defined(RT_ARCH_AMD64)
2441# if RT_INLINE_ASM_GNU_STYLE
2442 uint8_t u8Ret;
2443 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2444 "setz %1\n\t"
2445 : "=m" (*pu64),
2446 "=qm" (u8Ret),
2447 "=a" (u64Old)
2448 : "r" (u64New),
2449 "2" (u64Old));
2450 return (bool)u8Ret;
2451# else
2452 bool fRet;
2453 __asm
2454 {
2455 mov rdx, [pu32]
2456 mov rax, [u64Old]
2457 mov rcx, [u64New]
2458 lock cmpxchg [rdx], rcx
2459 setz al
2460 mov [fRet], al
2461 }
2462 return fRet;
2463# endif
2464# else /* !RT_ARCH_AMD64 */
2465 uint32_t u32Ret;
2466# if RT_INLINE_ASM_GNU_STYLE
2467# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2468 uint32_t u32EBX = (uint32_t)u64New;
2469 uint32_t u32Spill;
2470 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2471 "lock; cmpxchg8b (%6)\n\t"
2472 "setz %%al\n\t"
2473 "movl %4, %%ebx\n\t"
2474 "movzbl %%al, %%eax\n\t"
2475 : "=a" (u32Ret),
2476 "=d" (u32Spill),
2477 "=m" (*pu64)
2478 : "A" (u64Old),
2479 "m" ( u32EBX ),
2480 "c" ( (uint32_t)(u64New >> 32) ),
2481 "S" (pu64) );
2482# else /* !PIC */
2483 uint32_t u32Spill;
2484 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2485 "setz %%al\n\t"
2486 "movzbl %%al, %%eax\n\t"
2487 : "=a" (u32Ret),
2488 "=d" (u32Spill),
2489 "=m" (*pu64)
2490 : "A" (u64Old),
2491 "b" ( (uint32_t)u64New ),
2492 "c" ( (uint32_t)(u64New >> 32) ));
2493# endif
2494 return (bool)u32Ret;
2495# else
2496 __asm
2497 {
2498 mov ebx, dword ptr [u64New]
2499 mov ecx, dword ptr [u64New + 4]
2500 mov edi, [pu64]
2501 mov eax, dword ptr [u64Old]
2502 mov edx, dword ptr [u64Old + 4]
2503 lock cmpxchg8b [edi]
2504 setz al
2505 movzx eax, al
2506 mov dword ptr [u32Ret], eax
2507 }
2508 return !!u32Ret;
2509# endif
2510# endif /* !RT_ARCH_AMD64 */
2511}
2512#endif
2513
2514
2515/**
2516 * Atomically Compare and exchange a signed 64-bit value, ordered.
2517 *
2518 * @returns true if xchg was done.
2519 * @returns false if xchg wasn't done.
2520 *
2521 * @param pi64 Pointer to the 64-bit variable to update.
2522 * @param i64 The 64-bit value to assign to *pu64.
2523 * @param i64Old The value to compare with.
2524 */
2525DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2526{
2527 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2528}
2529
2530
2531/**
2532 * Atomically Compare and Exchange a pointer value, ordered.
2533 *
2534 * @returns true if xchg was done.
2535 * @returns false if xchg wasn't done.
2536 *
2537 * @param ppv Pointer to the value to update.
2538 * @param pvNew The new value to assigned to *ppv.
2539 * @param pvOld The old value to *ppv compare with.
2540 */
2541DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2542{
2543#if ARCH_BITS == 32
2544 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2545#elif ARCH_BITS == 64
2546 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2547#else
2548# error "ARCH_BITS is bogus"
2549#endif
2550}
2551
2552
2553/** @def ASMAtomicCmpXchgHandle
2554 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
2555 *
2556 * @param ph Pointer to the value to update.
2557 * @param hNew The new value to assigned to *pu.
2558 * @param hOld The old value to *pu compare with.
2559 * @param fRc Where to store the result.
2560 *
2561 * @remarks This doesn't currently work for all handles (like RTFILE).
2562 */
2563#define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
2564 do { \
2565 (fRc) = ASMAtomicCmpXchgPtr((void * volatile *)(ph), (void *)(hNew), (void *)(hOld)); \
2566 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2567 } while (0)
2568
2569
2570/** @def ASMAtomicCmpXchgSize
2571 * Atomically Compare and Exchange a value which size might differ
2572 * between platforms or compilers, ordered.
2573 *
2574 * @param pu Pointer to the value to update.
2575 * @param uNew The new value to assigned to *pu.
2576 * @param uOld The old value to *pu compare with.
2577 * @param fRc Where to store the result.
2578 */
2579#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2580 do { \
2581 switch (sizeof(*(pu))) { \
2582 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2583 break; \
2584 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2585 break; \
2586 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2587 (fRc) = false; \
2588 break; \
2589 } \
2590 } while (0)
2591
2592
2593/**
2594 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
2595 * passes back old value, ordered.
2596 *
2597 * @returns true if xchg was done.
2598 * @returns false if xchg wasn't done.
2599 *
2600 * @param pu32 Pointer to the value to update.
2601 * @param u32New The new value to assigned to *pu32.
2602 * @param u32Old The old value to *pu32 compare with.
2603 * @param pu32Old Pointer store the old value at.
2604 */
2605#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2606DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
2607#else
2608DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
2609{
2610# if RT_INLINE_ASM_GNU_STYLE
2611 uint8_t u8Ret;
2612 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2613 "setz %1\n\t"
2614 : "=m" (*pu32),
2615 "=qm" (u8Ret),
2616 "=a" (*pu32Old)
2617 : "r" (u32New),
2618 "a" (u32Old));
2619 return (bool)u8Ret;
2620
2621# elif RT_INLINE_ASM_USES_INTRIN
2622 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
2623
2624# else
2625 uint32_t u32Ret;
2626 __asm
2627 {
2628# ifdef RT_ARCH_AMD64
2629 mov rdx, [pu32]
2630# else
2631 mov edx, [pu32]
2632# endif
2633 mov eax, [u32Old]
2634 mov ecx, [u32New]
2635# ifdef RT_ARCH_AMD64
2636 lock cmpxchg [rdx], ecx
2637 mov rdx, [pu32Old]
2638 mov [rdx], eax
2639# else
2640 lock cmpxchg [edx], ecx
2641 mov edx, [pu32Old]
2642 mov [edx], eax
2643# endif
2644 setz al
2645 movzx eax, al
2646 mov [u32Ret], eax
2647 }
2648 return !!u32Ret;
2649# endif
2650}
2651#endif
2652
2653
2654/**
2655 * Atomically Compare and Exchange a signed 32-bit value, additionally
2656 * passes back old value, ordered.
2657 *
2658 * @returns true if xchg was done.
2659 * @returns false if xchg wasn't done.
2660 *
2661 * @param pi32 Pointer to the value to update.
2662 * @param i32New The new value to assigned to *pi32.
2663 * @param i32Old The old value to *pi32 compare with.
2664 * @param pi32Old Pointer store the old value at.
2665 */
2666DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
2667{
2668 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
2669}
2670
2671
2672/**
2673 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2674 * passing back old value, ordered.
2675 *
2676 * @returns true if xchg was done.
2677 * @returns false if xchg wasn't done.
2678 *
2679 * @param pu64 Pointer to the 64-bit variable to update.
2680 * @param u64New The 64-bit value to assign to *pu64.
2681 * @param u64Old The value to compare with.
2682 * @param pu64Old Pointer store the old value at.
2683 */
2684#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2685DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
2686#else
2687DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
2688{
2689# if RT_INLINE_ASM_USES_INTRIN
2690 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
2691
2692# elif defined(RT_ARCH_AMD64)
2693# if RT_INLINE_ASM_GNU_STYLE
2694 uint8_t u8Ret;
2695 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2696 "setz %1\n\t"
2697 : "=m" (*pu64),
2698 "=qm" (u8Ret),
2699 "=a" (*pu64Old)
2700 : "r" (u64New),
2701 "a" (u64Old));
2702 return (bool)u8Ret;
2703# else
2704 bool fRet;
2705 __asm
2706 {
2707 mov rdx, [pu32]
2708 mov rax, [u64Old]
2709 mov rcx, [u64New]
2710 lock cmpxchg [rdx], rcx
2711 mov rdx, [pu64Old]
2712 mov [rdx], rax
2713 setz al
2714 mov [fRet], al
2715 }
2716 return fRet;
2717# endif
2718# else /* !RT_ARCH_AMD64 */
2719# if RT_INLINE_ASM_GNU_STYLE
2720 uint64_t u64Ret;
2721# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2722 /* NB: this code uses a memory clobber description, because the clean
2723 * solution with an output value for *pu64 makes gcc run out of registers.
2724 * This will cause suboptimal code, and anyone with a better solution is
2725 * welcome to improve this. */
2726 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2727 "lock; cmpxchg8b %3\n\t"
2728 "xchgl %%ebx, %1\n\t"
2729 : "=A" (u64Ret)
2730 : "DS" ((uint32_t)u64New),
2731 "c" ((uint32_t)(u64New >> 32)),
2732 "m" (*pu64),
2733 "0" (u64Old)
2734 : "memory" );
2735# else /* !PIC */
2736 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2737 : "=A" (u64Ret),
2738 "=m" (*pu64)
2739 : "b" ((uint32_t)u64New),
2740 "c" ((uint32_t)(u64New >> 32)),
2741 "m" (*pu64),
2742 "0" (u64Old));
2743# endif
2744 *pu64Old = u64Ret;
2745 return u64Ret == u64Old;
2746# else
2747 uint32_t u32Ret;
2748 __asm
2749 {
2750 mov ebx, dword ptr [u64New]
2751 mov ecx, dword ptr [u64New + 4]
2752 mov edi, [pu64]
2753 mov eax, dword ptr [u64Old]
2754 mov edx, dword ptr [u64Old + 4]
2755 lock cmpxchg8b [edi]
2756 mov ebx, [pu64Old]
2757 mov [ebx], eax
2758 setz al
2759 movzx eax, al
2760 add ebx, 4
2761 mov [ebx], edx
2762 mov dword ptr [u32Ret], eax
2763 }
2764 return !!u32Ret;
2765# endif
2766# endif /* !RT_ARCH_AMD64 */
2767}
2768#endif
2769
2770
2771/**
2772 * Atomically Compare and exchange a signed 64-bit value, additionally
2773 * passing back old value, ordered.
2774 *
2775 * @returns true if xchg was done.
2776 * @returns false if xchg wasn't done.
2777 *
2778 * @param pi64 Pointer to the 64-bit variable to update.
2779 * @param i64 The 64-bit value to assign to *pu64.
2780 * @param i64Old The value to compare with.
2781 * @param pi64Old Pointer store the old value at.
2782 */
2783DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
2784{
2785 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
2786}
2787
2788/** @def ASMAtomicCmpXchgExHandle
2789 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
2790 *
2791 * @param ph Pointer to the value to update.
2792 * @param hNew The new value to assigned to *pu.
2793 * @param hOld The old value to *pu compare with.
2794 * @param fRc Where to store the result.
2795 * @param phOldVal Pointer to where to store the old value.
2796 *
2797 * @remarks This doesn't currently work for all handles (like RTFILE).
2798 */
2799#if ARCH_BITS == 32
2800# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
2801 do { \
2802 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
2803 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2804 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
2805 } while (0)
2806#elif ARCH_BITS == 64
2807# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
2808 do { \
2809 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
2810 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2811 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
2812 } while (0)
2813#endif
2814
2815
2816/** @def ASMAtomicCmpXchgExSize
2817 * Atomically Compare and Exchange a value which size might differ
2818 * between platforms or compilers. Additionally passes back old value.
2819 *
2820 * @param pu Pointer to the value to update.
2821 * @param uNew The new value to assigned to *pu.
2822 * @param uOld The old value to *pu compare with.
2823 * @param fRc Where to store the result.
2824 * @param puOldVal Pointer to where to store the old value.
2825 */
2826#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
2827 do { \
2828 switch (sizeof(*(pu))) { \
2829 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
2830 break; \
2831 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
2832 break; \
2833 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2834 (fRc) = false; \
2835 (uOldVal) = 0; \
2836 break; \
2837 } \
2838 } while (0)
2839
2840
2841/**
2842 * Atomically Compare and Exchange a pointer value, additionally
2843 * passing back old value, ordered.
2844 *
2845 * @returns true if xchg was done.
2846 * @returns false if xchg wasn't done.
2847 *
2848 * @param ppv Pointer to the value to update.
2849 * @param pvNew The new value to assigned to *ppv.
2850 * @param pvOld The old value to *ppv compare with.
2851 * @param ppvOld Pointer store the old value at.
2852 */
2853DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, void *pvNew, void *pvOld, void **ppvOld)
2854{
2855#if ARCH_BITS == 32
2856 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
2857#elif ARCH_BITS == 64
2858 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
2859#else
2860# error "ARCH_BITS is bogus"
2861#endif
2862}
2863
2864
2865/**
2866 * Atomically exchanges and adds to a 32-bit value, ordered.
2867 *
2868 * @returns The old value.
2869 * @param pu32 Pointer to the value.
2870 * @param u32 Number to add.
2871 */
2872#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2873DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2874#else
2875DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2876{
2877# if RT_INLINE_ASM_USES_INTRIN
2878 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2879 return u32;
2880
2881# elif RT_INLINE_ASM_GNU_STYLE
2882 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2883 : "=r" (u32),
2884 "=m" (*pu32)
2885 : "0" (u32)
2886 : "memory");
2887 return u32;
2888# else
2889 __asm
2890 {
2891 mov eax, [u32]
2892# ifdef RT_ARCH_AMD64
2893 mov rdx, [pu32]
2894 lock xadd [rdx], eax
2895# else
2896 mov edx, [pu32]
2897 lock xadd [edx], eax
2898# endif
2899 mov [u32], eax
2900 }
2901 return u32;
2902# endif
2903}
2904#endif
2905
2906
2907/**
2908 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2909 *
2910 * @returns The old value.
2911 * @param pi32 Pointer to the value.
2912 * @param i32 Number to add.
2913 */
2914DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2915{
2916 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2917}
2918
2919
2920/**
2921 * Atomically increment a 32-bit value, ordered.
2922 *
2923 * @returns The new value.
2924 * @param pu32 Pointer to the value to increment.
2925 */
2926#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2927DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2928#else
2929DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2930{
2931 uint32_t u32;
2932# if RT_INLINE_ASM_USES_INTRIN
2933 u32 = _InterlockedIncrement((long *)pu32);
2934 return u32;
2935
2936# elif RT_INLINE_ASM_GNU_STYLE
2937 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2938 : "=r" (u32),
2939 "=m" (*pu32)
2940 : "0" (1)
2941 : "memory");
2942 return u32+1;
2943# else
2944 __asm
2945 {
2946 mov eax, 1
2947# ifdef RT_ARCH_AMD64
2948 mov rdx, [pu32]
2949 lock xadd [rdx], eax
2950# else
2951 mov edx, [pu32]
2952 lock xadd [edx], eax
2953# endif
2954 mov u32, eax
2955 }
2956 return u32+1;
2957# endif
2958}
2959#endif
2960
2961
2962/**
2963 * Atomically increment a signed 32-bit value, ordered.
2964 *
2965 * @returns The new value.
2966 * @param pi32 Pointer to the value to increment.
2967 */
2968DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2969{
2970 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2971}
2972
2973
2974/**
2975 * Atomically decrement an unsigned 32-bit value, ordered.
2976 *
2977 * @returns The new value.
2978 * @param pu32 Pointer to the value to decrement.
2979 */
2980#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2981DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2982#else
2983DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2984{
2985 uint32_t u32;
2986# if RT_INLINE_ASM_USES_INTRIN
2987 u32 = _InterlockedDecrement((long *)pu32);
2988 return u32;
2989
2990# elif RT_INLINE_ASM_GNU_STYLE
2991 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2992 : "=r" (u32),
2993 "=m" (*pu32)
2994 : "0" (-1)
2995 : "memory");
2996 return u32-1;
2997# else
2998 __asm
2999 {
3000 mov eax, -1
3001# ifdef RT_ARCH_AMD64
3002 mov rdx, [pu32]
3003 lock xadd [rdx], eax
3004# else
3005 mov edx, [pu32]
3006 lock xadd [edx], eax
3007# endif
3008 mov u32, eax
3009 }
3010 return u32-1;
3011# endif
3012}
3013#endif
3014
3015
3016/**
3017 * Atomically decrement a signed 32-bit value, ordered.
3018 *
3019 * @returns The new value.
3020 * @param pi32 Pointer to the value to decrement.
3021 */
3022DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3023{
3024 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3025}
3026
3027
3028/**
3029 * Atomically Or an unsigned 32-bit value, ordered.
3030 *
3031 * @param pu32 Pointer to the pointer variable to OR u32 with.
3032 * @param u32 The value to OR *pu32 with.
3033 */
3034#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3035DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3036#else
3037DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3038{
3039# if RT_INLINE_ASM_USES_INTRIN
3040 _InterlockedOr((long volatile *)pu32, (long)u32);
3041
3042# elif RT_INLINE_ASM_GNU_STYLE
3043 __asm__ __volatile__("lock; orl %1, %0\n\t"
3044 : "=m" (*pu32)
3045 : "ir" (u32));
3046# else
3047 __asm
3048 {
3049 mov eax, [u32]
3050# ifdef RT_ARCH_AMD64
3051 mov rdx, [pu32]
3052 lock or [rdx], eax
3053# else
3054 mov edx, [pu32]
3055 lock or [edx], eax
3056# endif
3057 }
3058# endif
3059}
3060#endif
3061
3062
3063/**
3064 * Atomically Or a signed 32-bit value, ordered.
3065 *
3066 * @param pi32 Pointer to the pointer variable to OR u32 with.
3067 * @param i32 The value to OR *pu32 with.
3068 */
3069DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3070{
3071 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3072}
3073
3074
3075/**
3076 * Atomically And an unsigned 32-bit value, ordered.
3077 *
3078 * @param pu32 Pointer to the pointer variable to AND u32 with.
3079 * @param u32 The value to AND *pu32 with.
3080 */
3081#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3082DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3083#else
3084DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3085{
3086# if RT_INLINE_ASM_USES_INTRIN
3087 _InterlockedAnd((long volatile *)pu32, u32);
3088
3089# elif RT_INLINE_ASM_GNU_STYLE
3090 __asm__ __volatile__("lock; andl %1, %0\n\t"
3091 : "=m" (*pu32)
3092 : "ir" (u32));
3093# else
3094 __asm
3095 {
3096 mov eax, [u32]
3097# ifdef RT_ARCH_AMD64
3098 mov rdx, [pu32]
3099 lock and [rdx], eax
3100# else
3101 mov edx, [pu32]
3102 lock and [edx], eax
3103# endif
3104 }
3105# endif
3106}
3107#endif
3108
3109
3110/**
3111 * Atomically And a signed 32-bit value, ordered.
3112 *
3113 * @param pi32 Pointer to the pointer variable to AND i32 with.
3114 * @param i32 The value to AND *pi32 with.
3115 */
3116DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3117{
3118 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3119}
3120
3121
3122/**
3123 * Memory fence, waits for any pending writes and reads to complete.
3124 */
3125DECLINLINE(void) ASMMemoryFence(void)
3126{
3127 /** @todo use mfence? check if all cpus we care for support it. */
3128 uint32_t volatile u32;
3129 ASMAtomicXchgU32(&u32, 0);
3130}
3131
3132
3133/**
3134 * Write fence, waits for any pending writes to complete.
3135 */
3136DECLINLINE(void) ASMWriteFence(void)
3137{
3138 /** @todo use sfence? check if all cpus we care for support it. */
3139 ASMMemoryFence();
3140}
3141
3142
3143/**
3144 * Read fence, waits for any pending reads to complete.
3145 */
3146DECLINLINE(void) ASMReadFence(void)
3147{
3148 /** @todo use lfence? check if all cpus we care for support it. */
3149 ASMMemoryFence();
3150}
3151
3152
3153/**
3154 * Atomically reads an unsigned 8-bit value, ordered.
3155 *
3156 * @returns Current *pu8 value
3157 * @param pu8 Pointer to the 8-bit variable to read.
3158 */
3159DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
3160{
3161 ASMMemoryFence();
3162 return *pu8; /* byte reads are atomic on x86 */
3163}
3164
3165
3166/**
3167 * Atomically reads an unsigned 8-bit value, unordered.
3168 *
3169 * @returns Current *pu8 value
3170 * @param pu8 Pointer to the 8-bit variable to read.
3171 */
3172DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
3173{
3174 return *pu8; /* byte reads are atomic on x86 */
3175}
3176
3177
3178/**
3179 * Atomically reads a signed 8-bit value, ordered.
3180 *
3181 * @returns Current *pi8 value
3182 * @param pi8 Pointer to the 8-bit variable to read.
3183 */
3184DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
3185{
3186 ASMMemoryFence();
3187 return *pi8; /* byte reads are atomic on x86 */
3188}
3189
3190
3191/**
3192 * Atomically reads a signed 8-bit value, unordered.
3193 *
3194 * @returns Current *pi8 value
3195 * @param pi8 Pointer to the 8-bit variable to read.
3196 */
3197DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
3198{
3199 return *pi8; /* byte reads are atomic on x86 */
3200}
3201
3202
3203/**
3204 * Atomically reads an unsigned 16-bit value, ordered.
3205 *
3206 * @returns Current *pu16 value
3207 * @param pu16 Pointer to the 16-bit variable to read.
3208 */
3209DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3210{
3211 ASMMemoryFence();
3212 Assert(!((uintptr_t)pu16 & 1));
3213 return *pu16;
3214}
3215
3216
3217/**
3218 * Atomically reads an unsigned 16-bit value, unordered.
3219 *
3220 * @returns Current *pu16 value
3221 * @param pu16 Pointer to the 16-bit variable to read.
3222 */
3223DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3224{
3225 Assert(!((uintptr_t)pu16 & 1));
3226 return *pu16;
3227}
3228
3229
3230/**
3231 * Atomically reads a signed 16-bit value, ordered.
3232 *
3233 * @returns Current *pi16 value
3234 * @param pi16 Pointer to the 16-bit variable to read.
3235 */
3236DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3237{
3238 ASMMemoryFence();
3239 Assert(!((uintptr_t)pi16 & 1));
3240 return *pi16;
3241}
3242
3243
3244/**
3245 * Atomically reads a signed 16-bit value, unordered.
3246 *
3247 * @returns Current *pi16 value
3248 * @param pi16 Pointer to the 16-bit variable to read.
3249 */
3250DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3251{
3252 Assert(!((uintptr_t)pi16 & 1));
3253 return *pi16;
3254}
3255
3256
3257/**
3258 * Atomically reads an unsigned 32-bit value, ordered.
3259 *
3260 * @returns Current *pu32 value
3261 * @param pu32 Pointer to the 32-bit variable to read.
3262 */
3263DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3264{
3265 ASMMemoryFence();
3266 Assert(!((uintptr_t)pu32 & 3));
3267 return *pu32;
3268}
3269
3270
3271/**
3272 * Atomically reads an unsigned 32-bit value, unordered.
3273 *
3274 * @returns Current *pu32 value
3275 * @param pu32 Pointer to the 32-bit variable to read.
3276 */
3277DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3278{
3279 Assert(!((uintptr_t)pu32 & 3));
3280 return *pu32;
3281}
3282
3283
3284/**
3285 * Atomically reads a signed 32-bit value, ordered.
3286 *
3287 * @returns Current *pi32 value
3288 * @param pi32 Pointer to the 32-bit variable to read.
3289 */
3290DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3291{
3292 ASMMemoryFence();
3293 Assert(!((uintptr_t)pi32 & 3));
3294 return *pi32;
3295}
3296
3297
3298/**
3299 * Atomically reads a signed 32-bit value, unordered.
3300 *
3301 * @returns Current *pi32 value
3302 * @param pi32 Pointer to the 32-bit variable to read.
3303 */
3304DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
3305{
3306 Assert(!((uintptr_t)pi32 & 3));
3307 return *pi32;
3308}
3309
3310
3311/**
3312 * Atomically reads an unsigned 64-bit value, ordered.
3313 *
3314 * @returns Current *pu64 value
3315 * @param pu64 Pointer to the 64-bit variable to read.
3316 * The memory pointed to must be writable.
3317 * @remark This will fault if the memory is read-only!
3318 */
3319#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3320DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
3321#else
3322DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
3323{
3324 uint64_t u64;
3325# ifdef RT_ARCH_AMD64
3326 Assert(!((uintptr_t)pu64 & 7));
3327/*# if RT_INLINE_ASM_GNU_STYLE
3328 __asm__ __volatile__( "mfence\n\t"
3329 "movq %1, %0\n\t"
3330 : "=r" (u64)
3331 : "m" (*pu64));
3332# else
3333 __asm
3334 {
3335 mfence
3336 mov rdx, [pu64]
3337 mov rax, [rdx]
3338 mov [u64], rax
3339 }
3340# endif*/
3341 ASMMemoryFence();
3342 u64 = *pu64;
3343# else /* !RT_ARCH_AMD64 */
3344# if RT_INLINE_ASM_GNU_STYLE
3345# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3346 uint32_t u32EBX = 0;
3347 Assert(!((uintptr_t)pu64 & 7));
3348 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3349 "lock; cmpxchg8b (%5)\n\t"
3350 "movl %3, %%ebx\n\t"
3351 : "=A" (u64),
3352 "=m" (*pu64)
3353 : "0" (0),
3354 "m" (u32EBX),
3355 "c" (0),
3356 "S" (pu64));
3357# else /* !PIC */
3358 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3359 : "=A" (u64),
3360 "=m" (*pu64)
3361 : "0" (0),
3362 "b" (0),
3363 "c" (0));
3364# endif
3365# else
3366 Assert(!((uintptr_t)pu64 & 7));
3367 __asm
3368 {
3369 xor eax, eax
3370 xor edx, edx
3371 mov edi, pu64
3372 xor ecx, ecx
3373 xor ebx, ebx
3374 lock cmpxchg8b [edi]
3375 mov dword ptr [u64], eax
3376 mov dword ptr [u64 + 4], edx
3377 }
3378# endif
3379# endif /* !RT_ARCH_AMD64 */
3380 return u64;
3381}
3382#endif
3383
3384
3385/**
3386 * Atomically reads an unsigned 64-bit value, unordered.
3387 *
3388 * @returns Current *pu64 value
3389 * @param pu64 Pointer to the 64-bit variable to read.
3390 * The memory pointed to must be writable.
3391 * @remark This will fault if the memory is read-only!
3392 */
3393#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3394DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
3395#else
3396DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
3397{
3398 uint64_t u64;
3399# ifdef RT_ARCH_AMD64
3400 Assert(!((uintptr_t)pu64 & 7));
3401/*# if RT_INLINE_ASM_GNU_STYLE
3402 Assert(!((uintptr_t)pu64 & 7));
3403 __asm__ __volatile__("movq %1, %0\n\t"
3404 : "=r" (u64)
3405 : "m" (*pu64));
3406# else
3407 __asm
3408 {
3409 mov rdx, [pu64]
3410 mov rax, [rdx]
3411 mov [u64], rax
3412 }
3413# endif */
3414 u64 = *pu64;
3415# else /* !RT_ARCH_AMD64 */
3416# if RT_INLINE_ASM_GNU_STYLE
3417# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3418 uint32_t u32EBX = 0;
3419 Assert(!((uintptr_t)pu64 & 7));
3420 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3421 "lock; cmpxchg8b (%5)\n\t"
3422 "movl %3, %%ebx\n\t"
3423 : "=A" (u64),
3424 "=m" (*pu64)
3425 : "0" (0),
3426 "m" (u32EBX),
3427 "c" (0),
3428 "S" (pu64));
3429# else /* !PIC */
3430 __asm__ __volatile__("cmpxchg8b %1\n\t"
3431 : "=A" (u64),
3432 "=m" (*pu64)
3433 : "0" (0),
3434 "b" (0),
3435 "c" (0));
3436# endif
3437# else
3438 Assert(!((uintptr_t)pu64 & 7));
3439 __asm
3440 {
3441 xor eax, eax
3442 xor edx, edx
3443 mov edi, pu64
3444 xor ecx, ecx
3445 xor ebx, ebx
3446 lock cmpxchg8b [edi]
3447 mov dword ptr [u64], eax
3448 mov dword ptr [u64 + 4], edx
3449 }
3450# endif
3451# endif /* !RT_ARCH_AMD64 */
3452 return u64;
3453}
3454#endif
3455
3456
3457/**
3458 * Atomically reads a signed 64-bit value, ordered.
3459 *
3460 * @returns Current *pi64 value
3461 * @param pi64 Pointer to the 64-bit variable to read.
3462 * The memory pointed to must be writable.
3463 * @remark This will fault if the memory is read-only!
3464 */
3465DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
3466{
3467 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
3468}
3469
3470
3471/**
3472 * Atomically reads a signed 64-bit value, unordered.
3473 *
3474 * @returns Current *pi64 value
3475 * @param pi64 Pointer to the 64-bit variable to read.
3476 * The memory pointed to must be writable.
3477 * @remark This will fault if the memory is read-only!
3478 */
3479DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
3480{
3481 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
3482}
3483
3484
3485/**
3486 * Atomically reads a pointer value, ordered.
3487 *
3488 * @returns Current *pv value
3489 * @param ppv Pointer to the pointer variable to read.
3490 */
3491DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
3492{
3493#if ARCH_BITS == 32
3494 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
3495#elif ARCH_BITS == 64
3496 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
3497#else
3498# error "ARCH_BITS is bogus"
3499#endif
3500}
3501
3502
3503/**
3504 * Atomically reads a pointer value, unordered.
3505 *
3506 * @returns Current *pv value
3507 * @param ppv Pointer to the pointer variable to read.
3508 */
3509DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
3510{
3511#if ARCH_BITS == 32
3512 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
3513#elif ARCH_BITS == 64
3514 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
3515#else
3516# error "ARCH_BITS is bogus"
3517#endif
3518}
3519
3520
3521/**
3522 * Atomically reads a boolean value, ordered.
3523 *
3524 * @returns Current *pf value
3525 * @param pf Pointer to the boolean variable to read.
3526 */
3527DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
3528{
3529 ASMMemoryFence();
3530 return *pf; /* byte reads are atomic on x86 */
3531}
3532
3533
3534/**
3535 * Atomically reads a boolean value, unordered.
3536 *
3537 * @returns Current *pf value
3538 * @param pf Pointer to the boolean variable to read.
3539 */
3540DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
3541{
3542 return *pf; /* byte reads are atomic on x86 */
3543}
3544
3545
3546/**
3547 * Atomically read a typical IPRT handle value, ordered.
3548 *
3549 * @param ph Pointer to the handle variable to read.
3550 * @param phRes Where to store the result.
3551 *
3552 * @remarks This doesn't currently work for all handles (like RTFILE).
3553 */
3554#define ASMAtomicReadHandle(ph, phRes) \
3555 do { \
3556 *(void **)(phRes) = ASMAtomicReadPtr((void * volatile *)(ph)); \
3557 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3558 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
3559 } while (0)
3560
3561
3562/**
3563 * Atomically read a typical IPRT handle value, unordered.
3564 *
3565 * @param ph Pointer to the handle variable to read.
3566 * @param phRes Where to store the result.
3567 *
3568 * @remarks This doesn't currently work for all handles (like RTFILE).
3569 */
3570#define ASMAtomicUoReadHandle(ph, phRes) \
3571 do { \
3572 *(void **)(phRes) = ASMAtomicUoReadPtr((void * volatile *)(ph)); \
3573 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3574 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
3575 } while (0)
3576
3577
3578/**
3579 * Atomically read a value which size might differ
3580 * between platforms or compilers, ordered.
3581 *
3582 * @param pu Pointer to the variable to update.
3583 * @param puRes Where to store the result.
3584 */
3585#define ASMAtomicReadSize(pu, puRes) \
3586 do { \
3587 switch (sizeof(*(pu))) { \
3588 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3589 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
3590 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
3591 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
3592 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3593 } \
3594 } while (0)
3595
3596
3597/**
3598 * Atomically read a value which size might differ
3599 * between platforms or compilers, unordered.
3600 *
3601 * @param pu Pointer to the variable to update.
3602 * @param puRes Where to store the result.
3603 */
3604#define ASMAtomicUoReadSize(pu, puRes) \
3605 do { \
3606 switch (sizeof(*(pu))) { \
3607 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3608 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
3609 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
3610 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
3611 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3612 } \
3613 } while (0)
3614
3615
3616/**
3617 * Atomically writes an unsigned 8-bit value, ordered.
3618 *
3619 * @param pu8 Pointer to the 8-bit variable.
3620 * @param u8 The 8-bit value to assign to *pu8.
3621 */
3622DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
3623{
3624 ASMAtomicXchgU8(pu8, u8);
3625}
3626
3627
3628/**
3629 * Atomically writes an unsigned 8-bit value, unordered.
3630 *
3631 * @param pu8 Pointer to the 8-bit variable.
3632 * @param u8 The 8-bit value to assign to *pu8.
3633 */
3634DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
3635{
3636 *pu8 = u8; /* byte writes are atomic on x86 */
3637}
3638
3639
3640/**
3641 * Atomically writes a signed 8-bit value, ordered.
3642 *
3643 * @param pi8 Pointer to the 8-bit variable to read.
3644 * @param i8 The 8-bit value to assign to *pi8.
3645 */
3646DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
3647{
3648 ASMAtomicXchgS8(pi8, i8);
3649}
3650
3651
3652/**
3653 * Atomically writes a signed 8-bit value, unordered.
3654 *
3655 * @param pi8 Pointer to the 8-bit variable to read.
3656 * @param i8 The 8-bit value to assign to *pi8.
3657 */
3658DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
3659{
3660 *pi8 = i8; /* byte writes are atomic on x86 */
3661}
3662
3663
3664/**
3665 * Atomically writes an unsigned 16-bit value, ordered.
3666 *
3667 * @param pu16 Pointer to the 16-bit variable.
3668 * @param u16 The 16-bit value to assign to *pu16.
3669 */
3670DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
3671{
3672 ASMAtomicXchgU16(pu16, u16);
3673}
3674
3675
3676/**
3677 * Atomically writes an unsigned 16-bit value, unordered.
3678 *
3679 * @param pu16 Pointer to the 16-bit variable.
3680 * @param u16 The 16-bit value to assign to *pu16.
3681 */
3682DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
3683{
3684 Assert(!((uintptr_t)pu16 & 1));
3685 *pu16 = u16;
3686}
3687
3688
3689/**
3690 * Atomically writes a signed 16-bit value, ordered.
3691 *
3692 * @param pi16 Pointer to the 16-bit variable to read.
3693 * @param i16 The 16-bit value to assign to *pi16.
3694 */
3695DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
3696{
3697 ASMAtomicXchgS16(pi16, i16);
3698}
3699
3700
3701/**
3702 * Atomically writes a signed 16-bit value, unordered.
3703 *
3704 * @param pi16 Pointer to the 16-bit variable to read.
3705 * @param i16 The 16-bit value to assign to *pi16.
3706 */
3707DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
3708{
3709 Assert(!((uintptr_t)pi16 & 1));
3710 *pi16 = i16;
3711}
3712
3713
3714/**
3715 * Atomically writes an unsigned 32-bit value, ordered.
3716 *
3717 * @param pu32 Pointer to the 32-bit variable.
3718 * @param u32 The 32-bit value to assign to *pu32.
3719 */
3720DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
3721{
3722 ASMAtomicXchgU32(pu32, u32);
3723}
3724
3725
3726/**
3727 * Atomically writes an unsigned 32-bit value, unordered.
3728 *
3729 * @param pu32 Pointer to the 32-bit variable.
3730 * @param u32 The 32-bit value to assign to *pu32.
3731 */
3732DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
3733{
3734 Assert(!((uintptr_t)pu32 & 3));
3735 *pu32 = u32;
3736}
3737
3738
3739/**
3740 * Atomically writes a signed 32-bit value, ordered.
3741 *
3742 * @param pi32 Pointer to the 32-bit variable to read.
3743 * @param i32 The 32-bit value to assign to *pi32.
3744 */
3745DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
3746{
3747 ASMAtomicXchgS32(pi32, i32);
3748}
3749
3750
3751/**
3752 * Atomically writes a signed 32-bit value, unordered.
3753 *
3754 * @param pi32 Pointer to the 32-bit variable to read.
3755 * @param i32 The 32-bit value to assign to *pi32.
3756 */
3757DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
3758{
3759 Assert(!((uintptr_t)pi32 & 3));
3760 *pi32 = i32;
3761}
3762
3763
3764/**
3765 * Atomically writes an unsigned 64-bit value, ordered.
3766 *
3767 * @param pu64 Pointer to the 64-bit variable.
3768 * @param u64 The 64-bit value to assign to *pu64.
3769 */
3770DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
3771{
3772 ASMAtomicXchgU64(pu64, u64);
3773}
3774
3775
3776/**
3777 * Atomically writes an unsigned 64-bit value, unordered.
3778 *
3779 * @param pu64 Pointer to the 64-bit variable.
3780 * @param u64 The 64-bit value to assign to *pu64.
3781 */
3782DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
3783{
3784 Assert(!((uintptr_t)pu64 & 7));
3785#if ARCH_BITS == 64
3786 *pu64 = u64;
3787#else
3788 ASMAtomicXchgU64(pu64, u64);
3789#endif
3790}
3791
3792
3793/**
3794 * Atomically writes a signed 64-bit value, ordered.
3795 *
3796 * @param pi64 Pointer to the 64-bit variable.
3797 * @param i64 The 64-bit value to assign to *pi64.
3798 */
3799DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
3800{
3801 ASMAtomicXchgS64(pi64, i64);
3802}
3803
3804
3805/**
3806 * Atomically writes a signed 64-bit value, unordered.
3807 *
3808 * @param pi64 Pointer to the 64-bit variable.
3809 * @param i64 The 64-bit value to assign to *pi64.
3810 */
3811DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
3812{
3813 Assert(!((uintptr_t)pi64 & 7));
3814#if ARCH_BITS == 64
3815 *pi64 = i64;
3816#else
3817 ASMAtomicXchgS64(pi64, i64);
3818#endif
3819}
3820
3821
3822/**
3823 * Atomically writes a boolean value, unordered.
3824 *
3825 * @param pf Pointer to the boolean variable.
3826 * @param f The boolean value to assign to *pf.
3827 */
3828DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
3829{
3830 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
3831}
3832
3833
3834/**
3835 * Atomically writes a boolean value, unordered.
3836 *
3837 * @param pf Pointer to the boolean variable.
3838 * @param f The boolean value to assign to *pf.
3839 */
3840DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
3841{
3842 *pf = f; /* byte writes are atomic on x86 */
3843}
3844
3845
3846/**
3847 * Atomically writes a pointer value, ordered.
3848 *
3849 * @returns Current *pv value
3850 * @param ppv Pointer to the pointer variable.
3851 * @param pv The pointer value to assigne to *ppv.
3852 */
3853DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, void *pv)
3854{
3855#if ARCH_BITS == 32
3856 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3857#elif ARCH_BITS == 64
3858 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3859#else
3860# error "ARCH_BITS is bogus"
3861#endif
3862}
3863
3864
3865/**
3866 * Atomically writes a pointer value, unordered.
3867 *
3868 * @returns Current *pv value
3869 * @param ppv Pointer to the pointer variable.
3870 * @param pv The pointer value to assigne to *ppv.
3871 */
3872DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, void *pv)
3873{
3874#if ARCH_BITS == 32
3875 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3876#elif ARCH_BITS == 64
3877 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3878#else
3879# error "ARCH_BITS is bogus"
3880#endif
3881}
3882
3883
3884/**
3885 * Atomically write a typical IPRT handle value, ordered.
3886 *
3887 * @param ph Pointer to the variable to update.
3888 * @param hNew The value to assign to *ph.
3889 *
3890 * @remarks This doesn't currently work for all handles (like RTFILE).
3891 */
3892#define ASMAtomicWriteHandle(ph, hNew) \
3893 do { \
3894 ASMAtomicWritePtr((void * volatile *)(ph), (void *)hNew); \
3895 AssertCompile(sizeof(*ph) == sizeof(void*)); \
3896 } while (0)
3897
3898
3899/**
3900 * Atomically write a typical IPRT handle value, unordered.
3901 *
3902 * @param ph Pointer to the variable to update.
3903 * @param hNew The value to assign to *ph.
3904 *
3905 * @remarks This doesn't currently work for all handles (like RTFILE).
3906 */
3907#define ASMAtomicUoWriteHandle(ph, hNew) \
3908 do { \
3909 ASMAtomicUoWritePtr((void * volatile *)(ph), (void *)hNew); \
3910 AssertCompile(sizeof(*ph) == sizeof(void*)); \
3911 } while (0)
3912
3913
3914/**
3915 * Atomically write a value which size might differ
3916 * between platforms or compilers, ordered.
3917 *
3918 * @param pu Pointer to the variable to update.
3919 * @param uNew The value to assign to *pu.
3920 */
3921#define ASMAtomicWriteSize(pu, uNew) \
3922 do { \
3923 switch (sizeof(*(pu))) { \
3924 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3925 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3926 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3927 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3928 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3929 } \
3930 } while (0)
3931
3932/**
3933 * Atomically write a value which size might differ
3934 * between platforms or compilers, unordered.
3935 *
3936 * @param pu Pointer to the variable to update.
3937 * @param uNew The value to assign to *pu.
3938 */
3939#define ASMAtomicUoWriteSize(pu, uNew) \
3940 do { \
3941 switch (sizeof(*(pu))) { \
3942 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3943 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3944 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3945 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3946 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3947 } \
3948 } while (0)
3949
3950
3951
3952
3953/**
3954 * Invalidate page.
3955 *
3956 * @param pv Address of the page to invalidate.
3957 */
3958#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3959DECLASM(void) ASMInvalidatePage(void *pv);
3960#else
3961DECLINLINE(void) ASMInvalidatePage(void *pv)
3962{
3963# if RT_INLINE_ASM_USES_INTRIN
3964 __invlpg(pv);
3965
3966# elif RT_INLINE_ASM_GNU_STYLE
3967 __asm__ __volatile__("invlpg %0\n\t"
3968 : : "m" (*(uint8_t *)pv));
3969# else
3970 __asm
3971 {
3972# ifdef RT_ARCH_AMD64
3973 mov rax, [pv]
3974 invlpg [rax]
3975# else
3976 mov eax, [pv]
3977 invlpg [eax]
3978# endif
3979 }
3980# endif
3981}
3982#endif
3983
3984
3985#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3986# if PAGE_SIZE != 0x1000
3987# error "PAGE_SIZE is not 0x1000!"
3988# endif
3989#endif
3990
3991/**
3992 * Zeros a 4K memory page.
3993 *
3994 * @param pv Pointer to the memory block. This must be page aligned.
3995 */
3996#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3997DECLASM(void) ASMMemZeroPage(volatile void *pv);
3998# else
3999DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
4000{
4001# if RT_INLINE_ASM_USES_INTRIN
4002# ifdef RT_ARCH_AMD64
4003 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
4004# else
4005 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
4006# endif
4007
4008# elif RT_INLINE_ASM_GNU_STYLE
4009 RTCCUINTREG uDummy;
4010# ifdef RT_ARCH_AMD64
4011 __asm__ __volatile__ ("rep stosq"
4012 : "=D" (pv),
4013 "=c" (uDummy)
4014 : "0" (pv),
4015 "c" (0x1000 >> 3),
4016 "a" (0)
4017 : "memory");
4018# else
4019 __asm__ __volatile__ ("rep stosl"
4020 : "=D" (pv),
4021 "=c" (uDummy)
4022 : "0" (pv),
4023 "c" (0x1000 >> 2),
4024 "a" (0)
4025 : "memory");
4026# endif
4027# else
4028 __asm
4029 {
4030# ifdef RT_ARCH_AMD64
4031 xor rax, rax
4032 mov ecx, 0200h
4033 mov rdi, [pv]
4034 rep stosq
4035# else
4036 xor eax, eax
4037 mov ecx, 0400h
4038 mov edi, [pv]
4039 rep stosd
4040# endif
4041 }
4042# endif
4043}
4044# endif
4045
4046
4047/**
4048 * Zeros a memory block with a 32-bit aligned size.
4049 *
4050 * @param pv Pointer to the memory block.
4051 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4052 */
4053#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4054DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
4055#else
4056DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
4057{
4058# if RT_INLINE_ASM_USES_INTRIN
4059# ifdef RT_ARCH_AMD64
4060 if (!(cb & 7))
4061 __stosq((unsigned __int64 *)pv, 0, cb / 8);
4062 else
4063# endif
4064 __stosd((unsigned long *)pv, 0, cb / 4);
4065
4066# elif RT_INLINE_ASM_GNU_STYLE
4067 __asm__ __volatile__ ("rep stosl"
4068 : "=D" (pv),
4069 "=c" (cb)
4070 : "0" (pv),
4071 "1" (cb >> 2),
4072 "a" (0)
4073 : "memory");
4074# else
4075 __asm
4076 {
4077 xor eax, eax
4078# ifdef RT_ARCH_AMD64
4079 mov rcx, [cb]
4080 shr rcx, 2
4081 mov rdi, [pv]
4082# else
4083 mov ecx, [cb]
4084 shr ecx, 2
4085 mov edi, [pv]
4086# endif
4087 rep stosd
4088 }
4089# endif
4090}
4091#endif
4092
4093
4094/**
4095 * Fills a memory block with a 32-bit aligned size.
4096 *
4097 * @param pv Pointer to the memory block.
4098 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4099 * @param u32 The value to fill with.
4100 */
4101#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4102DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
4103#else
4104DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
4105{
4106# if RT_INLINE_ASM_USES_INTRIN
4107# ifdef RT_ARCH_AMD64
4108 if (!(cb & 7))
4109 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
4110 else
4111# endif
4112 __stosd((unsigned long *)pv, u32, cb / 4);
4113
4114# elif RT_INLINE_ASM_GNU_STYLE
4115 __asm__ __volatile__ ("rep stosl"
4116 : "=D" (pv),
4117 "=c" (cb)
4118 : "0" (pv),
4119 "1" (cb >> 2),
4120 "a" (u32)
4121 : "memory");
4122# else
4123 __asm
4124 {
4125# ifdef RT_ARCH_AMD64
4126 mov rcx, [cb]
4127 shr rcx, 2
4128 mov rdi, [pv]
4129# else
4130 mov ecx, [cb]
4131 shr ecx, 2
4132 mov edi, [pv]
4133# endif
4134 mov eax, [u32]
4135 rep stosd
4136 }
4137# endif
4138}
4139#endif
4140
4141
4142/**
4143 * Checks if a memory block is filled with the specified byte.
4144 *
4145 * This is a sort of inverted memchr.
4146 *
4147 * @returns Pointer to the byte which doesn't equal u8.
4148 * @returns NULL if all equal to u8.
4149 *
4150 * @param pv Pointer to the memory block.
4151 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4152 * @param u8 The value it's supposed to be filled with.
4153 */
4154#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4155DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
4156#else
4157DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
4158{
4159/** @todo rewrite this in inline assembly? */
4160 uint8_t const *pb = (uint8_t const *)pv;
4161 for (; cb; cb--, pb++)
4162 if (RT_UNLIKELY(*pb != u8))
4163 return (void *)pb;
4164 return NULL;
4165}
4166#endif
4167
4168
4169/**
4170 * Checks if a memory block is filled with the specified 32-bit value.
4171 *
4172 * This is a sort of inverted memchr.
4173 *
4174 * @returns Pointer to the first value which doesn't equal u32.
4175 * @returns NULL if all equal to u32.
4176 *
4177 * @param pv Pointer to the memory block.
4178 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4179 * @param u32 The value it's supposed to be filled with.
4180 */
4181#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4182DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
4183#else
4184DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
4185{
4186/** @todo rewrite this in inline assembly? */
4187 uint32_t const *pu32 = (uint32_t const *)pv;
4188 for (; cb; cb -= 4, pu32++)
4189 if (RT_UNLIKELY(*pu32 != u32))
4190 return (uint32_t *)pu32;
4191 return NULL;
4192}
4193#endif
4194
4195
4196/**
4197 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
4198 *
4199 * @returns u32F1 * u32F2.
4200 */
4201#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4202DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
4203#else
4204DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
4205{
4206# ifdef RT_ARCH_AMD64
4207 return (uint64_t)u32F1 * u32F2;
4208# else /* !RT_ARCH_AMD64 */
4209 uint64_t u64;
4210# if RT_INLINE_ASM_GNU_STYLE
4211 __asm__ __volatile__("mull %%edx"
4212 : "=A" (u64)
4213 : "a" (u32F2), "d" (u32F1));
4214# else
4215 __asm
4216 {
4217 mov edx, [u32F1]
4218 mov eax, [u32F2]
4219 mul edx
4220 mov dword ptr [u64], eax
4221 mov dword ptr [u64 + 4], edx
4222 }
4223# endif
4224 return u64;
4225# endif /* !RT_ARCH_AMD64 */
4226}
4227#endif
4228
4229
4230/**
4231 * Multiplies two signed 32-bit values returning a signed 64-bit result.
4232 *
4233 * @returns u32F1 * u32F2.
4234 */
4235#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4236DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
4237#else
4238DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
4239{
4240# ifdef RT_ARCH_AMD64
4241 return (int64_t)i32F1 * i32F2;
4242# else /* !RT_ARCH_AMD64 */
4243 int64_t i64;
4244# if RT_INLINE_ASM_GNU_STYLE
4245 __asm__ __volatile__("imull %%edx"
4246 : "=A" (i64)
4247 : "a" (i32F2), "d" (i32F1));
4248# else
4249 __asm
4250 {
4251 mov edx, [i32F1]
4252 mov eax, [i32F2]
4253 imul edx
4254 mov dword ptr [i64], eax
4255 mov dword ptr [i64 + 4], edx
4256 }
4257# endif
4258 return i64;
4259# endif /* !RT_ARCH_AMD64 */
4260}
4261#endif
4262
4263
4264/**
4265 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
4266 *
4267 * @returns u64 / u32.
4268 */
4269#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4270DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
4271#else
4272DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
4273{
4274# ifdef RT_ARCH_AMD64
4275 return (uint32_t)(u64 / u32);
4276# else /* !RT_ARCH_AMD64 */
4277# if RT_INLINE_ASM_GNU_STYLE
4278 RTCCUINTREG uDummy;
4279 __asm__ __volatile__("divl %3"
4280 : "=a" (u32), "=d"(uDummy)
4281 : "A" (u64), "r" (u32));
4282# else
4283 __asm
4284 {
4285 mov eax, dword ptr [u64]
4286 mov edx, dword ptr [u64 + 4]
4287 mov ecx, [u32]
4288 div ecx
4289 mov [u32], eax
4290 }
4291# endif
4292 return u32;
4293# endif /* !RT_ARCH_AMD64 */
4294}
4295#endif
4296
4297
4298/**
4299 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
4300 *
4301 * @returns u64 / u32.
4302 */
4303#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4304DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
4305#else
4306DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
4307{
4308# ifdef RT_ARCH_AMD64
4309 return (int32_t)(i64 / i32);
4310# else /* !RT_ARCH_AMD64 */
4311# if RT_INLINE_ASM_GNU_STYLE
4312 RTCCUINTREG iDummy;
4313 __asm__ __volatile__("idivl %3"
4314 : "=a" (i32), "=d"(iDummy)
4315 : "A" (i64), "r" (i32));
4316# else
4317 __asm
4318 {
4319 mov eax, dword ptr [i64]
4320 mov edx, dword ptr [i64 + 4]
4321 mov ecx, [i32]
4322 idiv ecx
4323 mov [i32], eax
4324 }
4325# endif
4326 return i32;
4327# endif /* !RT_ARCH_AMD64 */
4328}
4329#endif
4330
4331
4332/**
4333 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
4334 * returning the rest.
4335 *
4336 * @returns u64 % u32.
4337 *
4338 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
4339 */
4340#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4341DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
4342#else
4343DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
4344{
4345# ifdef RT_ARCH_AMD64
4346 return (uint32_t)(u64 % u32);
4347# else /* !RT_ARCH_AMD64 */
4348# if RT_INLINE_ASM_GNU_STYLE
4349 RTCCUINTREG uDummy;
4350 __asm__ __volatile__("divl %3"
4351 : "=a" (uDummy), "=d"(u32)
4352 : "A" (u64), "r" (u32));
4353# else
4354 __asm
4355 {
4356 mov eax, dword ptr [u64]
4357 mov edx, dword ptr [u64 + 4]
4358 mov ecx, [u32]
4359 div ecx
4360 mov [u32], edx
4361 }
4362# endif
4363 return u32;
4364# endif /* !RT_ARCH_AMD64 */
4365}
4366#endif
4367
4368
4369/**
4370 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
4371 * returning the rest.
4372 *
4373 * @returns u64 % u32.
4374 *
4375 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
4376 */
4377#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4378DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
4379#else
4380DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
4381{
4382# ifdef RT_ARCH_AMD64
4383 return (int32_t)(i64 % i32);
4384# else /* !RT_ARCH_AMD64 */
4385# if RT_INLINE_ASM_GNU_STYLE
4386 RTCCUINTREG iDummy;
4387 __asm__ __volatile__("idivl %3"
4388 : "=a" (iDummy), "=d"(i32)
4389 : "A" (i64), "r" (i32));
4390# else
4391 __asm
4392 {
4393 mov eax, dword ptr [i64]
4394 mov edx, dword ptr [i64 + 4]
4395 mov ecx, [i32]
4396 idiv ecx
4397 mov [i32], edx
4398 }
4399# endif
4400 return i32;
4401# endif /* !RT_ARCH_AMD64 */
4402}
4403#endif
4404
4405
4406/**
4407 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
4408 * using a 96 bit intermediate result.
4409 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
4410 * __udivdi3 and __umoddi3 even if this inline function is not used.
4411 *
4412 * @returns (u64A * u32B) / u32C.
4413 * @param u64A The 64-bit value.
4414 * @param u32B The 32-bit value to multiple by A.
4415 * @param u32C The 32-bit value to divide A*B by.
4416 */
4417#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
4418DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
4419#else
4420DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
4421{
4422# if RT_INLINE_ASM_GNU_STYLE
4423# ifdef RT_ARCH_AMD64
4424 uint64_t u64Result, u64Spill;
4425 __asm__ __volatile__("mulq %2\n\t"
4426 "divq %3\n\t"
4427 : "=a" (u64Result),
4428 "=d" (u64Spill)
4429 : "r" ((uint64_t)u32B),
4430 "r" ((uint64_t)u32C),
4431 "0" (u64A),
4432 "1" (0));
4433 return u64Result;
4434# else
4435 uint32_t u32Dummy;
4436 uint64_t u64Result;
4437 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
4438 edx = u64Lo.hi = (u64A.lo * u32B).hi */
4439 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
4440 eax = u64A.hi */
4441 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
4442 edx = u32C */
4443 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
4444 edx = u32B */
4445 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
4446 edx = u64Hi.hi = (u64A.hi * u32B).hi */
4447 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
4448 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
4449 "divl %%ecx \n\t" /* eax = u64Hi / u32C
4450 edx = u64Hi % u32C */
4451 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
4452 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
4453 "divl %%ecx \n\t" /* u64Result.lo */
4454 "movl %%edi,%%edx \n\t" /* u64Result.hi */
4455 : "=A"(u64Result), "=c"(u32Dummy),
4456 "=S"(u32Dummy), "=D"(u32Dummy)
4457 : "a"((uint32_t)u64A),
4458 "S"((uint32_t)(u64A >> 32)),
4459 "c"(u32B),
4460 "D"(u32C));
4461 return u64Result;
4462# endif
4463# else
4464 RTUINT64U u;
4465 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
4466 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
4467 u64Hi += (u64Lo >> 32);
4468 u.s.Hi = (uint32_t)(u64Hi / u32C);
4469 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
4470 return u.u;
4471# endif
4472}
4473#endif
4474
4475
4476/**
4477 * Probes a byte pointer for read access.
4478 *
4479 * While the function will not fault if the byte is not read accessible,
4480 * the idea is to do this in a safe place like before acquiring locks
4481 * and such like.
4482 *
4483 * Also, this functions guarantees that an eager compiler is not going
4484 * to optimize the probing away.
4485 *
4486 * @param pvByte Pointer to the byte.
4487 */
4488#if RT_INLINE_ASM_EXTERNAL
4489DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4490#else
4491DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4492{
4493 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4494 uint8_t u8;
4495# if RT_INLINE_ASM_GNU_STYLE
4496 __asm__ __volatile__("movb (%1), %0\n\t"
4497 : "=r" (u8)
4498 : "r" (pvByte));
4499# else
4500 __asm
4501 {
4502# ifdef RT_ARCH_AMD64
4503 mov rax, [pvByte]
4504 mov al, [rax]
4505# else
4506 mov eax, [pvByte]
4507 mov al, [eax]
4508# endif
4509 mov [u8], al
4510 }
4511# endif
4512 return u8;
4513}
4514#endif
4515
4516/**
4517 * Probes a buffer for read access page by page.
4518 *
4519 * While the function will fault if the buffer is not fully read
4520 * accessible, the idea is to do this in a safe place like before
4521 * acquiring locks and such like.
4522 *
4523 * Also, this functions guarantees that an eager compiler is not going
4524 * to optimize the probing away.
4525 *
4526 * @param pvBuf Pointer to the buffer.
4527 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4528 */
4529DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4530{
4531 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4532 /* the first byte */
4533 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4534 ASMProbeReadByte(pu8);
4535
4536 /* the pages in between pages. */
4537 while (cbBuf > /*PAGE_SIZE*/0x1000)
4538 {
4539 ASMProbeReadByte(pu8);
4540 cbBuf -= /*PAGE_SIZE*/0x1000;
4541 pu8 += /*PAGE_SIZE*/0x1000;
4542 }
4543
4544 /* the last byte */
4545 ASMProbeReadByte(pu8 + cbBuf - 1);
4546}
4547
4548
4549/** @def ASMBreakpoint
4550 * Debugger Breakpoint.
4551 * @remark In the gnu world we add a nop instruction after the int3 to
4552 * force gdb to remain at the int3 source line.
4553 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
4554 * @internal
4555 */
4556#if RT_INLINE_ASM_GNU_STYLE
4557# ifndef __L4ENV__
4558# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
4559# else
4560# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
4561# endif
4562#else
4563# define ASMBreakpoint() __debugbreak()
4564#endif
4565
4566
4567
4568/** @defgroup grp_inline_bits Bit Operations
4569 * @{
4570 */
4571
4572
4573/**
4574 * Sets a bit in a bitmap.
4575 *
4576 * @param pvBitmap Pointer to the bitmap.
4577 * @param iBit The bit to set.
4578 */
4579#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4580DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4581#else
4582DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4583{
4584# if RT_INLINE_ASM_USES_INTRIN
4585 _bittestandset((long *)pvBitmap, iBit);
4586
4587# elif RT_INLINE_ASM_GNU_STYLE
4588 __asm__ __volatile__ ("btsl %1, %0"
4589 : "=m" (*(volatile long *)pvBitmap)
4590 : "Ir" (iBit)
4591 : "memory");
4592# else
4593 __asm
4594 {
4595# ifdef RT_ARCH_AMD64
4596 mov rax, [pvBitmap]
4597 mov edx, [iBit]
4598 bts [rax], edx
4599# else
4600 mov eax, [pvBitmap]
4601 mov edx, [iBit]
4602 bts [eax], edx
4603# endif
4604 }
4605# endif
4606}
4607#endif
4608
4609
4610/**
4611 * Atomically sets a bit in a bitmap, ordered.
4612 *
4613 * @param pvBitmap Pointer to the bitmap.
4614 * @param iBit The bit to set.
4615 */
4616#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4617DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4618#else
4619DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4620{
4621# if RT_INLINE_ASM_USES_INTRIN
4622 _interlockedbittestandset((long *)pvBitmap, iBit);
4623# elif RT_INLINE_ASM_GNU_STYLE
4624 __asm__ __volatile__ ("lock; btsl %1, %0"
4625 : "=m" (*(volatile long *)pvBitmap)
4626 : "Ir" (iBit)
4627 : "memory");
4628# else
4629 __asm
4630 {
4631# ifdef RT_ARCH_AMD64
4632 mov rax, [pvBitmap]
4633 mov edx, [iBit]
4634 lock bts [rax], edx
4635# else
4636 mov eax, [pvBitmap]
4637 mov edx, [iBit]
4638 lock bts [eax], edx
4639# endif
4640 }
4641# endif
4642}
4643#endif
4644
4645
4646/**
4647 * Clears a bit in a bitmap.
4648 *
4649 * @param pvBitmap Pointer to the bitmap.
4650 * @param iBit The bit to clear.
4651 */
4652#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4653DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4654#else
4655DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4656{
4657# if RT_INLINE_ASM_USES_INTRIN
4658 _bittestandreset((long *)pvBitmap, iBit);
4659
4660# elif RT_INLINE_ASM_GNU_STYLE
4661 __asm__ __volatile__ ("btrl %1, %0"
4662 : "=m" (*(volatile long *)pvBitmap)
4663 : "Ir" (iBit)
4664 : "memory");
4665# else
4666 __asm
4667 {
4668# ifdef RT_ARCH_AMD64
4669 mov rax, [pvBitmap]
4670 mov edx, [iBit]
4671 btr [rax], edx
4672# else
4673 mov eax, [pvBitmap]
4674 mov edx, [iBit]
4675 btr [eax], edx
4676# endif
4677 }
4678# endif
4679}
4680#endif
4681
4682
4683/**
4684 * Atomically clears a bit in a bitmap, ordered.
4685 *
4686 * @param pvBitmap Pointer to the bitmap.
4687 * @param iBit The bit to toggle set.
4688 * @remark No memory barrier, take care on smp.
4689 */
4690#if RT_INLINE_ASM_EXTERNAL
4691DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
4692#else
4693DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
4694{
4695# if RT_INLINE_ASM_GNU_STYLE
4696 __asm__ __volatile__ ("lock; btrl %1, %0"
4697 : "=m" (*(volatile long *)pvBitmap)
4698 : "Ir" (iBit)
4699 : "memory");
4700# else
4701 __asm
4702 {
4703# ifdef RT_ARCH_AMD64
4704 mov rax, [pvBitmap]
4705 mov edx, [iBit]
4706 lock btr [rax], edx
4707# else
4708 mov eax, [pvBitmap]
4709 mov edx, [iBit]
4710 lock btr [eax], edx
4711# endif
4712 }
4713# endif
4714}
4715#endif
4716
4717
4718/**
4719 * Toggles a bit in a bitmap.
4720 *
4721 * @param pvBitmap Pointer to the bitmap.
4722 * @param iBit The bit to toggle.
4723 */
4724#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4725DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
4726#else
4727DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
4728{
4729# if RT_INLINE_ASM_USES_INTRIN
4730 _bittestandcomplement((long *)pvBitmap, iBit);
4731# elif RT_INLINE_ASM_GNU_STYLE
4732 __asm__ __volatile__ ("btcl %1, %0"
4733 : "=m" (*(volatile long *)pvBitmap)
4734 : "Ir" (iBit)
4735 : "memory");
4736# else
4737 __asm
4738 {
4739# ifdef RT_ARCH_AMD64
4740 mov rax, [pvBitmap]
4741 mov edx, [iBit]
4742 btc [rax], edx
4743# else
4744 mov eax, [pvBitmap]
4745 mov edx, [iBit]
4746 btc [eax], edx
4747# endif
4748 }
4749# endif
4750}
4751#endif
4752
4753
4754/**
4755 * Atomically toggles a bit in a bitmap, ordered.
4756 *
4757 * @param pvBitmap Pointer to the bitmap.
4758 * @param iBit The bit to test and set.
4759 */
4760#if RT_INLINE_ASM_EXTERNAL
4761DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4762#else
4763DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4764{
4765# if RT_INLINE_ASM_GNU_STYLE
4766 __asm__ __volatile__ ("lock; btcl %1, %0"
4767 : "=m" (*(volatile long *)pvBitmap)
4768 : "Ir" (iBit)
4769 : "memory");
4770# else
4771 __asm
4772 {
4773# ifdef RT_ARCH_AMD64
4774 mov rax, [pvBitmap]
4775 mov edx, [iBit]
4776 lock btc [rax], edx
4777# else
4778 mov eax, [pvBitmap]
4779 mov edx, [iBit]
4780 lock btc [eax], edx
4781# endif
4782 }
4783# endif
4784}
4785#endif
4786
4787
4788/**
4789 * Tests and sets a bit in a bitmap.
4790 *
4791 * @returns true if the bit was set.
4792 * @returns false if the bit was clear.
4793 * @param pvBitmap Pointer to the bitmap.
4794 * @param iBit The bit to test and set.
4795 */
4796#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4797DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4798#else
4799DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4800{
4801 union { bool f; uint32_t u32; uint8_t u8; } rc;
4802# if RT_INLINE_ASM_USES_INTRIN
4803 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4804
4805# elif RT_INLINE_ASM_GNU_STYLE
4806 __asm__ __volatile__ ("btsl %2, %1\n\t"
4807 "setc %b0\n\t"
4808 "andl $1, %0\n\t"
4809 : "=q" (rc.u32),
4810 "=m" (*(volatile long *)pvBitmap)
4811 : "Ir" (iBit)
4812 : "memory");
4813# else
4814 __asm
4815 {
4816 mov edx, [iBit]
4817# ifdef RT_ARCH_AMD64
4818 mov rax, [pvBitmap]
4819 bts [rax], edx
4820# else
4821 mov eax, [pvBitmap]
4822 bts [eax], edx
4823# endif
4824 setc al
4825 and eax, 1
4826 mov [rc.u32], eax
4827 }
4828# endif
4829 return rc.f;
4830}
4831#endif
4832
4833
4834/**
4835 * Atomically tests and sets a bit in a bitmap, ordered.
4836 *
4837 * @returns true if the bit was set.
4838 * @returns false if the bit was clear.
4839 * @param pvBitmap Pointer to the bitmap.
4840 * @param iBit The bit to set.
4841 */
4842#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4843DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4844#else
4845DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4846{
4847 union { bool f; uint32_t u32; uint8_t u8; } rc;
4848# if RT_INLINE_ASM_USES_INTRIN
4849 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4850# elif RT_INLINE_ASM_GNU_STYLE
4851 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
4852 "setc %b0\n\t"
4853 "andl $1, %0\n\t"
4854 : "=q" (rc.u32),
4855 "=m" (*(volatile long *)pvBitmap)
4856 : "Ir" (iBit)
4857 : "memory");
4858# else
4859 __asm
4860 {
4861 mov edx, [iBit]
4862# ifdef RT_ARCH_AMD64
4863 mov rax, [pvBitmap]
4864 lock bts [rax], edx
4865# else
4866 mov eax, [pvBitmap]
4867 lock bts [eax], edx
4868# endif
4869 setc al
4870 and eax, 1
4871 mov [rc.u32], eax
4872 }
4873# endif
4874 return rc.f;
4875}
4876#endif
4877
4878
4879/**
4880 * Tests and clears a bit in a bitmap.
4881 *
4882 * @returns true if the bit was set.
4883 * @returns false if the bit was clear.
4884 * @param pvBitmap Pointer to the bitmap.
4885 * @param iBit The bit to test and clear.
4886 */
4887#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4888DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4889#else
4890DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4891{
4892 union { bool f; uint32_t u32; uint8_t u8; } rc;
4893# if RT_INLINE_ASM_USES_INTRIN
4894 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4895
4896# elif RT_INLINE_ASM_GNU_STYLE
4897 __asm__ __volatile__ ("btrl %2, %1\n\t"
4898 "setc %b0\n\t"
4899 "andl $1, %0\n\t"
4900 : "=q" (rc.u32),
4901 "=m" (*(volatile long *)pvBitmap)
4902 : "Ir" (iBit)
4903 : "memory");
4904# else
4905 __asm
4906 {
4907 mov edx, [iBit]
4908# ifdef RT_ARCH_AMD64
4909 mov rax, [pvBitmap]
4910 btr [rax], edx
4911# else
4912 mov eax, [pvBitmap]
4913 btr [eax], edx
4914# endif
4915 setc al
4916 and eax, 1
4917 mov [rc.u32], eax
4918 }
4919# endif
4920 return rc.f;
4921}
4922#endif
4923
4924
4925/**
4926 * Atomically tests and clears a bit in a bitmap, ordered.
4927 *
4928 * @returns true if the bit was set.
4929 * @returns false if the bit was clear.
4930 * @param pvBitmap Pointer to the bitmap.
4931 * @param iBit The bit to test and clear.
4932 * @remark No memory barrier, take care on smp.
4933 */
4934#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4935DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4936#else
4937DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4938{
4939 union { bool f; uint32_t u32; uint8_t u8; } rc;
4940# if RT_INLINE_ASM_USES_INTRIN
4941 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4942
4943# elif RT_INLINE_ASM_GNU_STYLE
4944 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
4945 "setc %b0\n\t"
4946 "andl $1, %0\n\t"
4947 : "=q" (rc.u32),
4948 "=m" (*(volatile long *)pvBitmap)
4949 : "Ir" (iBit)
4950 : "memory");
4951# else
4952 __asm
4953 {
4954 mov edx, [iBit]
4955# ifdef RT_ARCH_AMD64
4956 mov rax, [pvBitmap]
4957 lock btr [rax], edx
4958# else
4959 mov eax, [pvBitmap]
4960 lock btr [eax], edx
4961# endif
4962 setc al
4963 and eax, 1
4964 mov [rc.u32], eax
4965 }
4966# endif
4967 return rc.f;
4968}
4969#endif
4970
4971
4972/**
4973 * Tests and toggles a bit in a bitmap.
4974 *
4975 * @returns true if the bit was set.
4976 * @returns false if the bit was clear.
4977 * @param pvBitmap Pointer to the bitmap.
4978 * @param iBit The bit to test and toggle.
4979 */
4980#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4981DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4982#else
4983DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4984{
4985 union { bool f; uint32_t u32; uint8_t u8; } rc;
4986# if RT_INLINE_ASM_USES_INTRIN
4987 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4988
4989# elif RT_INLINE_ASM_GNU_STYLE
4990 __asm__ __volatile__ ("btcl %2, %1\n\t"
4991 "setc %b0\n\t"
4992 "andl $1, %0\n\t"
4993 : "=q" (rc.u32),
4994 "=m" (*(volatile long *)pvBitmap)
4995 : "Ir" (iBit)
4996 : "memory");
4997# else
4998 __asm
4999 {
5000 mov edx, [iBit]
5001# ifdef RT_ARCH_AMD64
5002 mov rax, [pvBitmap]
5003 btc [rax], edx
5004# else
5005 mov eax, [pvBitmap]
5006 btc [eax], edx
5007# endif
5008 setc al
5009 and eax, 1
5010 mov [rc.u32], eax
5011 }
5012# endif
5013 return rc.f;
5014}
5015#endif
5016
5017
5018/**
5019 * Atomically tests and toggles a bit in a bitmap, ordered.
5020 *
5021 * @returns true if the bit was set.
5022 * @returns false if the bit was clear.
5023 * @param pvBitmap Pointer to the bitmap.
5024 * @param iBit The bit to test and toggle.
5025 */
5026#if RT_INLINE_ASM_EXTERNAL
5027DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5028#else
5029DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5030{
5031 union { bool f; uint32_t u32; uint8_t u8; } rc;
5032# if RT_INLINE_ASM_GNU_STYLE
5033 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
5034 "setc %b0\n\t"
5035 "andl $1, %0\n\t"
5036 : "=q" (rc.u32),
5037 "=m" (*(volatile long *)pvBitmap)
5038 : "Ir" (iBit)
5039 : "memory");
5040# else
5041 __asm
5042 {
5043 mov edx, [iBit]
5044# ifdef RT_ARCH_AMD64
5045 mov rax, [pvBitmap]
5046 lock btc [rax], edx
5047# else
5048 mov eax, [pvBitmap]
5049 lock btc [eax], edx
5050# endif
5051 setc al
5052 and eax, 1
5053 mov [rc.u32], eax
5054 }
5055# endif
5056 return rc.f;
5057}
5058#endif
5059
5060
5061/**
5062 * Tests if a bit in a bitmap is set.
5063 *
5064 * @returns true if the bit is set.
5065 * @returns false if the bit is clear.
5066 * @param pvBitmap Pointer to the bitmap.
5067 * @param iBit The bit to test.
5068 */
5069#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5070DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
5071#else
5072DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
5073{
5074 union { bool f; uint32_t u32; uint8_t u8; } rc;
5075# if RT_INLINE_ASM_USES_INTRIN
5076 rc.u32 = _bittest((long *)pvBitmap, iBit);
5077# elif RT_INLINE_ASM_GNU_STYLE
5078
5079 __asm__ __volatile__ ("btl %2, %1\n\t"
5080 "setc %b0\n\t"
5081 "andl $1, %0\n\t"
5082 : "=q" (rc.u32)
5083 : "m" (*(const volatile long *)pvBitmap),
5084 "Ir" (iBit)
5085 : "memory");
5086# else
5087 __asm
5088 {
5089 mov edx, [iBit]
5090# ifdef RT_ARCH_AMD64
5091 mov rax, [pvBitmap]
5092 bt [rax], edx
5093# else
5094 mov eax, [pvBitmap]
5095 bt [eax], edx
5096# endif
5097 setc al
5098 and eax, 1
5099 mov [rc.u32], eax
5100 }
5101# endif
5102 return rc.f;
5103}
5104#endif
5105
5106
5107/**
5108 * Clears a bit range within a bitmap.
5109 *
5110 * @param pvBitmap Pointer to the bitmap.
5111 * @param iBitStart The First bit to clear.
5112 * @param iBitEnd The first bit not to clear.
5113 */
5114DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5115{
5116 if (iBitStart < iBitEnd)
5117 {
5118 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5119 int iStart = iBitStart & ~31;
5120 int iEnd = iBitEnd & ~31;
5121 if (iStart == iEnd)
5122 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
5123 else
5124 {
5125 /* bits in first dword. */
5126 if (iBitStart & 31)
5127 {
5128 *pu32 &= (1 << (iBitStart & 31)) - 1;
5129 pu32++;
5130 iBitStart = iStart + 32;
5131 }
5132
5133 /* whole dword. */
5134 if (iBitStart != iEnd)
5135 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
5136
5137 /* bits in last dword. */
5138 if (iBitEnd & 31)
5139 {
5140 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5141 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
5142 }
5143 }
5144 }
5145}
5146
5147
5148/**
5149 * Sets a bit range within a bitmap.
5150 *
5151 * @param pvBitmap Pointer to the bitmap.
5152 * @param iBitStart The First bit to set.
5153 * @param iBitEnd The first bit not to set.
5154 */
5155DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5156{
5157 if (iBitStart < iBitEnd)
5158 {
5159 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5160 int iStart = iBitStart & ~31;
5161 int iEnd = iBitEnd & ~31;
5162 if (iStart == iEnd)
5163 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
5164 else
5165 {
5166 /* bits in first dword. */
5167 if (iBitStart & 31)
5168 {
5169 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
5170 pu32++;
5171 iBitStart = iStart + 32;
5172 }
5173
5174 /* whole dword. */
5175 if (iBitStart != iEnd)
5176 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
5177
5178 /* bits in last dword. */
5179 if (iBitEnd & 31)
5180 {
5181 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5182 *pu32 |= (1 << (iBitEnd & 31)) - 1;
5183 }
5184 }
5185 }
5186}
5187
5188
5189/**
5190 * Finds the first clear bit in a bitmap.
5191 *
5192 * @returns Index of the first zero bit.
5193 * @returns -1 if no clear bit was found.
5194 * @param pvBitmap Pointer to the bitmap.
5195 * @param cBits The number of bits in the bitmap. Multiple of 32.
5196 */
5197#if RT_INLINE_ASM_EXTERNAL
5198DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
5199#else
5200DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
5201{
5202 if (cBits)
5203 {
5204 int32_t iBit;
5205# if RT_INLINE_ASM_GNU_STYLE
5206 RTCCUINTREG uEAX, uECX, uEDI;
5207 cBits = RT_ALIGN_32(cBits, 32);
5208 __asm__ __volatile__("repe; scasl\n\t"
5209 "je 1f\n\t"
5210# ifdef RT_ARCH_AMD64
5211 "lea -4(%%rdi), %%rdi\n\t"
5212 "xorl (%%rdi), %%eax\n\t"
5213 "subq %5, %%rdi\n\t"
5214# else
5215 "lea -4(%%edi), %%edi\n\t"
5216 "xorl (%%edi), %%eax\n\t"
5217 "subl %5, %%edi\n\t"
5218# endif
5219 "shll $3, %%edi\n\t"
5220 "bsfl %%eax, %%edx\n\t"
5221 "addl %%edi, %%edx\n\t"
5222 "1:\t\n"
5223 : "=d" (iBit),
5224 "=&c" (uECX),
5225 "=&D" (uEDI),
5226 "=&a" (uEAX)
5227 : "0" (0xffffffff),
5228 "mr" (pvBitmap),
5229 "1" (cBits >> 5),
5230 "2" (pvBitmap),
5231 "3" (0xffffffff));
5232# else
5233 cBits = RT_ALIGN_32(cBits, 32);
5234 __asm
5235 {
5236# ifdef RT_ARCH_AMD64
5237 mov rdi, [pvBitmap]
5238 mov rbx, rdi
5239# else
5240 mov edi, [pvBitmap]
5241 mov ebx, edi
5242# endif
5243 mov edx, 0ffffffffh
5244 mov eax, edx
5245 mov ecx, [cBits]
5246 shr ecx, 5
5247 repe scasd
5248 je done
5249
5250# ifdef RT_ARCH_AMD64
5251 lea rdi, [rdi - 4]
5252 xor eax, [rdi]
5253 sub rdi, rbx
5254# else
5255 lea edi, [edi - 4]
5256 xor eax, [edi]
5257 sub edi, ebx
5258# endif
5259 shl edi, 3
5260 bsf edx, eax
5261 add edx, edi
5262 done:
5263 mov [iBit], edx
5264 }
5265# endif
5266 return iBit;
5267 }
5268 return -1;
5269}
5270#endif
5271
5272
5273/**
5274 * Finds the next clear bit in a bitmap.
5275 *
5276 * @returns Index of the first zero bit.
5277 * @returns -1 if no clear bit was found.
5278 * @param pvBitmap Pointer to the bitmap.
5279 * @param cBits The number of bits in the bitmap. Multiple of 32.
5280 * @param iBitPrev The bit returned from the last search.
5281 * The search will start at iBitPrev + 1.
5282 */
5283#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5284DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5285#else
5286DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5287{
5288 int iBit = ++iBitPrev & 31;
5289 pvBitmap = (const volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5290 cBits -= iBitPrev & ~31;
5291 if (iBit)
5292 {
5293 /* inspect the first dword. */
5294 uint32_t u32 = (~*(const volatile uint32_t *)pvBitmap) >> iBit;
5295# if RT_INLINE_ASM_USES_INTRIN
5296 unsigned long ulBit = 0;
5297 if (_BitScanForward(&ulBit, u32))
5298 return ulBit + iBitPrev;
5299 iBit = -1;
5300# else
5301# if RT_INLINE_ASM_GNU_STYLE
5302 __asm__ __volatile__("bsf %1, %0\n\t"
5303 "jnz 1f\n\t"
5304 "movl $-1, %0\n\t"
5305 "1:\n\t"
5306 : "=r" (iBit)
5307 : "r" (u32));
5308# else
5309 __asm
5310 {
5311 mov edx, [u32]
5312 bsf eax, edx
5313 jnz done
5314 mov eax, 0ffffffffh
5315 done:
5316 mov [iBit], eax
5317 }
5318# endif
5319 if (iBit >= 0)
5320 return iBit + iBitPrev;
5321# endif
5322 /* Search the rest of the bitmap, if there is anything. */
5323 if (cBits > 32)
5324 {
5325 iBit = ASMBitFirstClear((const volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5326 if (iBit >= 0)
5327 return iBit + (iBitPrev & ~31) + 32;
5328 }
5329 }
5330 else
5331 {
5332 /* Search the rest of the bitmap. */
5333 iBit = ASMBitFirstClear(pvBitmap, cBits);
5334 if (iBit >= 0)
5335 return iBit + (iBitPrev & ~31);
5336 }
5337 return iBit;
5338}
5339#endif
5340
5341
5342/**
5343 * Finds the first set bit in a bitmap.
5344 *
5345 * @returns Index of the first set bit.
5346 * @returns -1 if no clear bit was found.
5347 * @param pvBitmap Pointer to the bitmap.
5348 * @param cBits The number of bits in the bitmap. Multiple of 32.
5349 */
5350#if RT_INLINE_ASM_EXTERNAL
5351DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
5352#else
5353DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
5354{
5355 if (cBits)
5356 {
5357 int32_t iBit;
5358# if RT_INLINE_ASM_GNU_STYLE
5359 RTCCUINTREG uEAX, uECX, uEDI;
5360 cBits = RT_ALIGN_32(cBits, 32);
5361 __asm__ __volatile__("repe; scasl\n\t"
5362 "je 1f\n\t"
5363# ifdef RT_ARCH_AMD64
5364 "lea -4(%%rdi), %%rdi\n\t"
5365 "movl (%%rdi), %%eax\n\t"
5366 "subq %5, %%rdi\n\t"
5367# else
5368 "lea -4(%%edi), %%edi\n\t"
5369 "movl (%%edi), %%eax\n\t"
5370 "subl %5, %%edi\n\t"
5371# endif
5372 "shll $3, %%edi\n\t"
5373 "bsfl %%eax, %%edx\n\t"
5374 "addl %%edi, %%edx\n\t"
5375 "1:\t\n"
5376 : "=d" (iBit),
5377 "=&c" (uECX),
5378 "=&D" (uEDI),
5379 "=&a" (uEAX)
5380 : "0" (0xffffffff),
5381 "mr" (pvBitmap),
5382 "1" (cBits >> 5),
5383 "2" (pvBitmap),
5384 "3" (0));
5385# else
5386 cBits = RT_ALIGN_32(cBits, 32);
5387 __asm
5388 {
5389# ifdef RT_ARCH_AMD64
5390 mov rdi, [pvBitmap]
5391 mov rbx, rdi
5392# else
5393 mov edi, [pvBitmap]
5394 mov ebx, edi
5395# endif
5396 mov edx, 0ffffffffh
5397 xor eax, eax
5398 mov ecx, [cBits]
5399 shr ecx, 5
5400 repe scasd
5401 je done
5402# ifdef RT_ARCH_AMD64
5403 lea rdi, [rdi - 4]
5404 mov eax, [rdi]
5405 sub rdi, rbx
5406# else
5407 lea edi, [edi - 4]
5408 mov eax, [edi]
5409 sub edi, ebx
5410# endif
5411 shl edi, 3
5412 bsf edx, eax
5413 add edx, edi
5414 done:
5415 mov [iBit], edx
5416 }
5417# endif
5418 return iBit;
5419 }
5420 return -1;
5421}
5422#endif
5423
5424
5425/**
5426 * Finds the next set bit in a bitmap.
5427 *
5428 * @returns Index of the next set bit.
5429 * @returns -1 if no set bit was found.
5430 * @param pvBitmap Pointer to the bitmap.
5431 * @param cBits The number of bits in the bitmap. Multiple of 32.
5432 * @param iBitPrev The bit returned from the last search.
5433 * The search will start at iBitPrev + 1.
5434 */
5435#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5436DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5437#else
5438DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5439{
5440 int iBit = ++iBitPrev & 31;
5441 pvBitmap = (const volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5442 cBits -= iBitPrev & ~31;
5443 if (iBit)
5444 {
5445 /* inspect the first dword. */
5446 uint32_t u32 = *(const volatile uint32_t *)pvBitmap >> iBit;
5447# if RT_INLINE_ASM_USES_INTRIN
5448 unsigned long ulBit = 0;
5449 if (_BitScanForward(&ulBit, u32))
5450 return ulBit + iBitPrev;
5451 iBit = -1;
5452# else
5453# if RT_INLINE_ASM_GNU_STYLE
5454 __asm__ __volatile__("bsf %1, %0\n\t"
5455 "jnz 1f\n\t"
5456 "movl $-1, %0\n\t"
5457 "1:\n\t"
5458 : "=r" (iBit)
5459 : "r" (u32));
5460# else
5461 __asm
5462 {
5463 mov edx, u32
5464 bsf eax, edx
5465 jnz done
5466 mov eax, 0ffffffffh
5467 done:
5468 mov [iBit], eax
5469 }
5470# endif
5471 if (iBit >= 0)
5472 return iBit + iBitPrev;
5473# endif
5474 /* Search the rest of the bitmap, if there is anything. */
5475 if (cBits > 32)
5476 {
5477 iBit = ASMBitFirstSet((const volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5478 if (iBit >= 0)
5479 return iBit + (iBitPrev & ~31) + 32;
5480 }
5481
5482 }
5483 else
5484 {
5485 /* Search the rest of the bitmap. */
5486 iBit = ASMBitFirstSet(pvBitmap, cBits);
5487 if (iBit >= 0)
5488 return iBit + (iBitPrev & ~31);
5489 }
5490 return iBit;
5491}
5492#endif
5493
5494
5495/**
5496 * Finds the first bit which is set in the given 32-bit integer.
5497 * Bits are numbered from 1 (least significant) to 32.
5498 *
5499 * @returns index [1..32] of the first set bit.
5500 * @returns 0 if all bits are cleared.
5501 * @param u32 Integer to search for set bits.
5502 * @remark Similar to ffs() in BSD.
5503 */
5504DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5505{
5506# if RT_INLINE_ASM_USES_INTRIN
5507 unsigned long iBit;
5508 if (_BitScanForward(&iBit, u32))
5509 iBit++;
5510 else
5511 iBit = 0;
5512# elif RT_INLINE_ASM_GNU_STYLE
5513 uint32_t iBit;
5514 __asm__ __volatile__("bsf %1, %0\n\t"
5515 "jnz 1f\n\t"
5516 "xorl %0, %0\n\t"
5517 "jmp 2f\n"
5518 "1:\n\t"
5519 "incl %0\n"
5520 "2:\n\t"
5521 : "=r" (iBit)
5522 : "rm" (u32));
5523# else
5524 uint32_t iBit;
5525 _asm
5526 {
5527 bsf eax, [u32]
5528 jnz found
5529 xor eax, eax
5530 jmp done
5531 found:
5532 inc eax
5533 done:
5534 mov [iBit], eax
5535 }
5536# endif
5537 return iBit;
5538}
5539
5540
5541/**
5542 * Finds the first bit which is set in the given 32-bit integer.
5543 * Bits are numbered from 1 (least significant) to 32.
5544 *
5545 * @returns index [1..32] of the first set bit.
5546 * @returns 0 if all bits are cleared.
5547 * @param i32 Integer to search for set bits.
5548 * @remark Similar to ffs() in BSD.
5549 */
5550DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5551{
5552 return ASMBitFirstSetU32((uint32_t)i32);
5553}
5554
5555
5556/**
5557 * Finds the last bit which is set in the given 32-bit integer.
5558 * Bits are numbered from 1 (least significant) to 32.
5559 *
5560 * @returns index [1..32] of the last set bit.
5561 * @returns 0 if all bits are cleared.
5562 * @param u32 Integer to search for set bits.
5563 * @remark Similar to fls() in BSD.
5564 */
5565DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5566{
5567# if RT_INLINE_ASM_USES_INTRIN
5568 unsigned long iBit;
5569 if (_BitScanReverse(&iBit, u32))
5570 iBit++;
5571 else
5572 iBit = 0;
5573# elif RT_INLINE_ASM_GNU_STYLE
5574 uint32_t iBit;
5575 __asm__ __volatile__("bsrl %1, %0\n\t"
5576 "jnz 1f\n\t"
5577 "xorl %0, %0\n\t"
5578 "jmp 2f\n"
5579 "1:\n\t"
5580 "incl %0\n"
5581 "2:\n\t"
5582 : "=r" (iBit)
5583 : "rm" (u32));
5584# else
5585 uint32_t iBit;
5586 _asm
5587 {
5588 bsr eax, [u32]
5589 jnz found
5590 xor eax, eax
5591 jmp done
5592 found:
5593 inc eax
5594 done:
5595 mov [iBit], eax
5596 }
5597# endif
5598 return iBit;
5599}
5600
5601
5602/**
5603 * Finds the last bit which is set in the given 32-bit integer.
5604 * Bits are numbered from 1 (least significant) to 32.
5605 *
5606 * @returns index [1..32] of the last set bit.
5607 * @returns 0 if all bits are cleared.
5608 * @param i32 Integer to search for set bits.
5609 * @remark Similar to fls() in BSD.
5610 */
5611DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5612{
5613 return ASMBitLastSetS32((uint32_t)i32);
5614}
5615
5616/**
5617 * Reverse the byte order of the given 16-bit integer.
5618 *
5619 * @returns Revert
5620 * @param u16 16-bit integer value.
5621 */
5622DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5623{
5624#if RT_INLINE_ASM_USES_INTRIN
5625 u16 = _byteswap_ushort(u16);
5626#elif RT_INLINE_ASM_GNU_STYLE
5627 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5628#else
5629 _asm
5630 {
5631 mov ax, [u16]
5632 ror ax, 8
5633 mov [u16], ax
5634 }
5635#endif
5636 return u16;
5637}
5638
5639/**
5640 * Reverse the byte order of the given 32-bit integer.
5641 *
5642 * @returns Revert
5643 * @param u32 32-bit integer value.
5644 */
5645DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5646{
5647#if RT_INLINE_ASM_USES_INTRIN
5648 u32 = _byteswap_ulong(u32);
5649#elif RT_INLINE_ASM_GNU_STYLE
5650 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5651#else
5652 _asm
5653 {
5654 mov eax, [u32]
5655 bswap eax
5656 mov [u32], eax
5657 }
5658#endif
5659 return u32;
5660}
5661
5662
5663/**
5664 * Reverse the byte order of the given 64-bit integer.
5665 *
5666 * @returns Revert
5667 * @param u64 64-bit integer value.
5668 */
5669DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5670{
5671#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5672 u64 = _byteswap_uint64(u64);
5673#else
5674 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5675 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5676#endif
5677 return u64;
5678}
5679
5680
5681/** @} */
5682
5683
5684/** @} */
5685#endif
5686
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette