VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 10995

Last change on this file since 10995 was 10995, checked in by vboxsync, 16 years ago

iprt: Added ASMByteSwapU16/64 and unit test for them + U32.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 144.4 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42#ifdef _MSC_VER
43# if _MSC_VER >= 1400
44# define RT_INLINE_ASM_USES_INTRIN 1
45# include <intrin.h>
46 /* Emit the intrinsics at all optimization levels. */
47# pragma intrinsic(_ReadWriteBarrier)
48# pragma intrinsic(__cpuid)
49# pragma intrinsic(_enable)
50# pragma intrinsic(_disable)
51# pragma intrinsic(__rdtsc)
52# pragma intrinsic(__readmsr)
53# pragma intrinsic(__writemsr)
54# pragma intrinsic(__outbyte)
55# pragma intrinsic(__outword)
56# pragma intrinsic(__outdword)
57# pragma intrinsic(__inbyte)
58# pragma intrinsic(__inword)
59# pragma intrinsic(__indword)
60# pragma intrinsic(__invlpg)
61# pragma intrinsic(__stosd)
62# pragma intrinsic(__stosw)
63# pragma intrinsic(__stosb)
64# pragma intrinsic(__readcr0)
65# pragma intrinsic(__readcr2)
66# pragma intrinsic(__readcr3)
67# pragma intrinsic(__readcr4)
68# pragma intrinsic(__writecr0)
69# pragma intrinsic(__writecr3)
70# pragma intrinsic(__writecr4)
71# pragma intrinsic(_BitScanForward)
72# pragma intrinsic(_BitScanReverse)
73# pragma intrinsic(_bittest)
74# pragma intrinsic(_bittestandset)
75# pragma intrinsic(_bittestandreset)
76# pragma intrinsic(_bittestandcomplement)
77# pragma intrinsic(_byteswap_ushort)
78# pragma intrinsic(_byteswap_ulong)
79# pragma intrinsic(_interlockedbittestandset)
80# pragma intrinsic(_interlockedbittestandreset)
81# pragma intrinsic(_InterlockedAnd)
82# pragma intrinsic(_InterlockedOr)
83# pragma intrinsic(_InterlockedIncrement)
84# pragma intrinsic(_InterlockedDecrement)
85# pragma intrinsic(_InterlockedExchange)
86# pragma intrinsic(_InterlockedExchangeAdd)
87# pragma intrinsic(_InterlockedCompareExchange)
88# pragma intrinsic(_InterlockedCompareExchange64)
89# ifdef RT_ARCH_AMD64
90# pragma intrinsic(__stosq)
91# pragma intrinsic(__readcr8)
92# pragma intrinsic(__writecr8)
93# pragma intrinsic(_byteswap_uint64)
94# pragma intrinsic(_InterlockedExchange64)
95# endif
96# endif
97#endif
98#ifndef RT_INLINE_ASM_USES_INTRIN
99# define RT_INLINE_ASM_USES_INTRIN 0
100#endif
101
102
103
104/** @defgroup grp_asm ASM - Assembly Routines
105 * @ingroup grp_rt
106 *
107 * @remarks The difference between ordered and unordered atomic operations are that
108 * the former will complete outstanding reads and writes before continuing
109 * while the latter doesn't make any promisses about the order. Ordered
110 * operations doesn't, it seems, make any 100% promise wrt to whether
111 * the operation will complete before any subsequent memory access.
112 * (please, correct if wrong.)
113 *
114 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
115 * are unordered (note the Uo).
116 *
117 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
118 * or even optimize assembler instructions away. For instance, in the following code
119 * the second rdmsr instruction is optimized away because gcc treats that instruction
120 * as deterministic:
121 *
122 * @code
123 * static inline uint64_t rdmsr_low(int idx)
124 * {
125 * uint32_t low;
126 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
127 * }
128 * ...
129 * uint32_t msr1 = rdmsr_low(1);
130 * foo(msr1);
131 * msr1 = rdmsr_low(1);
132 * bar(msr1);
133 * @endcode
134 *
135 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
136 * use the result of the first call as input parameter for bar() as well. For rdmsr this
137 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
138 * machine status information in general.
139 *
140 * @{
141 */
142
143/** @def RT_INLINE_ASM_EXTERNAL
144 * Defined as 1 if the compiler does not support inline assembly.
145 * The ASM* functions will then be implemented in an external .asm file.
146 *
147 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
148 * inline assmebly in their AMD64 compiler.
149 */
150#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
151# define RT_INLINE_ASM_EXTERNAL 1
152#else
153# define RT_INLINE_ASM_EXTERNAL 0
154#endif
155
156/** @def RT_INLINE_ASM_GNU_STYLE
157 * Defined as 1 if the compiler understand GNU style inline assembly.
158 */
159#if defined(_MSC_VER)
160# define RT_INLINE_ASM_GNU_STYLE 0
161#else
162# define RT_INLINE_ASM_GNU_STYLE 1
163#endif
164
165
166/** @todo find a more proper place for this structure? */
167#pragma pack(1)
168/** IDTR */
169typedef struct RTIDTR
170{
171 /** Size of the IDT. */
172 uint16_t cbIdt;
173 /** Address of the IDT. */
174 uintptr_t pIdt;
175} RTIDTR, *PRTIDTR;
176#pragma pack()
177
178#pragma pack(1)
179/** GDTR */
180typedef struct RTGDTR
181{
182 /** Size of the GDT. */
183 uint16_t cbGdt;
184 /** Address of the GDT. */
185 uintptr_t pGdt;
186} RTGDTR, *PRTGDTR;
187#pragma pack()
188
189
190/** @def ASMReturnAddress
191 * Gets the return address of the current (or calling if you like) function or method.
192 */
193#ifdef _MSC_VER
194# ifdef __cplusplus
195extern "C"
196# endif
197void * _ReturnAddress(void);
198# pragma intrinsic(_ReturnAddress)
199# define ASMReturnAddress() _ReturnAddress()
200#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
201# define ASMReturnAddress() __builtin_return_address(0)
202#else
203# error "Unsupported compiler."
204#endif
205
206
207/**
208 * Gets the content of the IDTR CPU register.
209 * @param pIdtr Where to store the IDTR contents.
210 */
211#if RT_INLINE_ASM_EXTERNAL
212DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
213#else
214DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
215{
216# if RT_INLINE_ASM_GNU_STYLE
217 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
218# else
219 __asm
220 {
221# ifdef RT_ARCH_AMD64
222 mov rax, [pIdtr]
223 sidt [rax]
224# else
225 mov eax, [pIdtr]
226 sidt [eax]
227# endif
228 }
229# endif
230}
231#endif
232
233
234/**
235 * Sets the content of the IDTR CPU register.
236 * @param pIdtr Where to load the IDTR contents from
237 */
238#if RT_INLINE_ASM_EXTERNAL
239DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
240#else
241DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
242{
243# if RT_INLINE_ASM_GNU_STYLE
244 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
245# else
246 __asm
247 {
248# ifdef RT_ARCH_AMD64
249 mov rax, [pIdtr]
250 lidt [rax]
251# else
252 mov eax, [pIdtr]
253 lidt [eax]
254# endif
255 }
256# endif
257}
258#endif
259
260
261/**
262 * Gets the content of the GDTR CPU register.
263 * @param pGdtr Where to store the GDTR contents.
264 */
265#if RT_INLINE_ASM_EXTERNAL
266DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
267#else
268DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
269{
270# if RT_INLINE_ASM_GNU_STYLE
271 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
272# else
273 __asm
274 {
275# ifdef RT_ARCH_AMD64
276 mov rax, [pGdtr]
277 sgdt [rax]
278# else
279 mov eax, [pGdtr]
280 sgdt [eax]
281# endif
282 }
283# endif
284}
285#endif
286
287/**
288 * Get the cs register.
289 * @returns cs.
290 */
291#if RT_INLINE_ASM_EXTERNAL
292DECLASM(RTSEL) ASMGetCS(void);
293#else
294DECLINLINE(RTSEL) ASMGetCS(void)
295{
296 RTSEL SelCS;
297# if RT_INLINE_ASM_GNU_STYLE
298 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
299# else
300 __asm
301 {
302 mov ax, cs
303 mov [SelCS], ax
304 }
305# endif
306 return SelCS;
307}
308#endif
309
310
311/**
312 * Get the DS register.
313 * @returns DS.
314 */
315#if RT_INLINE_ASM_EXTERNAL
316DECLASM(RTSEL) ASMGetDS(void);
317#else
318DECLINLINE(RTSEL) ASMGetDS(void)
319{
320 RTSEL SelDS;
321# if RT_INLINE_ASM_GNU_STYLE
322 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
323# else
324 __asm
325 {
326 mov ax, ds
327 mov [SelDS], ax
328 }
329# endif
330 return SelDS;
331}
332#endif
333
334
335/**
336 * Get the ES register.
337 * @returns ES.
338 */
339#if RT_INLINE_ASM_EXTERNAL
340DECLASM(RTSEL) ASMGetES(void);
341#else
342DECLINLINE(RTSEL) ASMGetES(void)
343{
344 RTSEL SelES;
345# if RT_INLINE_ASM_GNU_STYLE
346 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
347# else
348 __asm
349 {
350 mov ax, es
351 mov [SelES], ax
352 }
353# endif
354 return SelES;
355}
356#endif
357
358
359/**
360 * Get the FS register.
361 * @returns FS.
362 */
363#if RT_INLINE_ASM_EXTERNAL
364DECLASM(RTSEL) ASMGetFS(void);
365#else
366DECLINLINE(RTSEL) ASMGetFS(void)
367{
368 RTSEL SelFS;
369# if RT_INLINE_ASM_GNU_STYLE
370 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
371# else
372 __asm
373 {
374 mov ax, fs
375 mov [SelFS], ax
376 }
377# endif
378 return SelFS;
379}
380# endif
381
382
383/**
384 * Get the GS register.
385 * @returns GS.
386 */
387#if RT_INLINE_ASM_EXTERNAL
388DECLASM(RTSEL) ASMGetGS(void);
389#else
390DECLINLINE(RTSEL) ASMGetGS(void)
391{
392 RTSEL SelGS;
393# if RT_INLINE_ASM_GNU_STYLE
394 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
395# else
396 __asm
397 {
398 mov ax, gs
399 mov [SelGS], ax
400 }
401# endif
402 return SelGS;
403}
404#endif
405
406
407/**
408 * Get the SS register.
409 * @returns SS.
410 */
411#if RT_INLINE_ASM_EXTERNAL
412DECLASM(RTSEL) ASMGetSS(void);
413#else
414DECLINLINE(RTSEL) ASMGetSS(void)
415{
416 RTSEL SelSS;
417# if RT_INLINE_ASM_GNU_STYLE
418 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
419# else
420 __asm
421 {
422 mov ax, ss
423 mov [SelSS], ax
424 }
425# endif
426 return SelSS;
427}
428#endif
429
430
431/**
432 * Get the TR register.
433 * @returns TR.
434 */
435#if RT_INLINE_ASM_EXTERNAL
436DECLASM(RTSEL) ASMGetTR(void);
437#else
438DECLINLINE(RTSEL) ASMGetTR(void)
439{
440 RTSEL SelTR;
441# if RT_INLINE_ASM_GNU_STYLE
442 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
443# else
444 __asm
445 {
446 str ax
447 mov [SelTR], ax
448 }
449# endif
450 return SelTR;
451}
452#endif
453
454
455/**
456 * Get the [RE]FLAGS register.
457 * @returns [RE]FLAGS.
458 */
459#if RT_INLINE_ASM_EXTERNAL
460DECLASM(RTCCUINTREG) ASMGetFlags(void);
461#else
462DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
463{
464 RTCCUINTREG uFlags;
465# if RT_INLINE_ASM_GNU_STYLE
466# ifdef RT_ARCH_AMD64
467 __asm__ __volatile__("pushfq\n\t"
468 "popq %0\n\t"
469 : "=g" (uFlags));
470# else
471 __asm__ __volatile__("pushfl\n\t"
472 "popl %0\n\t"
473 : "=g" (uFlags));
474# endif
475# else
476 __asm
477 {
478# ifdef RT_ARCH_AMD64
479 pushfq
480 pop [uFlags]
481# else
482 pushfd
483 pop [uFlags]
484# endif
485 }
486# endif
487 return uFlags;
488}
489#endif
490
491
492/**
493 * Set the [RE]FLAGS register.
494 * @param uFlags The new [RE]FLAGS value.
495 */
496#if RT_INLINE_ASM_EXTERNAL
497DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
498#else
499DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
500{
501# if RT_INLINE_ASM_GNU_STYLE
502# ifdef RT_ARCH_AMD64
503 __asm__ __volatile__("pushq %0\n\t"
504 "popfq\n\t"
505 : : "g" (uFlags));
506# else
507 __asm__ __volatile__("pushl %0\n\t"
508 "popfl\n\t"
509 : : "g" (uFlags));
510# endif
511# else
512 __asm
513 {
514# ifdef RT_ARCH_AMD64
515 push [uFlags]
516 popfq
517# else
518 push [uFlags]
519 popfd
520# endif
521 }
522# endif
523}
524#endif
525
526
527/**
528 * Gets the content of the CPU timestamp counter register.
529 *
530 * @returns TSC.
531 */
532#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
533DECLASM(uint64_t) ASMReadTSC(void);
534#else
535DECLINLINE(uint64_t) ASMReadTSC(void)
536{
537 RTUINT64U u;
538# if RT_INLINE_ASM_GNU_STYLE
539 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
540# else
541# if RT_INLINE_ASM_USES_INTRIN
542 u.u = __rdtsc();
543# else
544 __asm
545 {
546 rdtsc
547 mov [u.s.Lo], eax
548 mov [u.s.Hi], edx
549 }
550# endif
551# endif
552 return u.u;
553}
554#endif
555
556
557/**
558 * Performs the cpuid instruction returning all registers.
559 *
560 * @param uOperator CPUID operation (eax).
561 * @param pvEAX Where to store eax.
562 * @param pvEBX Where to store ebx.
563 * @param pvECX Where to store ecx.
564 * @param pvEDX Where to store edx.
565 * @remark We're using void pointers to ease the use of special bitfield structures and such.
566 */
567#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
568DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
569#else
570DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
571{
572# if RT_INLINE_ASM_GNU_STYLE
573# ifdef RT_ARCH_AMD64
574 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
575 __asm__ ("cpuid\n\t"
576 : "=a" (uRAX),
577 "=b" (uRBX),
578 "=c" (uRCX),
579 "=d" (uRDX)
580 : "0" (uOperator));
581 *(uint32_t *)pvEAX = (uint32_t)uRAX;
582 *(uint32_t *)pvEBX = (uint32_t)uRBX;
583 *(uint32_t *)pvECX = (uint32_t)uRCX;
584 *(uint32_t *)pvEDX = (uint32_t)uRDX;
585# else
586 __asm__ ("xchgl %%ebx, %1\n\t"
587 "cpuid\n\t"
588 "xchgl %%ebx, %1\n\t"
589 : "=a" (*(uint32_t *)pvEAX),
590 "=r" (*(uint32_t *)pvEBX),
591 "=c" (*(uint32_t *)pvECX),
592 "=d" (*(uint32_t *)pvEDX)
593 : "0" (uOperator));
594# endif
595
596# elif RT_INLINE_ASM_USES_INTRIN
597 int aInfo[4];
598 __cpuid(aInfo, uOperator);
599 *(uint32_t *)pvEAX = aInfo[0];
600 *(uint32_t *)pvEBX = aInfo[1];
601 *(uint32_t *)pvECX = aInfo[2];
602 *(uint32_t *)pvEDX = aInfo[3];
603
604# else
605 uint32_t uEAX;
606 uint32_t uEBX;
607 uint32_t uECX;
608 uint32_t uEDX;
609 __asm
610 {
611 push ebx
612 mov eax, [uOperator]
613 cpuid
614 mov [uEAX], eax
615 mov [uEBX], ebx
616 mov [uECX], ecx
617 mov [uEDX], edx
618 pop ebx
619 }
620 *(uint32_t *)pvEAX = uEAX;
621 *(uint32_t *)pvEBX = uEBX;
622 *(uint32_t *)pvECX = uECX;
623 *(uint32_t *)pvEDX = uEDX;
624# endif
625}
626#endif
627
628
629/**
630 * Performs the cpuid instruction returning all registers.
631 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
632 *
633 * @param uOperator CPUID operation (eax).
634 * @param uIdxECX ecx index
635 * @param pvEAX Where to store eax.
636 * @param pvEBX Where to store ebx.
637 * @param pvECX Where to store ecx.
638 * @param pvEDX Where to store edx.
639 * @remark We're using void pointers to ease the use of special bitfield structures and such.
640 */
641#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
642DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
643#else
644DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
645{
646# if RT_INLINE_ASM_GNU_STYLE
647# ifdef RT_ARCH_AMD64
648 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
649 __asm__ ("cpuid\n\t"
650 : "=a" (uRAX),
651 "=b" (uRBX),
652 "=c" (uRCX),
653 "=d" (uRDX)
654 : "0" (uOperator),
655 "2" (uIdxECX));
656 *(uint32_t *)pvEAX = (uint32_t)uRAX;
657 *(uint32_t *)pvEBX = (uint32_t)uRBX;
658 *(uint32_t *)pvECX = (uint32_t)uRCX;
659 *(uint32_t *)pvEDX = (uint32_t)uRDX;
660# else
661 __asm__ ("xchgl %%ebx, %1\n\t"
662 "cpuid\n\t"
663 "xchgl %%ebx, %1\n\t"
664 : "=a" (*(uint32_t *)pvEAX),
665 "=r" (*(uint32_t *)pvEBX),
666 "=c" (*(uint32_t *)pvECX),
667 "=d" (*(uint32_t *)pvEDX)
668 : "0" (uOperator),
669 "2" (uIdxECX));
670# endif
671
672# elif RT_INLINE_ASM_USES_INTRIN
673 int aInfo[4];
674 /* ??? another intrinsic ??? */
675 __cpuid(aInfo, uOperator);
676 *(uint32_t *)pvEAX = aInfo[0];
677 *(uint32_t *)pvEBX = aInfo[1];
678 *(uint32_t *)pvECX = aInfo[2];
679 *(uint32_t *)pvEDX = aInfo[3];
680
681# else
682 uint32_t uEAX;
683 uint32_t uEBX;
684 uint32_t uECX;
685 uint32_t uEDX;
686 __asm
687 {
688 push ebx
689 mov eax, [uOperator]
690 mov ecx, [uIdxECX]
691 cpuid
692 mov [uEAX], eax
693 mov [uEBX], ebx
694 mov [uECX], ecx
695 mov [uEDX], edx
696 pop ebx
697 }
698 *(uint32_t *)pvEAX = uEAX;
699 *(uint32_t *)pvEBX = uEBX;
700 *(uint32_t *)pvECX = uECX;
701 *(uint32_t *)pvEDX = uEDX;
702# endif
703}
704#endif
705
706
707/**
708 * Performs the cpuid instruction returning ecx and edx.
709 *
710 * @param uOperator CPUID operation (eax).
711 * @param pvECX Where to store ecx.
712 * @param pvEDX Where to store edx.
713 * @remark We're using void pointers to ease the use of special bitfield structures and such.
714 */
715#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
716DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
717#else
718DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
719{
720 uint32_t uEBX;
721 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
722}
723#endif
724
725
726/**
727 * Performs the cpuid instruction returning edx.
728 *
729 * @param uOperator CPUID operation (eax).
730 * @returns EDX after cpuid operation.
731 */
732#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
733DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
734#else
735DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
736{
737 RTCCUINTREG xDX;
738# if RT_INLINE_ASM_GNU_STYLE
739# ifdef RT_ARCH_AMD64
740 RTCCUINTREG uSpill;
741 __asm__ ("cpuid"
742 : "=a" (uSpill),
743 "=d" (xDX)
744 : "0" (uOperator)
745 : "rbx", "rcx");
746# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
747 __asm__ ("push %%ebx\n\t"
748 "cpuid\n\t"
749 "pop %%ebx\n\t"
750 : "=a" (uOperator),
751 "=d" (xDX)
752 : "0" (uOperator)
753 : "ecx");
754# else
755 __asm__ ("cpuid"
756 : "=a" (uOperator),
757 "=d" (xDX)
758 : "0" (uOperator)
759 : "ebx", "ecx");
760# endif
761
762# elif RT_INLINE_ASM_USES_INTRIN
763 int aInfo[4];
764 __cpuid(aInfo, uOperator);
765 xDX = aInfo[3];
766
767# else
768 __asm
769 {
770 push ebx
771 mov eax, [uOperator]
772 cpuid
773 mov [xDX], edx
774 pop ebx
775 }
776# endif
777 return (uint32_t)xDX;
778}
779#endif
780
781
782/**
783 * Performs the cpuid instruction returning ecx.
784 *
785 * @param uOperator CPUID operation (eax).
786 * @returns ECX after cpuid operation.
787 */
788#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
789DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
790#else
791DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
792{
793 RTCCUINTREG xCX;
794# if RT_INLINE_ASM_GNU_STYLE
795# ifdef RT_ARCH_AMD64
796 RTCCUINTREG uSpill;
797 __asm__ ("cpuid"
798 : "=a" (uSpill),
799 "=c" (xCX)
800 : "0" (uOperator)
801 : "rbx", "rdx");
802# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
803 __asm__ ("push %%ebx\n\t"
804 "cpuid\n\t"
805 "pop %%ebx\n\t"
806 : "=a" (uOperator),
807 "=c" (xCX)
808 : "0" (uOperator)
809 : "edx");
810# else
811 __asm__ ("cpuid"
812 : "=a" (uOperator),
813 "=c" (xCX)
814 : "0" (uOperator)
815 : "ebx", "edx");
816
817# endif
818
819# elif RT_INLINE_ASM_USES_INTRIN
820 int aInfo[4];
821 __cpuid(aInfo, uOperator);
822 xCX = aInfo[2];
823
824# else
825 __asm
826 {
827 push ebx
828 mov eax, [uOperator]
829 cpuid
830 mov [xCX], ecx
831 pop ebx
832 }
833# endif
834 return (uint32_t)xCX;
835}
836#endif
837
838
839/**
840 * Checks if the current CPU supports CPUID.
841 *
842 * @returns true if CPUID is supported.
843 */
844DECLINLINE(bool) ASMHasCpuId(void)
845{
846#ifdef RT_ARCH_AMD64
847 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
848#else /* !RT_ARCH_AMD64 */
849 bool fRet = false;
850# if RT_INLINE_ASM_GNU_STYLE
851 uint32_t u1;
852 uint32_t u2;
853 __asm__ ("pushf\n\t"
854 "pop %1\n\t"
855 "mov %1, %2\n\t"
856 "xorl $0x200000, %1\n\t"
857 "push %1\n\t"
858 "popf\n\t"
859 "pushf\n\t"
860 "pop %1\n\t"
861 "cmpl %1, %2\n\t"
862 "setne %0\n\t"
863 "push %2\n\t"
864 "popf\n\t"
865 : "=m" (fRet), "=r" (u1), "=r" (u2));
866# else
867 __asm
868 {
869 pushfd
870 pop eax
871 mov ebx, eax
872 xor eax, 0200000h
873 push eax
874 popfd
875 pushfd
876 pop eax
877 cmp eax, ebx
878 setne fRet
879 push ebx
880 popfd
881 }
882# endif
883 return fRet;
884#endif /* !RT_ARCH_AMD64 */
885}
886
887
888/**
889 * Gets the APIC ID of the current CPU.
890 *
891 * @returns the APIC ID.
892 */
893#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
894DECLASM(uint8_t) ASMGetApicId(void);
895#else
896DECLINLINE(uint8_t) ASMGetApicId(void)
897{
898 RTCCUINTREG xBX;
899# if RT_INLINE_ASM_GNU_STYLE
900# ifdef RT_ARCH_AMD64
901 RTCCUINTREG uSpill;
902 __asm__ ("cpuid"
903 : "=a" (uSpill),
904 "=b" (xBX)
905 : "0" (1)
906 : "rcx", "rdx");
907# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
908 RTCCUINTREG uSpill;
909 __asm__ ("mov %%ebx,%1\n\t"
910 "cpuid\n\t"
911 "xchgl %%ebx,%1\n\t"
912 : "=a" (uSpill),
913 "=r" (xBX)
914 : "0" (1)
915 : "ecx", "edx");
916# else
917 RTCCUINTREG uSpill;
918 __asm__ ("cpuid"
919 : "=a" (uSpill),
920 "=b" (xBX)
921 : "0" (1)
922 : "ecx", "edx");
923# endif
924
925# elif RT_INLINE_ASM_USES_INTRIN
926 int aInfo[4];
927 __cpuid(aInfo, 1);
928 xBX = aInfo[1];
929
930# else
931 __asm
932 {
933 push ebx
934 mov eax, 1
935 cpuid
936 mov [xBX], ebx
937 pop ebx
938 }
939# endif
940 return (uint8_t)(xBX >> 24);
941}
942#endif
943
944
945/**
946 * Tests if it an genuin Intel CPU based on the ASMCpuId(0) output.
947 *
948 * @returns true/false.
949 * @param uEBX EBX return from ASMCpuId(0)
950 * @param uECX ECX return from ASMCpuId(0)
951 * @param uEDX EDX return from ASMCpuId(0)
952 */
953DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
954{
955 return uEBX == 0x756e6547
956 || uECX == 0x6c65746e
957 || uEDX == 0x49656e69;
958}
959
960
961/**
962 * Tests if this is an genuin Intel CPU.
963 *
964 * @returns true/false.
965 */
966DECLINLINE(bool) ASMIsIntelCpu(void)
967{
968 uint32_t uEAX, uEBX, uECX, uEDX;
969 ASMCpuId(1, &uEAX, &uEBX, &uECX, &uEDX);
970 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
971}
972
973
974/**
975 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
976 *
977 * @returns Family.
978 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
979 */
980DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
981{
982 return ((uEAX >> 8) & 0xf) == 0xf
983 ? ((uEAX >> 20) & 0x7f) + 0xf
984 : ((uEAX >> 8) & 0xf);
985}
986
987
988/**
989 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
990 *
991 * @returns Model.
992 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
993 * @param fIntel Whether it's an intel CPU.
994 */
995DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
996{
997 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
998 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
999 : ((uEAX >> 4) & 0xf);
1000}
1001
1002
1003/**
1004 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1005 *
1006 * @returns Model.
1007 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1008 * @param fIntel Whether it's an intel CPU.
1009 */
1010DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1011{
1012 return ((uEAX >> 8) & 0xf) == 0xf
1013 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1014 : ((uEAX >> 4) & 0xf);
1015}
1016
1017
1018/**
1019 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1020 *
1021 * @returns Model.
1022 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1023 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1024 */
1025DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1026{
1027 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1028 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1029 : ((uEAX >> 4) & 0xf);
1030}
1031
1032
1033/**
1034 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1035 *
1036 * @returns Model.
1037 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1038 */
1039DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1040{
1041 return uEAX & 0xf;
1042}
1043
1044
1045/**
1046 * Get cr0.
1047 * @returns cr0.
1048 */
1049#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1050DECLASM(RTCCUINTREG) ASMGetCR0(void);
1051#else
1052DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1053{
1054 RTCCUINTREG uCR0;
1055# if RT_INLINE_ASM_USES_INTRIN
1056 uCR0 = __readcr0();
1057
1058# elif RT_INLINE_ASM_GNU_STYLE
1059# ifdef RT_ARCH_AMD64
1060 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1061# else
1062 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1063# endif
1064# else
1065 __asm
1066 {
1067# ifdef RT_ARCH_AMD64
1068 mov rax, cr0
1069 mov [uCR0], rax
1070# else
1071 mov eax, cr0
1072 mov [uCR0], eax
1073# endif
1074 }
1075# endif
1076 return uCR0;
1077}
1078#endif
1079
1080
1081/**
1082 * Sets the CR0 register.
1083 * @param uCR0 The new CR0 value.
1084 */
1085#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1086DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1087#else
1088DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1089{
1090# if RT_INLINE_ASM_USES_INTRIN
1091 __writecr0(uCR0);
1092
1093# elif RT_INLINE_ASM_GNU_STYLE
1094# ifdef RT_ARCH_AMD64
1095 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1096# else
1097 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1098# endif
1099# else
1100 __asm
1101 {
1102# ifdef RT_ARCH_AMD64
1103 mov rax, [uCR0]
1104 mov cr0, rax
1105# else
1106 mov eax, [uCR0]
1107 mov cr0, eax
1108# endif
1109 }
1110# endif
1111}
1112#endif
1113
1114
1115/**
1116 * Get cr2.
1117 * @returns cr2.
1118 */
1119#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1120DECLASM(RTCCUINTREG) ASMGetCR2(void);
1121#else
1122DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1123{
1124 RTCCUINTREG uCR2;
1125# if RT_INLINE_ASM_USES_INTRIN
1126 uCR2 = __readcr2();
1127
1128# elif RT_INLINE_ASM_GNU_STYLE
1129# ifdef RT_ARCH_AMD64
1130 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1131# else
1132 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1133# endif
1134# else
1135 __asm
1136 {
1137# ifdef RT_ARCH_AMD64
1138 mov rax, cr2
1139 mov [uCR2], rax
1140# else
1141 mov eax, cr2
1142 mov [uCR2], eax
1143# endif
1144 }
1145# endif
1146 return uCR2;
1147}
1148#endif
1149
1150
1151/**
1152 * Sets the CR2 register.
1153 * @param uCR2 The new CR0 value.
1154 */
1155#if RT_INLINE_ASM_EXTERNAL
1156DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1157#else
1158DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1159{
1160# if RT_INLINE_ASM_GNU_STYLE
1161# ifdef RT_ARCH_AMD64
1162 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1163# else
1164 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1165# endif
1166# else
1167 __asm
1168 {
1169# ifdef RT_ARCH_AMD64
1170 mov rax, [uCR2]
1171 mov cr2, rax
1172# else
1173 mov eax, [uCR2]
1174 mov cr2, eax
1175# endif
1176 }
1177# endif
1178}
1179#endif
1180
1181
1182/**
1183 * Get cr3.
1184 * @returns cr3.
1185 */
1186#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1187DECLASM(RTCCUINTREG) ASMGetCR3(void);
1188#else
1189DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1190{
1191 RTCCUINTREG uCR3;
1192# if RT_INLINE_ASM_USES_INTRIN
1193 uCR3 = __readcr3();
1194
1195# elif RT_INLINE_ASM_GNU_STYLE
1196# ifdef RT_ARCH_AMD64
1197 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1198# else
1199 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1200# endif
1201# else
1202 __asm
1203 {
1204# ifdef RT_ARCH_AMD64
1205 mov rax, cr3
1206 mov [uCR3], rax
1207# else
1208 mov eax, cr3
1209 mov [uCR3], eax
1210# endif
1211 }
1212# endif
1213 return uCR3;
1214}
1215#endif
1216
1217
1218/**
1219 * Sets the CR3 register.
1220 *
1221 * @param uCR3 New CR3 value.
1222 */
1223#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1224DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1225#else
1226DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1227{
1228# if RT_INLINE_ASM_USES_INTRIN
1229 __writecr3(uCR3);
1230
1231# elif RT_INLINE_ASM_GNU_STYLE
1232# ifdef RT_ARCH_AMD64
1233 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1234# else
1235 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1236# endif
1237# else
1238 __asm
1239 {
1240# ifdef RT_ARCH_AMD64
1241 mov rax, [uCR3]
1242 mov cr3, rax
1243# else
1244 mov eax, [uCR3]
1245 mov cr3, eax
1246# endif
1247 }
1248# endif
1249}
1250#endif
1251
1252
1253/**
1254 * Reloads the CR3 register.
1255 */
1256#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1257DECLASM(void) ASMReloadCR3(void);
1258#else
1259DECLINLINE(void) ASMReloadCR3(void)
1260{
1261# if RT_INLINE_ASM_USES_INTRIN
1262 __writecr3(__readcr3());
1263
1264# elif RT_INLINE_ASM_GNU_STYLE
1265 RTCCUINTREG u;
1266# ifdef RT_ARCH_AMD64
1267 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1268 "movq %0, %%cr3\n\t"
1269 : "=r" (u));
1270# else
1271 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1272 "movl %0, %%cr3\n\t"
1273 : "=r" (u));
1274# endif
1275# else
1276 __asm
1277 {
1278# ifdef RT_ARCH_AMD64
1279 mov rax, cr3
1280 mov cr3, rax
1281# else
1282 mov eax, cr3
1283 mov cr3, eax
1284# endif
1285 }
1286# endif
1287}
1288#endif
1289
1290
1291/**
1292 * Get cr4.
1293 * @returns cr4.
1294 */
1295#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1296DECLASM(RTCCUINTREG) ASMGetCR4(void);
1297#else
1298DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1299{
1300 RTCCUINTREG uCR4;
1301# if RT_INLINE_ASM_USES_INTRIN
1302 uCR4 = __readcr4();
1303
1304# elif RT_INLINE_ASM_GNU_STYLE
1305# ifdef RT_ARCH_AMD64
1306 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1307# else
1308 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1309# endif
1310# else
1311 __asm
1312 {
1313# ifdef RT_ARCH_AMD64
1314 mov rax, cr4
1315 mov [uCR4], rax
1316# else
1317 push eax /* just in case */
1318 /*mov eax, cr4*/
1319 _emit 0x0f
1320 _emit 0x20
1321 _emit 0xe0
1322 mov [uCR4], eax
1323 pop eax
1324# endif
1325 }
1326# endif
1327 return uCR4;
1328}
1329#endif
1330
1331
1332/**
1333 * Sets the CR4 register.
1334 *
1335 * @param uCR4 New CR4 value.
1336 */
1337#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1338DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1339#else
1340DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1341{
1342# if RT_INLINE_ASM_USES_INTRIN
1343 __writecr4(uCR4);
1344
1345# elif RT_INLINE_ASM_GNU_STYLE
1346# ifdef RT_ARCH_AMD64
1347 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1348# else
1349 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1350# endif
1351# else
1352 __asm
1353 {
1354# ifdef RT_ARCH_AMD64
1355 mov rax, [uCR4]
1356 mov cr4, rax
1357# else
1358 mov eax, [uCR4]
1359 _emit 0x0F
1360 _emit 0x22
1361 _emit 0xE0 /* mov cr4, eax */
1362# endif
1363 }
1364# endif
1365}
1366#endif
1367
1368
1369/**
1370 * Get cr8.
1371 * @returns cr8.
1372 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1373 */
1374#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1375DECLASM(RTCCUINTREG) ASMGetCR8(void);
1376#else
1377DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1378{
1379# ifdef RT_ARCH_AMD64
1380 RTCCUINTREG uCR8;
1381# if RT_INLINE_ASM_USES_INTRIN
1382 uCR8 = __readcr8();
1383
1384# elif RT_INLINE_ASM_GNU_STYLE
1385 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1386# else
1387 __asm
1388 {
1389 mov rax, cr8
1390 mov [uCR8], rax
1391 }
1392# endif
1393 return uCR8;
1394# else /* !RT_ARCH_AMD64 */
1395 return 0;
1396# endif /* !RT_ARCH_AMD64 */
1397}
1398#endif
1399
1400
1401/**
1402 * Enables interrupts (EFLAGS.IF).
1403 */
1404#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1405DECLASM(void) ASMIntEnable(void);
1406#else
1407DECLINLINE(void) ASMIntEnable(void)
1408{
1409# if RT_INLINE_ASM_GNU_STYLE
1410 __asm("sti\n");
1411# elif RT_INLINE_ASM_USES_INTRIN
1412 _enable();
1413# else
1414 __asm sti
1415# endif
1416}
1417#endif
1418
1419
1420/**
1421 * Disables interrupts (!EFLAGS.IF).
1422 */
1423#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1424DECLASM(void) ASMIntDisable(void);
1425#else
1426DECLINLINE(void) ASMIntDisable(void)
1427{
1428# if RT_INLINE_ASM_GNU_STYLE
1429 __asm("cli\n");
1430# elif RT_INLINE_ASM_USES_INTRIN
1431 _disable();
1432# else
1433 __asm cli
1434# endif
1435}
1436#endif
1437
1438
1439/**
1440 * Disables interrupts and returns previous xFLAGS.
1441 */
1442#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1443DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1444#else
1445DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1446{
1447 RTCCUINTREG xFlags;
1448# if RT_INLINE_ASM_GNU_STYLE
1449# ifdef RT_ARCH_AMD64
1450 __asm__ __volatile__("pushfq\n\t"
1451 "cli\n\t"
1452 "popq %0\n\t"
1453 : "=rm" (xFlags));
1454# else
1455 __asm__ __volatile__("pushfl\n\t"
1456 "cli\n\t"
1457 "popl %0\n\t"
1458 : "=rm" (xFlags));
1459# endif
1460# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1461 xFlags = ASMGetFlags();
1462 _disable();
1463# else
1464 __asm {
1465 pushfd
1466 cli
1467 pop [xFlags]
1468 }
1469# endif
1470 return xFlags;
1471}
1472#endif
1473
1474
1475/**
1476 * Reads a machine specific register.
1477 *
1478 * @returns Register content.
1479 * @param uRegister Register to read.
1480 */
1481#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1482DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1483#else
1484DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1485{
1486 RTUINT64U u;
1487# if RT_INLINE_ASM_GNU_STYLE
1488 __asm__ __volatile__("rdmsr\n\t"
1489 : "=a" (u.s.Lo),
1490 "=d" (u.s.Hi)
1491 : "c" (uRegister));
1492
1493# elif RT_INLINE_ASM_USES_INTRIN
1494 u.u = __readmsr(uRegister);
1495
1496# else
1497 __asm
1498 {
1499 mov ecx, [uRegister]
1500 rdmsr
1501 mov [u.s.Lo], eax
1502 mov [u.s.Hi], edx
1503 }
1504# endif
1505
1506 return u.u;
1507}
1508#endif
1509
1510
1511/**
1512 * Writes a machine specific register.
1513 *
1514 * @returns Register content.
1515 * @param uRegister Register to write to.
1516 * @param u64Val Value to write.
1517 */
1518#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1519DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1520#else
1521DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1522{
1523 RTUINT64U u;
1524
1525 u.u = u64Val;
1526# if RT_INLINE_ASM_GNU_STYLE
1527 __asm__ __volatile__("wrmsr\n\t"
1528 ::"a" (u.s.Lo),
1529 "d" (u.s.Hi),
1530 "c" (uRegister));
1531
1532# elif RT_INLINE_ASM_USES_INTRIN
1533 __writemsr(uRegister, u.u);
1534
1535# else
1536 __asm
1537 {
1538 mov ecx, [uRegister]
1539 mov edx, [u.s.Hi]
1540 mov eax, [u.s.Lo]
1541 wrmsr
1542 }
1543# endif
1544}
1545#endif
1546
1547
1548/**
1549 * Reads low part of a machine specific register.
1550 *
1551 * @returns Register content.
1552 * @param uRegister Register to read.
1553 */
1554#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1555DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1556#else
1557DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1558{
1559 uint32_t u32;
1560# if RT_INLINE_ASM_GNU_STYLE
1561 __asm__ __volatile__("rdmsr\n\t"
1562 : "=a" (u32)
1563 : "c" (uRegister)
1564 : "edx");
1565
1566# elif RT_INLINE_ASM_USES_INTRIN
1567 u32 = (uint32_t)__readmsr(uRegister);
1568
1569#else
1570 __asm
1571 {
1572 mov ecx, [uRegister]
1573 rdmsr
1574 mov [u32], eax
1575 }
1576# endif
1577
1578 return u32;
1579}
1580#endif
1581
1582
1583/**
1584 * Reads high part of a machine specific register.
1585 *
1586 * @returns Register content.
1587 * @param uRegister Register to read.
1588 */
1589#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1590DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1591#else
1592DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1593{
1594 uint32_t u32;
1595# if RT_INLINE_ASM_GNU_STYLE
1596 __asm__ __volatile__("rdmsr\n\t"
1597 : "=d" (u32)
1598 : "c" (uRegister)
1599 : "eax");
1600
1601# elif RT_INLINE_ASM_USES_INTRIN
1602 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1603
1604# else
1605 __asm
1606 {
1607 mov ecx, [uRegister]
1608 rdmsr
1609 mov [u32], edx
1610 }
1611# endif
1612
1613 return u32;
1614}
1615#endif
1616
1617
1618/**
1619 * Gets dr7.
1620 *
1621 * @returns dr7.
1622 */
1623#if RT_INLINE_ASM_EXTERNAL
1624DECLASM(RTCCUINTREG) ASMGetDR7(void);
1625#else
1626DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1627{
1628 RTCCUINTREG uDR7;
1629# if RT_INLINE_ASM_GNU_STYLE
1630# ifdef RT_ARCH_AMD64
1631 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1632# else
1633 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1634# endif
1635# else
1636 __asm
1637 {
1638# ifdef RT_ARCH_AMD64
1639 mov rax, dr7
1640 mov [uDR7], rax
1641# else
1642 mov eax, dr7
1643 mov [uDR7], eax
1644# endif
1645 }
1646# endif
1647 return uDR7;
1648}
1649#endif
1650
1651
1652/**
1653 * Gets dr6.
1654 *
1655 * @returns dr6.
1656 */
1657#if RT_INLINE_ASM_EXTERNAL
1658DECLASM(RTCCUINTREG) ASMGetDR6(void);
1659#else
1660DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1661{
1662 RTCCUINTREG uDR6;
1663# if RT_INLINE_ASM_GNU_STYLE
1664# ifdef RT_ARCH_AMD64
1665 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1666# else
1667 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1668# endif
1669# else
1670 __asm
1671 {
1672# ifdef RT_ARCH_AMD64
1673 mov rax, dr6
1674 mov [uDR6], rax
1675# else
1676 mov eax, dr6
1677 mov [uDR6], eax
1678# endif
1679 }
1680# endif
1681 return uDR6;
1682}
1683#endif
1684
1685
1686/**
1687 * Reads and clears DR6.
1688 *
1689 * @returns DR6.
1690 */
1691#if RT_INLINE_ASM_EXTERNAL
1692DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1693#else
1694DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1695{
1696 RTCCUINTREG uDR6;
1697# if RT_INLINE_ASM_GNU_STYLE
1698 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1699# ifdef RT_ARCH_AMD64
1700 __asm__ __volatile__("movq %%dr6, %0\n\t"
1701 "movq %1, %%dr6\n\t"
1702 : "=r" (uDR6)
1703 : "r" (uNewValue));
1704# else
1705 __asm__ __volatile__("movl %%dr6, %0\n\t"
1706 "movl %1, %%dr6\n\t"
1707 : "=r" (uDR6)
1708 : "r" (uNewValue));
1709# endif
1710# else
1711 __asm
1712 {
1713# ifdef RT_ARCH_AMD64
1714 mov rax, dr6
1715 mov [uDR6], rax
1716 mov rcx, rax
1717 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1718 mov dr6, rcx
1719# else
1720 mov eax, dr6
1721 mov [uDR6], eax
1722 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1723 mov dr6, ecx
1724# endif
1725 }
1726# endif
1727 return uDR6;
1728}
1729#endif
1730
1731
1732/**
1733 * Compiler memory barrier.
1734 *
1735 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1736 * values or any outstanding writes when returning from this function.
1737 *
1738 * This function must be used if non-volatile data is modified by a
1739 * device or the VMM. Typical cases are port access, MMIO access,
1740 * trapping instruction, etc.
1741 */
1742#if RT_INLINE_ASM_GNU_STYLE
1743# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1744#elif RT_INLINE_ASM_USES_INTRIN
1745# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1746#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1747DECLINLINE(void) ASMCompilerBarrier(void)
1748{
1749 __asm
1750 {
1751 }
1752}
1753#endif
1754
1755
1756/**
1757 * Writes a 8-bit unsigned integer to an I/O port, ordered.
1758 *
1759 * @param Port I/O port to read from.
1760 * @param u8 8-bit integer to write.
1761 */
1762#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1763DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1764#else
1765DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1766{
1767# if RT_INLINE_ASM_GNU_STYLE
1768 __asm__ __volatile__("outb %b1, %w0\n\t"
1769 :: "Nd" (Port),
1770 "a" (u8));
1771
1772# elif RT_INLINE_ASM_USES_INTRIN
1773 __outbyte(Port, u8);
1774
1775# else
1776 __asm
1777 {
1778 mov dx, [Port]
1779 mov al, [u8]
1780 out dx, al
1781 }
1782# endif
1783}
1784#endif
1785
1786
1787/**
1788 * Gets a 8-bit unsigned integer from an I/O port, ordered.
1789 *
1790 * @returns 8-bit integer.
1791 * @param Port I/O port to read from.
1792 */
1793#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1794DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1795#else
1796DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1797{
1798 uint8_t u8;
1799# if RT_INLINE_ASM_GNU_STYLE
1800 __asm__ __volatile__("inb %w1, %b0\n\t"
1801 : "=a" (u8)
1802 : "Nd" (Port));
1803
1804# elif RT_INLINE_ASM_USES_INTRIN
1805 u8 = __inbyte(Port);
1806
1807# else
1808 __asm
1809 {
1810 mov dx, [Port]
1811 in al, dx
1812 mov [u8], al
1813 }
1814# endif
1815 return u8;
1816}
1817#endif
1818
1819
1820/**
1821 * Writes a 16-bit unsigned integer to an I/O port, ordered.
1822 *
1823 * @param Port I/O port to read from.
1824 * @param u16 16-bit integer to write.
1825 */
1826#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1827DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1828#else
1829DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1830{
1831# if RT_INLINE_ASM_GNU_STYLE
1832 __asm__ __volatile__("outw %w1, %w0\n\t"
1833 :: "Nd" (Port),
1834 "a" (u16));
1835
1836# elif RT_INLINE_ASM_USES_INTRIN
1837 __outword(Port, u16);
1838
1839# else
1840 __asm
1841 {
1842 mov dx, [Port]
1843 mov ax, [u16]
1844 out dx, ax
1845 }
1846# endif
1847}
1848#endif
1849
1850
1851/**
1852 * Gets a 16-bit unsigned integer from an I/O port, ordered.
1853 *
1854 * @returns 16-bit integer.
1855 * @param Port I/O port to read from.
1856 */
1857#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1858DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1859#else
1860DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1861{
1862 uint16_t u16;
1863# if RT_INLINE_ASM_GNU_STYLE
1864 __asm__ __volatile__("inw %w1, %w0\n\t"
1865 : "=a" (u16)
1866 : "Nd" (Port));
1867
1868# elif RT_INLINE_ASM_USES_INTRIN
1869 u16 = __inword(Port);
1870
1871# else
1872 __asm
1873 {
1874 mov dx, [Port]
1875 in ax, dx
1876 mov [u16], ax
1877 }
1878# endif
1879 return u16;
1880}
1881#endif
1882
1883
1884/**
1885 * Writes a 32-bit unsigned integer to an I/O port, ordered.
1886 *
1887 * @param Port I/O port to read from.
1888 * @param u32 32-bit integer to write.
1889 */
1890#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1891DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1892#else
1893DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1894{
1895# if RT_INLINE_ASM_GNU_STYLE
1896 __asm__ __volatile__("outl %1, %w0\n\t"
1897 :: "Nd" (Port),
1898 "a" (u32));
1899
1900# elif RT_INLINE_ASM_USES_INTRIN
1901 __outdword(Port, u32);
1902
1903# else
1904 __asm
1905 {
1906 mov dx, [Port]
1907 mov eax, [u32]
1908 out dx, eax
1909 }
1910# endif
1911}
1912#endif
1913
1914
1915/**
1916 * Gets a 32-bit unsigned integer from an I/O port, ordered.
1917 *
1918 * @returns 32-bit integer.
1919 * @param Port I/O port to read from.
1920 */
1921#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1922DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1923#else
1924DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1925{
1926 uint32_t u32;
1927# if RT_INLINE_ASM_GNU_STYLE
1928 __asm__ __volatile__("inl %w1, %0\n\t"
1929 : "=a" (u32)
1930 : "Nd" (Port));
1931
1932# elif RT_INLINE_ASM_USES_INTRIN
1933 u32 = __indword(Port);
1934
1935# else
1936 __asm
1937 {
1938 mov dx, [Port]
1939 in eax, dx
1940 mov [u32], eax
1941 }
1942# endif
1943 return u32;
1944}
1945#endif
1946
1947/** @todo string i/o */
1948
1949
1950/**
1951 * Atomically Exchange an unsigned 8-bit value, ordered.
1952 *
1953 * @returns Current *pu8 value
1954 * @param pu8 Pointer to the 8-bit variable to update.
1955 * @param u8 The 8-bit value to assign to *pu8.
1956 */
1957#if RT_INLINE_ASM_EXTERNAL
1958DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1959#else
1960DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1961{
1962# if RT_INLINE_ASM_GNU_STYLE
1963 __asm__ __volatile__("xchgb %0, %1\n\t"
1964 : "=m" (*pu8),
1965 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
1966 : "1" (u8));
1967# else
1968 __asm
1969 {
1970# ifdef RT_ARCH_AMD64
1971 mov rdx, [pu8]
1972 mov al, [u8]
1973 xchg [rdx], al
1974 mov [u8], al
1975# else
1976 mov edx, [pu8]
1977 mov al, [u8]
1978 xchg [edx], al
1979 mov [u8], al
1980# endif
1981 }
1982# endif
1983 return u8;
1984}
1985#endif
1986
1987
1988/**
1989 * Atomically Exchange a signed 8-bit value, ordered.
1990 *
1991 * @returns Current *pu8 value
1992 * @param pi8 Pointer to the 8-bit variable to update.
1993 * @param i8 The 8-bit value to assign to *pi8.
1994 */
1995DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1996{
1997 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1998}
1999
2000
2001/**
2002 * Atomically Exchange a bool value, ordered.
2003 *
2004 * @returns Current *pf value
2005 * @param pf Pointer to the 8-bit variable to update.
2006 * @param f The 8-bit value to assign to *pi8.
2007 */
2008DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2009{
2010#ifdef _MSC_VER
2011 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2012#else
2013 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2014#endif
2015}
2016
2017
2018/**
2019 * Atomically Exchange an unsigned 16-bit value, ordered.
2020 *
2021 * @returns Current *pu16 value
2022 * @param pu16 Pointer to the 16-bit variable to update.
2023 * @param u16 The 16-bit value to assign to *pu16.
2024 */
2025#if RT_INLINE_ASM_EXTERNAL
2026DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2027#else
2028DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2029{
2030# if RT_INLINE_ASM_GNU_STYLE
2031 __asm__ __volatile__("xchgw %0, %1\n\t"
2032 : "=m" (*pu16),
2033 "=r" (u16)
2034 : "1" (u16));
2035# else
2036 __asm
2037 {
2038# ifdef RT_ARCH_AMD64
2039 mov rdx, [pu16]
2040 mov ax, [u16]
2041 xchg [rdx], ax
2042 mov [u16], ax
2043# else
2044 mov edx, [pu16]
2045 mov ax, [u16]
2046 xchg [edx], ax
2047 mov [u16], ax
2048# endif
2049 }
2050# endif
2051 return u16;
2052}
2053#endif
2054
2055
2056/**
2057 * Atomically Exchange a signed 16-bit value, ordered.
2058 *
2059 * @returns Current *pu16 value
2060 * @param pi16 Pointer to the 16-bit variable to update.
2061 * @param i16 The 16-bit value to assign to *pi16.
2062 */
2063DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2064{
2065 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2066}
2067
2068
2069/**
2070 * Atomically Exchange an unsigned 32-bit value, ordered.
2071 *
2072 * @returns Current *pu32 value
2073 * @param pu32 Pointer to the 32-bit variable to update.
2074 * @param u32 The 32-bit value to assign to *pu32.
2075 */
2076#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2077DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2078#else
2079DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2080{
2081# if RT_INLINE_ASM_GNU_STYLE
2082 __asm__ __volatile__("xchgl %0, %1\n\t"
2083 : "=m" (*pu32),
2084 "=r" (u32)
2085 : "1" (u32));
2086
2087# elif RT_INLINE_ASM_USES_INTRIN
2088 u32 = _InterlockedExchange((long *)pu32, u32);
2089
2090# else
2091 __asm
2092 {
2093# ifdef RT_ARCH_AMD64
2094 mov rdx, [pu32]
2095 mov eax, u32
2096 xchg [rdx], eax
2097 mov [u32], eax
2098# else
2099 mov edx, [pu32]
2100 mov eax, u32
2101 xchg [edx], eax
2102 mov [u32], eax
2103# endif
2104 }
2105# endif
2106 return u32;
2107}
2108#endif
2109
2110
2111/**
2112 * Atomically Exchange a signed 32-bit value, ordered.
2113 *
2114 * @returns Current *pu32 value
2115 * @param pi32 Pointer to the 32-bit variable to update.
2116 * @param i32 The 32-bit value to assign to *pi32.
2117 */
2118DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2119{
2120 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2121}
2122
2123
2124/**
2125 * Atomically Exchange an unsigned 64-bit value, ordered.
2126 *
2127 * @returns Current *pu64 value
2128 * @param pu64 Pointer to the 64-bit variable to update.
2129 * @param u64 The 64-bit value to assign to *pu64.
2130 */
2131#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2132DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2133#else
2134DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2135{
2136# if defined(RT_ARCH_AMD64)
2137# if RT_INLINE_ASM_USES_INTRIN
2138 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2139
2140# elif RT_INLINE_ASM_GNU_STYLE
2141 __asm__ __volatile__("xchgq %0, %1\n\t"
2142 : "=m" (*pu64),
2143 "=r" (u64)
2144 : "1" (u64));
2145# else
2146 __asm
2147 {
2148 mov rdx, [pu64]
2149 mov rax, [u64]
2150 xchg [rdx], rax
2151 mov [u64], rax
2152 }
2153# endif
2154# else /* !RT_ARCH_AMD64 */
2155# if RT_INLINE_ASM_GNU_STYLE
2156# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2157 uint32_t u32EBX = (uint32_t)u64;
2158 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2159 "xchgl %%ebx, %3\n\t"
2160 "1:\n\t"
2161 "lock; cmpxchg8b (%5)\n\t"
2162 "jnz 1b\n\t"
2163 "movl %3, %%ebx\n\t"
2164 /*"xchgl %%esi, %5\n\t"*/
2165 : "=A" (u64),
2166 "=m" (*pu64)
2167 : "0" (*pu64),
2168 "m" ( u32EBX ),
2169 "c" ( (uint32_t)(u64 >> 32) ),
2170 "S" (pu64) );
2171# else /* !PIC */
2172 __asm__ __volatile__("1:\n\t"
2173 "lock; cmpxchg8b %1\n\t"
2174 "jnz 1b\n\t"
2175 : "=A" (u64),
2176 "=m" (*pu64)
2177 : "0" (*pu64),
2178 "b" ( (uint32_t)u64 ),
2179 "c" ( (uint32_t)(u64 >> 32) ));
2180# endif
2181# else
2182 __asm
2183 {
2184 mov ebx, dword ptr [u64]
2185 mov ecx, dword ptr [u64 + 4]
2186 mov edi, pu64
2187 mov eax, dword ptr [edi]
2188 mov edx, dword ptr [edi + 4]
2189 retry:
2190 lock cmpxchg8b [edi]
2191 jnz retry
2192 mov dword ptr [u64], eax
2193 mov dword ptr [u64 + 4], edx
2194 }
2195# endif
2196# endif /* !RT_ARCH_AMD64 */
2197 return u64;
2198}
2199#endif
2200
2201
2202/**
2203 * Atomically Exchange an signed 64-bit value, ordered.
2204 *
2205 * @returns Current *pi64 value
2206 * @param pi64 Pointer to the 64-bit variable to update.
2207 * @param i64 The 64-bit value to assign to *pi64.
2208 */
2209DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2210{
2211 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2212}
2213
2214
2215#ifdef RT_ARCH_AMD64
2216/**
2217 * Atomically Exchange an unsigned 128-bit value, ordered.
2218 *
2219 * @returns Current *pu128.
2220 * @param pu128 Pointer to the 128-bit variable to update.
2221 * @param u128 The 128-bit value to assign to *pu128.
2222 *
2223 * @remark We cannot really assume that any hardware supports this. Nor do I have
2224 * GAS support for it. So, for the time being we'll BREAK the atomic
2225 * bit of this function and use two 64-bit exchanges instead.
2226 */
2227# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2228DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2229# else
2230DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2231{
2232 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2233 {
2234 /** @todo this is clumsy code */
2235 RTUINT128U u128Ret;
2236 u128Ret.u = u128;
2237 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2238 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2239 return u128Ret.u;
2240 }
2241#if 0 /* later? */
2242 else
2243 {
2244# if RT_INLINE_ASM_GNU_STYLE
2245 __asm__ __volatile__("1:\n\t"
2246 "lock; cmpxchg8b %1\n\t"
2247 "jnz 1b\n\t"
2248 : "=A" (u128),
2249 "=m" (*pu128)
2250 : "0" (*pu128),
2251 "b" ( (uint64_t)u128 ),
2252 "c" ( (uint64_t)(u128 >> 64) ));
2253# else
2254 __asm
2255 {
2256 mov rbx, dword ptr [u128]
2257 mov rcx, dword ptr [u128 + 8]
2258 mov rdi, pu128
2259 mov rax, dword ptr [rdi]
2260 mov rdx, dword ptr [rdi + 8]
2261 retry:
2262 lock cmpxchg16b [rdi]
2263 jnz retry
2264 mov dword ptr [u128], rax
2265 mov dword ptr [u128 + 8], rdx
2266 }
2267# endif
2268 }
2269 return u128;
2270#endif
2271}
2272# endif
2273#endif /* RT_ARCH_AMD64 */
2274
2275
2276/**
2277 * Atomically Exchange a pointer value, ordered.
2278 *
2279 * @returns Current *ppv value
2280 * @param ppv Pointer to the pointer variable to update.
2281 * @param pv The pointer value to assign to *ppv.
2282 */
2283DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2284{
2285#if ARCH_BITS == 32
2286 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2287#elif ARCH_BITS == 64
2288 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2289#else
2290# error "ARCH_BITS is bogus"
2291#endif
2292}
2293
2294
2295/** @def ASMAtomicXchgHandle
2296 * Atomically Exchange a typical IPRT handle value, ordered.
2297 *
2298 * @param ph Pointer to the value to update.
2299 * @param hNew The new value to assigned to *pu.
2300 * @param phRes Where to store the current *ph value.
2301 *
2302 * @remarks This doesn't currently work for all handles (like RTFILE).
2303 */
2304#define ASMAtomicXchgHandle(ph, hNew, phRes) \
2305 do { \
2306 *(void **)(phRes) = ASMAtomicXchgPtr((void * volatile *)(ph), (void *)(hNew)); \
2307 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2308 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
2309 } while (0)
2310
2311
2312/**
2313 * Atomically Exchange a value which size might differ
2314 * between platforms or compilers, ordered.
2315 *
2316 * @param pu Pointer to the variable to update.
2317 * @param uNew The value to assign to *pu.
2318 * @todo This is busted as its missing the result argument.
2319 */
2320#define ASMAtomicXchgSize(pu, uNew) \
2321 do { \
2322 switch (sizeof(*(pu))) { \
2323 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2324 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2325 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2326 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2327 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2328 } \
2329 } while (0)
2330
2331/**
2332 * Atomically Exchange a value which size might differ
2333 * between platforms or compilers, ordered.
2334 *
2335 * @param pu Pointer to the variable to update.
2336 * @param uNew The value to assign to *pu.
2337 * @param puRes Where to store the current *pu value.
2338 */
2339#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
2340 do { \
2341 switch (sizeof(*(pu))) { \
2342 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2343 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2344 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2345 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2346 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2347 } \
2348 } while (0)
2349
2350
2351/**
2352 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
2353 *
2354 * @returns true if xchg was done.
2355 * @returns false if xchg wasn't done.
2356 *
2357 * @param pu32 Pointer to the value to update.
2358 * @param u32New The new value to assigned to *pu32.
2359 * @param u32Old The old value to *pu32 compare with.
2360 */
2361#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2362DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2363#else
2364DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
2365{
2366# if RT_INLINE_ASM_GNU_STYLE
2367 uint8_t u8Ret;
2368 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2369 "setz %1\n\t"
2370 : "=m" (*pu32),
2371 "=qm" (u8Ret),
2372 "=a" (u32Old)
2373 : "r" (u32New),
2374 "2" (u32Old));
2375 return (bool)u8Ret;
2376
2377# elif RT_INLINE_ASM_USES_INTRIN
2378 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2379
2380# else
2381 uint32_t u32Ret;
2382 __asm
2383 {
2384# ifdef RT_ARCH_AMD64
2385 mov rdx, [pu32]
2386# else
2387 mov edx, [pu32]
2388# endif
2389 mov eax, [u32Old]
2390 mov ecx, [u32New]
2391# ifdef RT_ARCH_AMD64
2392 lock cmpxchg [rdx], ecx
2393# else
2394 lock cmpxchg [edx], ecx
2395# endif
2396 setz al
2397 movzx eax, al
2398 mov [u32Ret], eax
2399 }
2400 return !!u32Ret;
2401# endif
2402}
2403#endif
2404
2405
2406/**
2407 * Atomically Compare and Exchange a signed 32-bit value, ordered.
2408 *
2409 * @returns true if xchg was done.
2410 * @returns false if xchg wasn't done.
2411 *
2412 * @param pi32 Pointer to the value to update.
2413 * @param i32New The new value to assigned to *pi32.
2414 * @param i32Old The old value to *pi32 compare with.
2415 */
2416DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2417{
2418 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2419}
2420
2421
2422/**
2423 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
2424 *
2425 * @returns true if xchg was done.
2426 * @returns false if xchg wasn't done.
2427 *
2428 * @param pu64 Pointer to the 64-bit variable to update.
2429 * @param u64New The 64-bit value to assign to *pu64.
2430 * @param u64Old The value to compare with.
2431 */
2432#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2433DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2434#else
2435DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
2436{
2437# if RT_INLINE_ASM_USES_INTRIN
2438 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2439
2440# elif defined(RT_ARCH_AMD64)
2441# if RT_INLINE_ASM_GNU_STYLE
2442 uint8_t u8Ret;
2443 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2444 "setz %1\n\t"
2445 : "=m" (*pu64),
2446 "=qm" (u8Ret),
2447 "=a" (u64Old)
2448 : "r" (u64New),
2449 "2" (u64Old));
2450 return (bool)u8Ret;
2451# else
2452 bool fRet;
2453 __asm
2454 {
2455 mov rdx, [pu32]
2456 mov rax, [u64Old]
2457 mov rcx, [u64New]
2458 lock cmpxchg [rdx], rcx
2459 setz al
2460 mov [fRet], al
2461 }
2462 return fRet;
2463# endif
2464# else /* !RT_ARCH_AMD64 */
2465 uint32_t u32Ret;
2466# if RT_INLINE_ASM_GNU_STYLE
2467# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2468 uint32_t u32EBX = (uint32_t)u64New;
2469 uint32_t u32Spill;
2470 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2471 "lock; cmpxchg8b (%6)\n\t"
2472 "setz %%al\n\t"
2473 "movl %4, %%ebx\n\t"
2474 "movzbl %%al, %%eax\n\t"
2475 : "=a" (u32Ret),
2476 "=d" (u32Spill),
2477 "=m" (*pu64)
2478 : "A" (u64Old),
2479 "m" ( u32EBX ),
2480 "c" ( (uint32_t)(u64New >> 32) ),
2481 "S" (pu64) );
2482# else /* !PIC */
2483 uint32_t u32Spill;
2484 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2485 "setz %%al\n\t"
2486 "movzbl %%al, %%eax\n\t"
2487 : "=a" (u32Ret),
2488 "=d" (u32Spill),
2489 "=m" (*pu64)
2490 : "A" (u64Old),
2491 "b" ( (uint32_t)u64New ),
2492 "c" ( (uint32_t)(u64New >> 32) ));
2493# endif
2494 return (bool)u32Ret;
2495# else
2496 __asm
2497 {
2498 mov ebx, dword ptr [u64New]
2499 mov ecx, dword ptr [u64New + 4]
2500 mov edi, [pu64]
2501 mov eax, dword ptr [u64Old]
2502 mov edx, dword ptr [u64Old + 4]
2503 lock cmpxchg8b [edi]
2504 setz al
2505 movzx eax, al
2506 mov dword ptr [u32Ret], eax
2507 }
2508 return !!u32Ret;
2509# endif
2510# endif /* !RT_ARCH_AMD64 */
2511}
2512#endif
2513
2514
2515/**
2516 * Atomically Compare and exchange a signed 64-bit value, ordered.
2517 *
2518 * @returns true if xchg was done.
2519 * @returns false if xchg wasn't done.
2520 *
2521 * @param pi64 Pointer to the 64-bit variable to update.
2522 * @param i64 The 64-bit value to assign to *pu64.
2523 * @param i64Old The value to compare with.
2524 */
2525DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2526{
2527 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2528}
2529
2530
2531/**
2532 * Atomically Compare and Exchange a pointer value, ordered.
2533 *
2534 * @returns true if xchg was done.
2535 * @returns false if xchg wasn't done.
2536 *
2537 * @param ppv Pointer to the value to update.
2538 * @param pvNew The new value to assigned to *ppv.
2539 * @param pvOld The old value to *ppv compare with.
2540 */
2541DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2542{
2543#if ARCH_BITS == 32
2544 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2545#elif ARCH_BITS == 64
2546 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2547#else
2548# error "ARCH_BITS is bogus"
2549#endif
2550}
2551
2552
2553/** @def ASMAtomicCmpXchgHandle
2554 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
2555 *
2556 * @param ph Pointer to the value to update.
2557 * @param hNew The new value to assigned to *pu.
2558 * @param hOld The old value to *pu compare with.
2559 * @param fRc Where to store the result.
2560 *
2561 * @remarks This doesn't currently work for all handles (like RTFILE).
2562 */
2563#define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
2564 do { \
2565 (fRc) = ASMAtomicCmpXchgPtr((void * volatile *)(ph), (void *)(hNew), (void *)(hOld)); \
2566 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2567 } while (0)
2568
2569
2570/** @def ASMAtomicCmpXchgSize
2571 * Atomically Compare and Exchange a value which size might differ
2572 * between platforms or compilers, ordered.
2573 *
2574 * @param pu Pointer to the value to update.
2575 * @param uNew The new value to assigned to *pu.
2576 * @param uOld The old value to *pu compare with.
2577 * @param fRc Where to store the result.
2578 */
2579#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2580 do { \
2581 switch (sizeof(*(pu))) { \
2582 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2583 break; \
2584 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2585 break; \
2586 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2587 (fRc) = false; \
2588 break; \
2589 } \
2590 } while (0)
2591
2592
2593/**
2594 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
2595 * passes back old value, ordered.
2596 *
2597 * @returns true if xchg was done.
2598 * @returns false if xchg wasn't done.
2599 *
2600 * @param pu32 Pointer to the value to update.
2601 * @param u32New The new value to assigned to *pu32.
2602 * @param u32Old The old value to *pu32 compare with.
2603 * @param pu32Old Pointer store the old value at.
2604 */
2605#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2606DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
2607#else
2608DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
2609{
2610# if RT_INLINE_ASM_GNU_STYLE
2611 uint8_t u8Ret;
2612 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2613 "setz %1\n\t"
2614 : "=m" (*pu32),
2615 "=qm" (u8Ret),
2616 "=a" (*pu32Old)
2617 : "r" (u32New),
2618 "a" (u32Old));
2619 return (bool)u8Ret;
2620
2621# elif RT_INLINE_ASM_USES_INTRIN
2622 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
2623
2624# else
2625 uint32_t u32Ret;
2626 __asm
2627 {
2628# ifdef RT_ARCH_AMD64
2629 mov rdx, [pu32]
2630# else
2631 mov edx, [pu32]
2632# endif
2633 mov eax, [u32Old]
2634 mov ecx, [u32New]
2635# ifdef RT_ARCH_AMD64
2636 lock cmpxchg [rdx], ecx
2637 mov rdx, [pu32Old]
2638 mov [rdx], eax
2639# else
2640 lock cmpxchg [edx], ecx
2641 mov edx, [pu32Old]
2642 mov [edx], eax
2643# endif
2644 setz al
2645 movzx eax, al
2646 mov [u32Ret], eax
2647 }
2648 return !!u32Ret;
2649# endif
2650}
2651#endif
2652
2653
2654/**
2655 * Atomically Compare and Exchange a signed 32-bit value, additionally
2656 * passes back old value, ordered.
2657 *
2658 * @returns true if xchg was done.
2659 * @returns false if xchg wasn't done.
2660 *
2661 * @param pi32 Pointer to the value to update.
2662 * @param i32New The new value to assigned to *pi32.
2663 * @param i32Old The old value to *pi32 compare with.
2664 * @param pi32Old Pointer store the old value at.
2665 */
2666DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
2667{
2668 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
2669}
2670
2671
2672/**
2673 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2674 * passing back old value, ordered.
2675 *
2676 * @returns true if xchg was done.
2677 * @returns false if xchg wasn't done.
2678 *
2679 * @param pu64 Pointer to the 64-bit variable to update.
2680 * @param u64New The 64-bit value to assign to *pu64.
2681 * @param u64Old The value to compare with.
2682 * @param pu64Old Pointer store the old value at.
2683 */
2684#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2685DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
2686#else
2687DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
2688{
2689# if RT_INLINE_ASM_USES_INTRIN
2690 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
2691
2692# elif defined(RT_ARCH_AMD64)
2693# if RT_INLINE_ASM_GNU_STYLE
2694 uint8_t u8Ret;
2695 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2696 "setz %1\n\t"
2697 : "=m" (*pu64),
2698 "=qm" (u8Ret),
2699 "=a" (*pu64Old)
2700 : "r" (u64New),
2701 "a" (u64Old));
2702 return (bool)u8Ret;
2703# else
2704 bool fRet;
2705 __asm
2706 {
2707 mov rdx, [pu32]
2708 mov rax, [u64Old]
2709 mov rcx, [u64New]
2710 lock cmpxchg [rdx], rcx
2711 mov rdx, [pu64Old]
2712 mov [rdx], rax
2713 setz al
2714 mov [fRet], al
2715 }
2716 return fRet;
2717# endif
2718# else /* !RT_ARCH_AMD64 */
2719# if RT_INLINE_ASM_GNU_STYLE
2720 uint64_t u64Ret;
2721# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2722 /* NB: this code uses a memory clobber description, because the clean
2723 * solution with an output value for *pu64 makes gcc run out of registers.
2724 * This will cause suboptimal code, and anyone with a better solution is
2725 * welcome to improve this. */
2726 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2727 "lock; cmpxchg8b %3\n\t"
2728 "xchgl %%ebx, %1\n\t"
2729 : "=A" (u64Ret)
2730 : "DS" ((uint32_t)u64New),
2731 "c" ((uint32_t)(u64New >> 32)),
2732 "m" (*pu64),
2733 "0" (u64Old)
2734 : "memory" );
2735# else /* !PIC */
2736 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2737 : "=A" (u64Ret),
2738 "=m" (*pu64)
2739 : "b" ((uint32_t)u64New),
2740 "c" ((uint32_t)(u64New >> 32)),
2741 "m" (*pu64),
2742 "0" (u64Old));
2743# endif
2744 *pu64Old = u64Ret;
2745 return u64Ret == u64Old;
2746# else
2747 uint32_t u32Ret;
2748 __asm
2749 {
2750 mov ebx, dword ptr [u64New]
2751 mov ecx, dword ptr [u64New + 4]
2752 mov edi, [pu64]
2753 mov eax, dword ptr [u64Old]
2754 mov edx, dword ptr [u64Old + 4]
2755 lock cmpxchg8b [edi]
2756 mov ebx, [pu64Old]
2757 mov [ebx], eax
2758 setz al
2759 movzx eax, al
2760 add ebx, 4
2761 mov [ebx], edx
2762 mov dword ptr [u32Ret], eax
2763 }
2764 return !!u32Ret;
2765# endif
2766# endif /* !RT_ARCH_AMD64 */
2767}
2768#endif
2769
2770
2771/**
2772 * Atomically Compare and exchange a signed 64-bit value, additionally
2773 * passing back old value, ordered.
2774 *
2775 * @returns true if xchg was done.
2776 * @returns false if xchg wasn't done.
2777 *
2778 * @param pi64 Pointer to the 64-bit variable to update.
2779 * @param i64 The 64-bit value to assign to *pu64.
2780 * @param i64Old The value to compare with.
2781 * @param pi64Old Pointer store the old value at.
2782 */
2783DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
2784{
2785 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
2786}
2787
2788/** @def ASMAtomicCmpXchgExHandle
2789 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
2790 *
2791 * @param ph Pointer to the value to update.
2792 * @param hNew The new value to assigned to *pu.
2793 * @param hOld The old value to *pu compare with.
2794 * @param fRc Where to store the result.
2795 * @param phOldVal Pointer to where to store the old value.
2796 *
2797 * @remarks This doesn't currently work for all handles (like RTFILE).
2798 */
2799#if ARCH_BITS == 32
2800# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
2801 do { \
2802 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
2803 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2804 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
2805 } while (0)
2806#elif ARCH_BITS == 64
2807# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
2808 do { \
2809 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
2810 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2811 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
2812 } while (0)
2813#endif
2814
2815
2816/** @def ASMAtomicCmpXchgExSize
2817 * Atomically Compare and Exchange a value which size might differ
2818 * between platforms or compilers. Additionally passes back old value.
2819 *
2820 * @param pu Pointer to the value to update.
2821 * @param uNew The new value to assigned to *pu.
2822 * @param uOld The old value to *pu compare with.
2823 * @param fRc Where to store the result.
2824 * @param puOldVal Pointer to where to store the old value.
2825 */
2826#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
2827 do { \
2828 switch (sizeof(*(pu))) { \
2829 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
2830 break; \
2831 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
2832 break; \
2833 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2834 (fRc) = false; \
2835 (uOldVal) = 0; \
2836 break; \
2837 } \
2838 } while (0)
2839
2840
2841/**
2842 * Atomically Compare and Exchange a pointer value, additionally
2843 * passing back old value, ordered.
2844 *
2845 * @returns true if xchg was done.
2846 * @returns false if xchg wasn't done.
2847 *
2848 * @param ppv Pointer to the value to update.
2849 * @param pvNew The new value to assigned to *ppv.
2850 * @param pvOld The old value to *ppv compare with.
2851 * @param ppvOld Pointer store the old value at.
2852 */
2853DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, void *pvNew, void *pvOld, void **ppvOld)
2854{
2855#if ARCH_BITS == 32
2856 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
2857#elif ARCH_BITS == 64
2858 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
2859#else
2860# error "ARCH_BITS is bogus"
2861#endif
2862}
2863
2864
2865/**
2866 * Atomically exchanges and adds to a 32-bit value, ordered.
2867 *
2868 * @returns The old value.
2869 * @param pu32 Pointer to the value.
2870 * @param u32 Number to add.
2871 */
2872#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2873DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2874#else
2875DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2876{
2877# if RT_INLINE_ASM_USES_INTRIN
2878 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2879 return u32;
2880
2881# elif RT_INLINE_ASM_GNU_STYLE
2882 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2883 : "=r" (u32),
2884 "=m" (*pu32)
2885 : "0" (u32)
2886 : "memory");
2887 return u32;
2888# else
2889 __asm
2890 {
2891 mov eax, [u32]
2892# ifdef RT_ARCH_AMD64
2893 mov rdx, [pu32]
2894 lock xadd [rdx], eax
2895# else
2896 mov edx, [pu32]
2897 lock xadd [edx], eax
2898# endif
2899 mov [u32], eax
2900 }
2901 return u32;
2902# endif
2903}
2904#endif
2905
2906
2907/**
2908 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2909 *
2910 * @returns The old value.
2911 * @param pi32 Pointer to the value.
2912 * @param i32 Number to add.
2913 */
2914DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2915{
2916 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2917}
2918
2919
2920/**
2921 * Atomically increment a 32-bit value, ordered.
2922 *
2923 * @returns The new value.
2924 * @param pu32 Pointer to the value to increment.
2925 */
2926#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2927DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2928#else
2929DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2930{
2931 uint32_t u32;
2932# if RT_INLINE_ASM_USES_INTRIN
2933 u32 = _InterlockedIncrement((long *)pu32);
2934 return u32;
2935
2936# elif RT_INLINE_ASM_GNU_STYLE
2937 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2938 : "=r" (u32),
2939 "=m" (*pu32)
2940 : "0" (1)
2941 : "memory");
2942 return u32+1;
2943# else
2944 __asm
2945 {
2946 mov eax, 1
2947# ifdef RT_ARCH_AMD64
2948 mov rdx, [pu32]
2949 lock xadd [rdx], eax
2950# else
2951 mov edx, [pu32]
2952 lock xadd [edx], eax
2953# endif
2954 mov u32, eax
2955 }
2956 return u32+1;
2957# endif
2958}
2959#endif
2960
2961
2962/**
2963 * Atomically increment a signed 32-bit value, ordered.
2964 *
2965 * @returns The new value.
2966 * @param pi32 Pointer to the value to increment.
2967 */
2968DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2969{
2970 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2971}
2972
2973
2974/**
2975 * Atomically decrement an unsigned 32-bit value, ordered.
2976 *
2977 * @returns The new value.
2978 * @param pu32 Pointer to the value to decrement.
2979 */
2980#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2981DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2982#else
2983DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2984{
2985 uint32_t u32;
2986# if RT_INLINE_ASM_USES_INTRIN
2987 u32 = _InterlockedDecrement((long *)pu32);
2988 return u32;
2989
2990# elif RT_INLINE_ASM_GNU_STYLE
2991 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2992 : "=r" (u32),
2993 "=m" (*pu32)
2994 : "0" (-1)
2995 : "memory");
2996 return u32-1;
2997# else
2998 __asm
2999 {
3000 mov eax, -1
3001# ifdef RT_ARCH_AMD64
3002 mov rdx, [pu32]
3003 lock xadd [rdx], eax
3004# else
3005 mov edx, [pu32]
3006 lock xadd [edx], eax
3007# endif
3008 mov u32, eax
3009 }
3010 return u32-1;
3011# endif
3012}
3013#endif
3014
3015
3016/**
3017 * Atomically decrement a signed 32-bit value, ordered.
3018 *
3019 * @returns The new value.
3020 * @param pi32 Pointer to the value to decrement.
3021 */
3022DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3023{
3024 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3025}
3026
3027
3028/**
3029 * Atomically Or an unsigned 32-bit value, ordered.
3030 *
3031 * @param pu32 Pointer to the pointer variable to OR u32 with.
3032 * @param u32 The value to OR *pu32 with.
3033 */
3034#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3035DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3036#else
3037DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3038{
3039# if RT_INLINE_ASM_USES_INTRIN
3040 _InterlockedOr((long volatile *)pu32, (long)u32);
3041
3042# elif RT_INLINE_ASM_GNU_STYLE
3043 __asm__ __volatile__("lock; orl %1, %0\n\t"
3044 : "=m" (*pu32)
3045 : "ir" (u32));
3046# else
3047 __asm
3048 {
3049 mov eax, [u32]
3050# ifdef RT_ARCH_AMD64
3051 mov rdx, [pu32]
3052 lock or [rdx], eax
3053# else
3054 mov edx, [pu32]
3055 lock or [edx], eax
3056# endif
3057 }
3058# endif
3059}
3060#endif
3061
3062
3063/**
3064 * Atomically Or a signed 32-bit value, ordered.
3065 *
3066 * @param pi32 Pointer to the pointer variable to OR u32 with.
3067 * @param i32 The value to OR *pu32 with.
3068 */
3069DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3070{
3071 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3072}
3073
3074
3075/**
3076 * Atomically And an unsigned 32-bit value, ordered.
3077 *
3078 * @param pu32 Pointer to the pointer variable to AND u32 with.
3079 * @param u32 The value to AND *pu32 with.
3080 */
3081#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3082DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3083#else
3084DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3085{
3086# if RT_INLINE_ASM_USES_INTRIN
3087 _InterlockedAnd((long volatile *)pu32, u32);
3088
3089# elif RT_INLINE_ASM_GNU_STYLE
3090 __asm__ __volatile__("lock; andl %1, %0\n\t"
3091 : "=m" (*pu32)
3092 : "ir" (u32));
3093# else
3094 __asm
3095 {
3096 mov eax, [u32]
3097# ifdef RT_ARCH_AMD64
3098 mov rdx, [pu32]
3099 lock and [rdx], eax
3100# else
3101 mov edx, [pu32]
3102 lock and [edx], eax
3103# endif
3104 }
3105# endif
3106}
3107#endif
3108
3109
3110/**
3111 * Atomically And a signed 32-bit value, ordered.
3112 *
3113 * @param pi32 Pointer to the pointer variable to AND i32 with.
3114 * @param i32 The value to AND *pi32 with.
3115 */
3116DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3117{
3118 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3119}
3120
3121
3122/**
3123 * Memory fence, waits for any pending writes and reads to complete.
3124 */
3125DECLINLINE(void) ASMMemoryFence(void)
3126{
3127 /** @todo use mfence? check if all cpus we care for support it. */
3128 uint32_t volatile u32;
3129 ASMAtomicXchgU32(&u32, 0);
3130}
3131
3132
3133/**
3134 * Write fence, waits for any pending writes to complete.
3135 */
3136DECLINLINE(void) ASMWriteFence(void)
3137{
3138 /** @todo use sfence? check if all cpus we care for support it. */
3139 ASMMemoryFence();
3140}
3141
3142
3143/**
3144 * Read fence, waits for any pending reads to complete.
3145 */
3146DECLINLINE(void) ASMReadFence(void)
3147{
3148 /** @todo use lfence? check if all cpus we care for support it. */
3149 ASMMemoryFence();
3150}
3151
3152
3153/**
3154 * Atomically reads an unsigned 8-bit value, ordered.
3155 *
3156 * @returns Current *pu8 value
3157 * @param pu8 Pointer to the 8-bit variable to read.
3158 */
3159DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
3160{
3161 ASMMemoryFence();
3162 return *pu8; /* byte reads are atomic on x86 */
3163}
3164
3165
3166/**
3167 * Atomically reads an unsigned 8-bit value, unordered.
3168 *
3169 * @returns Current *pu8 value
3170 * @param pu8 Pointer to the 8-bit variable to read.
3171 */
3172DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
3173{
3174 return *pu8; /* byte reads are atomic on x86 */
3175}
3176
3177
3178/**
3179 * Atomically reads a signed 8-bit value, ordered.
3180 *
3181 * @returns Current *pi8 value
3182 * @param pi8 Pointer to the 8-bit variable to read.
3183 */
3184DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
3185{
3186 ASMMemoryFence();
3187 return *pi8; /* byte reads are atomic on x86 */
3188}
3189
3190
3191/**
3192 * Atomically reads a signed 8-bit value, unordered.
3193 *
3194 * @returns Current *pi8 value
3195 * @param pi8 Pointer to the 8-bit variable to read.
3196 */
3197DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
3198{
3199 return *pi8; /* byte reads are atomic on x86 */
3200}
3201
3202
3203/**
3204 * Atomically reads an unsigned 16-bit value, ordered.
3205 *
3206 * @returns Current *pu16 value
3207 * @param pu16 Pointer to the 16-bit variable to read.
3208 */
3209DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3210{
3211 ASMMemoryFence();
3212 Assert(!((uintptr_t)pu16 & 1));
3213 return *pu16;
3214}
3215
3216
3217/**
3218 * Atomically reads an unsigned 16-bit value, unordered.
3219 *
3220 * @returns Current *pu16 value
3221 * @param pu16 Pointer to the 16-bit variable to read.
3222 */
3223DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3224{
3225 Assert(!((uintptr_t)pu16 & 1));
3226 return *pu16;
3227}
3228
3229
3230/**
3231 * Atomically reads a signed 16-bit value, ordered.
3232 *
3233 * @returns Current *pi16 value
3234 * @param pi16 Pointer to the 16-bit variable to read.
3235 */
3236DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3237{
3238 ASMMemoryFence();
3239 Assert(!((uintptr_t)pi16 & 1));
3240 return *pi16;
3241}
3242
3243
3244/**
3245 * Atomically reads a signed 16-bit value, unordered.
3246 *
3247 * @returns Current *pi16 value
3248 * @param pi16 Pointer to the 16-bit variable to read.
3249 */
3250DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3251{
3252 Assert(!((uintptr_t)pi16 & 1));
3253 return *pi16;
3254}
3255
3256
3257/**
3258 * Atomically reads an unsigned 32-bit value, ordered.
3259 *
3260 * @returns Current *pu32 value
3261 * @param pu32 Pointer to the 32-bit variable to read.
3262 */
3263DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3264{
3265 ASMMemoryFence();
3266 Assert(!((uintptr_t)pu32 & 3));
3267 return *pu32;
3268}
3269
3270
3271/**
3272 * Atomically reads an unsigned 32-bit value, unordered.
3273 *
3274 * @returns Current *pu32 value
3275 * @param pu32 Pointer to the 32-bit variable to read.
3276 */
3277DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3278{
3279 Assert(!((uintptr_t)pu32 & 3));
3280 return *pu32;
3281}
3282
3283
3284/**
3285 * Atomically reads a signed 32-bit value, ordered.
3286 *
3287 * @returns Current *pi32 value
3288 * @param pi32 Pointer to the 32-bit variable to read.
3289 */
3290DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3291{
3292 ASMMemoryFence();
3293 Assert(!((uintptr_t)pi32 & 3));
3294 return *pi32;
3295}
3296
3297
3298/**
3299 * Atomically reads a signed 32-bit value, unordered.
3300 *
3301 * @returns Current *pi32 value
3302 * @param pi32 Pointer to the 32-bit variable to read.
3303 */
3304DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
3305{
3306 Assert(!((uintptr_t)pi32 & 3));
3307 return *pi32;
3308}
3309
3310
3311/**
3312 * Atomically reads an unsigned 64-bit value, ordered.
3313 *
3314 * @returns Current *pu64 value
3315 * @param pu64 Pointer to the 64-bit variable to read.
3316 * The memory pointed to must be writable.
3317 * @remark This will fault if the memory is read-only!
3318 */
3319#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3320DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
3321#else
3322DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
3323{
3324 uint64_t u64;
3325# ifdef RT_ARCH_AMD64
3326 Assert(!((uintptr_t)pu64 & 7));
3327/*# if RT_INLINE_ASM_GNU_STYLE
3328 __asm__ __volatile__( "mfence\n\t"
3329 "movq %1, %0\n\t"
3330 : "=r" (u64)
3331 : "m" (*pu64));
3332# else
3333 __asm
3334 {
3335 mfence
3336 mov rdx, [pu64]
3337 mov rax, [rdx]
3338 mov [u64], rax
3339 }
3340# endif*/
3341 ASMMemoryFence();
3342 u64 = *pu64;
3343# else /* !RT_ARCH_AMD64 */
3344# if RT_INLINE_ASM_GNU_STYLE
3345# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3346 uint32_t u32EBX = 0;
3347 Assert(!((uintptr_t)pu64 & 7));
3348 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3349 "lock; cmpxchg8b (%5)\n\t"
3350 "movl %3, %%ebx\n\t"
3351 : "=A" (u64),
3352 "=m" (*pu64)
3353 : "0" (0),
3354 "m" (u32EBX),
3355 "c" (0),
3356 "S" (pu64));
3357# else /* !PIC */
3358 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3359 : "=A" (u64),
3360 "=m" (*pu64)
3361 : "0" (0),
3362 "b" (0),
3363 "c" (0));
3364# endif
3365# else
3366 Assert(!((uintptr_t)pu64 & 7));
3367 __asm
3368 {
3369 xor eax, eax
3370 xor edx, edx
3371 mov edi, pu64
3372 xor ecx, ecx
3373 xor ebx, ebx
3374 lock cmpxchg8b [edi]
3375 mov dword ptr [u64], eax
3376 mov dword ptr [u64 + 4], edx
3377 }
3378# endif
3379# endif /* !RT_ARCH_AMD64 */
3380 return u64;
3381}
3382#endif
3383
3384
3385/**
3386 * Atomically reads an unsigned 64-bit value, unordered.
3387 *
3388 * @returns Current *pu64 value
3389 * @param pu64 Pointer to the 64-bit variable to read.
3390 * The memory pointed to must be writable.
3391 * @remark This will fault if the memory is read-only!
3392 */
3393#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3394DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
3395#else
3396DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
3397{
3398 uint64_t u64;
3399# ifdef RT_ARCH_AMD64
3400 Assert(!((uintptr_t)pu64 & 7));
3401/*# if RT_INLINE_ASM_GNU_STYLE
3402 Assert(!((uintptr_t)pu64 & 7));
3403 __asm__ __volatile__("movq %1, %0\n\t"
3404 : "=r" (u64)
3405 : "m" (*pu64));
3406# else
3407 __asm
3408 {
3409 mov rdx, [pu64]
3410 mov rax, [rdx]
3411 mov [u64], rax
3412 }
3413# endif */
3414 u64 = *pu64;
3415# else /* !RT_ARCH_AMD64 */
3416# if RT_INLINE_ASM_GNU_STYLE
3417# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3418 uint32_t u32EBX = 0;
3419 Assert(!((uintptr_t)pu64 & 7));
3420 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3421 "lock; cmpxchg8b (%5)\n\t"
3422 "movl %3, %%ebx\n\t"
3423 : "=A" (u64),
3424 "=m" (*pu64)
3425 : "0" (0),
3426 "m" (u32EBX),
3427 "c" (0),
3428 "S" (pu64));
3429# else /* !PIC */
3430 __asm__ __volatile__("cmpxchg8b %1\n\t"
3431 : "=A" (u64),
3432 "=m" (*pu64)
3433 : "0" (0),
3434 "b" (0),
3435 "c" (0));
3436# endif
3437# else
3438 Assert(!((uintptr_t)pu64 & 7));
3439 __asm
3440 {
3441 xor eax, eax
3442 xor edx, edx
3443 mov edi, pu64
3444 xor ecx, ecx
3445 xor ebx, ebx
3446 lock cmpxchg8b [edi]
3447 mov dword ptr [u64], eax
3448 mov dword ptr [u64 + 4], edx
3449 }
3450# endif
3451# endif /* !RT_ARCH_AMD64 */
3452 return u64;
3453}
3454#endif
3455
3456
3457/**
3458 * Atomically reads a signed 64-bit value, ordered.
3459 *
3460 * @returns Current *pi64 value
3461 * @param pi64 Pointer to the 64-bit variable to read.
3462 * The memory pointed to must be writable.
3463 * @remark This will fault if the memory is read-only!
3464 */
3465DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
3466{
3467 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
3468}
3469
3470
3471/**
3472 * Atomically reads a signed 64-bit value, unordered.
3473 *
3474 * @returns Current *pi64 value
3475 * @param pi64 Pointer to the 64-bit variable to read.
3476 * The memory pointed to must be writable.
3477 * @remark This will fault if the memory is read-only!
3478 */
3479DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
3480{
3481 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
3482}
3483
3484
3485/**
3486 * Atomically reads a pointer value, ordered.
3487 *
3488 * @returns Current *pv value
3489 * @param ppv Pointer to the pointer variable to read.
3490 */
3491DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
3492{
3493#if ARCH_BITS == 32
3494 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
3495#elif ARCH_BITS == 64
3496 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
3497#else
3498# error "ARCH_BITS is bogus"
3499#endif
3500}
3501
3502
3503/**
3504 * Atomically reads a pointer value, unordered.
3505 *
3506 * @returns Current *pv value
3507 * @param ppv Pointer to the pointer variable to read.
3508 */
3509DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
3510{
3511#if ARCH_BITS == 32
3512 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
3513#elif ARCH_BITS == 64
3514 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
3515#else
3516# error "ARCH_BITS is bogus"
3517#endif
3518}
3519
3520
3521/**
3522 * Atomically reads a boolean value, ordered.
3523 *
3524 * @returns Current *pf value
3525 * @param pf Pointer to the boolean variable to read.
3526 */
3527DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
3528{
3529 ASMMemoryFence();
3530 return *pf; /* byte reads are atomic on x86 */
3531}
3532
3533
3534/**
3535 * Atomically reads a boolean value, unordered.
3536 *
3537 * @returns Current *pf value
3538 * @param pf Pointer to the boolean variable to read.
3539 */
3540DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
3541{
3542 return *pf; /* byte reads are atomic on x86 */
3543}
3544
3545
3546/**
3547 * Atomically read a typical IPRT handle value, ordered.
3548 *
3549 * @param ph Pointer to the handle variable to read.
3550 * @param phRes Where to store the result.
3551 *
3552 * @remarks This doesn't currently work for all handles (like RTFILE).
3553 */
3554#define ASMAtomicReadHandle(ph, phRes) \
3555 do { \
3556 *(void **)(phRes) = ASMAtomicReadPtr((void * volatile *)(ph)); \
3557 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3558 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
3559 } while (0)
3560
3561
3562/**
3563 * Atomically read a typical IPRT handle value, unordered.
3564 *
3565 * @param ph Pointer to the handle variable to read.
3566 * @param phRes Where to store the result.
3567 *
3568 * @remarks This doesn't currently work for all handles (like RTFILE).
3569 */
3570#define ASMAtomicUoReadHandle(ph, phRes) \
3571 do { \
3572 *(void **)(phRes) = ASMAtomicUoReadPtr((void * volatile *)(ph)); \
3573 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3574 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
3575 } while (0)
3576
3577
3578/**
3579 * Atomically read a value which size might differ
3580 * between platforms or compilers, ordered.
3581 *
3582 * @param pu Pointer to the variable to update.
3583 * @param puRes Where to store the result.
3584 */
3585#define ASMAtomicReadSize(pu, puRes) \
3586 do { \
3587 switch (sizeof(*(pu))) { \
3588 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3589 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
3590 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
3591 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
3592 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3593 } \
3594 } while (0)
3595
3596
3597/**
3598 * Atomically read a value which size might differ
3599 * between platforms or compilers, unordered.
3600 *
3601 * @param pu Pointer to the variable to update.
3602 * @param puRes Where to store the result.
3603 */
3604#define ASMAtomicUoReadSize(pu, puRes) \
3605 do { \
3606 switch (sizeof(*(pu))) { \
3607 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3608 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
3609 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
3610 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
3611 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3612 } \
3613 } while (0)
3614
3615
3616/**
3617 * Atomically writes an unsigned 8-bit value, ordered.
3618 *
3619 * @param pu8 Pointer to the 8-bit variable.
3620 * @param u8 The 8-bit value to assign to *pu8.
3621 */
3622DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
3623{
3624 ASMAtomicXchgU8(pu8, u8);
3625}
3626
3627
3628/**
3629 * Atomically writes an unsigned 8-bit value, unordered.
3630 *
3631 * @param pu8 Pointer to the 8-bit variable.
3632 * @param u8 The 8-bit value to assign to *pu8.
3633 */
3634DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
3635{
3636 *pu8 = u8; /* byte writes are atomic on x86 */
3637}
3638
3639
3640/**
3641 * Atomically writes a signed 8-bit value, ordered.
3642 *
3643 * @param pi8 Pointer to the 8-bit variable to read.
3644 * @param i8 The 8-bit value to assign to *pi8.
3645 */
3646DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
3647{
3648 ASMAtomicXchgS8(pi8, i8);
3649}
3650
3651
3652/**
3653 * Atomically writes a signed 8-bit value, unordered.
3654 *
3655 * @param pi8 Pointer to the 8-bit variable to read.
3656 * @param i8 The 8-bit value to assign to *pi8.
3657 */
3658DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
3659{
3660 *pi8 = i8; /* byte writes are atomic on x86 */
3661}
3662
3663
3664/**
3665 * Atomically writes an unsigned 16-bit value, ordered.
3666 *
3667 * @param pu16 Pointer to the 16-bit variable.
3668 * @param u16 The 16-bit value to assign to *pu16.
3669 */
3670DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
3671{
3672 ASMAtomicXchgU16(pu16, u16);
3673}
3674
3675
3676/**
3677 * Atomically writes an unsigned 16-bit value, unordered.
3678 *
3679 * @param pu16 Pointer to the 16-bit variable.
3680 * @param u16 The 16-bit value to assign to *pu16.
3681 */
3682DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
3683{
3684 Assert(!((uintptr_t)pu16 & 1));
3685 *pu16 = u16;
3686}
3687
3688
3689/**
3690 * Atomically writes a signed 16-bit value, ordered.
3691 *
3692 * @param pi16 Pointer to the 16-bit variable to read.
3693 * @param i16 The 16-bit value to assign to *pi16.
3694 */
3695DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
3696{
3697 ASMAtomicXchgS16(pi16, i16);
3698}
3699
3700
3701/**
3702 * Atomically writes a signed 16-bit value, unordered.
3703 *
3704 * @param pi16 Pointer to the 16-bit variable to read.
3705 * @param i16 The 16-bit value to assign to *pi16.
3706 */
3707DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
3708{
3709 Assert(!((uintptr_t)pi16 & 1));
3710 *pi16 = i16;
3711}
3712
3713
3714/**
3715 * Atomically writes an unsigned 32-bit value, ordered.
3716 *
3717 * @param pu32 Pointer to the 32-bit variable.
3718 * @param u32 The 32-bit value to assign to *pu32.
3719 */
3720DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
3721{
3722 ASMAtomicXchgU32(pu32, u32);
3723}
3724
3725
3726/**
3727 * Atomically writes an unsigned 32-bit value, unordered.
3728 *
3729 * @param pu32 Pointer to the 32-bit variable.
3730 * @param u32 The 32-bit value to assign to *pu32.
3731 */
3732DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
3733{
3734 Assert(!((uintptr_t)pu32 & 3));
3735 *pu32 = u32;
3736}
3737
3738
3739/**
3740 * Atomically writes a signed 32-bit value, ordered.
3741 *
3742 * @param pi32 Pointer to the 32-bit variable to read.
3743 * @param i32 The 32-bit value to assign to *pi32.
3744 */
3745DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
3746{
3747 ASMAtomicXchgS32(pi32, i32);
3748}
3749
3750
3751/**
3752 * Atomically writes a signed 32-bit value, unordered.
3753 *
3754 * @param pi32 Pointer to the 32-bit variable to read.
3755 * @param i32 The 32-bit value to assign to *pi32.
3756 */
3757DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
3758{
3759 Assert(!((uintptr_t)pi32 & 3));
3760 *pi32 = i32;
3761}
3762
3763
3764/**
3765 * Atomically writes an unsigned 64-bit value, ordered.
3766 *
3767 * @param pu64 Pointer to the 64-bit variable.
3768 * @param u64 The 64-bit value to assign to *pu64.
3769 */
3770DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
3771{
3772 ASMAtomicXchgU64(pu64, u64);
3773}
3774
3775
3776/**
3777 * Atomically writes an unsigned 64-bit value, unordered.
3778 *
3779 * @param pu64 Pointer to the 64-bit variable.
3780 * @param u64 The 64-bit value to assign to *pu64.
3781 */
3782DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
3783{
3784 Assert(!((uintptr_t)pu64 & 7));
3785#if ARCH_BITS == 64
3786 *pu64 = u64;
3787#else
3788 ASMAtomicXchgU64(pu64, u64);
3789#endif
3790}
3791
3792
3793/**
3794 * Atomically writes a signed 64-bit value, ordered.
3795 *
3796 * @param pi64 Pointer to the 64-bit variable.
3797 * @param i64 The 64-bit value to assign to *pi64.
3798 */
3799DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
3800{
3801 ASMAtomicXchgS64(pi64, i64);
3802}
3803
3804
3805/**
3806 * Atomically writes a signed 64-bit value, unordered.
3807 *
3808 * @param pi64 Pointer to the 64-bit variable.
3809 * @param i64 The 64-bit value to assign to *pi64.
3810 */
3811DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
3812{
3813 Assert(!((uintptr_t)pi64 & 7));
3814#if ARCH_BITS == 64
3815 *pi64 = i64;
3816#else
3817 ASMAtomicXchgS64(pi64, i64);
3818#endif
3819}
3820
3821
3822/**
3823 * Atomically writes a boolean value, unordered.
3824 *
3825 * @param pf Pointer to the boolean variable.
3826 * @param f The boolean value to assign to *pf.
3827 */
3828DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
3829{
3830 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
3831}
3832
3833
3834/**
3835 * Atomically writes a boolean value, unordered.
3836 *
3837 * @param pf Pointer to the boolean variable.
3838 * @param f The boolean value to assign to *pf.
3839 */
3840DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
3841{
3842 *pf = f; /* byte writes are atomic on x86 */
3843}
3844
3845
3846/**
3847 * Atomically writes a pointer value, ordered.
3848 *
3849 * @returns Current *pv value
3850 * @param ppv Pointer to the pointer variable.
3851 * @param pv The pointer value to assigne to *ppv.
3852 */
3853DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, void *pv)
3854{
3855#if ARCH_BITS == 32
3856 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3857#elif ARCH_BITS == 64
3858 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3859#else
3860# error "ARCH_BITS is bogus"
3861#endif
3862}
3863
3864
3865/**
3866 * Atomically writes a pointer value, unordered.
3867 *
3868 * @returns Current *pv value
3869 * @param ppv Pointer to the pointer variable.
3870 * @param pv The pointer value to assigne to *ppv.
3871 */
3872DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, void *pv)
3873{
3874#if ARCH_BITS == 32
3875 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3876#elif ARCH_BITS == 64
3877 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3878#else
3879# error "ARCH_BITS is bogus"
3880#endif
3881}
3882
3883
3884/**
3885 * Atomically write a typical IPRT handle value, ordered.
3886 *
3887 * @param ph Pointer to the variable to update.
3888 * @param hNew The value to assign to *ph.
3889 *
3890 * @remarks This doesn't currently work for all handles (like RTFILE).
3891 */
3892#define ASMAtomicWriteHandle(ph, hNew) \
3893 do { \
3894 ASMAtomicWritePtr((void * volatile *)(ph), (void *)hNew); \
3895 AssertCompile(sizeof(*ph) == sizeof(void*)); \
3896 } while (0)
3897
3898
3899/**
3900 * Atomically write a typical IPRT handle value, unordered.
3901 *
3902 * @param ph Pointer to the variable to update.
3903 * @param hNew The value to assign to *ph.
3904 *
3905 * @remarks This doesn't currently work for all handles (like RTFILE).
3906 */
3907#define ASMAtomicUoWriteHandle(ph, hNew) \
3908 do { \
3909 ASMAtomicUoWritePtr((void * volatile *)(ph), (void *)hNew); \
3910 AssertCompile(sizeof(*ph) == sizeof(void*)); \
3911 } while (0)
3912
3913
3914/**
3915 * Atomically write a value which size might differ
3916 * between platforms or compilers, ordered.
3917 *
3918 * @param pu Pointer to the variable to update.
3919 * @param uNew The value to assign to *pu.
3920 */
3921#define ASMAtomicWriteSize(pu, uNew) \
3922 do { \
3923 switch (sizeof(*(pu))) { \
3924 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3925 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3926 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3927 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3928 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3929 } \
3930 } while (0)
3931
3932/**
3933 * Atomically write a value which size might differ
3934 * between platforms or compilers, unordered.
3935 *
3936 * @param pu Pointer to the variable to update.
3937 * @param uNew The value to assign to *pu.
3938 */
3939#define ASMAtomicUoWriteSize(pu, uNew) \
3940 do { \
3941 switch (sizeof(*(pu))) { \
3942 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3943 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3944 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3945 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3946 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3947 } \
3948 } while (0)
3949
3950
3951
3952
3953/**
3954 * Invalidate page.
3955 *
3956 * @param pv Address of the page to invalidate.
3957 */
3958#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3959DECLASM(void) ASMInvalidatePage(void *pv);
3960#else
3961DECLINLINE(void) ASMInvalidatePage(void *pv)
3962{
3963# if RT_INLINE_ASM_USES_INTRIN
3964 __invlpg(pv);
3965
3966# elif RT_INLINE_ASM_GNU_STYLE
3967 __asm__ __volatile__("invlpg %0\n\t"
3968 : : "m" (*(uint8_t *)pv));
3969# else
3970 __asm
3971 {
3972# ifdef RT_ARCH_AMD64
3973 mov rax, [pv]
3974 invlpg [rax]
3975# else
3976 mov eax, [pv]
3977 invlpg [eax]
3978# endif
3979 }
3980# endif
3981}
3982#endif
3983
3984
3985#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3986# if PAGE_SIZE != 0x1000
3987# error "PAGE_SIZE is not 0x1000!"
3988# endif
3989#endif
3990
3991/**
3992 * Zeros a 4K memory page.
3993 *
3994 * @param pv Pointer to the memory block. This must be page aligned.
3995 */
3996#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3997DECLASM(void) ASMMemZeroPage(volatile void *pv);
3998# else
3999DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
4000{
4001# if RT_INLINE_ASM_USES_INTRIN
4002# ifdef RT_ARCH_AMD64
4003 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
4004# else
4005 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
4006# endif
4007
4008# elif RT_INLINE_ASM_GNU_STYLE
4009 RTCCUINTREG uDummy;
4010# ifdef RT_ARCH_AMD64
4011 __asm__ __volatile__ ("rep stosq"
4012 : "=D" (pv),
4013 "=c" (uDummy)
4014 : "0" (pv),
4015 "c" (0x1000 >> 3),
4016 "a" (0)
4017 : "memory");
4018# else
4019 __asm__ __volatile__ ("rep stosl"
4020 : "=D" (pv),
4021 "=c" (uDummy)
4022 : "0" (pv),
4023 "c" (0x1000 >> 2),
4024 "a" (0)
4025 : "memory");
4026# endif
4027# else
4028 __asm
4029 {
4030# ifdef RT_ARCH_AMD64
4031 xor rax, rax
4032 mov ecx, 0200h
4033 mov rdi, [pv]
4034 rep stosq
4035# else
4036 xor eax, eax
4037 mov ecx, 0400h
4038 mov edi, [pv]
4039 rep stosd
4040# endif
4041 }
4042# endif
4043}
4044# endif
4045
4046
4047/**
4048 * Zeros a memory block with a 32-bit aligned size.
4049 *
4050 * @param pv Pointer to the memory block.
4051 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4052 */
4053#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4054DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
4055#else
4056DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
4057{
4058# if RT_INLINE_ASM_USES_INTRIN
4059# ifdef RT_ARCH_AMD64
4060 if (!(cb & 7))
4061 __stosq((unsigned __int64 *)pv, 0, cb / 8);
4062 else
4063# endif
4064 __stosd((unsigned long *)pv, 0, cb / 4);
4065
4066# elif RT_INLINE_ASM_GNU_STYLE
4067 __asm__ __volatile__ ("rep stosl"
4068 : "=D" (pv),
4069 "=c" (cb)
4070 : "0" (pv),
4071 "1" (cb >> 2),
4072 "a" (0)
4073 : "memory");
4074# else
4075 __asm
4076 {
4077 xor eax, eax
4078# ifdef RT_ARCH_AMD64
4079 mov rcx, [cb]
4080 shr rcx, 2
4081 mov rdi, [pv]
4082# else
4083 mov ecx, [cb]
4084 shr ecx, 2
4085 mov edi, [pv]
4086# endif
4087 rep stosd
4088 }
4089# endif
4090}
4091#endif
4092
4093
4094/**
4095 * Fills a memory block with a 32-bit aligned size.
4096 *
4097 * @param pv Pointer to the memory block.
4098 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4099 * @param u32 The value to fill with.
4100 */
4101#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4102DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
4103#else
4104DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
4105{
4106# if RT_INLINE_ASM_USES_INTRIN
4107# ifdef RT_ARCH_AMD64
4108 if (!(cb & 7))
4109 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
4110 else
4111# endif
4112 __stosd((unsigned long *)pv, u32, cb / 4);
4113
4114# elif RT_INLINE_ASM_GNU_STYLE
4115 __asm__ __volatile__ ("rep stosl"
4116 : "=D" (pv),
4117 "=c" (cb)
4118 : "0" (pv),
4119 "1" (cb >> 2),
4120 "a" (u32)
4121 : "memory");
4122# else
4123 __asm
4124 {
4125# ifdef RT_ARCH_AMD64
4126 mov rcx, [cb]
4127 shr rcx, 2
4128 mov rdi, [pv]
4129# else
4130 mov ecx, [cb]
4131 shr ecx, 2
4132 mov edi, [pv]
4133# endif
4134 mov eax, [u32]
4135 rep stosd
4136 }
4137# endif
4138}
4139#endif
4140
4141
4142/**
4143 * Checks if a memory block is filled with the specified byte.
4144 *
4145 * This is a sort of inverted memchr.
4146 *
4147 * @returns Pointer to the byte which doesn't equal u8.
4148 * @returns NULL if all equal to u8.
4149 *
4150 * @param pv Pointer to the memory block.
4151 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4152 * @param u8 The value it's supposed to be filled with.
4153 */
4154#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4155DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
4156#else
4157DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
4158{
4159/** @todo rewrite this in inline assembly? */
4160 uint8_t const *pb = (uint8_t const *)pv;
4161 for (; cb; cb--, pb++)
4162 if (RT_UNLIKELY(*pb != u8))
4163 return (void *)pb;
4164 return NULL;
4165}
4166#endif
4167
4168
4169/**
4170 * Checks if a memory block is filled with the specified 32-bit value.
4171 *
4172 * This is a sort of inverted memchr.
4173 *
4174 * @returns Pointer to the first value which doesn't equal u32.
4175 * @returns NULL if all equal to u32.
4176 *
4177 * @param pv Pointer to the memory block.
4178 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4179 * @param u32 The value it's supposed to be filled with.
4180 */
4181#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4182DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
4183#else
4184DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
4185{
4186/** @todo rewrite this in inline assembly? */
4187 uint32_t const *pu32 = (uint32_t const *)pv;
4188 for (; cb; cb -= 4, pu32++)
4189 if (RT_UNLIKELY(*pu32 != u32))
4190 return (uint32_t *)pu32;
4191 return NULL;
4192}
4193#endif
4194
4195
4196/**
4197 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
4198 *
4199 * @returns u32F1 * u32F2.
4200 */
4201#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4202DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
4203#else
4204DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
4205{
4206# ifdef RT_ARCH_AMD64
4207 return (uint64_t)u32F1 * u32F2;
4208# else /* !RT_ARCH_AMD64 */
4209 uint64_t u64;
4210# if RT_INLINE_ASM_GNU_STYLE
4211 __asm__ __volatile__("mull %%edx"
4212 : "=A" (u64)
4213 : "a" (u32F2), "d" (u32F1));
4214# else
4215 __asm
4216 {
4217 mov edx, [u32F1]
4218 mov eax, [u32F2]
4219 mul edx
4220 mov dword ptr [u64], eax
4221 mov dword ptr [u64 + 4], edx
4222 }
4223# endif
4224 return u64;
4225# endif /* !RT_ARCH_AMD64 */
4226}
4227#endif
4228
4229
4230/**
4231 * Multiplies two signed 32-bit values returning a signed 64-bit result.
4232 *
4233 * @returns u32F1 * u32F2.
4234 */
4235#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4236DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
4237#else
4238DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
4239{
4240# ifdef RT_ARCH_AMD64
4241 return (int64_t)i32F1 * i32F2;
4242# else /* !RT_ARCH_AMD64 */
4243 int64_t i64;
4244# if RT_INLINE_ASM_GNU_STYLE
4245 __asm__ __volatile__("imull %%edx"
4246 : "=A" (i64)
4247 : "a" (i32F2), "d" (i32F1));
4248# else
4249 __asm
4250 {
4251 mov edx, [i32F1]
4252 mov eax, [i32F2]
4253 imul edx
4254 mov dword ptr [i64], eax
4255 mov dword ptr [i64 + 4], edx
4256 }
4257# endif
4258 return i64;
4259# endif /* !RT_ARCH_AMD64 */
4260}
4261#endif
4262
4263
4264/**
4265 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
4266 *
4267 * @returns u64 / u32.
4268 */
4269#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4270DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
4271#else
4272DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
4273{
4274# ifdef RT_ARCH_AMD64
4275 return (uint32_t)(u64 / u32);
4276# else /* !RT_ARCH_AMD64 */
4277# if RT_INLINE_ASM_GNU_STYLE
4278 RTCCUINTREG uDummy;
4279 __asm__ __volatile__("divl %3"
4280 : "=a" (u32), "=d"(uDummy)
4281 : "A" (u64), "r" (u32));
4282# else
4283 __asm
4284 {
4285 mov eax, dword ptr [u64]
4286 mov edx, dword ptr [u64 + 4]
4287 mov ecx, [u32]
4288 div ecx
4289 mov [u32], eax
4290 }
4291# endif
4292 return u32;
4293# endif /* !RT_ARCH_AMD64 */
4294}
4295#endif
4296
4297
4298/**
4299 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
4300 *
4301 * @returns u64 / u32.
4302 */
4303#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4304DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
4305#else
4306DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
4307{
4308# ifdef RT_ARCH_AMD64
4309 return (int32_t)(i64 / i32);
4310# else /* !RT_ARCH_AMD64 */
4311# if RT_INLINE_ASM_GNU_STYLE
4312 RTCCUINTREG iDummy;
4313 __asm__ __volatile__("idivl %3"
4314 : "=a" (i32), "=d"(iDummy)
4315 : "A" (i64), "r" (i32));
4316# else
4317 __asm
4318 {
4319 mov eax, dword ptr [i64]
4320 mov edx, dword ptr [i64 + 4]
4321 mov ecx, [i32]
4322 idiv ecx
4323 mov [i32], eax
4324 }
4325# endif
4326 return i32;
4327# endif /* !RT_ARCH_AMD64 */
4328}
4329#endif
4330
4331
4332/**
4333 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
4334 * using a 96 bit intermediate result.
4335 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
4336 * __udivdi3 and __umoddi3 even if this inline function is not used.
4337 *
4338 * @returns (u64A * u32B) / u32C.
4339 * @param u64A The 64-bit value.
4340 * @param u32B The 32-bit value to multiple by A.
4341 * @param u32C The 32-bit value to divide A*B by.
4342 */
4343#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
4344DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
4345#else
4346DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
4347{
4348# if RT_INLINE_ASM_GNU_STYLE
4349# ifdef RT_ARCH_AMD64
4350 uint64_t u64Result, u64Spill;
4351 __asm__ __volatile__("mulq %2\n\t"
4352 "divq %3\n\t"
4353 : "=a" (u64Result),
4354 "=d" (u64Spill)
4355 : "r" ((uint64_t)u32B),
4356 "r" ((uint64_t)u32C),
4357 "0" (u64A),
4358 "1" (0));
4359 return u64Result;
4360# else
4361 uint32_t u32Dummy;
4362 uint64_t u64Result;
4363 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
4364 edx = u64Lo.hi = (u64A.lo * u32B).hi */
4365 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
4366 eax = u64A.hi */
4367 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
4368 edx = u32C */
4369 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
4370 edx = u32B */
4371 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
4372 edx = u64Hi.hi = (u64A.hi * u32B).hi */
4373 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
4374 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
4375 "divl %%ecx \n\t" /* eax = u64Hi / u32C
4376 edx = u64Hi % u32C */
4377 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
4378 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
4379 "divl %%ecx \n\t" /* u64Result.lo */
4380 "movl %%edi,%%edx \n\t" /* u64Result.hi */
4381 : "=A"(u64Result), "=c"(u32Dummy),
4382 "=S"(u32Dummy), "=D"(u32Dummy)
4383 : "a"((uint32_t)u64A),
4384 "S"((uint32_t)(u64A >> 32)),
4385 "c"(u32B),
4386 "D"(u32C));
4387 return u64Result;
4388# endif
4389# else
4390 RTUINT64U u;
4391 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
4392 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
4393 u64Hi += (u64Lo >> 32);
4394 u.s.Hi = (uint32_t)(u64Hi / u32C);
4395 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
4396 return u.u;
4397# endif
4398}
4399#endif
4400
4401
4402/**
4403 * Probes a byte pointer for read access.
4404 *
4405 * While the function will not fault if the byte is not read accessible,
4406 * the idea is to do this in a safe place like before acquiring locks
4407 * and such like.
4408 *
4409 * Also, this functions guarantees that an eager compiler is not going
4410 * to optimize the probing away.
4411 *
4412 * @param pvByte Pointer to the byte.
4413 */
4414#if RT_INLINE_ASM_EXTERNAL
4415DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4416#else
4417DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4418{
4419 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4420 uint8_t u8;
4421# if RT_INLINE_ASM_GNU_STYLE
4422 __asm__ __volatile__("movb (%1), %0\n\t"
4423 : "=r" (u8)
4424 : "r" (pvByte));
4425# else
4426 __asm
4427 {
4428# ifdef RT_ARCH_AMD64
4429 mov rax, [pvByte]
4430 mov al, [rax]
4431# else
4432 mov eax, [pvByte]
4433 mov al, [eax]
4434# endif
4435 mov [u8], al
4436 }
4437# endif
4438 return u8;
4439}
4440#endif
4441
4442/**
4443 * Probes a buffer for read access page by page.
4444 *
4445 * While the function will fault if the buffer is not fully read
4446 * accessible, the idea is to do this in a safe place like before
4447 * acquiring locks and such like.
4448 *
4449 * Also, this functions guarantees that an eager compiler is not going
4450 * to optimize the probing away.
4451 *
4452 * @param pvBuf Pointer to the buffer.
4453 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4454 */
4455DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4456{
4457 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4458 /* the first byte */
4459 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4460 ASMProbeReadByte(pu8);
4461
4462 /* the pages in between pages. */
4463 while (cbBuf > /*PAGE_SIZE*/0x1000)
4464 {
4465 ASMProbeReadByte(pu8);
4466 cbBuf -= /*PAGE_SIZE*/0x1000;
4467 pu8 += /*PAGE_SIZE*/0x1000;
4468 }
4469
4470 /* the last byte */
4471 ASMProbeReadByte(pu8 + cbBuf - 1);
4472}
4473
4474
4475/** @def ASMBreakpoint
4476 * Debugger Breakpoint.
4477 * @remark In the gnu world we add a nop instruction after the int3 to
4478 * force gdb to remain at the int3 source line.
4479 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
4480 * @internal
4481 */
4482#if RT_INLINE_ASM_GNU_STYLE
4483# ifndef __L4ENV__
4484# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
4485# else
4486# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
4487# endif
4488#else
4489# define ASMBreakpoint() __debugbreak()
4490#endif
4491
4492
4493
4494/** @defgroup grp_inline_bits Bit Operations
4495 * @{
4496 */
4497
4498
4499/**
4500 * Sets a bit in a bitmap.
4501 *
4502 * @param pvBitmap Pointer to the bitmap.
4503 * @param iBit The bit to set.
4504 */
4505#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4506DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4507#else
4508DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4509{
4510# if RT_INLINE_ASM_USES_INTRIN
4511 _bittestandset((long *)pvBitmap, iBit);
4512
4513# elif RT_INLINE_ASM_GNU_STYLE
4514 __asm__ __volatile__ ("btsl %1, %0"
4515 : "=m" (*(volatile long *)pvBitmap)
4516 : "Ir" (iBit)
4517 : "memory");
4518# else
4519 __asm
4520 {
4521# ifdef RT_ARCH_AMD64
4522 mov rax, [pvBitmap]
4523 mov edx, [iBit]
4524 bts [rax], edx
4525# else
4526 mov eax, [pvBitmap]
4527 mov edx, [iBit]
4528 bts [eax], edx
4529# endif
4530 }
4531# endif
4532}
4533#endif
4534
4535
4536/**
4537 * Atomically sets a bit in a bitmap, ordered.
4538 *
4539 * @param pvBitmap Pointer to the bitmap.
4540 * @param iBit The bit to set.
4541 */
4542#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4543DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4544#else
4545DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4546{
4547# if RT_INLINE_ASM_USES_INTRIN
4548 _interlockedbittestandset((long *)pvBitmap, iBit);
4549# elif RT_INLINE_ASM_GNU_STYLE
4550 __asm__ __volatile__ ("lock; btsl %1, %0"
4551 : "=m" (*(volatile long *)pvBitmap)
4552 : "Ir" (iBit)
4553 : "memory");
4554# else
4555 __asm
4556 {
4557# ifdef RT_ARCH_AMD64
4558 mov rax, [pvBitmap]
4559 mov edx, [iBit]
4560 lock bts [rax], edx
4561# else
4562 mov eax, [pvBitmap]
4563 mov edx, [iBit]
4564 lock bts [eax], edx
4565# endif
4566 }
4567# endif
4568}
4569#endif
4570
4571
4572/**
4573 * Clears a bit in a bitmap.
4574 *
4575 * @param pvBitmap Pointer to the bitmap.
4576 * @param iBit The bit to clear.
4577 */
4578#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4579DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4580#else
4581DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4582{
4583# if RT_INLINE_ASM_USES_INTRIN
4584 _bittestandreset((long *)pvBitmap, iBit);
4585
4586# elif RT_INLINE_ASM_GNU_STYLE
4587 __asm__ __volatile__ ("btrl %1, %0"
4588 : "=m" (*(volatile long *)pvBitmap)
4589 : "Ir" (iBit)
4590 : "memory");
4591# else
4592 __asm
4593 {
4594# ifdef RT_ARCH_AMD64
4595 mov rax, [pvBitmap]
4596 mov edx, [iBit]
4597 btr [rax], edx
4598# else
4599 mov eax, [pvBitmap]
4600 mov edx, [iBit]
4601 btr [eax], edx
4602# endif
4603 }
4604# endif
4605}
4606#endif
4607
4608
4609/**
4610 * Atomically clears a bit in a bitmap, ordered.
4611 *
4612 * @param pvBitmap Pointer to the bitmap.
4613 * @param iBit The bit to toggle set.
4614 * @remark No memory barrier, take care on smp.
4615 */
4616#if RT_INLINE_ASM_EXTERNAL
4617DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
4618#else
4619DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
4620{
4621# if RT_INLINE_ASM_GNU_STYLE
4622 __asm__ __volatile__ ("lock; btrl %1, %0"
4623 : "=m" (*(volatile long *)pvBitmap)
4624 : "Ir" (iBit)
4625 : "memory");
4626# else
4627 __asm
4628 {
4629# ifdef RT_ARCH_AMD64
4630 mov rax, [pvBitmap]
4631 mov edx, [iBit]
4632 lock btr [rax], edx
4633# else
4634 mov eax, [pvBitmap]
4635 mov edx, [iBit]
4636 lock btr [eax], edx
4637# endif
4638 }
4639# endif
4640}
4641#endif
4642
4643
4644/**
4645 * Toggles a bit in a bitmap.
4646 *
4647 * @param pvBitmap Pointer to the bitmap.
4648 * @param iBit The bit to toggle.
4649 */
4650#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4651DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
4652#else
4653DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
4654{
4655# if RT_INLINE_ASM_USES_INTRIN
4656 _bittestandcomplement((long *)pvBitmap, iBit);
4657# elif RT_INLINE_ASM_GNU_STYLE
4658 __asm__ __volatile__ ("btcl %1, %0"
4659 : "=m" (*(volatile long *)pvBitmap)
4660 : "Ir" (iBit)
4661 : "memory");
4662# else
4663 __asm
4664 {
4665# ifdef RT_ARCH_AMD64
4666 mov rax, [pvBitmap]
4667 mov edx, [iBit]
4668 btc [rax], edx
4669# else
4670 mov eax, [pvBitmap]
4671 mov edx, [iBit]
4672 btc [eax], edx
4673# endif
4674 }
4675# endif
4676}
4677#endif
4678
4679
4680/**
4681 * Atomically toggles a bit in a bitmap, ordered.
4682 *
4683 * @param pvBitmap Pointer to the bitmap.
4684 * @param iBit The bit to test and set.
4685 */
4686#if RT_INLINE_ASM_EXTERNAL
4687DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4688#else
4689DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4690{
4691# if RT_INLINE_ASM_GNU_STYLE
4692 __asm__ __volatile__ ("lock; btcl %1, %0"
4693 : "=m" (*(volatile long *)pvBitmap)
4694 : "Ir" (iBit)
4695 : "memory");
4696# else
4697 __asm
4698 {
4699# ifdef RT_ARCH_AMD64
4700 mov rax, [pvBitmap]
4701 mov edx, [iBit]
4702 lock btc [rax], edx
4703# else
4704 mov eax, [pvBitmap]
4705 mov edx, [iBit]
4706 lock btc [eax], edx
4707# endif
4708 }
4709# endif
4710}
4711#endif
4712
4713
4714/**
4715 * Tests and sets a bit in a bitmap.
4716 *
4717 * @returns true if the bit was set.
4718 * @returns false if the bit was clear.
4719 * @param pvBitmap Pointer to the bitmap.
4720 * @param iBit The bit to test and set.
4721 */
4722#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4723DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4724#else
4725DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4726{
4727 union { bool f; uint32_t u32; uint8_t u8; } rc;
4728# if RT_INLINE_ASM_USES_INTRIN
4729 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4730
4731# elif RT_INLINE_ASM_GNU_STYLE
4732 __asm__ __volatile__ ("btsl %2, %1\n\t"
4733 "setc %b0\n\t"
4734 "andl $1, %0\n\t"
4735 : "=q" (rc.u32),
4736 "=m" (*(volatile long *)pvBitmap)
4737 : "Ir" (iBit)
4738 : "memory");
4739# else
4740 __asm
4741 {
4742 mov edx, [iBit]
4743# ifdef RT_ARCH_AMD64
4744 mov rax, [pvBitmap]
4745 bts [rax], edx
4746# else
4747 mov eax, [pvBitmap]
4748 bts [eax], edx
4749# endif
4750 setc al
4751 and eax, 1
4752 mov [rc.u32], eax
4753 }
4754# endif
4755 return rc.f;
4756}
4757#endif
4758
4759
4760/**
4761 * Atomically tests and sets a bit in a bitmap, ordered.
4762 *
4763 * @returns true if the bit was set.
4764 * @returns false if the bit was clear.
4765 * @param pvBitmap Pointer to the bitmap.
4766 * @param iBit The bit to set.
4767 */
4768#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4769DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4770#else
4771DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4772{
4773 union { bool f; uint32_t u32; uint8_t u8; } rc;
4774# if RT_INLINE_ASM_USES_INTRIN
4775 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4776# elif RT_INLINE_ASM_GNU_STYLE
4777 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
4778 "setc %b0\n\t"
4779 "andl $1, %0\n\t"
4780 : "=q" (rc.u32),
4781 "=m" (*(volatile long *)pvBitmap)
4782 : "Ir" (iBit)
4783 : "memory");
4784# else
4785 __asm
4786 {
4787 mov edx, [iBit]
4788# ifdef RT_ARCH_AMD64
4789 mov rax, [pvBitmap]
4790 lock bts [rax], edx
4791# else
4792 mov eax, [pvBitmap]
4793 lock bts [eax], edx
4794# endif
4795 setc al
4796 and eax, 1
4797 mov [rc.u32], eax
4798 }
4799# endif
4800 return rc.f;
4801}
4802#endif
4803
4804
4805/**
4806 * Tests and clears a bit in a bitmap.
4807 *
4808 * @returns true if the bit was set.
4809 * @returns false if the bit was clear.
4810 * @param pvBitmap Pointer to the bitmap.
4811 * @param iBit The bit to test and clear.
4812 */
4813#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4814DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4815#else
4816DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4817{
4818 union { bool f; uint32_t u32; uint8_t u8; } rc;
4819# if RT_INLINE_ASM_USES_INTRIN
4820 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4821
4822# elif RT_INLINE_ASM_GNU_STYLE
4823 __asm__ __volatile__ ("btrl %2, %1\n\t"
4824 "setc %b0\n\t"
4825 "andl $1, %0\n\t"
4826 : "=q" (rc.u32),
4827 "=m" (*(volatile long *)pvBitmap)
4828 : "Ir" (iBit)
4829 : "memory");
4830# else
4831 __asm
4832 {
4833 mov edx, [iBit]
4834# ifdef RT_ARCH_AMD64
4835 mov rax, [pvBitmap]
4836 btr [rax], edx
4837# else
4838 mov eax, [pvBitmap]
4839 btr [eax], edx
4840# endif
4841 setc al
4842 and eax, 1
4843 mov [rc.u32], eax
4844 }
4845# endif
4846 return rc.f;
4847}
4848#endif
4849
4850
4851/**
4852 * Atomically tests and clears a bit in a bitmap, ordered.
4853 *
4854 * @returns true if the bit was set.
4855 * @returns false if the bit was clear.
4856 * @param pvBitmap Pointer to the bitmap.
4857 * @param iBit The bit to test and clear.
4858 * @remark No memory barrier, take care on smp.
4859 */
4860#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4861DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4862#else
4863DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4864{
4865 union { bool f; uint32_t u32; uint8_t u8; } rc;
4866# if RT_INLINE_ASM_USES_INTRIN
4867 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4868
4869# elif RT_INLINE_ASM_GNU_STYLE
4870 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
4871 "setc %b0\n\t"
4872 "andl $1, %0\n\t"
4873 : "=q" (rc.u32),
4874 "=m" (*(volatile long *)pvBitmap)
4875 : "Ir" (iBit)
4876 : "memory");
4877# else
4878 __asm
4879 {
4880 mov edx, [iBit]
4881# ifdef RT_ARCH_AMD64
4882 mov rax, [pvBitmap]
4883 lock btr [rax], edx
4884# else
4885 mov eax, [pvBitmap]
4886 lock btr [eax], edx
4887# endif
4888 setc al
4889 and eax, 1
4890 mov [rc.u32], eax
4891 }
4892# endif
4893 return rc.f;
4894}
4895#endif
4896
4897
4898/**
4899 * Tests and toggles a bit in a bitmap.
4900 *
4901 * @returns true if the bit was set.
4902 * @returns false if the bit was clear.
4903 * @param pvBitmap Pointer to the bitmap.
4904 * @param iBit The bit to test and toggle.
4905 */
4906#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4907DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4908#else
4909DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4910{
4911 union { bool f; uint32_t u32; uint8_t u8; } rc;
4912# if RT_INLINE_ASM_USES_INTRIN
4913 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4914
4915# elif RT_INLINE_ASM_GNU_STYLE
4916 __asm__ __volatile__ ("btcl %2, %1\n\t"
4917 "setc %b0\n\t"
4918 "andl $1, %0\n\t"
4919 : "=q" (rc.u32),
4920 "=m" (*(volatile long *)pvBitmap)
4921 : "Ir" (iBit)
4922 : "memory");
4923# else
4924 __asm
4925 {
4926 mov edx, [iBit]
4927# ifdef RT_ARCH_AMD64
4928 mov rax, [pvBitmap]
4929 btc [rax], edx
4930# else
4931 mov eax, [pvBitmap]
4932 btc [eax], edx
4933# endif
4934 setc al
4935 and eax, 1
4936 mov [rc.u32], eax
4937 }
4938# endif
4939 return rc.f;
4940}
4941#endif
4942
4943
4944/**
4945 * Atomically tests and toggles a bit in a bitmap, ordered.
4946 *
4947 * @returns true if the bit was set.
4948 * @returns false if the bit was clear.
4949 * @param pvBitmap Pointer to the bitmap.
4950 * @param iBit The bit to test and toggle.
4951 */
4952#if RT_INLINE_ASM_EXTERNAL
4953DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4954#else
4955DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4956{
4957 union { bool f; uint32_t u32; uint8_t u8; } rc;
4958# if RT_INLINE_ASM_GNU_STYLE
4959 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
4960 "setc %b0\n\t"
4961 "andl $1, %0\n\t"
4962 : "=q" (rc.u32),
4963 "=m" (*(volatile long *)pvBitmap)
4964 : "Ir" (iBit)
4965 : "memory");
4966# else
4967 __asm
4968 {
4969 mov edx, [iBit]
4970# ifdef RT_ARCH_AMD64
4971 mov rax, [pvBitmap]
4972 lock btc [rax], edx
4973# else
4974 mov eax, [pvBitmap]
4975 lock btc [eax], edx
4976# endif
4977 setc al
4978 and eax, 1
4979 mov [rc.u32], eax
4980 }
4981# endif
4982 return rc.f;
4983}
4984#endif
4985
4986
4987/**
4988 * Tests if a bit in a bitmap is set.
4989 *
4990 * @returns true if the bit is set.
4991 * @returns false if the bit is clear.
4992 * @param pvBitmap Pointer to the bitmap.
4993 * @param iBit The bit to test.
4994 */
4995#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4996DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
4997#else
4998DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
4999{
5000 union { bool f; uint32_t u32; uint8_t u8; } rc;
5001# if RT_INLINE_ASM_USES_INTRIN
5002 rc.u32 = _bittest((long *)pvBitmap, iBit);
5003# elif RT_INLINE_ASM_GNU_STYLE
5004
5005 __asm__ __volatile__ ("btl %2, %1\n\t"
5006 "setc %b0\n\t"
5007 "andl $1, %0\n\t"
5008 : "=q" (rc.u32)
5009 : "m" (*(const volatile long *)pvBitmap),
5010 "Ir" (iBit)
5011 : "memory");
5012# else
5013 __asm
5014 {
5015 mov edx, [iBit]
5016# ifdef RT_ARCH_AMD64
5017 mov rax, [pvBitmap]
5018 bt [rax], edx
5019# else
5020 mov eax, [pvBitmap]
5021 bt [eax], edx
5022# endif
5023 setc al
5024 and eax, 1
5025 mov [rc.u32], eax
5026 }
5027# endif
5028 return rc.f;
5029}
5030#endif
5031
5032
5033/**
5034 * Clears a bit range within a bitmap.
5035 *
5036 * @param pvBitmap Pointer to the bitmap.
5037 * @param iBitStart The First bit to clear.
5038 * @param iBitEnd The first bit not to clear.
5039 */
5040DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5041{
5042 if (iBitStart < iBitEnd)
5043 {
5044 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5045 int iStart = iBitStart & ~31;
5046 int iEnd = iBitEnd & ~31;
5047 if (iStart == iEnd)
5048 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
5049 else
5050 {
5051 /* bits in first dword. */
5052 if (iBitStart & 31)
5053 {
5054 *pu32 &= (1 << (iBitStart & 31)) - 1;
5055 pu32++;
5056 iBitStart = iStart + 32;
5057 }
5058
5059 /* whole dword. */
5060 if (iBitStart != iEnd)
5061 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
5062
5063 /* bits in last dword. */
5064 if (iBitEnd & 31)
5065 {
5066 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5067 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
5068 }
5069 }
5070 }
5071}
5072
5073
5074/**
5075 * Sets a bit range within a bitmap.
5076 *
5077 * @param pvBitmap Pointer to the bitmap.
5078 * @param iBitStart The First bit to set.
5079 * @param iBitEnd The first bit not to set.
5080 */
5081DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5082{
5083 if (iBitStart < iBitEnd)
5084 {
5085 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5086 int iStart = iBitStart & ~31;
5087 int iEnd = iBitEnd & ~31;
5088 if (iStart == iEnd)
5089 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
5090 else
5091 {
5092 /* bits in first dword. */
5093 if (iBitStart & 31)
5094 {
5095 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
5096 pu32++;
5097 iBitStart = iStart + 32;
5098 }
5099
5100 /* whole dword. */
5101 if (iBitStart != iEnd)
5102 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
5103
5104 /* bits in last dword. */
5105 if (iBitEnd & 31)
5106 {
5107 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5108 *pu32 |= (1 << (iBitEnd & 31)) - 1;
5109 }
5110 }
5111 }
5112}
5113
5114
5115/**
5116 * Finds the first clear bit in a bitmap.
5117 *
5118 * @returns Index of the first zero bit.
5119 * @returns -1 if no clear bit was found.
5120 * @param pvBitmap Pointer to the bitmap.
5121 * @param cBits The number of bits in the bitmap. Multiple of 32.
5122 */
5123#if RT_INLINE_ASM_EXTERNAL
5124DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
5125#else
5126DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
5127{
5128 if (cBits)
5129 {
5130 int32_t iBit;
5131# if RT_INLINE_ASM_GNU_STYLE
5132 RTCCUINTREG uEAX, uECX, uEDI;
5133 cBits = RT_ALIGN_32(cBits, 32);
5134 __asm__ __volatile__("repe; scasl\n\t"
5135 "je 1f\n\t"
5136# ifdef RT_ARCH_AMD64
5137 "lea -4(%%rdi), %%rdi\n\t"
5138 "xorl (%%rdi), %%eax\n\t"
5139 "subq %5, %%rdi\n\t"
5140# else
5141 "lea -4(%%edi), %%edi\n\t"
5142 "xorl (%%edi), %%eax\n\t"
5143 "subl %5, %%edi\n\t"
5144# endif
5145 "shll $3, %%edi\n\t"
5146 "bsfl %%eax, %%edx\n\t"
5147 "addl %%edi, %%edx\n\t"
5148 "1:\t\n"
5149 : "=d" (iBit),
5150 "=&c" (uECX),
5151 "=&D" (uEDI),
5152 "=&a" (uEAX)
5153 : "0" (0xffffffff),
5154 "mr" (pvBitmap),
5155 "1" (cBits >> 5),
5156 "2" (pvBitmap),
5157 "3" (0xffffffff));
5158# else
5159 cBits = RT_ALIGN_32(cBits, 32);
5160 __asm
5161 {
5162# ifdef RT_ARCH_AMD64
5163 mov rdi, [pvBitmap]
5164 mov rbx, rdi
5165# else
5166 mov edi, [pvBitmap]
5167 mov ebx, edi
5168# endif
5169 mov edx, 0ffffffffh
5170 mov eax, edx
5171 mov ecx, [cBits]
5172 shr ecx, 5
5173 repe scasd
5174 je done
5175
5176# ifdef RT_ARCH_AMD64
5177 lea rdi, [rdi - 4]
5178 xor eax, [rdi]
5179 sub rdi, rbx
5180# else
5181 lea edi, [edi - 4]
5182 xor eax, [edi]
5183 sub edi, ebx
5184# endif
5185 shl edi, 3
5186 bsf edx, eax
5187 add edx, edi
5188 done:
5189 mov [iBit], edx
5190 }
5191# endif
5192 return iBit;
5193 }
5194 return -1;
5195}
5196#endif
5197
5198
5199/**
5200 * Finds the next clear bit in a bitmap.
5201 *
5202 * @returns Index of the first zero bit.
5203 * @returns -1 if no clear bit was found.
5204 * @param pvBitmap Pointer to the bitmap.
5205 * @param cBits The number of bits in the bitmap. Multiple of 32.
5206 * @param iBitPrev The bit returned from the last search.
5207 * The search will start at iBitPrev + 1.
5208 */
5209#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5210DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5211#else
5212DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5213{
5214 int iBit = ++iBitPrev & 31;
5215 pvBitmap = (const volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5216 cBits -= iBitPrev & ~31;
5217 if (iBit)
5218 {
5219 /* inspect the first dword. */
5220 uint32_t u32 = (~*(const volatile uint32_t *)pvBitmap) >> iBit;
5221# if RT_INLINE_ASM_USES_INTRIN
5222 unsigned long ulBit = 0;
5223 if (_BitScanForward(&ulBit, u32))
5224 return ulBit + iBitPrev;
5225 iBit = -1;
5226# else
5227# if RT_INLINE_ASM_GNU_STYLE
5228 __asm__ __volatile__("bsf %1, %0\n\t"
5229 "jnz 1f\n\t"
5230 "movl $-1, %0\n\t"
5231 "1:\n\t"
5232 : "=r" (iBit)
5233 : "r" (u32));
5234# else
5235 __asm
5236 {
5237 mov edx, [u32]
5238 bsf eax, edx
5239 jnz done
5240 mov eax, 0ffffffffh
5241 done:
5242 mov [iBit], eax
5243 }
5244# endif
5245 if (iBit >= 0)
5246 return iBit + iBitPrev;
5247# endif
5248 /* Search the rest of the bitmap, if there is anything. */
5249 if (cBits > 32)
5250 {
5251 iBit = ASMBitFirstClear((const volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5252 if (iBit >= 0)
5253 return iBit + (iBitPrev & ~31) + 32;
5254 }
5255 }
5256 else
5257 {
5258 /* Search the rest of the bitmap. */
5259 iBit = ASMBitFirstClear(pvBitmap, cBits);
5260 if (iBit >= 0)
5261 return iBit + (iBitPrev & ~31);
5262 }
5263 return iBit;
5264}
5265#endif
5266
5267
5268/**
5269 * Finds the first set bit in a bitmap.
5270 *
5271 * @returns Index of the first set bit.
5272 * @returns -1 if no clear bit was found.
5273 * @param pvBitmap Pointer to the bitmap.
5274 * @param cBits The number of bits in the bitmap. Multiple of 32.
5275 */
5276#if RT_INLINE_ASM_EXTERNAL
5277DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
5278#else
5279DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
5280{
5281 if (cBits)
5282 {
5283 int32_t iBit;
5284# if RT_INLINE_ASM_GNU_STYLE
5285 RTCCUINTREG uEAX, uECX, uEDI;
5286 cBits = RT_ALIGN_32(cBits, 32);
5287 __asm__ __volatile__("repe; scasl\n\t"
5288 "je 1f\n\t"
5289# ifdef RT_ARCH_AMD64
5290 "lea -4(%%rdi), %%rdi\n\t"
5291 "movl (%%rdi), %%eax\n\t"
5292 "subq %5, %%rdi\n\t"
5293# else
5294 "lea -4(%%edi), %%edi\n\t"
5295 "movl (%%edi), %%eax\n\t"
5296 "subl %5, %%edi\n\t"
5297# endif
5298 "shll $3, %%edi\n\t"
5299 "bsfl %%eax, %%edx\n\t"
5300 "addl %%edi, %%edx\n\t"
5301 "1:\t\n"
5302 : "=d" (iBit),
5303 "=&c" (uECX),
5304 "=&D" (uEDI),
5305 "=&a" (uEAX)
5306 : "0" (0xffffffff),
5307 "mr" (pvBitmap),
5308 "1" (cBits >> 5),
5309 "2" (pvBitmap),
5310 "3" (0));
5311# else
5312 cBits = RT_ALIGN_32(cBits, 32);
5313 __asm
5314 {
5315# ifdef RT_ARCH_AMD64
5316 mov rdi, [pvBitmap]
5317 mov rbx, rdi
5318# else
5319 mov edi, [pvBitmap]
5320 mov ebx, edi
5321# endif
5322 mov edx, 0ffffffffh
5323 xor eax, eax
5324 mov ecx, [cBits]
5325 shr ecx, 5
5326 repe scasd
5327 je done
5328# ifdef RT_ARCH_AMD64
5329 lea rdi, [rdi - 4]
5330 mov eax, [rdi]
5331 sub rdi, rbx
5332# else
5333 lea edi, [edi - 4]
5334 mov eax, [edi]
5335 sub edi, ebx
5336# endif
5337 shl edi, 3
5338 bsf edx, eax
5339 add edx, edi
5340 done:
5341 mov [iBit], edx
5342 }
5343# endif
5344 return iBit;
5345 }
5346 return -1;
5347}
5348#endif
5349
5350
5351/**
5352 * Finds the next set bit in a bitmap.
5353 *
5354 * @returns Index of the next set bit.
5355 * @returns -1 if no set bit was found.
5356 * @param pvBitmap Pointer to the bitmap.
5357 * @param cBits The number of bits in the bitmap. Multiple of 32.
5358 * @param iBitPrev The bit returned from the last search.
5359 * The search will start at iBitPrev + 1.
5360 */
5361#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5362DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5363#else
5364DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5365{
5366 int iBit = ++iBitPrev & 31;
5367 pvBitmap = (const volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5368 cBits -= iBitPrev & ~31;
5369 if (iBit)
5370 {
5371 /* inspect the first dword. */
5372 uint32_t u32 = *(const volatile uint32_t *)pvBitmap >> iBit;
5373# if RT_INLINE_ASM_USES_INTRIN
5374 unsigned long ulBit = 0;
5375 if (_BitScanForward(&ulBit, u32))
5376 return ulBit + iBitPrev;
5377 iBit = -1;
5378# else
5379# if RT_INLINE_ASM_GNU_STYLE
5380 __asm__ __volatile__("bsf %1, %0\n\t"
5381 "jnz 1f\n\t"
5382 "movl $-1, %0\n\t"
5383 "1:\n\t"
5384 : "=r" (iBit)
5385 : "r" (u32));
5386# else
5387 __asm
5388 {
5389 mov edx, u32
5390 bsf eax, edx
5391 jnz done
5392 mov eax, 0ffffffffh
5393 done:
5394 mov [iBit], eax
5395 }
5396# endif
5397 if (iBit >= 0)
5398 return iBit + iBitPrev;
5399# endif
5400 /* Search the rest of the bitmap, if there is anything. */
5401 if (cBits > 32)
5402 {
5403 iBit = ASMBitFirstSet((const volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5404 if (iBit >= 0)
5405 return iBit + (iBitPrev & ~31) + 32;
5406 }
5407
5408 }
5409 else
5410 {
5411 /* Search the rest of the bitmap. */
5412 iBit = ASMBitFirstSet(pvBitmap, cBits);
5413 if (iBit >= 0)
5414 return iBit + (iBitPrev & ~31);
5415 }
5416 return iBit;
5417}
5418#endif
5419
5420
5421/**
5422 * Finds the first bit which is set in the given 32-bit integer.
5423 * Bits are numbered from 1 (least significant) to 32.
5424 *
5425 * @returns index [1..32] of the first set bit.
5426 * @returns 0 if all bits are cleared.
5427 * @param u32 Integer to search for set bits.
5428 * @remark Similar to ffs() in BSD.
5429 */
5430DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5431{
5432# if RT_INLINE_ASM_USES_INTRIN
5433 unsigned long iBit;
5434 if (_BitScanForward(&iBit, u32))
5435 iBit++;
5436 else
5437 iBit = 0;
5438# elif RT_INLINE_ASM_GNU_STYLE
5439 uint32_t iBit;
5440 __asm__ __volatile__("bsf %1, %0\n\t"
5441 "jnz 1f\n\t"
5442 "xorl %0, %0\n\t"
5443 "jmp 2f\n"
5444 "1:\n\t"
5445 "incl %0\n"
5446 "2:\n\t"
5447 : "=r" (iBit)
5448 : "rm" (u32));
5449# else
5450 uint32_t iBit;
5451 _asm
5452 {
5453 bsf eax, [u32]
5454 jnz found
5455 xor eax, eax
5456 jmp done
5457 found:
5458 inc eax
5459 done:
5460 mov [iBit], eax
5461 }
5462# endif
5463 return iBit;
5464}
5465
5466
5467/**
5468 * Finds the first bit which is set in the given 32-bit integer.
5469 * Bits are numbered from 1 (least significant) to 32.
5470 *
5471 * @returns index [1..32] of the first set bit.
5472 * @returns 0 if all bits are cleared.
5473 * @param i32 Integer to search for set bits.
5474 * @remark Similar to ffs() in BSD.
5475 */
5476DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5477{
5478 return ASMBitFirstSetU32((uint32_t)i32);
5479}
5480
5481
5482/**
5483 * Finds the last bit which is set in the given 32-bit integer.
5484 * Bits are numbered from 1 (least significant) to 32.
5485 *
5486 * @returns index [1..32] of the last set bit.
5487 * @returns 0 if all bits are cleared.
5488 * @param u32 Integer to search for set bits.
5489 * @remark Similar to fls() in BSD.
5490 */
5491DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5492{
5493# if RT_INLINE_ASM_USES_INTRIN
5494 unsigned long iBit;
5495 if (_BitScanReverse(&iBit, u32))
5496 iBit++;
5497 else
5498 iBit = 0;
5499# elif RT_INLINE_ASM_GNU_STYLE
5500 uint32_t iBit;
5501 __asm__ __volatile__("bsrl %1, %0\n\t"
5502 "jnz 1f\n\t"
5503 "xorl %0, %0\n\t"
5504 "jmp 2f\n"
5505 "1:\n\t"
5506 "incl %0\n"
5507 "2:\n\t"
5508 : "=r" (iBit)
5509 : "rm" (u32));
5510# else
5511 uint32_t iBit;
5512 _asm
5513 {
5514 bsr eax, [u32]
5515 jnz found
5516 xor eax, eax
5517 jmp done
5518 found:
5519 inc eax
5520 done:
5521 mov [iBit], eax
5522 }
5523# endif
5524 return iBit;
5525}
5526
5527
5528/**
5529 * Finds the last bit which is set in the given 32-bit integer.
5530 * Bits are numbered from 1 (least significant) to 32.
5531 *
5532 * @returns index [1..32] of the last set bit.
5533 * @returns 0 if all bits are cleared.
5534 * @param i32 Integer to search for set bits.
5535 * @remark Similar to fls() in BSD.
5536 */
5537DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5538{
5539 return ASMBitLastSetS32((uint32_t)i32);
5540}
5541
5542/**
5543 * Reverse the byte order of the given 16-bit integer.
5544 *
5545 * @returns Revert
5546 * @param u16 16-bit integer value.
5547 */
5548DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5549{
5550#if RT_INLINE_ASM_USES_INTRIN
5551 u16 = _byteswap_ushort(u16);
5552#elif RT_INLINE_ASM_GNU_STYLE
5553 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5554#else
5555 _asm
5556 {
5557 mov ax, [u16]
5558 ror ax, 8
5559 mov [u16], ax
5560 }
5561#endif
5562 return u16;
5563}
5564
5565/**
5566 * Reverse the byte order of the given 32-bit integer.
5567 *
5568 * @returns Revert
5569 * @param u32 32-bit integer value.
5570 */
5571DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5572{
5573#if RT_INLINE_ASM_USES_INTRIN
5574 u32 = _byteswap_ulong(u32);
5575#elif RT_INLINE_ASM_GNU_STYLE
5576 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5577#else
5578 _asm
5579 {
5580 mov eax, [u32]
5581 bswap eax
5582 mov [u32], eax
5583 }
5584#endif
5585 return u32;
5586}
5587
5588
5589/**
5590 * Reverse the byte order of the given 64-bit integer.
5591 *
5592 * @returns Revert
5593 * @param u64 64-bit integer value.
5594 */
5595DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5596{
5597#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5598 u64 = _byteswap_uint64(u64);
5599#else /* !RT_ARCH_AMD64 (assume x86) */
5600 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5601 | (uint64_t)ASMByteSwapU32(u64 >> 32);
5602#endif
5603 return u64;
5604}
5605
5606
5607/** @} */
5608
5609
5610/** @} */
5611#endif
5612
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette