Changeset 102959 in vbox for trunk/include/iprt/asm.h
- Timestamp:
- Jan 18, 2024 8:36:48 PM (13 months ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/include/iprt/asm.h
r102941 r102959 104 104 * assuming that the host CPU always supports these. */ 105 105 # define RTASM_ARM64_USE_FEAT_LSE 1 106 /** @def RTASM_ARM64_USE_FEAT_LSE_WITHOUT_DMB 107 * Set to use DMB w/o barrier in most places and rely on the acquire-release 108 * aspects to do the serializing. The assumption is that the tstRTInline 109 * benchmark may be skewing the results testing an unusual scenario. */ 110 # define RTASM_ARM64_USE_FEAT_LSE_WITHOUT_DMB 1 106 111 #endif 107 112 … … 531 536 care of by the DMB. The SWPB is rather cheap (~70% faster). */ 532 537 __asm__ __volatile__("Lstart_ASMAtomicXchgU8_%=:\n\t" 538 # if defined(RTASM_ARM64_USE_FEAT_LSE_WITHOUT_DMB) 539 "swpalb %w[uNew], %w[uOld], %[pMem]\n\t" 540 # else 533 541 RTASM_ARM_DMB_SY 534 542 "swpb %w[uNew], %w[uOld], %[pMem]\n\t" 543 # endif 535 544 : [pMem] "+Q" (*pu8) 536 545 , [uOld] "=&r" (uOld) 537 546 : [uNew] "r" ((uint32_t)u8) 538 RTASM_ARM_DMB_SY_COMMA_IN_REG539 547 : ); 540 548 # else … … 643 651 The SWPH is rather cheap (~70% faster). */ 644 652 __asm__ __volatile__("Lstart_ASMAtomicXchgU16_%=:\n\t" 653 # if defined(RTASM_ARM64_USE_FEAT_LSE_WITHOUT_DMB) 654 "swpalh %w[uNew], %w[uOld], %[pMem]\n\t" 655 # else 645 656 RTASM_ARM_DMB_SY 646 657 "swph %w[uNew], %w[uOld], %[pMem]\n\t" 658 # endif 647 659 : [pMem] "+Q" (*pu16) 648 660 , [uOld] "=&r" (uOld) 649 661 : [uNew] "r" ((uint32_t)u16) 650 RTASM_ARM_DMB_SY_COMMA_IN_REG651 662 : ); 652 663 # else … … 744 755 The SWP is rather cheap (~70% faster). */ 745 756 __asm__ __volatile__("Lstart_ASMAtomicXchgU32_%=:\n\t" 757 # if defined(RTASM_ARM64_USE_FEAT_LSE_WITHOUT_DMB) 758 "swpal %w[uNew], %w[uOld], %[pMem]\n\t" 759 # else 746 760 RTASM_ARM_DMB_SY 747 761 "swp %w[uNew], %w[uOld], %[pMem]\n\t" 762 # endif 748 763 : [pMem] "+Q" (*pu32) 749 764 , [uOld] "=&r" (uOld) 750 765 : [uNew] "r" (u32) 751 RTASM_ARM_DMB_SY_COMMA_IN_REG752 766 : ); 753 767 # else … … 885 899 The SWP is rather cheap (~70% faster). */ 886 900 __asm__ __volatile__("Lstart_ASMAtomicXchgU64_%=:\n\t" 901 # if defined(RTASM_ARM64_USE_FEAT_LSE_WITHOUT_DMB) 902 "swpal %[uNew], %[uOld], %[pMem]\n\t" 903 # else 887 904 RTASM_ARM_DMB_SY 888 905 "swp %[uNew], %[uOld], %[pMem]\n\t" 906 # endif 889 907 : [pMem] "+Q" (*pu64) 890 908 , [uOld] "=&r" (uOld) 891 909 : [uNew] "r" (u64) 892 RTASM_ARM_DMB_SY_COMMA_IN_REG893 910 : ); 894 911 # else
Note:
See TracChangeset
for help on using the changeset viewer.