Changeset 102939 in vbox
- Timestamp:
- Jan 17, 2024 10:45:27 PM (13 months ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/include/iprt/asm.h
r102938 r102939 98 98 # endif 99 99 #endif 100 101 #if (defined(RT_ARCH_ARM64) && defined(RT_OS_DARWIN)) || defined(DOXYGEN_RUNNING) 102 /** @def RTASM_ARM64_USE_FEAT_LSE 103 * Use instructions from the FEAT_LSE set to implement atomic operations, 104 * assuming that the host CPU always supports these. */ 105 # define RTASM_ARM64_USE_FEAT_LSE 1 106 #endif 107 100 108 101 109 /* … … 3514 3522 DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF 3515 3523 { 3516 /** @todo Any possible ARM32/ARM64 optimizations here? */ 3524 #if defined(RT_ARCH_ARM64) 3525 /* The DMB SY will ensure ordering a la x86, the stlrb is probably overkill 3526 as all byte accesses are single-copy atomic, which I think suffices here. */ 3527 __asm__ __volatile__("Lstart_ASMAtomicWriteU8_%=:\n\t" 3528 # if defined(RTASM_ARM64_USE_FEAT_LSE) && 0 /* this is a lot slower and has no alignment benefits with LSE2 */ 3529 RTASM_ARM_DMB_SY 3530 "swpb %w[uValue], wzr, %[pMem]\n\t" 3531 # else 3532 RTASM_ARM_DMB_SY 3533 "stlrb %w[uValue], %[pMem]\n\t" /* single-copy atomic w/ release semantics. */ 3534 # endif 3535 : [pMem] "+Q" (*pu8) 3536 : [uValue] "r" ((uint32_t)u8) 3537 : ); 3538 #else 3517 3539 ASMAtomicXchgU8(pu8, u8); 3540 #endif 3518 3541 } 3519 3542 … … 3527 3550 DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF 3528 3551 { 3529 /** @todo Any possible ARM32/ARM64 improvements here? */3530 3552 *pu8 = u8; /* byte writes are atomic on x86 */ 3531 3553 } … … 3540 3562 DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF 3541 3563 { 3542 /** @todo Any possible ARM32/ARM64 optimizations here? */ 3564 #if defined(RT_ARCH_ARM64) 3565 ASMAtomicWriteU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8); 3566 #else 3543 3567 ASMAtomicXchgS8(pi8, i8); 3568 #endif 3544 3569 } 3545 3570 … … 3565 3590 DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF 3566 3591 { 3567 /** @todo Any possible ARM32/ARM64 optimizations here? */ 3592 #if defined(RT_ARCH_ARM64) 3593 __asm__ __volatile__("Lstart_ASMAtomicWriteU16_%=:\n\t" 3594 # if defined(RTASM_ARM64_USE_FEAT_LSE) /* slower on M1, but benefits from relaxed LSE2 alignment requirements (M2?). */ 3595 RTASM_ARM_DMB_SY 3596 "swph %w[uValue], wzr, %[pMem]\n\t" 3597 # else 3598 RTASM_ARM_DMB_SY 3599 "stlrh %w[uValue], %[pMem]\n\t" /* single-copy atomic w/ release semantics. */ 3600 # endif 3601 : [pMem] "+Q" (*pu16) 3602 : [uValue] "r" ((uint32_t)u16) 3603 : ); 3604 #else 3568 3605 ASMAtomicXchgU16(pu16, u16); 3606 #endif 3569 3607 } 3570 3608 … … 3591 3629 DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF 3592 3630 { 3593 /** @todo Any possible ARM32/ARM64 optimizations here? */ 3631 #if defined(RT_ARCH_ARM64) 3632 ASMAtomicWriteU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16); 3633 #else 3594 3634 ASMAtomicXchgS16(pi16, i16); 3635 #endif 3595 3636 } 3596 3637 … … 3617 3658 DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF 3618 3659 { 3619 /** @todo Any possible ARM32/ARM64 optimizations here? */ 3660 #if defined(RT_ARCH_ARM64) 3661 __asm__ __volatile__("Lstart_ASMAtomicWriteU32_%=:\n\t" 3662 # if defined(RTASM_ARM64_USE_FEAT_LSE) /* slower on M1, but benefits from relaxed LSE2 alignment requirements (M2?). */ 3663 RTASM_ARM_DMB_SY 3664 "swp %w[uValue], wzr, %[pMem]\n\t" 3665 # else 3666 RTASM_ARM_DMB_SY 3667 "stlr %w[uValue], %[pMem]\n\t" /* single-copy atomic w/ release semantics. */ 3668 # endif 3669 : [pMem] "+Q" (*pu32) 3670 : [uValue] "r" (u32) 3671 : "cc"); 3672 #else 3620 3673 ASMAtomicXchgU32(pu32, u32); 3674 #endif 3621 3675 } 3622 3676 … … 3647 3701 DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF 3648 3702 { 3703 #if defined(RT_ARCH_ARM64) 3704 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32); 3705 #else 3649 3706 ASMAtomicXchgS32(pi32, i32); 3707 #endif 3650 3708 } 3651 3709 … … 3676 3734 DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF 3677 3735 { 3678 /** @todo Any possible ARM32/ARM64 optimizations here? */ 3736 #if defined(RT_ARCH_ARM64) 3737 __asm__ __volatile__("Lstart_ASMAtomicWriteU64_%=:\n\t" 3738 # if defined(RTASM_ARM64_USE_FEAT_LSE) /* slower on M1, but benefits from relaxed LSE2 alignment requirements (M2?). */ 3739 RTASM_ARM_DMB_SY 3740 "swp %[uValue], xzr, %[pMem]\n\t" 3741 # else 3742 RTASM_ARM_DMB_SY /** @todo necessary? */ 3743 "stlr %[uValue], %[pMem]\n\t" 3744 # endif 3745 : [pMem] "+Q" (*pu64) 3746 : [uValue] "r" (u64) 3747 : ); 3748 #else 3679 3749 ASMAtomicXchgU64(pu64, u64); 3750 #endif 3680 3751 } 3681 3752 … … 3706 3777 DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF 3707 3778 { 3708 /** @todo Any possible ARM32/ARM64 optimizations here? */ 3779 #if defined(RT_ARCH_ARM64) 3780 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64); 3781 #else 3709 3782 ASMAtomicXchgS64(pi64, i64); 3783 #endif 3710 3784 } 3711 3785
Note:
See TracChangeset
for help on using the changeset viewer.