VirtualBox

Changeset 102939 in vbox


Ignore:
Timestamp:
Jan 17, 2024 10:45:27 PM (13 months ago)
Author:
vboxsync
Message:

iprt/asm.h: Working over the ASMAtomicWrite* functions for arm64 w/ optional use of LSE instructions. bugref:9898

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/include/iprt/asm.h

    r102938 r102939  
    9898# endif
    9999#endif
     100
     101#if (defined(RT_ARCH_ARM64) && defined(RT_OS_DARWIN)) || defined(DOXYGEN_RUNNING)
     102/** @def RTASM_ARM64_USE_FEAT_LSE
     103 * Use instructions from the FEAT_LSE set to implement atomic operations,
     104 * assuming that the host CPU always supports these. */
     105# define RTASM_ARM64_USE_FEAT_LSE 1
     106#endif
     107
    100108
    101109/*
     
    35143522DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
    35153523{
    3516     /** @todo Any possible ARM32/ARM64 optimizations here? */
     3524#if defined(RT_ARCH_ARM64)
     3525    /* The DMB SY will ensure ordering a la x86, the stlrb is probably overkill
     3526       as all byte accesses are single-copy atomic, which I think suffices here. */
     3527    __asm__ __volatile__("Lstart_ASMAtomicWriteU8_%=:\n\t"
     3528# if defined(RTASM_ARM64_USE_FEAT_LSE) && 0 /* this is a lot slower and has no alignment benefits with LSE2 */
     3529                         RTASM_ARM_DMB_SY
     3530                         "swpb      %w[uValue], wzr, %[pMem]\n\t"
     3531# else
     3532                         RTASM_ARM_DMB_SY
     3533                         "stlrb     %w[uValue], %[pMem]\n\t" /* single-copy atomic w/ release semantics. */
     3534# endif
     3535                         : [pMem]   "+Q" (*pu8)
     3536                         : [uValue] "r" ((uint32_t)u8)
     3537                         : );
     3538#else
    35173539    ASMAtomicXchgU8(pu8, u8);
     3540#endif
    35183541}
    35193542
     
    35273550DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
    35283551{
    3529     /** @todo Any possible ARM32/ARM64 improvements here? */
    35303552    *pu8 = u8;      /* byte writes are atomic on x86 */
    35313553}
     
    35403562DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
    35413563{
    3542     /** @todo Any possible ARM32/ARM64 optimizations here? */
     3564#if defined(RT_ARCH_ARM64)
     3565    ASMAtomicWriteU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
     3566#else
    35433567    ASMAtomicXchgS8(pi8, i8);
     3568#endif
    35443569}
    35453570
     
    35653590DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
    35663591{
    3567     /** @todo Any possible ARM32/ARM64 optimizations here? */
     3592#if defined(RT_ARCH_ARM64)
     3593    __asm__ __volatile__("Lstart_ASMAtomicWriteU16_%=:\n\t"
     3594# if defined(RTASM_ARM64_USE_FEAT_LSE) /* slower on M1, but benefits from relaxed LSE2 alignment requirements (M2?). */
     3595                         RTASM_ARM_DMB_SY
     3596                         "swph      %w[uValue], wzr, %[pMem]\n\t"
     3597# else
     3598                         RTASM_ARM_DMB_SY
     3599                         "stlrh     %w[uValue], %[pMem]\n\t" /* single-copy atomic w/ release semantics. */
     3600# endif
     3601                         : [pMem]   "+Q" (*pu16)
     3602                         : [uValue] "r" ((uint32_t)u16)
     3603                         : );
     3604#else
    35683605    ASMAtomicXchgU16(pu16, u16);
     3606#endif
    35693607}
    35703608
     
    35913629DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
    35923630{
    3593     /** @todo Any possible ARM32/ARM64 optimizations here? */
     3631#if defined(RT_ARCH_ARM64)
     3632    ASMAtomicWriteU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
     3633#else
    35943634    ASMAtomicXchgS16(pi16, i16);
     3635#endif
    35953636}
    35963637
     
    36173658DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
    36183659{
    3619     /** @todo Any possible ARM32/ARM64 optimizations here? */
     3660#if defined(RT_ARCH_ARM64)
     3661    __asm__ __volatile__("Lstart_ASMAtomicWriteU32_%=:\n\t"
     3662# if defined(RTASM_ARM64_USE_FEAT_LSE) /* slower on M1, but benefits from relaxed LSE2 alignment requirements (M2?). */
     3663                         RTASM_ARM_DMB_SY
     3664                         "swp      %w[uValue], wzr, %[pMem]\n\t"
     3665# else
     3666                         RTASM_ARM_DMB_SY
     3667                         "stlr     %w[uValue], %[pMem]\n\t" /* single-copy atomic w/ release semantics. */
     3668# endif
     3669                         : [pMem]   "+Q" (*pu32)
     3670                         : [uValue] "r" (u32)
     3671                         : "cc");
     3672#else
    36203673    ASMAtomicXchgU32(pu32, u32);
     3674#endif
    36213675}
    36223676
     
    36473701DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
    36483702{
     3703#if defined(RT_ARCH_ARM64)
     3704    ASMAtomicWriteU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
     3705#else
    36493706    ASMAtomicXchgS32(pi32, i32);
     3707#endif
    36503708}
    36513709
     
    36763734DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
    36773735{
    3678     /** @todo Any possible ARM32/ARM64 optimizations here? */
     3736#if defined(RT_ARCH_ARM64)
     3737    __asm__ __volatile__("Lstart_ASMAtomicWriteU64_%=:\n\t"
     3738# if defined(RTASM_ARM64_USE_FEAT_LSE) /* slower on M1, but benefits from relaxed LSE2 alignment requirements (M2?). */
     3739                         RTASM_ARM_DMB_SY
     3740                         "swp      %[uValue], xzr, %[pMem]\n\t"
     3741# else
     3742                         RTASM_ARM_DMB_SY /** @todo necessary? */
     3743                         "stlr     %[uValue], %[pMem]\n\t"
     3744# endif
     3745                         : [pMem]   "+Q" (*pu64)
     3746                         : [uValue] "r" (u64)
     3747                         : );
     3748#else
    36793749    ASMAtomicXchgU64(pu64, u64);
     3750#endif
    36803751}
    36813752
     
    37063777DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
    37073778{
    3708     /** @todo Any possible ARM32/ARM64 optimizations here? */
     3779#if defined(RT_ARCH_ARM64)
     3780    ASMAtomicWriteU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
     3781#else
    37093782    ASMAtomicXchgS64(pi64, i64);
     3783#endif
    37103784}
    37113785
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette