Changeset 102940 in vbox
- Timestamp:
- Jan 17, 2024 11:23:30 PM (13 months ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/include/iprt/asm.h
r102939 r102940 2711 2711 #if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32) 2712 2712 uint32_t u32; 2713 # if defined(RTASM_ARM64_USE_FEAT_LSE) && 0 /* very expensive on M1 */ 2713 2714 __asm__ __volatile__("Lstart_ASMAtomicReadU8_%=:\n\t" 2714 2715 RTASM_ARM_DMB_SY 2715 # if defined(RT_ARCH_ARM64) 2716 "casab %w[uDst], wzr, %[pMem]\n\t" 2717 : [uDst] "=&r" (u32) 2718 : [pMem] "Q" (*pu8), 2719 "0" (0) 2720 RTASM_ARM_DMB_SY_COMMA_IN_REG); 2721 # else 2722 __asm__ __volatile__("Lstart_ASMAtomicReadU8_%=:\n\t" 2723 RTASM_ARM_DMB_SY 2724 # if defined(RT_ARCH_ARM64) 2725 # if 1 /* shouldn't be any need for more than single-copy atomicity when we've got a proper barrier, just like on x86. */ 2726 "ldurb %w[uDst], %[pMem]\n\t" 2727 # else 2716 2728 "ldxrb %w[uDst], %[pMem]\n\t" 2717 # else 2729 "clrex\n\t" 2730 # endif 2731 # else 2718 2732 "ldrexb %[uDst], %[pMem]\n\t" 2719 # endif 2733 /** @todo clrex */ 2734 # endif 2720 2735 : [uDst] "=&r" (u32) 2721 2736 : [pMem] "Q" (*pu8) 2722 2737 RTASM_ARM_DMB_SY_COMMA_IN_REG); 2738 # endif 2723 2739 return (uint8_t)u32; 2724 2740 #else … … 2741 2757 __asm__ __volatile__("Lstart_ASMAtomicUoReadU8_%=:\n\t" 2742 2758 # if defined(RT_ARCH_ARM64) 2743 "ld xrb%w[uDst], %[pMem]\n\t"2744 # else 2745 "ldrexb %[uDst], %[pMem]\n\t" 2759 "ldurb %w[uDst], %[pMem]\n\t" 2760 # else 2761 "ldrexb %[uDst], %[pMem]\n\t" /** @todo fix this */ 2746 2762 # endif 2747 2763 : [uDst] "=&r" (u32) … … 2762 2778 DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF 2763 2779 { 2780 #if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32) 2781 return (int8_t)ASMAtomicReadU8((volatile uint8_t RT_FAR *)pi8); 2782 #else 2764 2783 ASMMemoryFence(); 2765 #if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)2766 int32_t i32;2767 __asm__ __volatile__("Lstart_ASMAtomicReadS8_%=:\n\t"2768 RTASM_ARM_DMB_SY2769 # if defined(RT_ARCH_ARM64)2770 "ldxrb %w[iDst], %[pMem]\n\t"2771 # else2772 "ldrexb %[iDst], %[pMem]\n\t"2773 # endif2774 : [iDst] "=&r" (i32)2775 : [pMem] "Q" (*pi8)2776 RTASM_ARM_DMB_SY_COMMA_IN_REG);2777 return (int8_t)i32;2778 #else2779 2784 return *pi8; /* byte reads are atomic on x86 */ 2780 2785 #endif … … 2794 2799 __asm__ __volatile__("Lstart_ASMAtomicUoReadS8_%=:\n\t" 2795 2800 # if defined(RT_ARCH_ARM64) 2796 "ld xrb %w[iDst], %[pMem]\n\t"2797 # else 2798 "ldrexb %[iDst], %[pMem]\n\t" 2801 "ldurb %w[iDst], %[pMem]\n\t" 2802 # else 2803 "ldrexb %[iDst], %[pMem]\n\t" /** @todo fix this */ 2799 2804 # endif 2800 2805 : [iDst] "=&r" (i32) … … 2818 2823 #if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32) 2819 2824 uint32_t u32; 2825 # if defined(RTASM_ARM64_USE_FEAT_LSE) && 0 /* very expensive on M1, but alignment advantages with LEA2 (M2?). */ 2820 2826 __asm__ __volatile__("Lstart_ASMAtomicReadU16_%=:\n\t" 2821 2827 RTASM_ARM_DMB_SY 2822 # if defined(RT_ARCH_ARM64) 2828 "casah %w[uDst], wzr, %[pMem]\n\t" 2829 : [uDst] "=&r" (u32) 2830 : [pMem] "Q" (*pu16), 2831 "0" (0) 2832 RTASM_ARM_DMB_SY_COMMA_IN_REG); 2833 # else 2834 __asm__ __volatile__("Lstart_ASMAtomicReadU16_%=:\n\t" 2835 RTASM_ARM_DMB_SY 2836 # if defined(RT_ARCH_ARM64) 2837 # if 1 /* ASSUMING proper barrier and aligned access, we should be fine with single-copy atomicity, just like on x86. */ 2838 "ldurh %w[uDst], %[pMem]\n\t" 2839 # else 2823 2840 "ldxrh %w[uDst], %[pMem]\n\t" 2824 # else 2841 "clrex\n\t" 2842 # endif 2843 # else 2825 2844 "ldrexh %[uDst], %[pMem]\n\t" 2826 # endif 2845 /** @todo clrex */ 2846 # endif 2827 2847 : [uDst] "=&r" (u32) 2828 2848 : [pMem] "Q" (*pu16) 2829 2849 RTASM_ARM_DMB_SY_COMMA_IN_REG); 2850 # endif 2830 2851 return (uint16_t)u32; 2831 2852 #else … … 2849 2870 __asm__ __volatile__("Lstart_ASMAtomicUoReadU16_%=:\n\t" 2850 2871 # if defined(RT_ARCH_ARM64) 2851 "ld xrh %w[uDst], %[pMem]\n\t"2852 # else 2853 "ldrexh %[uDst], %[pMem]\n\t" 2872 "ldurh %w[uDst], %[pMem]\n\t" 2873 # else 2874 "ldrexh %[uDst], %[pMem]\n\t" /** @todo fix this */ 2854 2875 # endif 2855 2876 : [uDst] "=&r" (u32) … … 2872 2893 Assert(!((uintptr_t)pi16 & 1)); 2873 2894 #if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32) 2874 int32_t i32; 2875 __asm__ __volatile__("Lstart_ASMAtomicReadS16_%=:\n\t" 2876 RTASM_ARM_DMB_SY 2877 # if defined(RT_ARCH_ARM64) 2878 "ldxrh %w[iDst], %[pMem]\n\t" 2879 # else 2880 "ldrexh %[iDst], %[pMem]\n\t" 2881 # endif 2882 : [iDst] "=&r" (i32) 2883 : [pMem] "Q" (*pi16) 2884 RTASM_ARM_DMB_SY_COMMA_IN_REG); 2885 return (int16_t)i32; 2895 return (int16_t)ASMAtomicReadU16((volatile uint16_t RT_FAR *)pi16); 2886 2896 #else 2887 2897 ASMMemoryFence(); … … 2904 2914 __asm__ __volatile__("Lstart_ASMAtomicUoReadS16_%=:\n\t" 2905 2915 # if defined(RT_ARCH_ARM64) 2906 "ld xrh %w[iDst], %[pMem]\n\t"2907 # else 2908 "ldrexh %[iDst], %[pMem]\n\t" 2916 "ldurh %w[iDst], %[pMem]\n\t" 2917 # else 2918 "ldrexh %[iDst], %[pMem]\n\t" /** @todo fix this */ 2909 2919 # endif 2910 2920 : [iDst] "=&r" (i32) … … 2928 2938 #if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32) 2929 2939 uint32_t u32; 2940 # if defined(RTASM_ARM64_USE_FEAT_LSE) && 0 /* very expensive on M1, but alignment advantages with LEA2 (M2?). */ 2930 2941 __asm__ __volatile__("Lstart_ASMAtomicReadU32_%=:\n\t" 2931 2942 RTASM_ARM_DMB_SY 2932 # if defined(RT_ARCH_ARM64) 2943 "casa %w[uDst], wzr, %[pMem]\n\t" 2944 : [uDst] "=&r" (u32) 2945 : [pMem] "Q" (*pu32), 2946 "0" (0) 2947 RTASM_ARM_DMB_SY_COMMA_IN_REG); 2948 # else 2949 __asm__ __volatile__("Lstart_ASMAtomicReadU32_%=:\n\t" 2950 RTASM_ARM_DMB_SY 2951 # if defined(RT_ARCH_ARM64) 2952 # if 1 /* ASSUMING proper barrier and aligned access, we should be fine with single-copy atomicity, just like on x86. */ 2953 "ldur %w[uDst], %[pMem]\n\t" 2954 # else 2933 2955 "ldxr %w[uDst], %[pMem]\n\t" 2934 # else 2956 "clrex\n\t" 2957 # endif 2958 # else 2935 2959 "ldrex %[uDst], %[pMem]\n\t" 2936 # endif 2960 /** @todo clrex */ 2961 # endif 2937 2962 : [uDst] "=&r" (u32) 2938 2963 : [pMem] "Q" (*pu32) 2939 2964 RTASM_ARM_DMB_SY_COMMA_IN_REG); 2965 # endif 2940 2966 return u32; 2941 2967 #else … … 2962 2988 __asm__ __volatile__("Lstart_ASMAtomicUoReadU32_%=:\n\t" 2963 2989 # if defined(RT_ARCH_ARM64) 2964 "ld xr %w[uDst], %[pMem]\n\t"2965 # else 2966 "ldrex %[uDst], %[pMem]\n\t"2990 "ldur %w[uDst], %[pMem]\n\t" 2991 # else 2992 "ldrex %[uDst], %[pMem]\n\t" /** @todo fix this */ 2967 2993 # endif 2968 2994 : [uDst] "=&r" (u32) … … 2988 3014 Assert(!((uintptr_t)pi32 & 3)); 2989 3015 #if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32) 2990 int32_t i32; 2991 __asm__ __volatile__("Lstart_ASMAtomicReadS32_%=:\n\t" 2992 RTASM_ARM_DMB_SY 2993 # if defined(RT_ARCH_ARM64) 2994 "ldxr %w[iDst], %[pMem]\n\t" 2995 # else 2996 "ldrex %[iDst], %[pMem]\n\t" 2997 # endif 2998 : [iDst] "=&r" (i32) 2999 : [pMem] "Q" (*pi32) 3000 RTASM_ARM_DMB_SY_COMMA_IN_REG); 3001 return i32; 3016 return (int32_t)ASMAtomicReadU32((volatile uint32_t RT_FAR *)pi32); 3002 3017 #else 3003 3018 ASMMemoryFence(); … … 3023 3038 __asm__ __volatile__("Lstart_ASMAtomicUoReadS32_%=:\n\t" 3024 3039 # if defined(RT_ARCH_ARM64) 3025 "ld xr %w[iDst], %[pMem]\n\t"3026 # else 3027 "ldrex %[iDst], %[pMem]\n\t"3040 "ldur %w[iDst], %[pMem]\n\t" 3041 # else 3042 "ldrex %[iDst], %[pMem]\n\t" /** @todo thix this */ 3028 3043 # endif 3029 3044 : [iDst] "=&r" (i32) … … 3121 3136 # elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32) 3122 3137 Assert(!((uintptr_t)pu64 & 7)); 3138 3139 # if defined(RTASM_ARM64_USE_FEAT_LSE) && 0 /* very expensive on M1, but alignment advantages with LEA2 (M2?). */ 3123 3140 __asm__ __volatile__("Lstart_ASMAtomicReadU64_%=:\n\t" 3124 3141 RTASM_ARM_DMB_SY 3125 # if defined(RT_ARCH_ARM64) 3142 "casa %[uDst], xzr, %[pMem]\n\t" 3143 : [uDst] "=&r" (u64) 3144 : [pMem] "Q" (*pu64), 3145 "0" (0) 3146 RTASM_ARM_DMB_SY_COMMA_IN_REG); 3147 # else 3148 __asm__ __volatile__("Lstart_ASMAtomicReadU64_%=:\n\t" 3149 RTASM_ARM_DMB_SY 3150 # if defined(RT_ARCH_ARM64) 3151 # if 1 /* ASSUMING proper barrier and aligned access, we should be fine with single-copy atomicity, just like on x86. */ 3152 "ldur %[uDst], %[pMem]\n\t" 3153 # else 3126 3154 "ldxr %[uDst], %[pMem]\n\t" 3127 # else 3155 "clrex\n\t" 3156 # endif 3157 # else 3128 3158 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t" 3129 # endif 3159 /** @todo clrex */ 3160 # endif 3130 3161 : [uDst] "=&r" (u64) 3131 3162 : [pMem] "Q" (*pu64) 3132 3163 RTASM_ARM_DMB_SY_COMMA_IN_REG); 3133 3164 # endif 3134 3165 # else 3135 3166 # error "Port me" … … 3225 3256 __asm__ __volatile__("Lstart_ASMAtomicUoReadU64_%=:\n\t" 3226 3257 # if defined(RT_ARCH_ARM64) 3227 "ldxr %[uDst], %[pMem]\n\t" 3228 # else 3229 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t" 3258 "ldur %[uDst], %[pMem]\n\t" 3259 # else 3260 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t" /* this is required for atomic access since it's a pair */ 3261 /** @todo clrex? */ 3230 3262 # endif 3231 3263 : [uDst] "=&r" (u64)
Note:
See TracChangeset
for help on using the changeset viewer.