Changeset 102961 in vbox for trunk/include
- Timestamp:
- Jan 18, 2024 9:59:09 PM (16 months ago)
- svn:sync-xref-src-repo-rev:
- 161189
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/include/iprt/asm.h
r102960 r102961 4435 4435 4436 4436 # elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32) 4437 /* M1 benchmark: ldaddal=6907 vs dmb+ldadd=2114 vs non-lse=6249 (ps/call) */ 4438 # if defined(RTASM_ARM64_USE_FEAT_LSE) 4439 uint32_t u32OldRet; 4440 __asm__ __volatile__("Lstart_ASMAtomicAddU32_%=:\n\t" 4441 # if defined(RTASM_ARM64_USE_FEAT_LSE_WITHOUT_DMB) 4442 "ldaddal %w[uAddend], %w[uOldActual], %[pMem]\n\t" 4443 # else 4444 RTASM_ARM_DMB_SY 4445 "ldadd %w[uAddend], %w[uOldActual], %[pMem]\n\t" 4446 # endif 4447 : [pMem] "+Q" (*pu32) 4448 , [uOldActual] "=&r" (u32OldRet) 4449 : [uAddend] "r" (u32) 4450 : ); 4451 # else 4437 4452 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAddU32, pu32, DMB_SY, 4438 4453 "add %w[uNew], %w[uOld], %w[uVal]\n\t", 4439 4454 "add %[uNew], %[uOld], %[uVal]\n\t", 4440 4455 [uVal] "r" (u32)); 4456 # endif 4441 4457 return u32OldRet; 4442 4458 … … 4492 4508 4493 4509 # elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32) 4510 # if defined(RTASM_ARM64_USE_FEAT_LSE) 4511 uint64_t u64OldRet; 4512 __asm__ __volatile__("Lstart_ASMAtomicAddU64_%=:\n\t" 4513 # if defined(RTASM_ARM64_USE_FEAT_LSE_WITHOUT_DMB) 4514 "ldaddal %[uAddend], %[uOldActual], %[pMem]\n\t" 4515 # else 4516 RTASM_ARM_DMB_SY 4517 "ldadd %[uAddend], %[uOldActual], %[pMem]\n\t" 4518 # endif 4519 : [pMem] "+Q" (*pu64) 4520 , [uOldActual] "=&r" (u64OldRet) 4521 : [uAddend] "r" (u64) 4522 : ); 4523 # else 4494 4524 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(ASMAtomicAddU64, pu64, DMB_SY, 4495 4525 "add %[uNew], %[uOld], %[uVal]\n\t" … … 4498 4528 "adc %H[uNew], %H[uOld], %H[uVal]\n\t", 4499 4529 [uVal] "r" (u64)); 4530 # endif 4500 4531 return u64OldRet; 4501 4532 … … 4766 4797 4767 4798 # elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32) 4799 /* M1 benchmark: ldaddal=6887 vs dmb+ldadd=2117 vs non-lse=6247 (ps/call) */ 4800 # if defined(RTASM_ARM64_USE_FEAT_LSE) 4801 uint32_t u32NewRet; 4802 __asm__ __volatile__("Lstart_ASMAtomicIncU32_%=:\n\t" 4803 # if defined(RTASM_ARM64_USE_FEAT_LSE_WITHOUT_DMB) 4804 "ldaddal %w[uAddend], %w[uNewRet], %[pMem]\n\t" 4805 # else 4806 RTASM_ARM_DMB_SY 4807 "ldadd %w[uAddend], %w[uNewRet], %[pMem]\n\t" 4808 # endif 4809 "add %w[uNewRet], %w[uNewRet], #1\n\t" 4810 : [pMem] "+Q" (*pu32) 4811 , [uNewRet] "=&r" (u32NewRet) 4812 : [uAddend] "r" ((uint32_t)1) 4813 : ); 4814 # else 4768 4815 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicIncU32, pu32, DMB_SY, 4769 4816 "add %w[uNew], %w[uNew], #1\n\t", 4770 4817 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */, 4771 4818 "X" (0) /* dummy */); 4819 # endif 4772 4820 return u32NewRet; 4773 4821 … … 4821 4869 4822 4870 # elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32) 4871 # if defined(RTASM_ARM64_USE_FEAT_LSE) 4872 uint64_t u64NewRet; 4873 __asm__ __volatile__("Lstart_ASMAtomicIncU64_%=:\n\t" 4874 # if defined(RTASM_ARM64_USE_FEAT_LSE_WITHOUT_DMB) 4875 "ldaddal %[uAddend], %[uNewRet], %[pMem]\n\t" 4876 # else 4877 RTASM_ARM_DMB_SY 4878 "ldadd %[uAddend], %[uNewRet], %[pMem]\n\t" 4879 # endif 4880 "add %[uNewRet], %[uNewRet], #1\n\t" 4881 : [pMem] "+Q" (*pu64) 4882 , [uNewRet] "=&r" (u64NewRet) 4883 : [uAddend] "r" ((uint64_t)1) 4884 : ); 4885 # else 4823 4886 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicIncU64, pu64, DMB_SY, 4824 4887 "add %[uNew], %[uNew], #1\n\t" … … 4827 4890 "adc %H[uNew], %H[uNew], %[uZeroVal]\n\t", 4828 4891 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) ); 4892 # endif 4829 4893 return u64NewRet; 4830 4894 … … 4930 4994 4931 4995 # elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32) 4996 /* M1 benchmark: ldaddal=6887 vs dmb+ldadd=2120 vs non-lse=6260 (ps/call) */ 4997 # if defined(RTASM_ARM64_USE_FEAT_LSE) 4998 uint32_t u32NewRet; 4999 __asm__ __volatile__("Lstart_ASMAtomicDecU32_%=:\n\t" 5000 # if defined(RTASM_ARM64_USE_FEAT_LSE_WITHOUT_DMB) 5001 "ldaddal %w[uAddend], %w[uNewRet], %[pMem]\n\t" 5002 # else 5003 RTASM_ARM_DMB_SY 5004 "ldadd %w[uAddend], %w[uNewRet], %[pMem]\n\t" 5005 # endif 5006 "sub %w[uNewRet], %w[uNewRet], #1\n\t" 5007 : [pMem] "+Q" (*pu32) 5008 , [uNewRet] "=&r" (u32NewRet) 5009 : [uAddend] "r" (~(uint32_t)0) 5010 : ); 5011 # else 4932 5012 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicDecU32, pu32, DMB_SY, 4933 5013 "sub %w[uNew], %w[uNew], #1\n\t", 4934 5014 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */, 4935 5015 "X" (0) /* dummy */); 5016 # endif 4936 5017 return u32NewRet; 4937 5018 … … 4985 5066 4986 5067 # elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32) 5068 # if defined(RTASM_ARM64_USE_FEAT_LSE) 5069 uint64_t u64NewRet; 5070 __asm__ __volatile__("Lstart_ASMAtomicDecU64_%=:\n\t" 5071 # if defined(RTASM_ARM64_USE_FEAT_LSE_WITHOUT_DMB) 5072 "ldaddal %[uAddend], %[uNewRet], %[pMem]\n\t" 5073 # else 5074 RTASM_ARM_DMB_SY 5075 "ldadd %[uAddend], %[uNewRet], %[pMem]\n\t" 5076 # endif 5077 "sub %[uNewRet], %[uNewRet], #1\n\t" 5078 : [pMem] "+Q" (*pu64) 5079 , [uNewRet] "=&r" (u64NewRet) 5080 : [uAddend] "r" (~(uint64_t)0) 5081 : ); 5082 # else 4987 5083 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicDecU64, pu64, DMB_SY, 4988 5084 "sub %[uNew], %[uNew], #1\n\t" … … 4991 5087 "sbc %H[uNew], %H[uNew], %[uZeroVal]\n\t", 4992 5088 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) ); 5089 # endif 4993 5090 return u64NewRet; 4994 5091 … … 5835 5932 5836 5933 # elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32) 5934 /* M1 benchmark: ldadd=2031 vs non-lse=6301 (ps/call) */ 5935 # if defined(RTASM_ARM64_USE_FEAT_LSE) 5936 uint32_t u32NewRet; 5937 __asm__ __volatile__("Lstart_ASMAtomicUoIncU32_%=:\n\t" 5938 "ldadd %w[uAddend], %w[uNewRet], %[pMem]\n\t" 5939 "add %w[uNewRet], %w[uNewRet], #1\n\t" 5940 : [pMem] "+Q" (*pu32) 5941 , [uNewRet] "=&r" (u32NewRet) 5942 : [uAddend] "r" ((uint32_t)1) 5943 : ); 5944 # else 5837 5945 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoIncU32, pu32, NO_BARRIER, 5838 5946 "add %w[uNew], %w[uNew], #1\n\t", 5839 5947 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */, 5840 5948 "X" (0) /* dummy */); 5949 # endif 5841 5950 return u32NewRet; 5842 5951 … … 5889 5998 5890 5999 # elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32) 6000 /* M1 benchmark: ldadd=2101 vs non-lse=6268 (ps/call) */ 6001 # if defined(RTASM_ARM64_USE_FEAT_LSE) 6002 uint32_t u32NewRet; 6003 __asm__ __volatile__("Lstart_ASMAtomicUoDecU32_%=:\n\t" 6004 "ldadd %w[uAddend], %w[uNewRet], %[pMem]\n\t" 6005 "sub %w[uNewRet], %w[uNewRet], #1\n\t" 6006 : [pMem] "+Q" (*pu32) 6007 , [uNewRet] "=&r" (u32NewRet) 6008 : [uAddend] "r" (~(uint32_t)0) 6009 : ); 6010 # else 5891 6011 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoDecU32, pu32, NO_BARRIER, 5892 6012 "sub %w[uNew], %w[uNew], #1\n\t", 5893 6013 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */, 5894 6014 "X" (0) /* dummy */); 6015 # endif 5895 6016 return u32NewRet; 5896 6017
Note:
See TracChangeset
for help on using the changeset viewer.