Changeset 87195 in vbox
- Timestamp:
- Jan 8, 2021 11:18:04 AM (4 years ago)
- svn:sync-xref-src-repo-rev:
- 142148
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/include/iprt/asm.h
r87192 r87195 6567 6567 * @remarks Similar to ffs() in BSD. 6568 6568 */ 6569 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN6569 #if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN 6570 6570 RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_PROTO; 6571 6571 #else … … 6578 6578 else 6579 6579 iBit = 0; 6580 # elif RT_INLINE_ASM_GNU_STYLE 6580 6581 # elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) 6582 # if RT_INLINE_ASM_GNU_STYLE 6581 6583 uint32_t iBit; 6582 6584 __asm__ __volatile__("bsf %1, %0\n\t" … … 6590 6592 : "rm" (u32) 6591 6593 : "cc"); 6592 # else6594 # else 6593 6595 uint32_t iBit; 6594 6596 _asm … … 6603 6605 mov [iBit], eax 6604 6606 } 6607 # endif 6608 6609 # elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32) 6610 /* 6611 * Using the "count leading zeros (clz)" instruction here because there 6612 * is no dedicated instruction to get the first set bit. 6613 * Need to reverse the bits in the value with "rbit" first because 6614 * "clz" starts counting from the most significant bit. 6615 */ 6616 uint32_t iBit; 6617 __asm__ __volatile__( 6618 # if defined(RT_ARCH_ARM64) 6619 "rbit %w[uVal], %w[uVal]\n\t" 6620 "clz %w[iBit], %w[uVal]\n\t" 6621 # else 6622 "rbit %[uVal], %[uVal]\n\t" 6623 "clz %[iBit], %[uVal]\n\t" 6624 # endif 6625 : [uVal] "=r" (u32) 6626 , [iBit] "=r" (iBit) 6627 : "[uVal]" (u32)); 6628 if (iBit != 32) 6629 iBit++; 6630 else 6631 iBit = 0; /* No bit set. */ 6632 6633 # else 6634 # error "Port me" 6605 6635 # endif 6606 6636 return iBit; … … 6634 6664 * @remarks Similar to ffs() in BSD. 6635 6665 */ 6636 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN6666 #if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN 6637 6667 RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_PROTO; 6638 6668 #else … … 6654 6684 iBit = 0; 6655 6685 # endif 6656 # elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64 6686 6687 # elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64) 6657 6688 uint64_t iBit; 6658 6689 __asm__ __volatile__("bsfq %1, %0\n\t" … … 6666 6697 : "rm" (u64) 6667 6698 : "cc"); 6699 6700 # elif defined(RT_ARCH_ARM64) 6701 uint64_t iBit; 6702 __asm__ __volatile__("rbit %[uVal], %[uVal]\n\t" 6703 "clz %[iBit], %[uVal]\n\t" 6704 : [uVal] "=r" (u64) 6705 , [iBit] "=r" (iBit) 6706 : "[uVal]" (u64)); 6707 if (iBit != 64) 6708 iBit++; 6709 else 6710 iBit = 0; /* No bit set. */ 6711 6668 6712 # else 6669 6713 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64); … … 6690 6734 * @remarks For 16-bit bs3kit code. 6691 6735 */ 6692 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN6736 #if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN 6693 6737 RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_PROTO; 6694 6738 #else … … 6709 6753 * @remark Similar to fls() in BSD. 6710 6754 */ 6711 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN6755 #if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN 6712 6756 RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_PROTO; 6713 6757 #else … … 6720 6764 else 6721 6765 iBit = 0; 6722 # elif RT_INLINE_ASM_GNU_STYLE 6766 6767 # elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) 6768 # if RT_INLINE_ASM_GNU_STYLE 6723 6769 uint32_t iBit; 6724 6770 __asm__ __volatile__("bsrl %1, %0\n\t" … … 6732 6778 : "rm" (u32) 6733 6779 : "cc"); 6734 # else6780 # else 6735 6781 uint32_t iBit; 6736 6782 _asm … … 6745 6791 mov [iBit], eax 6746 6792 } 6793 # endif 6794 6795 # elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32) 6796 uint32_t iBit; 6797 __asm__ __volatile__( 6798 # if defined(RT_ARCH_ARM64) 6799 "clz %w[iBit], %w[uVal]\n\t" 6800 # else 6801 "clz %[iBit], %[uVal]\n\t" 6802 # endif 6803 : [iBit] "=r" (iBit) 6804 : [uVal] "r" (u32)); 6805 iBit = 32 - iBit; 6806 6807 # else 6808 # error "Port me" 6747 6809 # endif 6748 6810 return iBit; … … 6776 6838 * @remark Similar to fls() in BSD. 6777 6839 */ 6778 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN6840 #if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN 6779 6841 RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_PROTO; 6780 6842 #else … … 6796 6858 iBit = 0; 6797 6859 # endif 6798 # elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64 6860 6861 # elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64) 6799 6862 uint64_t iBit; 6800 6863 __asm__ __volatile__("bsrq %1, %0\n\t" … … 6808 6871 : "rm" (u64) 6809 6872 : "cc"); 6873 6874 # elif defined(RT_ARCH_ARM64) 6875 uint64_t iBit; 6876 __asm__ __volatile__("clz %[iBit], %[uVal]\n\t" 6877 : [iBit] "=r" (iBit) 6878 : [uVal] "r" (u64)); 6879 iBit = 64 - iBit; 6880 6810 6881 # else 6811 6882 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32)); … … 6814 6885 else 6815 6886 iBit = ASMBitLastSetU32((uint32_t)u64); 6816 # endif6887 # endif 6817 6888 return (unsigned)iBit; 6818 6889 } … … 6830 6901 * @remarks For 16-bit bs3kit code. 6831 6902 */ 6832 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN6903 #if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN 6833 6904 RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_PROTO; 6834 6905 #else … … 6846 6917 * @param u16 16-bit integer value. 6847 6918 */ 6848 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN6919 #if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN 6849 6920 RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_PROTO; 6850 6921 #else … … 6852 6923 { 6853 6924 # if RT_INLINE_ASM_USES_INTRIN 6854 u16 = _byteswap_ushort(u16); 6855 # elif RT_INLINE_ASM_GNU_STYLE 6925 return _byteswap_ushort(u16); 6926 6927 # elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) 6928 # if RT_INLINE_ASM_GNU_STYLE 6856 6929 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16) : "cc"); 6857 # else6930 # else 6858 6931 _asm 6859 6932 { … … 6862 6935 mov [u16], ax 6863 6936 } 6864 # endif6937 # endif 6865 6938 return u16; 6939 6940 # elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32) 6941 uint32_t u32Ret; 6942 __asm__ __volatile__( 6943 # if defined(RT_ARCH_ARM64) 6944 "rev16 %w[uRet], %w[uVal]\n\t" 6945 # else 6946 "rev16 %[uRet], %[uVal]\n\t" 6947 # endif 6948 : [uRet] "=r" (u32Ret) 6949 : [uVal] "r" (u16)); 6950 return (uint16_t)u32Ret; 6951 6952 # else 6953 # error "Port me" 6954 # endif 6866 6955 } 6867 6956 #endif … … 6874 6963 * @param u32 32-bit integer value. 6875 6964 */ 6876 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN6965 #if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN 6877 6966 RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_PROTO; 6878 6967 #else … … 6880 6969 { 6881 6970 # if RT_INLINE_ASM_USES_INTRIN 6882 u32 = _byteswap_ulong(u32); 6883 # elif RT_INLINE_ASM_GNU_STYLE 6971 return _byteswap_ulong(u32); 6972 6973 # elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) 6974 # if RT_INLINE_ASM_GNU_STYLE 6884 6975 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32)); 6885 # else6976 # else 6886 6977 _asm 6887 6978 { … … 6890 6981 mov [u32], eax 6891 6982 } 6892 # endif6983 # endif 6893 6984 return u32; 6985 6986 # elif defined(RT_ARCH_ARM64) 6987 uint64_t u64Ret; 6988 __asm__ __volatile__("rev32 %[uRet], %[uVal]\n\t" 6989 : [uRet] "=r" (u64Ret) 6990 : [uVal] "r" ((uint64_t)u32)); 6991 return (uint32_t)u64Ret; 6992 6993 # elif defined(RT_ARCH_ARM32) 6994 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t" 6995 : [uRet] "=r" (u32) 6996 : [uVal] "[uRet]" (u32)); 6997 return u32; 6998 6999 # else 7000 # error "Port me" 7001 # endif 6894 7002 } 6895 7003 #endif … … 6905 7013 { 6906 7014 #if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN 6907 u64 = _byteswap_uint64(u64); 6908 #else 6909 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32 6910 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32)); 6911 #endif 7015 return _byteswap_uint64(u64); 7016 7017 # elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64) 7018 __asm__ ("bswapq %0" : "=r" (u64) : "0" (u64)); 6912 7019 return u64; 7020 7021 # elif defined(RT_ARCH_ARM64) 7022 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t" 7023 : [uRet] "=r" (u64) 7024 : [uVal] "[uRet]" (u64)); 7025 return u64; 7026 7027 #else 7028 return = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32 7029 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32)); 7030 #endif 6913 7031 } 6914 7032 … … 6928 7046 # if RT_INLINE_ASM_USES_INTRIN 6929 7047 return _rotl(u32, cShift); 7048 6930 7049 # elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)) 6931 7050 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc"); 6932 7051 return u32; 7052 7053 # elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32) 7054 __asm__ __volatile__( 7055 # if defined(RT_ARCH_ARM64) 7056 "ror %w[uRet], %w[uVal], %w[cShift]\n\t" 7057 # else 7058 "ror %[uRet], %[uVal], %[cShift]\n\t" 7059 # endif 7060 : [uRet] "=r" (u32) 7061 : [uVal] "[uRet]" (u32) 7062 , [cShift] "r" (32 - (cShift & 31))); /** @todo there is an immediate form here */ 7063 return u32; 7064 6933 7065 # else 6934 7066 cShift &= 31; … … 6953 7085 # if RT_INLINE_ASM_USES_INTRIN 6954 7086 return _rotr(u32, cShift); 7087 6955 7088 # elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)) 6956 7089 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc"); 6957 7090 return u32; 7091 7092 # elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32) 7093 __asm__ __volatile__( 7094 # if defined(RT_ARCH_ARM64) 7095 "ror %w[uRet], %w[uVal], %w[cShift]\n\t" 7096 # else 7097 "ror %[uRet], %[uVal], %[cShift]\n\t" 7098 # endif 7099 : [uRet] "=r" (u32) 7100 : [uVal] "[uRet]" (u32) 7101 , [cShift] "r" (cShift & 31)); /** @todo there is an immediate form here */ 7102 return u32; 7103 6958 7104 # else 6959 7105 cShift &= 31; … … 6975 7121 #if RT_INLINE_ASM_USES_INTRIN 6976 7122 return _rotl64(u64, cShift); 7123 6977 7124 #elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64) 6978 7125 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc"); 6979 7126 return u64; 7127 6980 7128 #elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86) 6981 7129 uint32_t uSpill; … … 6997 7145 : "cc"); 6998 7146 return u64; 7147 7148 # elif defined(RT_ARCH_ARM64) 7149 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t" 7150 : [uRet] "=r" (u64) 7151 : [uVal] "[uRet]" (u64) 7152 , [cShift] "r" ((uint64_t)(64 - (cShift & 63)))); /** @todo there is an immediate form here */ 7153 return u64; 7154 6999 7155 #else 7000 7156 cShift &= 63; … … 7015 7171 #if RT_INLINE_ASM_USES_INTRIN 7016 7172 return _rotr64(u64, cShift); 7173 7017 7174 #elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64) 7018 7175 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc"); 7019 7176 return u64; 7177 7020 7178 #elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86) 7021 7179 uint32_t uSpill; … … 7037 7195 : "cc"); 7038 7196 return u64; 7197 7198 # elif defined(RT_ARCH_ARM64) 7199 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t" 7200 : [uRet] "=r" (u64) 7201 : [uVal] "[uRet]" (u64) 7202 , [cShift] "r" ((uint64_t)(cShift & 63))); /** @todo there is an immediate form here */ 7203 return u64; 7204 7039 7205 #else 7040 7206 cShift &= 63;
Note:
See TracChangeset
for help on using the changeset viewer.