- Timestamp:
- Apr 11, 2024 8:47:42 PM (10 months ago)
- Location:
- trunk/src/VBox/VMM/VMMAll
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/VMMAll/IEMAllAImpl-arm64.S
r104296 r104299 940 940 /* Do we need to rotate anything at all? */ 941 941 and w2, w2, #0x1f 942 .ifne \a_cBits >= 32 942 943 cbz w2, 99f 943 944 .ifne \a_cBits < 32 944 .else 945 .ifeq \a_fIntelFlags 946 cbz w2, 99f /* AMD */ 947 .endif 948 945 949 /* 946 950 * 8 and 16 bit: w2 = w2 % (a_cBits + 1). … … 969 973 * subs w8, w2, w9 970 974 */ 971 mov w7, w2972 973 975 .ifne \a_cBits == 16 974 976 subs w3, w2, #17 … … 986 988 987 989 /* 988 * Do the rotating: (w8 << w2) | (CF << (w2 - 1)) | (w2 > 1 ? (w8 >> (a_cBits - w2 + 1)) : 0) 989 */ 990 and w3, w0, #X86_EFL_CF 991 subs w4, w2, #1 /* Also: prep for 'w2 > 1' (w2 can't be zero, btw) - think: cmp w2, #1 */ 992 lslv x3, x3, x4 /* x3 = CF << (w2 - 1) */ 993 994 mov w4, #(\a_cBits + 1) 995 sub w4, w4, w2 /* w4 = a_cBits - w2 + 1 */ 990 * Do the rotating: x9 = RORV(w8[0:a_cBits-1] | (CF << 63) | (w8[1:a_cBits-1] << (64-a_cBits-1)) | (CF << a_cBits)), -w2) 991 */ 992 neg w2, w2 /* w3 = rorv count - this will be masked by 0x3f so it's the same as 64-w2. */ 996 993 997 994 ldr\a_LdStSuff w8, [x1] 998 lslv x9, x8, x2 999 lsrv w10, w8, w4 1000 csel w10, wzr, w10, eq /* if w2 == 1: w10 = 0; else: w10 = w8 >> (a_cBits - w2 + 1); */ 1001 orr x9, x9, x3 /* shifted CF */ 1002 orr x9, x9, x10 995 .ifne \a_cBits < 32 996 orr x8, x8, x8, LSL #(64 - \a_cBits - 1) 997 .ifeq \a_fIntelFlags 998 bfi x8, x0, #(\a_cBits), #1 /* AMD: w8[a_cBits] = CF; Avoids conditional branch for CF calc to cover cShift==0. */ 999 .endif 1000 .else 1001 lsr w9, w8, #1 1002 orr x8, x8, x9, LSL #(64 - \a_cBits) 1003 .endif 1004 bfi x8, x0, #63, #1 /* w8[63] = CF */ 1005 rorv x9, x8, x2 1003 1006 str\a_LdStSuff w9, [x1] 1007 1004 1008 /* 1005 1009 * Calculate EFLAGS - only CF and OF. 1006 1010 */ 1007 .ifeq \a_fIntelFlags 1008 cbz w2, 88f /* AMD: CF doesn't change if the modded rotate count is zero (only OF does actually). */ 1009 .endif 1010 bfxil x0, x9, #(\a_cBits), #1 /* CF = last bit rotated out */ 1011 88: 1011 bfxil x0, x9, #(\a_cBits), #1 /* CF = last bit rotated 'out' */ 1012 1012 1013 1013 .ifne \a_fIntelFlags … … 1048 1048 /* Do we need to shift anything at all? */ 1049 1049 and w2, w2, #0x3f 1050 cbz w2, 99f 1050 cbz w2, 99f /** @todo eliminate this for < 32 shift with intel flags */ 1051 1051 1052 1052 /* … … 1094 1094 RCL_64 iemAImpl_rcl_u64_intel, 1 1095 1095 RCL_64 iemAImpl_rcl_u64_amd, 0 1096 1097 1098 /* 1099 * Rotate Right thru Carry. 1100 */ 1101 1102 /* uint32_t iemAImpl_rcr_u8( uint32_t fEFlagsIn, uint8_t *pu8Dst, uint8_t cShift); */ 1103 /* uint32_t iemAImpl_rcr_u16(uint32_t fEFlagsIn, uint16_t *pu16Dst, uint8_t cShift); */ 1104 /* uint32_t iemAImpl_rcr_u32(uint32_t fEFlagsIn, uint16_t *pu32Dst, uint8_t cShift); */ 1105 .macro RCR_8_16_32, a_Name, a_cBits, a_fIntelFlags, a_LdStSuff 1106 ALIGNCODE(IEM_AIMPL_FUNCTION_ALIGNMENT) 1107 BEGINPROC_HIDDEN \a_Name 1108 .cfi_startproc 1109 1110 /* Do we need to rotate anything at all? */ 1111 and w2, w2, #0x1f 1112 .ifne \a_cBits >= 32 1113 cbz w2, 99f 1114 .else 1115 .ifeq \a_fIntelFlags 1116 cbz w2, 99f /* AMD */ 1117 .endif 1118 1119 /* 1120 * 8 and 16 bit: w2 = w2 % (a_cBits + 1). (See RCL for details.) 1121 */ 1122 .ifne \a_cBits == 16 1123 subs w3, w2, #17 1124 csel w2, w3, w2, hs 1125 .else 1126 subs w3, w2, #18 1127 csel w2, w3, w2, hs 1128 subs w3, w2, #9 1129 csel w2, w3, w2, hs 1130 .endif 1131 .ifne \a_fIntelFlags 1132 cbz w2, 99f /* Intel: Skip everything if the modded rotate count is zero. */ 1133 .endif 1134 .endif 1135 1136 /* 1137 * Do the rotating: x9 = RORV(x8[0:a_cBits-1] | (CF << a_cBits) | ((x8 << (a_cBits + 2)) >> 1) | (CF << 63), x2) 1138 */ 1139 add w3, w2, #1 /* w3 = w2 + 1 */ 1140 1141 subs w4, w2, #1 1142 mov w5, #(\a_cBits) 1143 csel w4, w5, w5, lo /* w4 = w2 >= 1 ? w2 - 1 : a_cBits - for CF extraction */ 1144 1145 ldr\a_LdStSuff w8, [x1] 1146 bfi x8, x0, #(\a_cBits), #1 /* Put CF above the input. */ 1147 bfi x8, x8, #(\a_cBits + 1), #(64 - \a_cBits - 1) /* Put repeat the register content above that again. */ 1148 .ifne \a_cBits < 32 1149 .ifeq \a_fIntelFlags 1150 bfi x8, x0, #63, #1 /* AMD 8- and 16-bit: Put CF at the very top so w2 == 0 works w/o branching. */ 1151 .endif 1152 .endif 1153 rorv x9, x8, x2 1154 str\a_LdStSuff w9, [x1] 1155 1156 /* 1157 * Calculate EFLAGS - only CF and OF. 1158 */ 1159 bfxil x0, x9, #63, #1 /* CF = last bit rotated 'out' */ 1160 1161 .ifne \a_fIntelFlags 1162 /* Intel: OF = first rotate step: fEFlags |= (fInCarry ^ (uint32_t)(uDst >> (a_cBits - 1))) << X86_EFL_OF_BIT; */ 1163 eor x11, x8, x8, LSR #1 /* We've got CF in bit #a_cBits in x8 */ 1164 lsr w11, w11, #(\a_cBits - 1) 1165 bfi w0, w11, #X86_EFL_OF_BIT, #1 1166 .else 1167 /* AMD: OF = last rotate step: fEFlags |= X86_EFL_GET_OF_ ## a_cBits(uResult ^ (uResult << 1)); */ 1168 eor w11, w9, w9, LSL #1 1169 lsr w11, w11, #(\a_cBits - 1) 1170 bfi w0, w11, #X86_EFL_OF_BIT, #1 1171 .endif 1172 1173 99: 1174 ret 1175 .cfi_endproc 1176 .endm 1177 1178 RCR_8_16_32 iemAImpl_rcr_u8, 8, 1, b 1179 RCR_8_16_32 iemAImpl_rcr_u8_intel, 8, 1, b 1180 RCR_8_16_32 iemAImpl_rcr_u8_amd, 8, 0, b 1181 1182 RCR_8_16_32 iemAImpl_rcr_u16, 16, 1, h 1183 RCR_8_16_32 iemAImpl_rcr_u16_intel, 16, 1, h 1184 RCR_8_16_32 iemAImpl_rcr_u16_amd, 16, 0, h 1185 1186 RCR_8_16_32 iemAImpl_rcr_u32, 32, 1, 1187 RCR_8_16_32 iemAImpl_rcr_u32_intel, 32, 1, 1188 RCR_8_16_32 iemAImpl_rcr_u32_amd, 32, 0, 1189 1190 /** @todo this is slightly slower than the C version (release) on an M2. Investigate why. */ 1191 /* uint32_t iemAImpl_rcr_u64(uint32_t fEFlagsIn, uint16_t *pu64Dst, uint8_t cShift); */ 1192 .macro RCR_64, a_Name, a_fIntelFlags 1193 ALIGNCODE(IEM_AIMPL_FUNCTION_ALIGNMENT) 1194 BEGINPROC_HIDDEN \a_Name 1195 .cfi_startproc 1196 1197 /* Do we need to shift anything at all? */ 1198 and w2, w2, #0x3f 1199 cbz w2, 99f 1200 1201 /* 1202 * Do the rotating: (w8 >> w2) | (CF << (64 - w2)) | (w2 > 1 ? (w8 << (64 - w2 + 1)) : 0) 1203 */ 1204 and w5, w0, #X86_EFL_CF /* x5 = input CF - for intel OF calc */ 1205 neg w4, w2 1206 lslv x3, x5, x4 /* x3 = CF << (64 - w2) */ 1207 1208 cmp w2, #1 /* prep for w2 > 1 */ 1209 add w4, w4, #1 /* w4 = -w2 + 1; which when & 0x3f =^= 64 - 2 + 1 */ 1210 1211 ldr x8, [x1] 1212 lsrv x9, x8, x2 1213 lslv x10, x8, x4 1214 csel x10, xzr, x10, eq /* if w2 == 1: x10 = 0; else: x10 = x8 << (64 - w2 + 1); */ 1215 orr x9, x9, x3 /* shifted CF */ 1216 orr x9, x9, x10 1217 str x9, [x1] 1218 1219 /* 1220 * Calculate EFLAGS - only CF and OF. 1221 */ 1222 sub x11, x2, #1 1223 lsr x11, x8, x11 1224 bfi w0, w11, #0, #1 /* CF = last bit rotated out. */ 1225 1226 .ifne \a_fIntelFlags 1227 /* Intel: OF = first rotate step: fEFlags |= (fInCarry ^ (uint32_t)(uDst >> (a_cBits - 1))) << X86_EFL_OF_BIT; */ 1228 eor x11, x5, x8, LSR #63 1229 bfi w0, w11, #X86_EFL_OF_BIT, #1 1230 .else 1231 /* AMD: OF = last rotate step: fEFlags |= X86_EFL_GET_OF_ ## a_cBits(uResult ^ (uResult << 1)); */ 1232 eor x11, x9, x9, LSL #1 1233 lsr x11, x11, #(64 - 1) 1234 bfi w0, w11, #X86_EFL_OF_BIT, #1 1235 .endif 1236 1237 99: 1238 ret 1239 .cfi_endproc 1240 .endm 1241 1242 RCR_64 iemAImpl_rcr_u64, 1 1243 RCR_64 iemAImpl_rcr_u64_intel, 1 1244 RCR_64 iemAImpl_rcr_u64_amd, 0 1245 -
trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp
r104296 r104299 3319 3319 } 3320 3320 3321 #ifndef RT_ARCH_ARM64 3322 3321 3323 #if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) 3322 3324 EMIT_RCR(64, uint64_t, RT_NOTHING, 1) … … 3325 3327 EMIT_RCR(64, uint64_t, _amd, 0) 3326 3328 3327 # if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)3329 # if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY) 3328 3330 EMIT_RCR(32, uint32_t, RT_NOTHING, 1) 3329 # endif3331 # endif 3330 3332 EMIT_RCR(32, uint32_t, _intel, 1) 3331 3333 EMIT_RCR(32, uint32_t, _amd, 0) 3332 3334 3333 # if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)3335 # if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY) 3334 3336 EMIT_RCR(16, uint16_t, RT_NOTHING, 1) 3335 # endif3337 # endif 3336 3338 EMIT_RCR(16, uint16_t, _intel, 1) 3337 3339 EMIT_RCR(16, uint16_t, _amd, 0) 3338 3340 3339 # if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)3341 # if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY) 3340 3342 EMIT_RCR(8, uint8_t, RT_NOTHING, 1) 3341 # endif3343 # endif 3342 3344 EMIT_RCR(8, uint8_t, _intel, 1) 3343 3345 EMIT_RCR(8, uint8_t, _amd, 0) 3346 3347 #endif /* !RT_ARCH_ARM64 */ 3344 3348 3345 3349
Note:
See TracChangeset
for help on using the changeset viewer.