Changeset 103003 in vbox for trunk/src/VBox/VMM/VMMAll
- Timestamp:
- Jan 23, 2024 4:19:17 PM (14 months ago)
- svn:sync-xref-src-repo-rev:
- 161235
- Location:
- trunk/src/VBox/VMM/VMMAll
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/VMMAll/IEMAllAImpl-arm64.S
r102977 r103003 27 27 28 28 29 /********************************************************************************************************************************* 30 * Header Files * 31 *********************************************************************************************************************************/ 29 32 #include <iprt/asmdefs-arm.h> 33 #include <iprt/x86.h> 34 35 36 #if RT_CLANG_PREREQ(15, 0) 37 .arch_extension flagm /* not necessary */ 38 #else 39 /* clang 12.0.x defaults to apple-a12. M1 is more similar to A14, I guess. 40 For some reason the +crc make cfinv work (with clang 12). 'flagm' isn't 41 recognized, nor is the 'fmi' in the error message for cfinv. 'flagm' 42 work for v15 and is enabled by default it seems. */ 43 .cpu apple-a14+crc 44 #endif 45 46 47 .macro CALC_EFLAGS, regEfl, regResult, regLeft, regRight, regTmp, fSkipFlags=0 48 /* 49 * Translate the arm NZCV bits into corresponding EFLAGS bits. 50 */ 51 .if \fSkipFlags == 0 || \fSkipFlags == X86_EFL_OF 52 #if 0 53 /* Maybe just a tiny bit slow than the next one. */ 54 mrs \regTmp, NZCV /* [31] = N; [30] = Z; [29] = C; [29] = V */ 55 .ifeq \fSkipFlags & X86_EFL_OF 56 lsr \regTmp, \regTmp, #28 57 bfi \regEfl, \regTmp, #X86_EFL_OF_BIT, #1 58 lsr \regTmp, \regTmp, #1 59 .else 60 lsr \regTmp, \regTmp, #29 61 .endif 62 eor \regTmp, \regTmp, #1 /* inverts the carry flag to x86 style. */ 63 bfi \regEfl, \regTmp, #X86_EFL_CF_BIT, #1 /* CF(0) = C */ 64 lsr \regTmp, \regTmp, #1 65 bfi \regEfl, \regTmp, #X86_EFL_ZF_BIT, #2 /* SF(7),ZF(6) = NZ */ 66 #else 67 /* This seems to be the faster one... */ 68 cfinv 69 mrs \regTmp, NZCV /* [31] = N; [30] = Z; [29] = C; [29] = V */ 70 .ifeq (\fSkipFlags & X86_EFL_OF) 71 lsr \regTmp, \regTmp, #28 72 bfi \regEfl, \regTmp, #X86_EFL_OF_BIT, #1 73 lsr \regTmp, \regTmp, #1 74 .else 75 lsr \regTmp, \regTmp, #29 76 .endif 77 bfi \regEfl, \regTmp, #X86_EFL_CF_BIT, #1 /* CF(0) = C */ 78 lsr \regTmp, \regTmp, #1 79 bfi \regEfl, \regTmp, #X86_EFL_ZF_BIT, #2 /* SF(7),ZF(6) = NZ */ 80 #endif 81 .else 82 /* Definitely slower than the above two, but easier to handle wrt skipping parts. */ 83 .ifeq \fSkipFlags & X86_EFL_ZF 84 cset \regTmp, eq 85 bfi \regEfl, \regTmp, #X86_EFL_ZF_BIT, #1 86 .endif 87 .ifeq \fSkipFlags & X86_EFL_CF 88 cset \regTmp, cc 89 bfi \regEfl, \regTmp, #X86_EFL_CF_BIT, #1 90 .endif 91 .ifeq \fSkipFlags & X86_EFL_OF 92 cset \regTmp, vs 93 bfi \regEfl, \regTmp, #X86_EFL_OF_BIT, #1 94 .endif 95 .ifeq \fSkipFlags & X86_EFL_SF 96 cset \regTmp, mi 97 bfi \regEfl, \regTmp, #X86_EFL_SF_BIT, #1 98 .endif 99 .endif 100 101 102 /* 103 * Parity calculation for low byte of the result (sucks that there is no popcount for gprs). 104 */ 105 eor \regTmp, \regResult, \regResult, LSR #4 106 eor \regTmp, \regTmp, \regTmp, LSR #2 107 eor \regTmp, \regTmp, \regTmp, LSR #1 108 eor \regTmp, \regTmp, #1 109 bfi \regEfl, \regTmp, #X86_EFL_PF_BIT, #1 /* PF(2) = popcount(w9 & 0xff) & 1 ^ 1 */ 110 111 /* 112 * Auxilary carry / borrow flag. This is related to 8-bit BCD. 113 */ 114 eor \regTmp, \regLeft, \regRight 115 eor \regTmp, \regTmp, \regResult 116 lsr \regTmp, \regTmp, #X86_EFL_AF_BIT 117 bfi \regEfl, \regTmp, #X86_EFL_AF_BIT, #1 /* AF(4) = (w8 ^ w1 ^ w9 & X86_EFL_AF) >> X86_EFL_AF_BIT */ 118 119 /* done */ 120 .endm 30 121 31 122 … … 64 155 65 156 */ 157 158 159 /* IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8_locked, (uint8_t *pu8Mem, uint8_t *pu8Reg)); */ 160 161 /* 162 * The CMP instruction. 163 */ 164 165 /* void iemAImpl_cmp_u8(uint8_t const *puDst, uint8_t uSrc, uint32_t *pEFlags); */ 166 .p2align 2 167 .private_extern NAME(iemAImpl_sub_u8) 168 .globl NAME(iemAImpl_sub_u8) 169 NAME(iemAImpl_sub_u8): 170 .cfi_startproc 171 /* Do the subtraction. */ 172 ldrb w8, [x0] 173 /*and w1, w1, #0xff - should not be necessary. */ 174 subs w9, w8, w1 /* w9 = w8 (*puDst) - w1 (uSrc) */ 175 setf8 w9 176 strb w9, [x0] 177 178 /* Load EFLAGS. */ 179 ldr w10, [x2] /* w10 = eflags; CF=0 PF=2 AF=4 ZF=6 SF=7 OF=11 */ 180 and w9, w9, #0xffff 181 CALC_EFLAGS x10, x9, x8, x1, x11, X86_EFL_OF 182 183 /* The overflow flag calc done by setf16 isn't correct for subtraction, so we have to 184 figure it out ourselves. (See IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC for details.) */ 185 eor w11, w8, w1 /* input dst ^ source (simplified from ~(dst ^ (source ^ 0x8000)) ). */ 186 eor w12, w8, w9 187 and w11, w12, w11 188 lsr w11, w11, #7 189 bfi w10, w11, #X86_EFL_OF_BIT, #1 190 191 /* Done with EFLAGS. */ 192 str w10, [x2] 193 ret 194 .cfi_endproc 195 196 197 /* void iemAImpl_cmp_u16(uint16_t const *puDst, uint16_t uSrc, uint32_t *pEFlags); */ 198 .p2align 2 199 .private_extern NAME(iemAImpl_sub_u16) 200 .globl NAME(iemAImpl_sub_u16) 201 NAME(iemAImpl_sub_u16): 202 .cfi_startproc 203 /* Do the subtraction. */ 204 ldrh w8, [x0] 205 /*and w1, w1, #0xffff - should not be necessary. */ 206 subs w9, w8, w1 /* w9 = w8 (*puDst) - w1 (uSrc) */ 207 setf16 w9 208 strh w9, [x0] 209 210 /* Load EFLAGS. */ 211 ldr w10, [x2] /* w10 = eflags; CF=0 PF=2 AF=4 ZF=6 SF=7 OF=11 */ 212 and w9, w9, #0xffff 213 CALC_EFLAGS x10, x9, x8, x1, x11, X86_EFL_OF 214 215 /* The overflow flag calc done by setf16 isn't correct for subtraction, so we have to 216 figure it out ourselves. (See IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC for details.) */ 217 eor w11, w8, w1 /* input dst ^ source (simplified from ~(dst ^ (source ^ 0x8000)) ). */ 218 eor w12, w8, w9 219 and w11, w12, w11 220 lsr w11, w11, #15 221 bfi w10, w11, #X86_EFL_OF_BIT, #1 222 223 /* Done with EFLAGS. */ 224 str w10, [x2] 225 ret 226 .cfi_endproc 227 228 229 /* void iemAImpl_cmp_u32(uint32_t const *puDst, uint32_t uSrc, uint32_t *pEFlags); */ 230 .p2align 2 231 .private_extern NAME(iemAImpl_sub_u32) 232 .globl NAME(iemAImpl_sub_u32) 233 NAME(iemAImpl_sub_u32): 234 .cfi_startproc 235 /* Do the subtraction. */ 236 ldr w8, [x0] 237 subs w9, w8, w1 /* w9 = w8 (*puDst) - w1 (uSrc) */ 238 str w9, [x0] 239 240 /* Load EFLAGS. */ 241 ldr w10, [x2] /* w10 = eflags; CF=0 PF=2 AF=4 ZF=6 SF=7 OF=11 */ 242 243 #if 0 244 /* Translate the arm NZCV bits into corresponding EFLAGS bits. */ 245 #if 0 /* maybe just a tiny bit slow than the next one. */ 246 mrs x11, NZCV /* w11[31] = N; w11[30] = Z; w11[29] = C; w11[29] = V */ 247 lsr w11, w11, #28 248 bfi w10, w11, #X86_EFL_OF_BIT, #1 249 lsr w11, w11, #1 250 eor w11, w11, #1 /* inverts the carry flag to x86 style. */ 251 bfi w10, w11, #X86_EFL_CF_BIT, #1 /* CF(0) = C */ 252 lsr w11, w11, #1 253 bfi w10, w11, #X86_EFL_ZF_BIT, #2 /* SF(7),ZF(6) = NZ */ 254 #elif 1 /* seems the faster one... */ 255 cfinv 256 mrs x11, NZCV /* w11[31] = N; w11[30] = Z; w11[29] = C; w11[29] = V */ 257 lsr w11, w11, #28 258 bfi w10, w11, #X86_EFL_OF_BIT, #1 259 lsr w11, w11, #1 260 bfi w10, w11, #X86_EFL_CF_BIT, #1 /* CF(0) = C */ 261 lsr w11, w11, #1 262 bfi w10, w11, #X86_EFL_ZF_BIT, #2 /* SF(7),ZF(6) = NZ */ 263 #else 264 cset w11, eq 265 bfi w10, w11, #X86_EFL_ZF_BIT, #1 266 cset w11, cc 267 bfi w10, w11, #X86_EFL_CF_BIT, #1 268 cset w11, vs 269 bfi w10, w11, #X86_EFL_OF_BIT, #1 270 cset w11, mi 271 bfi w10, w11, #X86_EFL_SF_BIT, #1 272 #endif 273 274 /* Parity calculation for low byte of the result (sucks that there is no popcount for gprs). */ 275 eor w11, w9, w9, LSR #4 276 eor w11, w11, w11, LSR #2 277 eor w11, w11, w11, LSR #1 278 eor w11, w11, #1 279 bfi w10, w11, #X86_EFL_PF_BIT, #1 /* PF(2) = popcount(w9 & 0xff) & 1 ^ 1 */ 280 281 /* Auxilary carry / borrow flag. This is related to 8-bit BCD. */ 282 eor w11, w8, w1 283 eor w11, w11, w9 284 lsr w11, w11, #X86_EFL_AF_BIT 285 bfi w10, w11, #X86_EFL_AF_BIT, #1 /* AF(4) = (w8 ^ w1 ^ w9 & X86_EFL_AF) >> X86_EFL_AF_BIT */ 286 #else 287 CALC_EFLAGS x10, x9, x8, x1, x11 288 #endif 289 290 str w10, [x2] 291 ret 292 .cfi_endproc 293 294 295 /* void iemAImpl_cmp_u64(uint64_t const *puDst, uint64_t uSrc, uint32_t *pEFlags); */ 296 .p2align 2 297 .private_extern NAME(iemAImpl_sub_u64) 298 .globl NAME(iemAImpl_sub_u64) 299 NAME(iemAImpl_sub_u64): 300 .cfi_startproc 301 /* Do the subtraction. */ 302 ldr x8, [x0] 303 subs x9, x8, x1 /* x9 = x8 (*puDst) - x1 (uSrc) */ 304 str x9, [x0] 305 306 /* Load EFLAGS. */ 307 ldr w10, [x2] /* w10 = eflags; CF=0 PF=2 AF=4 ZF=6 SF=7 OF=11 */ 308 CALC_EFLAGS x10, x9, x8, x1, x11 309 310 str w10, [x2] 311 ret 312 .cfi_endproc -
trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp
r102896 r103003 757 757 * SUB 758 758 */ 759 # if !defined(RT_ARCH_ARM64) 759 760 760 761 IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags)) … … 766 767 } 767 768 768 # if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)769 # if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) 769 770 770 771 IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags)) … … 794 795 } 795 796 796 # endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */ 797 # endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */ 798 # endif /* !RT_ARCH_ARM64 */ 797 799 798 800 /*
Note:
See TracChangeset
for help on using the changeset viewer.