Changeset 106180 in vbox for trunk/src/VBox/VMM/VMMAll/target-x86
- Timestamp:
- Sep 30, 2024 1:51:48 PM (4 months ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/VMMAll/target-x86/IEMAllN8veEmit-x86.h
r106123 r106180 195 195 #endif /* RT_ARCH_AMD64 */ 196 196 197 198 199 /********************************************************************************************************************************* 200 * EFLAGS * 201 *********************************************************************************************************************************/ 202 203 #ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING 204 205 /** @def IEMNATIVE_POSTPONING_REG_MASK 206 * Register suitable for keeping the inputs or result for a postponed EFLAGS 207 * calculation. 208 * 209 * We use non-volatile register here so we don't have to save & restore them 210 * accross callouts (i.e. TLB loads). 211 * 212 * @note On x86 we cannot use RDI and RSI because these are used by the 213 * opcode checking code. The usual joy of the x86 instruction set. 214 */ 215 # ifdef RT_ARCH_AMD64 216 # define IEMNATIVE_POSTPONING_REG_MASK \ 217 (IEMNATIVE_CALL_NONVOLATILE_GREG_MASK & ~(RT_BIT_32(X86_GREG_xDI) | RT_BIT_32(X86_GREG_xSI))) 218 # else 219 # define IEMNATIVE_POSTPONING_REG_MASK IEMNATIVE_CALL_NONVOLATILE_GREG_MASK 220 # endif 221 222 /** 223 * This is normally invoked via IEMNATIVE_CLEAR_POSTPONED_EFLAGS(). 224 */ 225 template<uint32_t const a_fEflClobbered> 226 DECL_FORCE_INLINE(void) iemNativeClearPostponedEFlags(PIEMRECOMPILERSTATE pReNative) 227 { 228 AssertCompile(!(a_fEflClobbered & ~X86_EFL_STATUS_BITS)); 229 uint32_t fEFlags = pReNative->PostponedEfl.fEFlags; 230 if (fEFlags) 231 { 232 if RT_CONSTEXPR(a_fEflClobbered != X86_EFL_STATUS_BITS) 233 { 234 fEFlags &= ~a_fEflClobbered; 235 if (!fEFlags) 236 { /* likely */ } 237 else 238 { 239 Log5(("iemNativeClearPostponedEFlags: Clobbering %#x: %#x -> %#x (op=%d bits=%u)\n", a_fEflClobbered, 240 pReNative->PostponedEfl.fEFlags, fEFlags, pReNative->PostponedEfl.enmOp, pReNative->PostponedEfl.cOpBits)); 241 pReNative->PostponedEfl.fEFlags = fEFlags; 242 return; 243 } 244 } 245 246 /* Do cleanup. */ 247 Log5(("iemNativeClearPostponedEFlags: Cleanup of op=%u bits=%u efl=%#x upon clobbering %#x\n", 248 pReNative->PostponedEfl.enmOp, pReNative->PostponedEfl.cOpBits, pReNative->PostponedEfl.fEFlags, a_fEflClobbered)); 249 pReNative->PostponedEfl.fEFlags = 0; 250 pReNative->PostponedEfl.enmOp = kIemNativePostponedEflOp_Invalid; 251 pReNative->PostponedEfl.cOpBits = 0; 252 iemNativeRegFreeTmp(pReNative, pReNative->PostponedEfl.idxReg1); 253 if (pReNative->PostponedEfl.idxReg2 != UINT8_MAX) 254 iemNativeRegFreeTmp(pReNative, pReNative->PostponedEfl.idxReg2); 255 pReNative->PostponedEfl.idxReg1 = UINT8_MAX; 256 pReNative->PostponedEfl.idxReg2 = UINT8_MAX; 257 } 258 } 259 260 DECL_INLINE_THROW(uint32_t) iemNativeEmitPostponedEFlagsCalcLogical(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t cOpBits, 261 uint8_t idxRegResult, uint8_t idxRegEfl, uint8_t idxRegTmp) 262 { 263 #ifdef RT_ARCH_AMD64 264 /* 265 * Do an AND and collect flags and merge them with eflags. 266 */ 267 /* Do TEST idxRegResult, idxRegResult to set flags. */ 268 off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0x84, 0x85, cOpBits, idxRegResult, idxRegResult); 269 270 if (idxRegTmp == X86_GREG_xAX) 271 { 272 /* sahf ; AH = EFLAGS */ 273 pCodeBuf[off++] = 0x9e; 274 if (idxRegEfl <= X86_GREG_xBX) 275 { 276 /* mov [CDB]L, AH */ 277 pCodeBuf[off++] = 0x88; 278 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4 /*AH*/, idxRegEfl); 279 } 280 else 281 { 282 /* mov AL, AH */ 283 pCodeBuf[off++] = 0x88; 284 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4 /*AH*/, 0 /*AL*/); 285 /* mov xxL, AL */ 286 pCodeBuf[off++] = idxRegEfl >= 8 ? X86_OP_REX_B : X86_OP_REX; 287 pCodeBuf[off++] = 0x88; 288 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0 /*AL*/, idxRegEfl & 7); 289 } 290 } 291 else if (idxRegEfl != X86_GREG_xAX) 292 { 293 /* pushf */ 294 pCodeBuf[off++] = 0x9c; 295 /* pop tmp */ 296 if (idxRegTmp >= 8) 297 pCodeBuf[off++] = X86_OP_REX_B; 298 pCodeBuf[off++] = 0x58 + (idxRegTmp & 7); 299 /* mov byte(efl), byte(tmp) */ 300 pCodeBuf[off++] = (idxRegEfl >= 8 ? X86_OP_REX_B : X86_OP_REX) 301 | (idxRegTmp >= 8 ? X86_OP_REX_R : 0); 302 pCodeBuf[off++] = 0x88; 303 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegTmp & 7, idxRegEfl & 7); 304 } 305 else 306 { 307 /* xchg al, ah */ 308 pCodeBuf[off++] = 0x86; 309 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4 /*AH*/, 0 /*AL*/); 310 /* sahf ; AH = EFLAGS */ 311 pCodeBuf[off++] = 0x9e; 312 /* xchg al, ah */ 313 pCodeBuf[off++] = 0x86; 314 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4 /*AH*/, 0 /*AL*/); 315 } 316 /* BTC idxEfl, 11; Clear OF */ 317 if (idxRegEfl >= 8) 318 pCodeBuf[off++] = X86_OP_REX_B; 319 pCodeBuf[off++] = 0xf; 320 pCodeBuf[off++] = 0xba; 321 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, idxRegEfl & 7); 322 pCodeBuf[off++] = X86_EFL_OF_BIT; 323 324 #elif defined(RT_ARCH_ARM64) 325 /* 326 * Calculate flags. 327 */ 328 /* Clear the status bits. ~0x8D5 (or ~0x8FD) can't be AND immediate, so use idxRegTmp for constant. */ 329 off = iemNativeEmitLoadGpr32ImmExT<~X86_EFL_STATUS_BITS>(pCodeBuf, off, idxRegTmp); 330 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxRegEfl, idxRegTmp); 331 332 /* N,Z -> SF,ZF */ 333 if (cOpBits < 32) 334 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegResult, cOpBits > 8); /* sets NZ */ 335 else 336 pCodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, idxRegResult, idxRegResult, cOpBits > 32 /*f64Bit*/); 337 pCodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_NZCV); /* Bits: 31=N; 30=Z; 29=C; 28=V; */ 338 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, 30); 339 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxRegTmp, X86_EFL_ZF_BIT, 2, false /*f64Bit*/); 340 AssertCompile(X86_EFL_ZF_BIT + 1 == X86_EFL_SF_BIT); 341 342 /* Calculate 8-bit parity of the result. */ 343 pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegTmp, idxRegResult, idxRegResult, false /*f64Bit*/, 344 4 /*offShift6*/, kArmv8A64InstrShift_Lsr); 345 pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegTmp, idxRegTmp, idxRegTmp, false /*f64Bit*/, 346 2 /*offShift6*/, kArmv8A64InstrShift_Lsr); 347 pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegTmp, idxRegTmp, idxRegTmp, false /*f64Bit*/, 348 1 /*offShift6*/, kArmv8A64InstrShift_Lsr); 349 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1); 350 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxRegTmp, idxRegTmp, 0, 0, false /*f64Bit*/); 351 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxRegTmp, X86_EFL_PF_BIT, 1, false /*f64Bit*/); 352 353 #else 354 # error "port me" 355 #endif 356 return off; 357 } 358 359 360 template<uint32_t const a_bmInputRegs> 361 static uint32_t iemNativeDoPostponedEFlagsAtTbExitInternal(PIEMRECOMPILERSTATE pReNative, uint32_t off, PIEMNATIVEINSTR pCodeBuf) 362 { 363 /* 364 * We can't do regular register allocations here, but since we're in an exit 365 * path where all pending writes has been flushed and we have a known set of 366 * registers with input for the exit label, we do our own simple stuff here. 367 * 368 * Note! On x86 we prefer using RAX as the first TMP register, so we can 369 * make use of LAHF which is typically faster than PUSHF/POP. This 370 * is why the idxRegTmp allocation is first when there is no EFLAG 371 * shadow, since RAX is represented by bit 0 in the mask. 372 */ 373 uint32_t bmAvailableRegs = ~(a_bmInputRegs | IEMNATIVE_REG_FIXED_MASK) & IEMNATIVE_HST_GREG_MASK; 374 if (pReNative->PostponedEfl.idxReg2 != UINT8_MAX) 375 bmAvailableRegs &= ~(RT_BIT_32(pReNative->PostponedEfl.idxReg1) | RT_BIT_32(pReNative->PostponedEfl.idxReg2)); 376 else 377 bmAvailableRegs &= ~RT_BIT_32(pReNative->PostponedEfl.idxReg1); 378 379 /* Use existing EFLAGS shadow if available. */ 380 uint8_t idxRegEfl, idxRegTmp; 381 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)) 382 { 383 idxRegEfl = pReNative->Core.aidxGstRegShadows[kIemNativeGstReg_EFlags]; 384 Assert(idxRegEfl < IEMNATIVE_HST_GREG_COUNT && (bmAvailableRegs & RT_BIT_32(idxRegEfl))); 385 bmAvailableRegs &= ~RT_BIT_32(idxRegEfl); 386 #ifdef VBOX_STRICT 387 /** @todo check shadow register content. */ 388 #endif 389 390 idxRegTmp = ASMBitFirstSetU32(bmAvailableRegs) - 1; 391 bmAvailableRegs &= ~RT_BIT_32(idxRegTmp); 392 } 393 else 394 { 395 idxRegTmp = ASMBitFirstSetU32(bmAvailableRegs) - 1; /* allocate the temp register first to prioritize EAX on x86. */ 396 bmAvailableRegs &= ~RT_BIT_32(idxRegTmp); 397 398 idxRegEfl = ASMBitFirstSetU32(bmAvailableRegs) - 1; 399 bmAvailableRegs &= ~RT_BIT_32(idxRegTmp); 400 off = iemNativeEmitLoadGprFromVCpuU32Ex(pCodeBuf, off, idxRegEfl, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags)); 401 } 402 Assert(bmAvailableRegs != 0); 403 404 /* 405 * Do the actual EFLAGS calculation. 406 */ 407 switch (pReNative->PostponedEfl.enmOp) 408 { 409 case kIemNativePostponedEflOp_Logical: 410 Assert(pReNative->PostponedEfl.idxReg2 == UINT8_MAX); 411 off = iemNativeEmitPostponedEFlagsCalcLogical(pCodeBuf, off, pReNative->PostponedEfl.cOpBits, 412 pReNative->PostponedEfl.idxReg1, idxRegEfl, idxRegTmp); 413 break; 414 415 default: 416 AssertFailedBreak(); 417 } 418 419 /* 420 * Store EFLAGS. 421 */ 422 off = iemNativeEmitStoreGprToVCpuU32Ex(pCodeBuf, off, idxRegEfl, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags)); 423 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 424 425 return off; 426 } 427 428 429 430 template<uint32_t const a_bmInputRegs> 431 DECL_FORCE_INLINE_THROW(uint32_t) 432 iemNativeDoPostponedEFlagsAtTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off) 433 { 434 if (pReNative->PostponedEfl.fEFlags) 435 { 436 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS); 437 return iemNativeDoPostponedEFlagsAtTbExitInternal<a_bmInputRegs>(pReNative, off, pCodeBuf); 438 } 439 return off; 440 } 441 442 443 template<uint32_t const a_bmInputRegs> 444 DECL_FORCE_INLINE_THROW(uint32_t) 445 iemNativeDoPostponedEFlagsAtTbExitEx(PIEMRECOMPILERSTATE pReNative, uint32_t off, PIEMNATIVEINSTR pCodeBuf) 446 { 447 if (pReNative->PostponedEfl.fEFlags) 448 return iemNativeDoPostponedEFlagsAtTbExitInternal<a_bmInputRegs>(pReNative, off, pCodeBuf); 449 return off; 450 } 451 452 453 #endif /* IEMNATIVE_WITH_EFLAGS_POSTPONING */ 454 455 197 456 /** 198 457 * This is an implementation of IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGICAL. … … 201 460 */ 202 461 DECL_INLINE_THROW(uint32_t) 203 iemNativeEmitEFlagsForLogical(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl 462 iemNativeEmitEFlagsForLogical(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl, 463 uint8_t cOpBits, uint8_t idxRegResult 204 464 #ifndef RT_ARCH_AMD64 205 , uint8_t cOpBits, uint8_t idxRegResult,bool fNativeFlags = false465 , bool fNativeFlags = false 206 466 #endif 207 467 ) 208 468 { 209 469 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEflTotalLogical); 470 IEMNATIVE_CLEAR_POSTPONED_EFLAGS(pReNative, X86_EFL_STATUS_BITS); 471 RT_NOREF(cOpBits, idxRegResult); 210 472 211 473 #ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING … … 214 476 */ 215 477 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall]; 216 if ( IEMLIVENESS_STATE_ARE_STATUS_EFL_TO_BE_CLOBBERED(pLivenessEntry) 478 uint64_t const fEflClobbered = IEMLIVENESS_STATE_GET_WILL_BE_CLOBBERED_SET(pLivenessEntry) 479 & IEMLIVENESSBIT_STATUS_EFL_MASK; 480 # ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING 481 uint64_t fEflPostponing; 482 # endif 483 if ( fEflClobbered == IEMLIVENESSBIT_STATUS_EFL_MASK 217 484 && !(pReNative->fMc & IEM_MC_F_WITH_FLAGS)) 218 485 { … … 222 489 off = iemNativeEmitOrImmIntoVCpuU32(pReNative, off, X86_EFL_STATUS_BITS, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags)); 223 490 # endif 224 } 491 Log5(("iemNativeEmitEFlagsForLogical: Skipping %#x\n", X86_EFL_STATUS_BITS)); 492 } 493 # ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING 494 else if ( ( (fEflPostponing = IEMLIVENESS_STATE_GET_CAN_BE_POSTPONED_SET(pLivenessEntry) & IEMLIVENESSBIT_STATUS_EFL_MASK) 495 | fEflClobbered) 496 == IEMLIVENESSBIT_STATUS_EFL_MASK 497 && idxRegResult != UINT8_MAX) 498 { 499 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEflPostponedLogical); 500 pReNative->fSkippingEFlags = 0; 501 pReNative->PostponedEfl.fEFlags = X86_EFL_STATUS_BITS; 502 pReNative->PostponedEfl.enmOp = kIemNativePostponedEflOp_Logical; 503 pReNative->PostponedEfl.cOpBits = cOpBits; 504 pReNative->PostponedEfl.idxReg1 = iemNativeRegAllocTmpEx(pReNative, &off, IEMNATIVE_POSTPONING_REG_MASK, false); 505 /** @todo it would normally be possible to use idxRegResult, iff it is 506 * already a non-volatile register and we can be user the caller 507 * doesn't modify it. That'll save a register move and allocation. */ 508 off = iemNativeEmitLoadGprFromGpr(pReNative, off, pReNative->PostponedEfl.idxReg1, idxRegResult); 509 Log5(("iemNativeEmitEFlagsForLogical: Postponing %#x op=%u bits=%u reg1=%u\n", X86_EFL_STATUS_BITS, 510 kIemNativePostponedEflOp_Logical, cOpBits, pReNative->PostponedEfl.idxReg1)); 511 } 512 # endif 225 513 else 226 514 #endif … … 317 605 { 318 606 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEflTotalArithmetic); 607 IEMNATIVE_CLEAR_POSTPONED_EFLAGS(pReNative, X86_EFL_STATUS_BITS); 319 608 320 609 #ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING … … 504 793 505 794 795 796 /********************************************************************************************************************************* 797 * Bitwise Logical Operations * 798 *********************************************************************************************************************************/ 799 506 800 /** 507 801 * The AND instruction will clear OF, CF and AF (latter is undefined) and … … 521 815 iemNativeVarRegisterRelease(pReNative, idxVarSrc); 522 816 523 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl );817 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst); 524 818 525 819 #elif defined(RT_ARCH_ARM64) … … 553 847 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 554 848 555 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl );849 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst); 556 850 557 851 #elif defined(RT_ARCH_ARM64) … … 628 922 629 923 #ifdef RT_ARCH_AMD64 630 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl );924 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, UINT8_MAX); 631 925 #else 632 926 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegResult, cOpBits >= 32 /*fNativeFlags*/); … … 652 946 iemNativeVarRegisterRelease(pReNative, idxVarDst); 653 947 654 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl );948 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, UINT8_MAX); 655 949 656 950 #elif defined(RT_ARCH_ARM64) … … 712 1006 iemNativeVarRegisterRelease(pReNative, idxVarSrc); 713 1007 714 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl );1008 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst); 715 1009 716 1010 #elif defined(RT_ARCH_ARM64) … … 745 1039 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 746 1040 747 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl );1041 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst); 748 1042 749 1043 #elif defined(RT_ARCH_ARM64) … … 795 1089 iemNativeVarRegisterRelease(pReNative, idxVarSrc); 796 1090 797 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl );1091 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst); 798 1092 799 1093 #elif defined(RT_ARCH_ARM64) … … 828 1122 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 829 1123 830 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl );1124 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst); 831 1125 832 1126 #elif defined(RT_ARCH_ARM64) … … 2733 3027 /* tmp &= mxcsr */ 2734 3028 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegMxCsrXcptFlags, idxRegTmp); 2735 off = iemNativeEmitT estAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxRegMxCsrXcptFlags, X86_MXCSR_XCPT_FLAGS,2736 kIemNativeLabelType_RaiseSseAvxFpRelated);3029 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseSseAvxFpRelated>(pReNative, off, idxRegMxCsrXcptFlags, 3030 X86_MXCSR_XCPT_FLAGS); 2737 3031 2738 3032 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegDst),
Note:
See TracChangeset
for help on using the changeset viewer.