Changeset 103750 in vbox
- Timestamp:
- Mar 10, 2024 8:12:55 PM (11 months ago)
- Location:
- trunk/src/VBox/VMM
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp
r103739 r103750 5745 5745 case kIemNativeGstSimdRegLdStSz_256: 5746 5746 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc); 5747 break; 5747 5748 case kIemNativeGstSimdRegLdStSz_Low128: 5748 5749 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc); 5750 break; 5749 5751 case kIemNativeGstSimdRegLdStSz_High128: 5750 5752 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1); 5753 break; 5751 5754 default: 5752 5755 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE)); … … 5977 5980 5978 5981 #ifdef RT_ARCH_AMD64 5979 # error "Port me" 5982 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, idxGstSimdReg)) 5983 { 5984 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256 5985 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128); 5986 off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[idxGstSimdReg].offXmm); 5987 } 5988 5989 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, idxGstSimdReg)) 5990 { 5991 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256 5992 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128); 5993 AssertReleaseFailed(); 5994 //off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[idxGstSimdReg].offYmm); 5995 } 5980 5996 #elif defined(RT_ARCH_ARM64) 5981 5997 /* ASSUMING there are two consecutive host registers to store the potential 256-bit guest register. */ … … 5991 6007 { 5992 6008 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256 5993 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_ Low128);6009 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128); 5994 6010 off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg + 1, g_aGstSimdShadowInfo[idxGstSimdReg].offYmm); 5995 6011 } … … 6394 6410 { 6395 6411 # ifdef RT_ARCH_AMD64 6396 # error "Port me!" 6412 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128); /** @todo 256-bit variant. */ 6413 6414 /* movdqa vectmp0, idxSimdReg */ 6415 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg); 6416 6417 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44); 6418 6419 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */ 6420 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 6421 if (idxSimdReg >= 8) 6422 pbCodeBuf[off++] = X86_OP_REX_R; 6423 pbCodeBuf[off++] = 0x0f; 6424 pbCodeBuf[off++] = 0x38; 6425 pbCodeBuf[off++] = 0x29; 6426 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm); 6427 6428 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */ 6429 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 6430 pbCodeBuf[off++] = X86_OP_REX_W 6431 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R) 6432 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B); 6433 pbCodeBuf[off++] = 0x0f; 6434 pbCodeBuf[off++] = 0x3a; 6435 pbCodeBuf[off++] = 0x16; 6436 pbCodeBuf[off++] = 0xeb; 6437 pbCodeBuf[off++] = 0x00; 6438 6439 /* test tmp0, 0xffffffff. */ 6440 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B); 6441 pbCodeBuf[off++] = 0xf7; 6442 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, IEMNATIVE_REG_FIXED_TMP0 & 7); 6443 pbCodeBuf[off++] = 0xff; 6444 pbCodeBuf[off++] = 0xff; 6445 pbCodeBuf[off++] = 0xff; 6446 pbCodeBuf[off++] = 0xff; 6447 6448 /* je/jz +1 */ 6449 pbCodeBuf[off++] = 0x74; 6450 pbCodeBuf[off++] = 0x01; 6451 6452 /* int3 */ 6453 pbCodeBuf[off++] = 0xcc; 6454 6455 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */ 6456 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 6457 pbCodeBuf[off++] = X86_OP_REX_W 6458 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R) 6459 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B); 6460 pbCodeBuf[off++] = 0x0f; 6461 pbCodeBuf[off++] = 0x3a; 6462 pbCodeBuf[off++] = 0x16; 6463 pbCodeBuf[off++] = 0xeb; 6464 pbCodeBuf[off++] = 0x01; 6465 6466 /* test tmp0, 0xffffffff. */ 6467 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B); 6468 pbCodeBuf[off++] = 0xf7; 6469 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, IEMNATIVE_REG_FIXED_TMP0 & 7); 6470 pbCodeBuf[off++] = 0xff; 6471 pbCodeBuf[off++] = 0xff; 6472 pbCodeBuf[off++] = 0xff; 6473 pbCodeBuf[off++] = 0xff; 6474 6475 /* je/jz +1 */ 6476 pbCodeBuf[off++] = 0x74; 6477 pbCodeBuf[off++] = 0x01; 6478 6479 /* int3 */ 6480 pbCodeBuf[off++] = 0xcc; 6481 6397 6482 # elif defined(RT_ARCH_ARM64) 6398 6483 /* mov vectmp0, [gstreg] */ … … 6413 6498 /* brk #0x1000+enmGstReg */ 6414 6499 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000)); 6415 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);6416 6500 } 6417 6501 … … 6430 6514 /* brk #0x1000+enmGstReg */ 6431 6515 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000)); 6432 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);6433 6516 } 6434 6517 … … 6436 6519 # error "Port me!" 6437 6520 # endif 6521 6522 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 6438 6523 return off; 6439 6524 } … … 8169 8254 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm); 8170 8255 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd); 8171 8172 #if 18173 off = iemNativeEmitBrk(pReNative, off, 0x4223); /** @todo Test this when AVX gets actually available. */8174 #endif8175 8256 8176 8257 /** @todo r=aeichner Optimize this more later to have less compares and branches, -
trunk/src/VBox/VMM/include/IEMN8veRecompiler.h
r103741 r103750 173 173 174 174 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 175 # define IEMNATIVE_SIMD_REG_FIXED_TMP0 5 /* xmm5/ymm5 */ 175 176 # if defined(IEMNATIVE_WITH_SIMD_REG_ACCESS_ALL_REGISTERS) || !defined(_MSC_VER) 176 177 # define IEMNATIVE_SIMD_REG_FIXED_MASK 0 -
trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h
r103728 r103750 6831 6831 { 6832 6832 #ifdef RT_ARCH_AMD64 6833 AssertReleaseFailed(); 6833 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */ 6834 pCodeBuf[off++] = 0x66; 6835 if (iVecReg >= 8) 6836 pCodeBuf[off++] = X86_OP_REX_R; 6837 pCodeBuf[off++] = 0x0f; 6838 pCodeBuf[off++] = 0x7f; 6839 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu); 6834 6840 #elif defined(RT_ARCH_ARM64) 6835 6841 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U)); … … 6849 6855 { 6850 6856 #ifdef RT_ARCH_AMD64 6851 off = iemNativeEmitSimdStoreVecRegToVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iVecReg, offVCpu);6857 off = iemNativeEmitSimdStoreVecRegToVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu); 6852 6858 #elif defined(RT_ARCH_ARM64) 6853 6859 off = iemNativeEmitSimdStoreVecRegToVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu); … … 6867 6873 { 6868 6874 #ifdef RT_ARCH_AMD64 6869 AssertReleaseFailed(); 6875 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */ 6876 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 6877 if (iVecReg >= 8) 6878 pCodeBuf[off++] = X86_OP_REX_R; 6879 pCodeBuf[off++] = 0x0f; 6880 pCodeBuf[off++] = 0x6f; 6881 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu); 6870 6882 #elif defined(RT_ARCH_ARM64) 6871 6883 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U)); … … 6885 6897 { 6886 6898 #ifdef RT_ARCH_AMD64 6887 off = iemNativeEmitSimdLoadVecRegFromVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iVecReg, offVCpu);6899 off = iemNativeEmitSimdLoadVecRegFromVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu); 6888 6900 #elif defined(RT_ARCH_ARM64) 6889 6901 off = iemNativeEmitSimdLoadVecRegFromVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu); … … 6896 6908 6897 6909 6910 #if 0 /* unused */ 6898 6911 /** 6899 6912 * Emits a 256-bit vector register store to a VCpu value. … … 6914 6927 return off; 6915 6928 } 6929 #endif 6916 6930 6917 6931 … … 6924 6938 #ifdef RT_ARCH_AMD64 6925 6939 AssertReleaseFailed(); 6940 RT_NOREF(pReNative, off, iVecReg, offVCpuLow, offVCpuHigh); 6926 6941 #elif defined(RT_ARCH_ARM64) 6927 6942 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */ … … 6972 6987 { 6973 6988 #ifdef RT_ARCH_AMD64 6974 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iVecRegDst, iVecRegSrc);6989 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc); 6975 6990 #elif defined(RT_ARCH_ARM64) 6976 6991 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc); … … 6984 6999 6985 7000 /** 6986 * Emits a gprdst = gprsrc load, 256-bit.7001 * Emits a vecdst = vecsrc load, 256-bit. 6987 7002 */ 6988 7003 DECL_INLINE_THROW(uint32_t) … … 6990 7005 { 6991 7006 #ifdef RT_ARCH_AMD64 6992 AssertReleaseFailed(); 7007 /* vmovdqa ymm, ymm */ 7008 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5); 7009 if (iVecRegDst >= 8 && iVecRegSrc >= 8) 7010 { 7011 pbCodeBuf[off++] = 0xc4; 7012 pbCodeBuf[off++] = 0x41; 7013 pbCodeBuf[off++] = 0x7d; 7014 pbCodeBuf[off++] = 0x6f; 7015 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7); 7016 } 7017 else 7018 { 7019 pbCodeBuf[off++] = 0xc5; /* Two byte VEX prefix */ 7020 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd; 7021 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f; 7022 pbCodeBuf[off++] = iVecRegSrc >= 8 7023 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7) 7024 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7); 7025 } 6993 7026 #elif defined(RT_ARCH_ARM64) 6994 7027 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
Note:
See TracChangeset
for help on using the changeset viewer.