- Timestamp:
- Apr 4, 2024 1:21:36 AM (10 months ago)
- Location:
- trunk
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/include/iprt/armv8.h
r104056 r104147 2368 2368 } 2369 2369 2370 2371 /** A64: ldp x1, x2, [x3] */ 2372 DECL_FORCE_INLINE(uint32_t) Armv8A64MkInstrLdPairGpr(uint32_t iReg1, uint32_t iReg2, uint32_t iBaseReg, int32_t iImm7 = 0, 2373 ARM64INSTRSTLDPAIRTYPE enmType = kArm64InstrStLdPairType_Signed, 2374 bool f64Bit = true) 2375 { 2376 return Armv8A64MkInstrStLdPair(true /*fLoad*/, f64Bit ? 2 : 0, enmType, iReg1, iReg2, iBaseReg, iImm7); 2377 } 2378 2379 2380 /** A64: stp x1, x2, [x3] */ 2381 DECL_FORCE_INLINE(uint32_t) Armv8A64MkInstrStPairGpr(uint32_t iReg1, uint32_t iReg2, uint32_t iBaseReg, int32_t iImm7 = 0, 2382 ARM64INSTRSTLDPAIRTYPE enmType = kArm64InstrStLdPairType_Signed, 2383 bool f64Bit = true) 2384 { 2385 return Armv8A64MkInstrStLdPair(false /*fLoad*/, f64Bit ? 2 : 0, enmType, iReg1, iReg2, iBaseReg, iImm7); 2386 } 2387 2388 2370 2389 typedef enum /* Size VR Opc */ 2371 2390 { /* \ | / */ -
trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp
r104144 r104147 8191 8191 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset), 8192 8192 #endif 8193 ENTRY(iem.s.DataTlb.aEntries),8194 8193 ENTRY(iem.s.DataTlb.uTlbRevision), 8195 8194 ENTRY(iem.s.DataTlb.uTlbPhysRev), 8196 8195 ENTRY(iem.s.DataTlb.cTlbHits), 8197 ENTRY(iem.s. CodeTlb.aEntries),8196 ENTRY(iem.s.DataTlb.aEntries), 8198 8197 ENTRY(iem.s.CodeTlb.uTlbRevision), 8199 8198 ENTRY(iem.s.CodeTlb.uTlbPhysRev), 8200 8199 ENTRY(iem.s.CodeTlb.cTlbHits), 8200 ENTRY(iem.s.CodeTlb.aEntries), 8201 8201 ENTRY(pVMR3), 8202 8202 ENTRY(cpum.GstCtx.rax), … … 8960 8960 DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT 8961 8961 { 8962 #if 0 /* For profiling the native recompiler code. */ 8963 l_profile_again: 8964 #endif 8962 8965 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a); 8963 8966 … … 9249 9252 #endif 9250 9253 9254 #if 0 /* For profiling the native recompiler code. */ 9255 if (pTb->Thrd.cCalls >= 136) 9256 { 9257 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a); 9258 goto l_profile_again; 9259 } 9260 #endif 9261 9251 9262 /* 9252 9263 * Allocate executable memory, copy over the code we've generated. -
trunk/src/VBox/VMM/include/IEMInternal.h
r104135 r104147 506 506 typedef struct IEMTLB 507 507 { 508 /** The TLB entries.509 * We've choosen 256 because that way we can obtain the result directly from a510 * 8-bit register without an additional AND instruction. */511 IEMTLBENTRY aEntries[256];512 508 /** The TLB revision. 513 509 * This is actually only 28 bits wide (see IEMTLBENTRY::uTag) and is incremented … … 556 552 /** Alignment padding. */ 557 553 uint32_t au32Padding[6]; 554 555 /** The TLB entries. 556 * We've choosen 256 because that way we can obtain the result directly from a 557 * 8-bit register without an additional AND instruction. */ 558 IEMTLBENTRY aEntries[256]; 558 559 } IEMTLB; 559 560 AssertCompileSizeAlignment(IEMTLB, 64); -
trunk/src/VBox/VMM/include/IEMN8veRecompilerTlbLookup.h
r104145 r104147 80 80 #if defined(RT_ARCH_ARM64) 81 81 uint8_t const idxReg3; 82 /** @def IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR 83 * Use LDP and STDP to reduce number of instructions accessing memory at the 84 * cost of using more registers. This will typically reduce the number of 85 * instructions emitted as well. */ 86 //# define IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR 87 # ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR 88 uint8_t const idxReg4; 89 uint8_t const idxReg5; 90 # endif 82 91 #endif 83 92 uint64_t const uAbsPtr; … … 125 134 #if defined(RT_ARCH_ARM64) 126 135 , idxReg3(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX) 136 # ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR 137 , idxReg4(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX) 138 , idxReg5(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX) 139 # endif 127 140 #endif 128 141 , uAbsPtr( a_pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(a_idxVarGCPtrMem)].enmKind … … 160 173 #if defined(RT_ARCH_ARM64) 161 174 , idxReg3(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX) 175 # ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR 176 , idxReg4(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX) 177 , idxReg5(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX) 178 # endif 162 179 #endif 163 180 , uAbsPtr(UINT64_MAX) … … 189 206 #if defined(RT_ARCH_ARM64) 190 207 , idxReg3(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX) 208 # ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR 209 , idxReg4(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX) 210 , idxReg5(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX) 211 # endif 191 212 #endif 192 213 , uAbsPtr(UINT64_MAX) … … 228 249 iemNativeRegFreeTmp(a_pReNative, idxRegSegAttrib); 229 250 #if defined(RT_ARCH_ARM64) 251 # ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR 252 iemNativeRegFreeTmp(a_pReNative, idxReg5); 253 iemNativeRegFreeTmp(a_pReNative, idxReg4); 254 # endif 230 255 iemNativeRegFreeTmp(a_pReNative, idxReg3); 231 256 #endif … … 242 267 #if defined(RT_ARCH_ARM64) 243 268 | RT_BIT_32(idxReg3) 269 # ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR 270 | RT_BIT_32(idxReg4) 271 | RT_BIT_32(idxReg5) 272 # endif 244 273 #endif 245 274 ; … … 592 621 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, pTlbState->idxReg1, offVCpuTlb + RT_UOFFSETOF(IEMTLB, uTlbRevision)); 593 622 # else 623 # ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR 624 /* Load uTlbRevision into reg3 and uTlbPhysRev into reg5. 625 We load the offVCpuTlb + aEntries into reg4 and use it for addressing here 626 and later when calculating pTble (save an instruction). */ 627 AssertCompileMemberAlignment(IEMTLB, uTlbRevision, 16); /* It is said that misaligned pair loads doesn't perform well. */ 628 AssertCompileAdjacentMembers(IEMTLB, uTlbRevision, uTlbPhysRev); 629 AssertCompile(RTASSERT_OFFSET_OF(IEMTLB, uTlbPhysRev) < RTASSERT_OFFSET_OF(IEMTLB, aEntries)); 630 AssertCompile(RTASSERT_OFFSET_OF(VMCPUCC, iem.s.DataTlb.aEntries) < _64K); 631 AssertCompile(RTASSERT_OFFSET_OF(VMCPUCC, iem.s.CodeTlb.aEntries) < _64K); /* if larger do: ADD x3, x27, x3, LSL #y */ 632 pCodeBuf[off++] = Armv8A64MkInstrMovZ(pTlbState->idxReg4, offVCpuTlb + RT_UOFFSETOF(IEMTLB, aEntries)); 633 pCodeBuf[off++] = Armv8A64MkInstrAddReg(pTlbState->idxReg4, IEMNATIVE_REG_FIXED_PVMCPU, pTlbState->idxReg4); 634 pCodeBuf[off++] = Armv8A64MkInstrLdPairGpr(pTlbState->idxReg3, pTlbState->idxReg5, pTlbState->idxReg4, 635 (RT_OFFSETOF(IEMTLB, uTlbRevision) - RT_OFFSETOF(IEMTLB, aEntries)) / 8); 636 # else 594 637 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, pTlbState->idxReg3, offVCpuTlb + RT_UOFFSETOF(IEMTLB, uTlbRevision)); 638 # endif 595 639 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg3); 596 640 # endif … … 599 643 * 3b. Calc pTlbe. 600 644 */ 645 # if !defined(RT_ARCH_ARM64) || !defined(IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR) 601 646 uint32_t const offTlbEntries = offVCpuTlb + RT_UOFFSETOF(IEMTLB, aEntries); 647 # endif 602 648 # if defined(RT_ARCH_AMD64) 603 649 /* movzx reg2, byte reg1 */ … … 620 666 /* reg2 = (reg1 & 0xff) << 5 */ 621 667 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(pTlbState->idxReg2, pTlbState->idxReg1, 5, 8); 668 # ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR 669 /* reg2 += &pVCpu->iem.s.DataTlb.aEntries / CodeTlb.aEntries */ 670 pCodeBuf[off++] = Armv8A64MkInstrAddReg(pTlbState->idxReg2, pTlbState->idxReg2, pTlbState->idxReg4); 671 # else 622 672 /* reg2 += offsetof(VMCPUCC, iem.s.DataTlb.aEntries) */ 623 673 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, pTlbState->idxReg2, offTlbEntries, pTlbState->idxReg3 /*iGprTmp*/); 624 674 /* reg2 += pVCpu */ 625 675 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, pTlbState->idxReg2, IEMNATIVE_REG_FIXED_PVMCPU); 676 # endif 626 677 # else 627 678 # error "Port me" … … 637 688 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, uTag)); 638 689 # elif defined(RT_ARCH_ARM64) 690 # ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR 691 AssertCompileMemberAlignment(IEMTLBENTRY, uTag, 16); /* It is said that misaligned pair loads doesn't perform well. */ 692 AssertCompile(RT_UOFFSETOF(IEMTLBENTRY, uTag) + sizeof(uint64_t) == RT_UOFFSETOF(IEMTLBENTRY, fFlagsAndPhysRev)); 693 pCodeBuf[off++] = Armv8A64MkInstrLdPairGpr(pTlbState->idxReg3, pTlbState->idxReg4, 694 pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, uTag) / 8); 695 # else 639 696 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, pTlbState->idxReg3, pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, uTag)); 697 # endif 640 698 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg3); 641 699 # else … … 673 731 offVCpuTlb + RT_UOFFSETOF(IEMTLB, uTlbPhysRev)); 674 732 # elif defined(RT_ARCH_ARM64) 675 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, pTlbState->idxReg3, pTlbState->idxReg2, 676 RT_UOFFSETOF(IEMTLBENTRY, fFlagsAndPhysRev)); 733 # ifdef IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR 734 pCodeBuf[off++] = Armv8A64MkInstrAnd(pTlbState->idxReg1, pTlbState->idxReg1, pTlbState->idxReg4); 735 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg5); 736 # else 737 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, pTlbState->idxReg3, 738 pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, fFlagsAndPhysRev)); 677 739 pCodeBuf[off++] = Armv8A64MkInstrAnd(pTlbState->idxReg1, pTlbState->idxReg1, pTlbState->idxReg3); 678 740 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, pTlbState->idxReg3, offVCpuTlb + RT_UOFFSETOF(IEMTLB, uTlbPhysRev)); 679 741 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg3); 742 # endif 680 743 # else 681 744 # error "Port me" … … 691 754 * IEMCPU members and we return a GCPhys address rather than a host pointer. 692 755 */ 693 /* mov reg1, [reg2->pbMappingR3] */ 694 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2, 695 RT_UOFFSETOF(IEMTLBENTRY, pbMappingR3)); 756 # if defined(RT_ARCH_ARM64) && defined(IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR) 757 if (!a_fDataTlb) 758 { 759 /* ldp reg4, reg1, [reg2->GCPhys+pbMappingR3] */ 760 AssertCompileMemberAlignment(IEMTLBENTRY, GCPhys, 16); 761 AssertCompileAdjacentMembers(IEMTLBENTRY, GCPhys, pbMappingR3); 762 pCodeBuf[off++] = Armv8A64MkInstrLdPairGpr(pTlbState->idxReg4, pTlbState->idxReg1, 763 pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, GCPhys) / 8); 764 } 765 else 766 # endif 767 { 768 /* mov reg1, [reg2->pbMappingR3] */ 769 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2, 770 RT_UOFFSETOF(IEMTLBENTRY, pbMappingR3)); 771 } 696 772 /* if (!reg1) goto tlbmiss; */ 697 773 /** @todo eliminate the need for this test? */ … … 723 799 * Note. We do not need to set offCurInstrStart or offInstrNextByte. 724 800 */ 725 # ifdef RT_ARCH_AMD64 801 # if !defined(RT_ARCH_ARM64) || !defined(IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR) 802 # ifdef RT_ARCH_AMD64 726 803 uint8_t const idxReg3 = UINT8_MAX; 727 # else804 # else 728 805 uint8_t const idxReg3 = pTlbState->idxReg3; 729 # endif806 # endif 730 807 /* Set pbInstrBuf first since we've got it loaded already. */ 731 808 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, pTlbState->idxReg1, … … 739 816 pTlbState->idxReg1, idxReg3); 740 817 /* Now set GCPhysInstrBuf last as we'll be returning it in idxRegMemResult. */ 818 # if defined(RT_ARCH_ARM64) && defined(IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR) 819 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, pTlbState->idxReg4, 820 RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf), idxReg3); 821 # else 741 822 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, pTlbState->idxReg1, 742 823 pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, GCPhys)); 743 824 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, pTlbState->idxReg1, 744 825 RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf), idxReg3); 826 # endif 827 # else 828 /* ARM64: Same as above but using STP. This ASSUMES that we can trash 829 the 6 bytes following iem.s.cbInstrBufTotal! */ 830 AssertCompileMemberAlignment(VMCPUCC, iem.s.pbInstrBuf, 16); 831 AssertCompileAdjacentMembers(VMCPUCC, iem.s.pbInstrBuf, iem.s.uInstrBufPc); 832 AssertCompile(RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf) < 512); 833 /* idxReg1 = reg2->pbMappingR3 (see previous LDP) */ 834 /* idxReg3 = FlatPC & ~GUEST_PAGE_OFFSET_MASK. */ 835 off = iemNativeEmitGprEqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg3, idxRegFlatPtr, ~(RTGCPTR)GUEST_PAGE_OFFSET_MASK); 836 pCodeBuf[off++] = Armv8A64MkInstrStPairGpr(pTlbState->idxReg1, pTlbState->idxReg3, 837 IEMNATIVE_REG_FIXED_PVMCPU, RT_UOFFSETOF(VMCPUCC, iem.s.pbInstrBuf) / 8); 838 839 AssertCompileMemberAlignment(VMCPUCC, iem.s.GCPhysInstrBuf, 16); 840 AssertCompileAdjacentMembers(VMCPUCC, iem.s.GCPhysInstrBuf, iem.s.cbInstrBufTotal); 841 AssertCompile(RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf) < 512); 842 # ifndef IEM_WITH_OPAQUE_DECODER_STATE 843 AssertCompileAdjacentMembers(VMCPUCC, iem.s.cbInstrBufTotal, iem.s.offCurInstrStart); 844 AssertCompileAdjacentMembers(VMCPUCC, iem.s.offCurInstrStart, iem.s.fPrefixes); /* these two will be set to ~0. */ 845 # endif 846 /* idxReg4 = reg2->GCPhys (see previous LDP) */ 847 /* idxReg3 = GUEST_PAGE_SIZE | UINT64_C(0xffffffffffff0000) */ 848 pCodeBuf[off++] = Armv8A64MkInstrMovN(pTlbState->idxReg3, ~GUEST_PAGE_SIZE & 0xffff); 849 pCodeBuf[off++] = Armv8A64MkInstrStPairGpr(pTlbState->idxReg4, pTlbState->idxReg3, 850 IEMNATIVE_REG_FIXED_PVMCPU, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf) / 8); 851 # endif 745 852 if (!a_fNoReturn) /* (We skip this for iemNativeEmitBltLoadTlbAfterBranch.) */ 746 853 { … … 750 857 else 751 858 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, idxRegMemResult, idxRegFlatPtr, GUEST_PAGE_OFFSET_MASK); 859 # if defined(RT_ARCH_ARM64) && defined(IEMNATIVE_WITH_TLB_LOOKUP_LOAD_STORE_PAIR) 860 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, idxRegMemResult, pTlbState->idxReg4); 861 # else 752 862 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, idxRegMemResult, pTlbState->idxReg1); 863 # endif 753 864 } 754 865 }
Note:
See TracChangeset
for help on using the changeset viewer.