VirtualBox

Changeset 103875 in vbox


Ignore:
Timestamp:
Mar 16, 2024 1:27:39 AM (13 months ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
162246
Message:

VMM/IEM: Reworked iemNativeEmitMaybeRaiseAvxRelatedXcpt to avoid relative 14-bit jumps on arm, optimizing it to a single branch while at it. bugref:10614 bugref:10370

Location:
trunk/src/VBox/VMM
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h

    r103865 r103875  
    933933        /* Allocate a temporary CR0 and CR4 register. */
    934934        uint8_t const idxLabelRaiseSseRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseRelated);
    935         uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
    936         uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
     935        uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
     936        uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
    937937        uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    938938
     
    969969        /* -> idxTmpReg[0]=OSFXSR;  idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
    970970        Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
    971         pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);              /* -> bit 0 = ~OSFXSR */
     971        pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
    972972        /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
    973973        off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
    974974                                                             idxLabelRaiseSseRelated);
     975
     976#else
     977# error "Port me!"
    975978#endif
    976979
     
    10141017         * Make sure we don't have any outstanding guest register writes as we may
    10151018         * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
    1016          *
    1017          * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
    10181019         */
     1020        /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
    10191021        off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
    10201022
     
    10261028
    10271029        /* Allocate a temporary CR0, CR4 and XCR0 register. */
    1028         uint8_t const idxCr0Reg       = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
    1029         uint8_t const idxCr4Reg       = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
    1030         uint8_t const idxXcr0Reg      = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0, kIemNativeGstRegUse_ReadOnly);
    1031         uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
    1032         uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
    1033 
    1034         /** @todo r=aeichner Optimize this more later to have less compares and branches,
    1035          *                   (see IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() in IEMMc.h but check that it has some
    1036          *                   actual performance benefit first). */
     1030        uint8_t const idxLabelRaiseAvxRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseAvxRelated);
     1031        uint8_t const idxCr0Reg  = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
     1032        uint8_t const idxCr4Reg  = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
     1033        uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
     1034        uint8_t const idxTmpReg  = iemNativeRegAllocTmp(pReNative, &off);
     1035
    10371036        /*
    1038          * if ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE))
    1039          *     return raisexcpt();
     1037         * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
     1038         *  if (RT_LIKELY(   (  (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
     1039         *                    | (pVCpu->cpum.GstCtx.cr4     & X86_CR4_OSXSAVE)
     1040         *                    | (pVCpu->cpum.GstCtx.cr0     & X86_CR0_TS))
     1041         *                == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
     1042         *       { likely }
     1043         *  else { goto RaiseAvxRelated; }
    10401044         */
    1041         const uint8_t idxRegTmp = iemNativeRegAllocTmpImm(pReNative, &off, XSAVE_C_YMM | XSAVE_C_SSE);
    1042         off = iemNativeEmitAndGprByGpr(pReNative, off, idxRegTmp, idxXcr0Reg);
    1043         off = iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, idxRegTmp, XSAVE_C_YMM | XSAVE_C_SSE, idxLabelRaiseUd);
    1044         iemNativeRegFreeTmp(pReNative, idxRegTmp);
    1045 
    1046         /*
    1047          * if (!(cr4 & X86_CR4_OSXSAVE))
    1048          *     return raisexcpt();
    1049          */
    1050         off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT, idxLabelRaiseUd);
    1051         /*
    1052          * if (cr0 & X86_CR0_TS)
    1053          *     return raisexcpt();
    1054          */
    1055         off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_TS_BIT, idxLabelRaiseNm);
    1056 
    1057         /* Free but don't flush the CR0, CR4 and XCR0 register. */
     1045#ifdef RT_ARCH_AMD64
     1046        /*  if (!(  (  ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
     1047                     | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1)    << 1)
     1048                     | ((cr0 >> X86_CR0_TS_BIT)      & 1)         )
     1049                  ^ 0x1a) ) { likely }
     1050            else            { goto RaiseAvxRelated; } */
     1051        PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
     1052        //pCodeBuf[off++] = 0xcc;
     1053        off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off,                 idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
     1054        off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off,              idxTmpReg, idxXcr0Reg);
     1055        off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off,            idxCr4Reg, X86_CR4_OSXSAVE_BIT);
     1056        off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
     1057        /* -> idxTmpReg[0]=CR4.OSXSAVE;  idxTmpReg[1]=0; idxTmpReg[2]=SSE;  idxTmpReg[3]=YMM; (the rest is zero) */
     1058        off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off,            idxCr0Reg, X86_CR0_TS_BIT);
     1059        off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
     1060        /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
     1061        off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off,                idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
     1062        /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
     1063        off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseAvxRelated, kIemNativeInstrCond_ne);
     1064
     1065#elif defined(RT_ARCH_ARM64)
     1066        /*  if (!(  (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
     1067                  | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
     1068            else                                    { goto RaiseAvxRelated; } */
     1069        PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
     1070        //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
     1071        Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
     1072        pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
     1073        pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
     1074        /* -> idxTmpReg[0]=CR4.OSXSAVE;  idxTmpReg[1]=SSE;  idxTmpReg[2]=YMM; (the rest is zero) */
     1075        Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
     1076        pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
     1077        /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
     1078        pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
     1079        pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
     1080        /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
     1081        off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
     1082                                                             idxLabelRaiseAvxRelated);
     1083
     1084#else
     1085# error "Port me!"
     1086#endif
     1087
     1088        iemNativeRegFreeTmp(pReNative, idxTmpReg);
    10581089        iemNativeRegFreeTmp(pReNative, idxCr0Reg);
    10591090        iemNativeRegFreeTmp(pReNative, idxCr4Reg);
  • trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h

    r103872 r103875  
    52615261
    52625262
     5263#if defined(RT_ARCH_AMD64)
     5264/**
     5265 * Emits code for rotating a 32-bit GPR a fixed number of bits to the left via carry.
     5266 */
     5267DECL_FORCE_INLINE(uint32_t)
     5268iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
     5269{
     5270    Assert(cShift > 0 && cShift < 32);
     5271
     5272    /* rcl dst, cShift */
     5273    if (iGprDst >= 8)
     5274        pCodeBuf[off++] = X86_OP_REX_B;
     5275    if (cShift != 1)
     5276    {
     5277        pCodeBuf[off++] = 0xc1;
     5278        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
     5279        pCodeBuf[off++] = cShift;
     5280    }
     5281    else
     5282    {
     5283        pCodeBuf[off++] = 0xd1;
     5284        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
     5285    }
     5286
     5287    return off;
     5288}
     5289#endif /* RT_ARCH_AMD64 */
     5290
     5291
     5292
    52635293/**
    52645294 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
     
    63246354#endif
    63256355}
     6356
     6357
     6358#ifdef RT_ARCH_AMD64
     6359/**
     6360 * For doing bt on a register.
     6361 */
     6362DECL_INLINE_THROW(uint32_t)
     6363iemNativeEmitAmd64TestBitInGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
     6364{
     6365    Assert(iBitNo < 64);
     6366    /* bt Ev, imm8 */
     6367    if (iBitNo >= 32)
     6368        pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
     6369    else if (iGprSrc >= 8)
     6370        pCodeBuf[off++] = X86_OP_REX_B;
     6371    pCodeBuf[off++] = 0x0f;
     6372    pCodeBuf[off++] = 0xba;
     6373    pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
     6374    pCodeBuf[off++] = iBitNo;
     6375    return off;
     6376}
     6377#endif /* RT_ARCH_AMD64 */
    63266378
    63276379
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette