VirtualBox

Ignore:
Timestamp:
Mar 11, 2024 10:02:34 PM (11 months ago)
Author:
vboxsync
Message:

VMM/IEM: Optimized the flag calculations for AND, OR, XOR and TEST on ARM64 a little - at least reduced the number of instructions. bugref:10376

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/target-x86/IEMAllN8veEmit-x86.h

    r103797 r103798  
    202202iemNativeEmitEFlagsForLogical(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl
    203203#ifndef RT_ARCH_AMD64
    204                               , uint8_t cOpBits, uint8_t idxRegResult
     204                              , uint8_t cOpBits, uint8_t idxRegResult, bool fNativeFlags = false
    205205#endif
    206206                              )
     
    248248        off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxRegEfl, idxTmpReg);
    249249
    250         /* Calculate zero: mov tmp, zf; cmp result,zero; csel.eq tmp,tmp,wxr */
    251         if (cOpBits > 32)
    252             off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, idxRegResult, ARMV8_A64_REG_XZR);
    253         else
    254             off = iemNativeEmitCmpGpr32WithGprEx(pCodeBuf, off, idxRegResult, ARMV8_A64_REG_XZR);
    255         pCodeBuf[off++] = Armv8A64MkInstrCSet(idxTmpReg, kArmv8InstrCond_Eq, false /*f64Bit*/);
    256         pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegEfl, idxRegEfl, idxTmpReg, false /*f64Bit*/, X86_EFL_ZF_BIT);
    257 
    258         /* Calculate signed: We could use the native SF flag, but it's just as simple to calculate it by shifting. */
    259         pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxRegResult, cOpBits - 1, cOpBits > 32 /*f64Bit*/);
    260 # if 0 /* BFI and ORR hsould have the same performance characteristics, so use BFI like we'll have to do for SUB/ADD/++. */
    261         pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegEfl, idxRegEfl, idxTmpReg, false /*f64Bit*/, X86_EFL_SF_BIT);
    262 # else
    263         pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_SF_BIT, 1, false /*f64Bit*/);
    264 # endif
     250        /* N,Z -> SF,ZF */
     251        if (cOpBits < 32)
     252            pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegResult, cOpBits > 8); /* sets NZ */
     253        else if (!fNativeFlags)
     254            pCodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, idxRegResult, idxRegResult, cOpBits > 32 /*f64Bit*/);
     255        pCodeBuf[off++] = Armv8A64MkInstrMrs(idxTmpReg, ARMV8_AARCH64_SYSREG_NZCV); /* Bits: 31=N; 30=Z; 29=C; 28=V; */
     256        pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 30);
     257        pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_ZF_BIT, 2, false /*f64Bit*/);
     258        AssertCompile(X86_EFL_ZF_BIT + 1 == X86_EFL_SF_BIT);
    265259
    266260        /* Calculate 8-bit parity of the result. */
     
    482476#elif defined(RT_ARCH_ARM64)
    483477    /* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. */
    484     /** @todo we should use ANDS on ARM64 and get the ZF for free for all
    485      *        variants, and SF for 32-bit and 64-bit.  */
    486478    PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    487     pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
    488     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    489     iemNativeVarRegisterRelease(pReNative, idxVarSrc);
    490 
    491     off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
     479    pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
     480    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     481    iemNativeVarRegisterRelease(pReNative, idxVarSrc);
     482
     483    off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst, true /*fNativeFlags*/);
    492484#else
    493485# error "Port me"
     
    516508#elif defined(RT_ARCH_ARM64)
    517509    /* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. */
    518     /** @todo we should use ANDS on ARM64 and get the ZF for free for all
    519      *        variants, and SF for 32-bit and 64-bit.  */
    520510    uint32_t uImmSizeLen, uImmRotations;
    521511    if (  cOpBits > 32
     
    524514    {
    525515        PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    526         pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /*f64Bit*/);
     516        if (cOpBits >= 32)
     517            pCodeBuf[off++] = Armv8A64MkInstrAndsImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /*f64Bit*/);
     518        else
     519            pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /*f64Bit*/);
    527520    }
    528521    else
     
    530523        uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
    531524        PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    532         pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/);
     525        if (cOpBits >= 32)
     526            pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/);
     527        else
     528            pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/);
    533529        iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
    534530    }
    535531    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    536532
    537     off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
     533    off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst, cOpBits >= 32 /*fNativeFlags*/);
    538534    RT_NOREF_PV(cImmBits)
    539535
     
    566562    /* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones.  We also
    567563       need to keep the result in order to calculate the flags. */
    568     /** @todo we should use ANDS on ARM64 and get the ZF for free for all
    569      *        variants, and SF for 32-bit and 64-bit.  */
    570564    uint8_t const         idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
    571565    PIEMNATIVEINSTR const pCodeBuf     = iemNativeInstrBufEnsure(pReNative, off, 1);
    572     pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
     566    if (cOpBits >= 32)
     567        pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
     568    else
     569        pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
    573570    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    574571
     
    583580    off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
    584581#else
    585     off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegResult);
     582    off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegResult, cOpBits >= 32 /*fNativeFlags*/);
    586583    iemNativeRegFreeTmp(pReNative, idxRegResult);
    587584#endif
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette