VirtualBox

Changeset 103894 in vbox for trunk/src/VBox/VMM


Ignore:
Timestamp:
Mar 18, 2024 1:48:31 PM (11 months ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
162266
Message:

VMM/IEM: Add SIMD local variable support and implement native emitters for IEM_MC_FETCH_YREG_U256() and IEM_MC_STORE_YREG_U256_ZX_VLMAX(), bugref:10614

Location:
trunk/src/VBox/VMM
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py

    r103865 r103894  
    30993099    'IEM_MC_FETCH_XREG_PAIR_XMM':                                (McBlock.parseMcGeneric,           False, False, False, ),
    31003100    'IEM_MC_FETCH_YREG_U128':                                    (McBlock.parseMcGeneric,           False, False, False, ),
    3101     'IEM_MC_FETCH_YREG_U256':                                    (McBlock.parseMcGeneric,           False, False, False, ),
     3101    'IEM_MC_FETCH_YREG_U256':                                    (McBlock.parseMcGeneric,           False, False, g_fNativeSimd),
    31023102    'IEM_MC_FETCH_YREG_U32':                                     (McBlock.parseMcGeneric,           False, False, g_fNativeSimd),
    31033103    'IEM_MC_FETCH_YREG_U64':                                     (McBlock.parseMcGeneric,           False, False, g_fNativeSimd),
     
    33433343    'IEM_MC_STORE_YREG_U128':                                    (McBlock.parseMcGeneric,           True,  True,  False, ),
    33443344    'IEM_MC_STORE_YREG_U128_ZX_VLMAX':                           (McBlock.parseMcGeneric,           True,  True,  False, ),
    3345     'IEM_MC_STORE_YREG_U256_ZX_VLMAX':                           (McBlock.parseMcGeneric,           True,  True,  False, ),
     3345    'IEM_MC_STORE_YREG_U256_ZX_VLMAX':                           (McBlock.parseMcGeneric,           True,  True,  g_fNativeSimd),
    33463346    'IEM_MC_STORE_YREG_U32_ZX_VLMAX':                            (McBlock.parseMcGeneric,           True,  True,  g_fNativeSimd),
    33473347    'IEM_MC_STORE_YREG_U64_ZX_VLMAX':                            (McBlock.parseMcGeneric,           True,  True,  g_fNativeSimd),
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h

    r103891 r103894  
    75087508
    75097509
     7510#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
     7511    off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
     7512
     7513
     7514/** Emits code for IEM_MC_FETCH_YREG_U256. */
     7515DECL_INLINE_THROW(uint32_t)
     7516iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
     7517{
     7518    IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
     7519    IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
     7520
     7521    uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
     7522                                                                          kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
     7523    uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
     7524
     7525    off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
     7526
     7527    /* Free but don't flush the source register. */
     7528    iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
     7529    iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
     7530
     7531    return off;
     7532}
     7533
     7534
     7535#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
     7536    off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
     7537
     7538
     7539/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX. */
     7540DECL_INLINE_THROW(uint32_t)
     7541iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
     7542{
     7543    IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
     7544    IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
     7545
     7546    uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
     7547                                                                          kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
     7548    uint8_t const idxVarRegSrc  = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
     7549
     7550    off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
     7551    IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYRegDst);
     7552    IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYRegDst);
     7553
     7554    /* Free but don't flush the source register. */
     7555    iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
     7556    iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
     7557
     7558    return off;
     7559}
     7560
     7561
    75107562
    75117563/*********************************************************************************************************************************
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp

    r103876 r103894  
    30933093        pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
    30943094        pReNative->Core.aHstSimdRegs[i].enmWhat        = kIemNativeWhat_Invalid;
     3095        pReNative->Core.aHstSimdRegs[i].idxVar         = UINT8_MAX;
    30953096        pReNative->Core.aHstSimdRegs[i].enmLoaded      = kIemNativeGstSimdRegLdStSz_Invalid;
    30963097    }
     
    39303931    IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    39313932    Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
     3933#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     3934    Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
     3935#endif
    39323936    RT_NOREF(pszCaller);
    39333937
     
    44664470                                   IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
    44674471                        Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
     4472#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     4473                        Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
     4474#endif
    44684475
    44694476                        if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
     
    45834590    uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
    45844591    IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    4585     Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
     4592    Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
     4593#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     4594    Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
     4595#endif
    45864596
    45874597    pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
     
    46094619    }
    46104620}
     4621
     4622
     4623#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     4624# ifdef LOG_ENABLED
     4625/** Host CPU SIMD register names. */
     4626DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
     4627{
     4628#  ifdef RT_ARCH_AMD64
     4629    "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
     4630#  elif RT_ARCH_ARM64
     4631    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",  "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
     4632    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
     4633#  else
     4634#   error "port me"
     4635#  endif
     4636};
     4637# endif
     4638
     4639
     4640/**
     4641 * Frees a SIMD register assigned to a variable.
     4642 *
     4643 * The register will be disassociated from the variable.
     4644 */
     4645DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
     4646{
     4647    Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
     4648    Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
     4649    uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
     4650    IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
     4651    Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
     4652    Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
     4653
     4654    pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
     4655    pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
     4656    if (!fFlushShadows)
     4657        Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
     4658               g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
     4659    else
     4660    {
     4661        pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
     4662        uint64_t const fGstRegShadowsOld        = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
     4663        pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
     4664        pReNative->Core.bmGstSimdRegShadows    &= ~fGstRegShadowsOld;
     4665        uint64_t       fGstRegShadows           = fGstRegShadowsOld;
     4666        while (fGstRegShadows)
     4667        {
     4668            unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
     4669            fGstRegShadows &= ~RT_BIT_64(idxGstReg);
     4670
     4671            Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
     4672            pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
     4673        }
     4674        Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
     4675               g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
     4676    }
     4677}
     4678#endif
    46114679
    46124680
     
    49525020};
    49535021AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
    4954 
    4955 
    4956 #ifdef LOG_ENABLED
    4957 /** Host CPU SIMD register names. */
    4958 DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
    4959 {
    4960 #ifdef RT_ARCH_AMD64
    4961     "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
    4962 #elif RT_ARCH_ARM64
    4963     "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",  "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
    4964     "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
    4965 #else
    4966 # error "port me"
    4967 #endif
    4968 };
    4969 #endif
    49705022
    49715023
     
    69046956    pReNative->Core.aVars[idxVar].fRegAcquired   = false;
    69056957    pReNative->Core.aVars[idxVar].u.uValue       = 0;
     6958#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     6959    pReNative->Core.aVars[idxVar].fSimdReg       = false;
     6960#endif
    69066961    return idxVar;
    69076962}
     
    69777032    uint32_t       fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
    69787033    uint32_t       bmStack       = ~pReNative->Core.bmStack;
    6979     while (bmStack != UINT32_MAX)
     7034    while (bmStack != 0)
    69807035    {
    69817036/** @todo allocate from the top to reduce BP displacement. */
     
    69937048            }
    69947049        }
    6995         bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
     7050        bmStack &= ~(fBitAlignMask << (iSlot & ~fBitAlignMask));
    69967051    }
    69977052    AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
     
    72987353    pVar->idxReg = idxReg;
    72997354
     7355#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     7356    pVar->fSimdReg = false;
     7357#endif
     7358
    73007359    /*
    73017360     * Load it off the stack if we've got a stack slot.
     
    73247383    return idxReg;
    73257384}
     7385
     7386
     7387#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     7388/**
     7389 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
     7390 * fixed till we call iemNativeVarRegisterRelease.
     7391 *
     7392 * @returns The host register number.
     7393 * @param   pReNative   The recompiler state.
     7394 * @param   idxVar      The variable.
     7395 * @param   poff        Pointer to the instruction buffer offset.
     7396 *                      In case a register needs to be freed up or the value
     7397 *                      loaded off the stack.
     7398 * @param  fInitialized Set if the variable must already have been initialized.
     7399 *                      Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
     7400 *                      the case.
     7401 * @param  idxRegPref   Preferred SIMD register number or UINT8_MAX.
     7402 */
     7403DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
     7404                                                           bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
     7405{
     7406    IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
     7407    PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
     7408    Assert(   pVar->cbVar == sizeof(RTUINT128U)
     7409           || pVar->cbVar == sizeof(RTUINT256U));
     7410    Assert(!pVar->fRegAcquired);
     7411
     7412    uint8_t idxReg = pVar->idxReg;
     7413    if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
     7414    {
     7415        Assert(   pVar->enmKind > kIemNativeVarKind_Invalid
     7416               && pVar->enmKind < kIemNativeVarKind_End);
     7417        pVar->fRegAcquired = true;
     7418        return idxReg;
     7419    }
     7420
     7421    /*
     7422     * If the kind of variable has not yet been set, default to 'stack'.
     7423     */
     7424    Assert(   pVar->enmKind >= kIemNativeVarKind_Invalid
     7425           && pVar->enmKind < kIemNativeVarKind_End);
     7426    if (pVar->enmKind == kIemNativeVarKind_Invalid)
     7427        iemNativeVarSetKindToStack(pReNative, idxVar);
     7428
     7429    /*
     7430     * We have to allocate a register for the variable, even if its a stack one
     7431     * as we don't know if there are modification being made to it before its
     7432     * finalized (todo: analyze and insert hints about that?).
     7433     *
     7434     * If we can, we try get the correct register for argument variables. This
     7435     * is assuming that most argument variables are fetched as close as possible
     7436     * to the actual call, so that there aren't any interfering hidden calls
     7437     * (memory accesses, etc) inbetween.
     7438     *
     7439     * If we cannot or it's a variable, we make sure no argument registers
     7440     * that will be used by this MC block will be allocated here, and we always
     7441     * prefer non-volatile registers to avoid needing to spill stuff for internal
     7442     * call.
     7443     */
     7444    /** @todo Detect too early argument value fetches and warn about hidden
     7445     * calls causing less optimal code to be generated in the python script. */
     7446
     7447    uint8_t const uArgNo = pVar->uArgNo;
     7448    Assert(uArgNo == UINT8_MAX); /* No SIMD registers as arguments for now. */
     7449
     7450    /* SIMD is bit simpler for now because there is no support for arguments. */
     7451    if (   idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
     7452        || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
     7453    {
     7454        uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
     7455        uint32_t const fRegs        = ~pReNative->Core.bmHstSimdRegs
     7456                                    & ~pReNative->Core.bmHstSimdRegsWithGstShadow
     7457                                    & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
     7458                                    & fNotArgsMask;
     7459        if (fRegs)
     7460        {
     7461            idxReg = (uint8_t)ASMBitLastSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
     7462                                               ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
     7463            Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
     7464            Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
     7465            Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
     7466        }
     7467        else
     7468        {
     7469            idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
     7470                                                   IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
     7471            AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
     7472            Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
     7473        }
     7474    }
     7475    else
     7476    {
     7477        idxReg = idxRegPref;
     7478        AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
     7479        Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
     7480    }
     7481    iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
     7482
     7483    pVar->fSimdReg = true;
     7484    pVar->idxReg = idxReg;
     7485
     7486    /*
     7487     * Load it off the stack if we've got a stack slot.
     7488     */
     7489    uint8_t const idxStackSlot = pVar->idxStackSlot;
     7490    if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
     7491    {
     7492        Assert(fInitialized);
     7493        int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
     7494        switch (pVar->cbVar)
     7495        {
     7496            case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
     7497            default: AssertFailed(); RT_FALL_THRU();
     7498            case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
     7499        }
     7500    }
     7501    else
     7502    {
     7503        Assert(idxStackSlot == UINT8_MAX);
     7504        AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
     7505    }
     7506    pVar->fRegAcquired = true;
     7507    return idxReg;
     7508}
     7509#endif
    73267510
    73277511
     
    75147698        } while (fHstRegs);
    75157699    }
     7700#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     7701    fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
     7702    if (fHstRegs)
     7703    {
     7704        do
     7705        {
     7706            unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
     7707            fHstRegs &= ~RT_BIT_32(idxHstReg);
     7708
     7709            /*
     7710             * Guest registers are flushed to CPUMCTX at the moment and don't need allocating a stack slot
     7711             * which would be more difficult due to spanning multiple stack slots and different sizes
     7712             * (besides we only have a limited amount of slots at the moment). Fixed temporary registers
     7713             * don't need saving.
     7714             */
     7715            if (   pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
     7716                || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
     7717                continue;
     7718
     7719            Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
     7720
     7721            uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
     7722            IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
     7723            AssertStmt(   IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
     7724                       && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
     7725                       && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
     7726                       && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
     7727                       && (   pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
     7728                           || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
     7729                       IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_12));
     7730            switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
     7731            {
     7732                case kIemNativeVarKind_Stack:
     7733                {
     7734                    /* Temporarily spill the variable register. */
     7735                    uint8_t const cbVar        = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
     7736                    uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
     7737                    Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
     7738                           idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
     7739                    if (cbVar == sizeof(RTUINT128U))
     7740                        off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
     7741                    else
     7742                        off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
     7743                    continue;
     7744                }
     7745
     7746                case kIemNativeVarKind_Immediate:
     7747                case kIemNativeVarKind_VarRef:
     7748                case kIemNativeVarKind_GstRegRef:
     7749                    /* It is weird to have any of these loaded at this point. */
     7750                    AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_13));
     7751                    continue;
     7752
     7753                case kIemNativeVarKind_End:
     7754                case kIemNativeVarKind_Invalid:
     7755                    break;
     7756            }
     7757            AssertFailed();
     7758        } while (fHstRegs);
     7759    }
     7760#endif
    75167761    return off;
    75177762}
     
    75867831        } while (fHstRegs);
    75877832    }
     7833#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     7834    fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
     7835    if (fHstRegs)
     7836    {
     7837        do
     7838        {
     7839            unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
     7840            fHstRegs &= ~RT_BIT_32(idxHstReg);
     7841
     7842            if (   pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
     7843                || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
     7844                continue;
     7845            Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
     7846
     7847            uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
     7848            IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
     7849            AssertStmt(   IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
     7850                       && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
     7851                       && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
     7852                       && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
     7853                       && (   pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
     7854                           || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
     7855                       IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_12));
     7856            switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
     7857            {
     7858                case kIemNativeVarKind_Stack:
     7859                {
     7860                    /* Unspill the variable register. */
     7861                    uint8_t const cbVar        = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
     7862                    uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
     7863                    Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
     7864                           idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
     7865
     7866                    if (cbVar == sizeof(RTUINT128U))
     7867                        off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
     7868                    else
     7869                        off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
     7870                    continue;
     7871                }
     7872
     7873                case kIemNativeVarKind_Immediate:
     7874                case kIemNativeVarKind_VarRef:
     7875                case kIemNativeVarKind_GstRegRef:
     7876                    /* It is weird to have any of these loaded at this point. */
     7877                    AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_13));
     7878                    continue;
     7879
     7880                case kIemNativeVarKind_End:
     7881                case kIemNativeVarKind_Invalid:
     7882                    break;
     7883            }
     7884            AssertFailed();
     7885        } while (fHstRegs);
     7886    }
     7887#endif
    75887888    return off;
    75897889}
     
    76317931    /* Free the host register first if any assigned. */
    76327932    uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
     7933#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     7934    if (   idxHstReg != UINT8_MAX
     7935        && pReNative->Core.aVars[idxVar].fSimdReg)
     7936    {
     7937        Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
     7938        Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
     7939        pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
     7940        pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
     7941    }
     7942    else
     7943#endif
    76337944    if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    76347945    {
     
    78328143            {
    78338144                uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
     8145#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     8146                if (   idxRegOld != UINT8_MAX
     8147                    && pReNative->Core.aVars[idxVar].fSimdReg)
     8148                {
     8149                    Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
     8150                    Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
     8151
     8152                    uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
     8153                    Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
     8154                           idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
     8155                           idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
     8156                    if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
     8157                        off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
     8158                    else
     8159                        off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
     8160
     8161                    Assert(!(   (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
     8162                              & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
     8163
     8164                    pReNative->Core.aVars[idxVar].idxReg       = UINT8_MAX;
     8165                    pReNative->Core.bmHstSimdRegs              &= ~RT_BIT_32(idxRegOld);
     8166                    pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
     8167                    pReNative->Core.bmGstSimdRegShadows        &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
     8168                    pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
     8169                }
     8170                else
     8171#endif
    78348172                if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
    78358173                {
     
    79648302                        int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
    79658303                        uint8_t const idxRegOther    = pReNative->Core.aVars[idxOtherVar].idxReg;
     8304# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     8305                        bool    const fSimdReg       = pReNative->Core.aVars[idxOtherVar].fSimdReg;
     8306                        uint8_t const cbVar          = pReNative->Core.aVars[idxOtherVar].cbVar;
     8307                        if (   fSimdReg
     8308                            && idxRegOther != UINT8_MAX)
     8309                        {
     8310                            Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
     8311                            if (cbVar == sizeof(RTUINT128U))
     8312                                off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
     8313                            else
     8314                                off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
     8315                            iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
     8316                            Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
     8317                        }
     8318                        else
     8319# endif
    79668320                        if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
    79678321                        {
     
    80508404                            int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
    80518405                            uint8_t const idxRegOther    = pReNative->Core.aVars[idxOtherVar].idxReg;
     8406#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     8407                            bool    const fSimdReg       = pReNative->Core.aVars[idxOtherVar].fSimdReg;
     8408                            uint8_t const cbVar          = pReNative->Core.aVars[idxOtherVar].cbVar;
     8409                            if (   fSimdReg
     8410                                && idxRegOther != UINT8_MAX)
     8411                            {
     8412                                Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
     8413                                if (cbVar == sizeof(RTUINT128U))
     8414                                    off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
     8415                                else
     8416                                    off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
     8417                                iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
     8418                                Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
     8419                            }
     8420                            else
     8421#endif
    80528422                            if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
    80538423                            {
  • trunk/src/VBox/VMM/include/IEMN8veRecompiler.h

    r103876 r103894  
    189189#  define IEMNATIVE_SIMD_REG_FIXED_TMP0    5 /* xmm5/ymm5 */
    190190#  if defined(IEMNATIVE_WITH_SIMD_REG_ACCESS_ALL_REGISTERS) || !defined(_MSC_VER)
    191 #   define IEMNATIVE_SIMD_REG_FIXED_MASK   RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
     191#   define IEMNATIVE_SIMD_REG_FIXED_MASK   (RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0))
    192192#  else
    193193/** On Windows xmm6 through xmm15 are marked as callee saved. */
     
    977977     * @todo not sure what this really is for...   */
    978978    IEMNATIVEGSTREG     enmGstReg;
     979#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     980    /** Flag whether this variable is held in a SIMD register (only supported for 128-bit and 256-bit variables),
     981     * only valid when idxReg is not UINT8_MAX. */
     982    bool                fSimdReg     : 1;
     983    /** Set if the registered is currently used exclusively, false if the
     984     *  variable is idle and the register can be grabbed. */
     985    bool                fRegAcquired : 1;
     986#else
    979987    /** Set if the registered is currently used exclusively, false if the
    980988     *  variable is idle and the register can be grabbed. */
    981989    bool                fRegAcquired;
     990#endif
    982991
    983992    union
     
    10951104    /** What is being kept in this register. */
    10961105    IEMNATIVEWHAT             enmWhat;
     1106    /** Variable index (packed) if holding a variable, otherwise UINT8_MAX. */
     1107    uint8_t                   idxVar;
    10971108    /** Flag what is currently loaded, low 128-bits, high 128-bits or complete 256-bits. */
    10981109    IEMNATIVEGSTSIMDREGLDSTSZ enmLoaded;
    10991110    /** Alignment padding. */
    1100     uint8_t                   abAlign[6];
     1111    uint8_t                   abAlign[5];
    11011112} IEMNATIVEHSTSIMDREG;
    11021113#endif
     
    15191530DECLHIDDEN(void)            iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT;
    15201531DECLHIDDEN(void)            iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT;
     1532DECLHIDDEN(void)            iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, bool fFlushShadows) RT_NOEXCEPT;
    15211533DECLHIDDEN(void)            iemNativeRegFreeAndFlushMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegMask) RT_NOEXCEPT;
    15221534DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs,
     
    15661578DECL_HIDDEN_THROW(uint8_t)  iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
    15671579                                                        bool fInitialized = false, uint8_t idxRegPref = UINT8_MAX);
     1580#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     1581DECL_HIDDEN_THROW(uint8_t)  iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
     1582                                                            bool fInitialized = false, uint8_t idxRegPref = UINT8_MAX);
     1583#endif
    15681584DECL_HIDDEN_THROW(uint8_t)  iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
    15691585                                                                   IEMNATIVEGSTREG enmGstReg, uint32_t *poff);
     
    18491865    pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = false;
    18501866}
     1867
     1868
     1869#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     1870DECL_INLINE_THROW(void) iemNativeVarSimdRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
     1871{
     1872    Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
     1873    iemNativeVarRegisterRelease(pReNative, idxVar);
     1874}
     1875#endif
    18511876
    18521877
     
    22332258
    22342259    pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat        = enmWhat;
     2260    pReNative->Core.aHstSimdRegs[idxSimdReg].idxVar         = idxVar;
    22352261    pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
    2236     RT_NOREF(idxVar);
    22372262    return idxSimdReg;
    22382263}
  • trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h

    r103892 r103894  
    21062106
    21072107
     2108#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     2109/**
     2110 * Emits a 128-bit vector register load instruction with an BP relative source address.
     2111 */
     2112DECL_FORCE_INLINE_THROW(uint32_t)
     2113iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
     2114{
     2115#ifdef RT_ARCH_AMD64
     2116    uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
     2117
     2118    /* movdqu reg128, mem128 */
     2119    pbCodeBuf[off++] = 0xf3;
     2120    if (iVecRegDst >= 8)
     2121        pbCodeBuf[off++] = X86_OP_REX_R;
     2122    pbCodeBuf[off++] = 0x0f;
     2123    pbCodeBuf[off++] = 0x6f;
     2124    return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
     2125#elif defined(RT_ARCH_ARM64)
     2126    return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
     2127#else
     2128# error "port me"
     2129#endif
     2130}
     2131
     2132
     2133/**
     2134 * Emits a 256-bit vector register load instruction with an BP relative source address.
     2135 */
     2136DECL_FORCE_INLINE_THROW(uint32_t)
     2137iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
     2138{
     2139#ifdef RT_ARCH_AMD64
     2140    uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
     2141
     2142    /* vmovdqu reg256, mem256 */
     2143    pbCodeBuf[off++] = X86_OP_VEX2;
     2144    pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
     2145    pbCodeBuf[off++] = 0x6f;
     2146    return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
     2147#elif defined(RT_ARCH_ARM64)
     2148    /* ASSUMES two consecutive vector registers for the 256-bit value. */
     2149    Assert(!(iVecRegDst & 0x1));
     2150    off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst,     offDisp,                      kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
     2151    return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
     2152#else
     2153# error "port me"
     2154#endif
     2155}
     2156
     2157#endif
     2158
     2159
    21082160/**
    21092161 * Emits a load effective address to a GRP with an BP relative source address.
     
    22512303    return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
    22522304}
     2305
     2306
     2307#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     2308/**
     2309 * Emits a 128-bit vector register store with an BP relative destination address.
     2310 *
     2311 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
     2312 */
     2313DECL_INLINE_THROW(uint32_t)
     2314iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
     2315{
     2316#ifdef RT_ARCH_AMD64
     2317    /* movdqu [rbp + offDisp], vecsrc */
     2318    uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
     2319    pbCodeBuf[off++] = 0xf3;
     2320    if (iVecRegSrc >= 8)
     2321        pbCodeBuf[off++] =  X86_OP_REX_R;
     2322    pbCodeBuf[off++] = 0x0f;
     2323    pbCodeBuf[off++] = 0x7f;
     2324    return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
     2325
     2326#elif defined(RT_ARCH_ARM64)
     2327    if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
     2328    {
     2329        /* str w/ unsigned imm12 (scaled) */
     2330        uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
     2331        pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc,
     2332                                                      ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
     2333    }
     2334    else if (offDisp >= -256 && offDisp <= 256)
     2335    {
     2336        /* stur w/ signed imm9 (unscaled) */
     2337        uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
     2338        pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp);
     2339    }
     2340    else if ((uint32_t)-offDisp < (unsigned)_4K)
     2341    {
     2342        /* Use temporary indexing register w/ sub uimm12. */
     2343        uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
     2344        pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
     2345                                                         ARMV8_A64_REG_BP, (uint32_t)-offDisp);
     2346        pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
     2347    }
     2348    else
     2349    {
     2350        /* Use temporary indexing register. */
     2351        off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
     2352        uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
     2353        pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP,
     2354                                                       IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
     2355    }
     2356    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     2357    return off;
     2358
     2359#else
     2360# error "Port me!"
     2361#endif
     2362}
     2363
     2364
     2365/**
     2366 * Emits a 256-bit vector register store with an BP relative destination address.
     2367 *
     2368 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
     2369 */
     2370DECL_INLINE_THROW(uint32_t)
     2371iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
     2372{
     2373#ifdef RT_ARCH_AMD64
     2374    uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
     2375
     2376    /* vmovdqu mem256, reg256 */
     2377    pbCodeBuf[off++] = X86_OP_VEX2;
     2378    pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
     2379    pbCodeBuf[off++] = 0x7f;
     2380    return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
     2381#elif defined(RT_ARCH_ARM64)
     2382    Assert(!(iVecRegSrc & 0x1));
     2383    off =  iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp,                      iVecRegSrc);
     2384    return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1);
     2385#else
     2386# error "Port me!"
     2387#endif
     2388}
     2389#endif
    22532390
    22542391#if defined(RT_ARCH_ARM64)
     
    71557292
    71567293/**
    7157  * Emits code to load the variable address into an argument GRP.
     7294 * Emits code to load the variable address into an argument GPR.
    71587295 *
    71597296 * This only works for uninitialized and stack variables.
     
    71737310
    71747311    uint8_t const idxRegVar      = pVar->idxReg;
     7312#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     7313    if (   idxRegVar != UINT8_MAX
     7314        && pVar->fSimdReg)
     7315    {
     7316        Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
     7317        Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
     7318
     7319        if (pVar->cbVar == sizeof(RTUINT128U))
     7320            off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDisp, idxRegVar);
     7321        else
     7322            off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDisp, idxRegVar);
     7323
     7324        iemNativeSimdRegFreeVar(pReNative, idxRegVar, fFlushShadows);
     7325        Assert(pVar->idxReg == UINT8_MAX);
     7326    }
     7327    else
     7328#endif
    71757329    if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
    71767330    {
     
    74007554{
    74017555#ifdef RT_ARCH_AMD64
    7402     off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
     7556    off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
    74037557#elif defined(RT_ARCH_ARM64)
    74047558    /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
    74057559    Assert(!(iVecReg & 0x1));
    7406     off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
     7560    off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
    74077561#else
    74087562# error "port me"
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette