VirtualBox

Changeset 103728 in vbox


Ignore:
Timestamp:
Mar 7, 2024 12:11:33 PM (12 months ago)
Author:
vboxsync
Message:

VMM/IEM: Initial implementation of a SIMD register allocator and associated code in order to be able to recompile SSE/AVX instructions (disabled by default and only working on ARM64 right now), bugref:10614

Location:
trunk/src/VBox/VMM
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp

    r103671 r103728  
    133133static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    134134                                                uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
     135# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     136static uint32_t iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
     137                                                    IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz);
     138# endif
    135139static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
    136140#endif
     
    30153019    pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat     = kIemNativeWhat_PcShadow;
    30163020#endif
     3021
     3022#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     3023# ifdef RT_ARCH_ARM64
     3024    /*
     3025     * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
     3026     * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
     3027     * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
     3028     * and the register allocator assumes that it will be always free when the lower is picked.
     3029     */
     3030    uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
     3031# else
     3032    uint32_t const fFixedAdditional = 0;
     3033# endif
     3034
     3035    pReNative->Core.bmHstSimdRegs          = IEMNATIVE_SIMD_REG_FIXED_MASK
     3036                                           | fFixedAdditional
     3037# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
     3038                                           | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
     3039# endif
     3040                                           ;
     3041    pReNative->Core.bmHstSimdRegsWithGstShadow   = 0;
     3042    pReNative->Core.bmGstSimdRegShadows          = 0;
     3043    pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
     3044    pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
     3045
     3046    /* Full host register reinit: */
     3047    for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
     3048    {
     3049        pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
     3050        pReNative->Core.aHstSimdRegs[i].enmWhat        = kIemNativeWhat_Invalid;
     3051        pReNative->Core.aHstSimdRegs[i].enmLoaded      = kIemNativeGstSimdRegLdStSz_Invalid;
     3052    }
     3053
     3054    fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
     3055    for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
     3056    {
     3057        fRegs &= ~RT_BIT_32(idxReg);
     3058        pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
     3059    }
     3060
     3061#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
     3062    pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
     3063#endif
     3064
     3065#endif
     3066
    30173067    return pReNative;
    30183068}
     
    34363486    pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
    34373487}
     3488
     3489
     3490# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     3491/**
     3492 * Debug Info: Record info about guest register shadowing.
     3493 */
     3494static void iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
     3495                                                     uint8_t idxHstSimdReg = UINT8_MAX, uint8_t idxHstSimdRegPrev = UINT8_MAX)
     3496{
     3497    PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
     3498    pEntry->GuestSimdRegShadowing.uType             = kIemTbDbgEntryType_GuestSimdRegShadowing;
     3499    pEntry->GuestSimdRegShadowing.uUnused           = 0;
     3500    pEntry->GuestSimdRegShadowing.idxGstSimdReg     = enmGstSimdReg;
     3501    pEntry->GuestSimdRegShadowing.idxHstSimdReg     = idxHstSimdReg;
     3502    pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
     3503}
     3504# endif
    34383505
    34393506
     
    51345201
    51355202
     5203#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     5204/*********************************************************************************************************************************
     5205*   SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge)                            *
     5206*********************************************************************************************************************************/
     5207
     5208/**
     5209 * Info about shadowed guest SIMD register values.
     5210 * @see IEMNATIVEGSTSIMDREG
     5211 */
     5212static struct
     5213{
     5214    /** Offset in VMCPU of XMM (low 128-bit) registers. */
     5215    uint32_t    offXmm;
     5216    /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
     5217    uint32_t    offYmm;
     5218    /** Name (for logging). */
     5219    const char *pszName;
     5220} const g_aGstSimdShadowInfo[] =
     5221{
     5222#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
     5223                                         (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
     5224    /* [kIemNativeGstSimdReg_SimdRegFirst +  0] = */  { CPUMCTX_OFF_AND_SIZE(0),  "ymm0",  },
     5225    /* [kIemNativeGstSimdReg_SimdRegFirst +  1] = */  { CPUMCTX_OFF_AND_SIZE(1),  "ymm1",  },
     5226    /* [kIemNativeGstSimdReg_SimdRegFirst +  2] = */  { CPUMCTX_OFF_AND_SIZE(2),  "ymm2",  },
     5227    /* [kIemNativeGstSimdReg_SimdRegFirst +  3] = */  { CPUMCTX_OFF_AND_SIZE(3),  "ymm3",  },
     5228    /* [kIemNativeGstSimdReg_SimdRegFirst +  4] = */  { CPUMCTX_OFF_AND_SIZE(4),  "ymm4",  },
     5229    /* [kIemNativeGstSimdReg_SimdRegFirst +  5] = */  { CPUMCTX_OFF_AND_SIZE(5),  "ymm5",  },
     5230    /* [kIemNativeGstSimdReg_SimdRegFirst +  6] = */  { CPUMCTX_OFF_AND_SIZE(6),  "ymm6",  },
     5231    /* [kIemNativeGstSimdReg_SimdRegFirst +  7] = */  { CPUMCTX_OFF_AND_SIZE(7),  "ymm7",  },
     5232    /* [kIemNativeGstSimdReg_SimdRegFirst +  8] = */  { CPUMCTX_OFF_AND_SIZE(8),  "ymm8",  },
     5233    /* [kIemNativeGstSimdReg_SimdRegFirst +  9] = */  { CPUMCTX_OFF_AND_SIZE(9),  "ymm9",  },
     5234    /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */  { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
     5235    /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */  { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
     5236    /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */  { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
     5237    /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */  { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
     5238    /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */  { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
     5239    /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */  { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
     5240#undef CPUMCTX_OFF_AND_SIZE
     5241};
     5242AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
     5243
     5244
     5245#ifdef LOG_ENABLED
     5246/** Host CPU SIMD register names. */
     5247DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
     5248{
     5249#ifdef RT_ARCH_AMD64
     5250    "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
     5251#elif RT_ARCH_ARM64
     5252    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",  "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
     5253    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
     5254#else
     5255# error "port me"
     5256#endif
     5257};
     5258#endif
     5259
     5260
     5261DECL_FORCE_INLINE(uint8_t) iemNativeSimdRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdReg,
     5262                                                         IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
     5263{
     5264    pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
     5265
     5266    pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat        = enmWhat;
     5267    pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
     5268    RT_NOREF(idxVar);
     5269    return idxSimdReg;
     5270}
     5271
     5272
     5273/**
     5274 * Frees a temporary SIMD register.
     5275 *
     5276 * Any shadow copies of guest registers assigned to the host register will not
     5277 * be flushed by this operation.
     5278 */
     5279DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
     5280{
     5281    Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
     5282    Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
     5283    pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
     5284    Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
     5285           g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
     5286}
     5287
     5288
     5289/**
     5290 * Locate a register, possibly freeing one up.
     5291 *
     5292 * This ASSUMES the caller has done the minimal/optimal allocation checks and
     5293 * failed.
     5294 *
     5295 * @returns Host register number on success. Returns UINT8_MAX if no registers
     5296 *          found, the caller is supposed to deal with this and raise a
     5297 *          allocation type specific status code (if desired).
     5298 *
     5299 * @throws  VBox status code if we're run into trouble spilling a variable of
     5300 *          recording debug info.  Does NOT throw anything if we're out of
     5301 *          registers, though.
     5302 */
     5303static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
     5304                                             uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
     5305{
     5306    //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
     5307    Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
     5308    Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
     5309
     5310    AssertFailed();
     5311
     5312    /*
     5313     * Try a freed register that's shadowing a guest register.
     5314     */
     5315    uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
     5316    if (fRegs)
     5317    {
     5318        //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
     5319
     5320#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
     5321        /*
     5322         * When we have livness information, we use it to kick out all shadowed
     5323         * guest register that will not be needed any more in this TB.  If we're
     5324         * lucky, this may prevent us from ending up here again.
     5325         *
     5326         * Note! We must consider the previous entry here so we don't free
     5327         *       anything that the current threaded function requires (current
     5328         *       entry is produced by the next threaded function).
     5329         */
     5330        uint32_t const idxCurCall = pReNative->idxCurCall;
     5331        if (idxCurCall > 0)
     5332        {
     5333            PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
     5334
     5335# ifndef IEMLIVENESS_EXTENDED_LAYOUT
     5336            /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
     5337            AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
     5338            uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
     5339#else
     5340            /* Construct a mask of the registers not in the read or write state.
     5341               Note! We could skips writes, if they aren't from us, as this is just
     5342                     a hack to prevent trashing registers that have just been written
     5343                     or will be written when we retire the current instruction. */
     5344            uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
     5345                                 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
     5346                                 & IEMLIVENESSBIT_MASK;
     5347#endif
     5348            /* Merge EFLAGS. */
     5349            uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3);   /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
     5350            fTmp &= fTmp >> 2;                                  /*         CF3,Other3 = AF2,PF2 & CF2,Other2  */
     5351            fTmp &= fTmp >> 1;                                  /*             Other4 = CF3 & Other3 */
     5352            fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
     5353            fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
     5354
     5355            /* If it matches any shadowed registers. */
     5356            if (pReNative->Core.bmGstRegShadows & fToFreeMask)
     5357            {
     5358                STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
     5359                iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
     5360                Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
     5361
     5362                /* See if we've got any unshadowed registers we can return now. */
     5363                uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
     5364                if (fUnshadowedRegs)
     5365                {
     5366                    STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
     5367                    return (fPreferVolatile
     5368                            ? ASMBitFirstSetU32(fUnshadowedRegs)
     5369                            : ASMBitLastSetU32(  fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
     5370                                               ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
     5371                         - 1;
     5372                }
     5373            }
     5374        }
     5375#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
     5376
     5377        unsigned const idxReg = (fPreferVolatile
     5378                                 ? ASMBitFirstSetU32(fRegs)
     5379                                 : ASMBitLastSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
     5380                                                    ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
     5381                              - 1;
     5382
     5383        Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
     5384        Assert(   (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
     5385               == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
     5386        Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
     5387        Assert(pReNative->Core.aHstSimdRegs[idxReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
     5388
     5389        pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
     5390        pReNative->Core.bmGstSimdRegShadows        &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
     5391        pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
     5392        return idxReg;
     5393    }
     5394
     5395    /*
     5396     * Try free up a variable that's in a register.
     5397     *
     5398     * We do two rounds here, first evacuating variables we don't need to be
     5399     * saved on the stack, then in the second round move things to the stack.
     5400     */
     5401    //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
     5402    AssertReleaseFailed(); /** @todo */
     5403#if 0
     5404    for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
     5405    {
     5406        uint32_t fVars = pReNative->Core.bmSimdVars;
     5407        while (fVars)
     5408        {
     5409            uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
     5410            uint8_t const  idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
     5411            if (   idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
     5412                && (RT_BIT_32(idxReg) & fRegMask)
     5413                && (  iLoop == 0
     5414                    ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
     5415                    : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
     5416                && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
     5417            {
     5418                Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
     5419                Assert(   (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
     5420                       == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
     5421                Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
     5422                Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
     5423                       == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
     5424
     5425                if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
     5426                {
     5427                    uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
     5428                    *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
     5429                }
     5430
     5431                pReNative->Core.aSimdVars[idxVar].idxReg    = UINT8_MAX;
     5432                pReNative->Core.bmHstSimdRegs              &= ~RT_BIT_32(idxReg);
     5433
     5434                pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
     5435                pReNative->Core.bmGstSimdRegShadows        &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
     5436                pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
     5437                return idxReg;
     5438            }
     5439            fVars &= ~RT_BIT_32(idxVar);
     5440        }
     5441    }
     5442#else
     5443    RT_NOREF(poff);
     5444#endif
     5445
     5446    return UINT8_MAX;
     5447}
     5448
     5449
     5450/**
     5451 * Marks host SIMD register @a idxHstSimdReg as containing a shadow copy of guest
     5452 * SIMD register @a enmGstSimdReg.
     5453 *
     5454 * ASSUMES that caller has made sure @a enmGstSimdReg is not associated with any
     5455 * host register before calling.
     5456 */
     5457DECL_FORCE_INLINE(void)
     5458iemNativeSimdRegMarkAsGstSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
     5459{
     5460    Assert(!(pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg)));
     5461    Assert(!pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
     5462    Assert((unsigned)enmGstSimdReg < (unsigned)kIemNativeGstSimdReg_End);
     5463
     5464    pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg]       = idxHstSimdReg;
     5465    pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
     5466    pReNative->Core.bmGstSimdRegShadows                        |= RT_BIT_64(enmGstSimdReg);
     5467    pReNative->Core.bmHstSimdRegsWithGstShadow                 |= RT_BIT_32(idxHstSimdReg);
     5468#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
     5469    iemNativeDbgInfoAddNativeOffset(pReNative, off);
     5470    iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxHstSimdReg);
     5471#else
     5472    RT_NOREF(off);
     5473#endif
     5474}
     5475
     5476
     5477/**
     5478 * Transfers the guest SIMD register shadow claims of @a enmGstSimdReg from @a idxSimdRegFrom
     5479 * to @a idxSimdRegTo.
     5480 */
     5481DECL_FORCE_INLINE(void)
     5482iemNativeSimdRegTransferGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdRegFrom, uint8_t idxSimdRegTo,
     5483                                            IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
     5484{
     5485    Assert(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
     5486    Assert(pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] == idxSimdRegFrom);
     5487    Assert(      (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows)
     5488              == pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows
     5489           && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
     5490    Assert(   (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows)
     5491           == pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows);
     5492    Assert(   RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdRegFrom))
     5493           == RT_BOOL(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows));
     5494    Assert(   pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded
     5495           == pReNative->Core.aHstSimdRegs[idxSimdRegTo].enmLoaded);
     5496
     5497
     5498    uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstSimdReg);
     5499    pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows  = fGstRegShadowsFrom;
     5500    if (!fGstRegShadowsFrom)
     5501    {
     5502        pReNative->Core.bmHstSimdRegsWithGstShadow               &= ~RT_BIT_32(idxSimdRegFrom);
     5503        pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded    = kIemNativeGstSimdRegLdStSz_Invalid;
     5504    }
     5505    pReNative->Core.bmHstSimdRegsWithGstShadow                |= RT_BIT_32(idxSimdRegTo);
     5506    pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
     5507    pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg]       = idxSimdRegTo;
     5508#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
     5509    iemNativeDbgInfoAddNativeOffset(pReNative, off);
     5510    iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxSimdRegTo, idxSimdRegFrom);
     5511#else
     5512    RT_NOREF(off);
     5513#endif
     5514}
     5515
     5516
     5517/**
     5518 * Clear any guest register shadow claims from @a idxHstSimdReg.
     5519 *
     5520 * The register does not need to be shadowing any guest registers.
     5521 */
     5522DECL_FORCE_INLINE(void)
     5523iemNativeSimdRegClearGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, uint32_t off)
     5524{
     5525    Assert(      (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
     5526              == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows
     5527           && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
     5528    Assert(   RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstSimdReg))
     5529           == RT_BOOL(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
     5530    Assert(   !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyLo128)
     5531           && !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyHi128));
     5532
     5533#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
     5534    uint64_t fGstRegs = pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
     5535    if (fGstRegs)
     5536    {
     5537        Assert(fGstRegs < RT_BIT_64(kIemNativeGstSimdReg_End));
     5538        iemNativeDbgInfoAddNativeOffset(pReNative, off);
     5539        while (fGstRegs)
     5540        {
     5541            unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
     5542            fGstRegs &= ~RT_BIT_64(iGstReg);
     5543            iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, (IEMNATIVEGSTSIMDREG)iGstReg, UINT8_MAX, idxHstSimdReg);
     5544        }
     5545    }
     5546#else
     5547    RT_NOREF(off);
     5548#endif
     5549
     5550    pReNative->Core.bmHstSimdRegsWithGstShadow        &= ~RT_BIT_32(idxHstSimdReg);
     5551    pReNative->Core.bmGstSimdRegShadows               &= ~pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
     5552    pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
     5553    pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded      = kIemNativeGstSimdRegLdStSz_Invalid;
     5554}
     5555
     5556
     5557/**
     5558 * Flushes a set of guest register shadow copies.
     5559 *
     5560 * This is usually done after calling a threaded function or a C-implementation
     5561 * of an instruction.
     5562 *
     5563 * @param   pReNative       The native recompile state.
     5564 * @param   fGstSimdRegs    Set of guest SIMD registers to flush.
     5565 */
     5566DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
     5567{
     5568    /*
     5569     * Reduce the mask by what's currently shadowed
     5570     */
     5571    uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
     5572    fGstSimdRegs &= bmGstSimdRegShadows;
     5573    if (fGstSimdRegs)
     5574    {
     5575        uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
     5576        Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
     5577        pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
     5578        if (bmGstSimdRegShadowsNew)
     5579        {
     5580            /*
     5581             * Partial.
     5582             */
     5583            do
     5584            {
     5585                unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
     5586                uint8_t const  idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
     5587                Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
     5588                Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
     5589                Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
     5590                Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
     5591
     5592                uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
     5593                fGstSimdRegs &= ~fInThisHstReg;
     5594                uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
     5595                pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
     5596                if (!fGstRegShadowsNew)
     5597                {
     5598                    pReNative->Core.bmHstSimdRegsWithGstShadow        &= ~RT_BIT_32(idxHstReg);
     5599                    pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded  = kIemNativeGstSimdRegLdStSz_Invalid;
     5600                }
     5601            } while (fGstSimdRegs != 0);
     5602        }
     5603        else
     5604        {
     5605            /*
     5606             * Clear all.
     5607             */
     5608            do
     5609            {
     5610                unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
     5611                uint8_t const  idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
     5612                Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
     5613                Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
     5614                Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
     5615                Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
     5616
     5617                fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
     5618                pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
     5619                pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded      = kIemNativeGstSimdRegLdStSz_Invalid;
     5620            } while (fGstSimdRegs != 0);
     5621            pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
     5622        }
     5623    }
     5624}
     5625
     5626
     5627/**
     5628 * Allocates a temporary host SIMD register.
     5629 *
     5630 * This may emit code to save register content onto the stack in order to free
     5631 * up a register.
     5632 *
     5633 * @returns The host register number; throws VBox status code on failure,
     5634 *          so no need to check the return value.
     5635 * @param   pReNative       The native recompile state.
     5636 * @param   poff            Pointer to the variable with the code buffer position.
     5637 *                          This will be update if we need to move a variable from
     5638 *                          register to stack in order to satisfy the request.
     5639 * @param   fPreferVolatile Whether to prefer volatile over non-volatile
     5640 *                          registers (@c true, default) or the other way around
     5641 *                          (@c false, for iemNativeRegAllocTmpForGuestReg()).
     5642 */
     5643DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
     5644{
     5645    /*
     5646     * Try find a completely unused register, preferably a call-volatile one.
     5647     */
     5648    uint8_t  idxSimdReg;
     5649    uint32_t fRegs = ~pReNative->Core.bmHstRegs
     5650                   & ~pReNative->Core.bmHstRegsWithGstShadow
     5651                   & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
     5652    if (fRegs)
     5653    {
     5654        if (fPreferVolatile)
     5655            idxSimdReg = (uint8_t)ASMBitFirstSetU32(  fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
     5656                                                    ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
     5657        else
     5658            idxSimdReg = (uint8_t)ASMBitFirstSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
     5659                                                    ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
     5660        Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
     5661        Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
     5662        Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
     5663    }
     5664    else
     5665    {
     5666        idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
     5667        AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
     5668        Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
     5669    }
     5670
     5671    Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
     5672    return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
     5673}
     5674
     5675
     5676/**
     5677 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
     5678 * registers.
     5679 *
     5680 * @returns The host register number; throws VBox status code on failure,
     5681 *          so no need to check the return value.
     5682 * @param   pReNative       The native recompile state.
     5683 * @param   poff            Pointer to the variable with the code buffer position.
     5684 *                          This will be update if we need to move a variable from
     5685 *                          register to stack in order to satisfy the request.
     5686 * @param   fRegMask        Mask of acceptable registers.
     5687 * @param   fPreferVolatile Whether to prefer volatile over non-volatile
     5688 *                          registers (@c true, default) or the other way around
     5689 *                          (@c false, for iemNativeRegAllocTmpForGuestReg()).
     5690 */
     5691DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
     5692                                                      bool fPreferVolatile /*= true*/)
     5693{
     5694    Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
     5695    Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
     5696
     5697    /*
     5698     * Try find a completely unused register, preferably a call-volatile one.
     5699     */
     5700    uint8_t  idxSimdReg;
     5701    uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
     5702                   & ~pReNative->Core.bmHstSimdRegsWithGstShadow
     5703                   & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
     5704                   & fRegMask;
     5705    if (fRegs)
     5706    {
     5707        if (fPreferVolatile)
     5708            idxSimdReg = (uint8_t)ASMBitFirstSetU32(  fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
     5709                                                    ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
     5710        else
     5711            idxSimdReg = (uint8_t)ASMBitFirstSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
     5712                                                    ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
     5713        Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
     5714        Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
     5715        Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
     5716    }
     5717    else
     5718    {
     5719        idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
     5720        AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
     5721        Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
     5722    }
     5723
     5724    Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
     5725    return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
     5726}
     5727
     5728
     5729static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
     5730                                                            uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
     5731{
     5732    /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
     5733    if (   pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded == enmLoadSzDst
     5734        || pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
     5735    {
     5736# ifdef RT_ARCH_ARM64
     5737        /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
     5738        Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
     5739# endif
     5740
     5741        switch (enmLoadSzDst)
     5742        {
     5743            case kIemNativeGstSimdRegLdStSz_256:
     5744                off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
     5745            case kIemNativeGstSimdRegLdStSz_Low128:
     5746                off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
     5747            case kIemNativeGstSimdRegLdStSz_High128:
     5748                off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
     5749            default:
     5750                AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
     5751        }
     5752
     5753        pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded = enmLoadSzDst;
     5754        return off;
     5755    }
     5756    else
     5757    {
     5758        /* Complicated stuff where the source is currently missing something, later. */
     5759        AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
     5760    }
     5761
     5762    return off;
     5763}
     5764
     5765
     5766/**
     5767 * Allocates a temporary host SIMD register for keeping a guest
     5768 * SIMD register value.
     5769 *
     5770 * Since we may already have a register holding the guest register value,
     5771 * code will be emitted to do the loading if that's not the case. Code may also
     5772 * be emitted if we have to free up a register to satify the request.
     5773 *
     5774 * @returns The host register number; throws VBox status code on failure, so no
     5775 *          need to check the return value.
     5776 * @param   pReNative       The native recompile state.
     5777 * @param   poff            Pointer to the variable with the code buffer
     5778 *                          position. This will be update if we need to move a
     5779 *                          variable from register to stack in order to satisfy
     5780 *                          the request.
     5781 * @param   enmGstSimdReg   The guest SIMD register that will is to be updated.
     5782 * @param   enmIntendedUse  How the caller will be using the host register.
     5783 * @param   fNoVolatileRegs Set if no volatile register allowed, clear if any
     5784 *                          register is okay (default).  The ASSUMPTION here is
     5785 *                          that the caller has already flushed all volatile
     5786 *                          registers, so this is only applied if we allocate a
     5787 *                          new register.
     5788 * @sa      iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
     5789 */
     5790DECL_HIDDEN_THROW(uint8_t)
     5791iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
     5792                                        IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
     5793                                        bool fNoVolatileRegs /*= false*/)
     5794{
     5795    Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
     5796#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
     5797    AssertMsg(   pReNative->idxCurCall == 0
     5798              || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
     5799                  ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
     5800                  : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
     5801                  ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
     5802                  : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(  iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
     5803              ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
     5804#endif
     5805#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
     5806    static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
     5807#endif
     5808    uint32_t const fRegMask = !fNoVolatileRegs
     5809                            ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
     5810                            : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
     5811
     5812    /*
     5813     * First check if the guest register value is already in a host register.
     5814     */
     5815    if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
     5816    {
     5817        uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
     5818        Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
     5819        Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
     5820        Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
     5821
     5822        /* It's not supposed to be allocated... */
     5823        if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
     5824        {
     5825            /*
     5826             * If the register will trash the guest shadow copy, try find a
     5827             * completely unused register we can use instead.  If that fails,
     5828             * we need to disassociate the host reg from the guest reg.
     5829             */
     5830            /** @todo would be nice to know if preserving the register is in any way helpful. */
     5831            /* If the purpose is calculations, try duplicate the register value as
     5832               we'll be clobbering the shadow. */
     5833            if (   enmIntendedUse == kIemNativeGstRegUse_Calculation
     5834                && (  ~pReNative->Core.bmHstSimdRegs
     5835                    & ~pReNative->Core.bmHstSimdRegsWithGstShadow
     5836                    & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
     5837            {
     5838                uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
     5839
     5840                *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
     5841
     5842                Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
     5843                       g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
     5844                       g_apszIemNativeHstSimdRegNames[idxRegNew]));
     5845                idxSimdReg = idxRegNew;
     5846            }
     5847            /* If the current register matches the restrictions, go ahead and allocate
     5848               it for the caller. */
     5849            else if (fRegMask & RT_BIT_32(idxSimdReg))
     5850            {
     5851                pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
     5852                pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
     5853                if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
     5854                    Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
     5855                           g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
     5856                else
     5857                {
     5858                    iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
     5859                    Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
     5860                           g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
     5861                }
     5862            }
     5863            /* Otherwise, allocate a register that satisfies the caller and transfer
     5864               the shadowing if compatible with the intended use.  (This basically
     5865               means the call wants a non-volatile register (RSP push/pop scenario).) */
     5866            else
     5867            {
     5868                Assert(fNoVolatileRegs);
     5869                uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
     5870                                                                    !fNoVolatileRegs
     5871                                                                 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
     5872                *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
     5873                if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
     5874                {
     5875                    iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
     5876                    Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
     5877                           g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
     5878                           g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
     5879                }
     5880                else
     5881                    Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
     5882                           g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
     5883                           g_apszIemNativeHstSimdRegNames[idxRegNew]));
     5884                idxSimdReg = idxRegNew;
     5885            }
     5886        }
     5887        else
     5888        {
     5889            /*
     5890             * Oops. Shadowed guest register already allocated!
     5891             *
     5892             * Allocate a new register, copy the value and, if updating, the
     5893             * guest shadow copy assignment to the new register.
     5894             */
     5895            AssertMsg(   enmIntendedUse != kIemNativeGstRegUse_ForUpdate
     5896                      && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
     5897                      ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
     5898                       idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
     5899
     5900            /** @todo share register for readonly access. */
     5901            uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
     5902                                                                 enmIntendedUse == kIemNativeGstRegUse_Calculation);
     5903
     5904            if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
     5905                *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
     5906            else
     5907            {
     5908                /** @todo This is a bit unsafe to mark the register already as loaded even though there is nothing written to it yet. */
     5909                pReNative->Core.aHstSimdRegs[idxRegNew].enmLoaded = enmLoadSz;
     5910            }
     5911
     5912            if (   enmIntendedUse != kIemNativeGstRegUse_ForUpdate
     5913                && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
     5914                Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
     5915                       g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
     5916                       g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
     5917            else
     5918            {
     5919                iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
     5920                Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
     5921                       g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
     5922                       g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
     5923            }
     5924            idxSimdReg = idxRegNew;
     5925        }
     5926        Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
     5927
     5928#ifdef VBOX_STRICT
     5929        /* Strict builds: Check that the value is correct. */
     5930        *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
     5931#endif
     5932
     5933        return idxSimdReg;
     5934    }
     5935
     5936    /*
     5937     * Allocate a new register, load it with the guest value and designate it as a copy of the
     5938     */
     5939    uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
     5940
     5941    if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
     5942        *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
     5943    else
     5944    {
     5945        /** @todo This is a bit unsafe to mark the register already as loaded even though there is nothing written to it yet. */
     5946        pReNative->Core.aHstSimdRegs[idxRegNew].enmLoaded = enmLoadSz;
     5947    }
     5948
     5949    if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
     5950        iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
     5951
     5952    Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
     5953           g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
     5954
     5955    return idxRegNew;
     5956}
     5957
     5958
     5959/**
     5960 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
     5961 *
     5962 * @returns New code bufferoffset.
     5963 * @param   pReNative       The native recompile state.
     5964 * @param   off             Current code buffer position.
     5965 * @param   idxGstSimdReg   The guest SIMD register to flush.
     5966 */
     5967static uint32_t iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGstSimdReg)
     5968{
     5969    uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
     5970
     5971    Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
     5972           g_aGstSimdShadowInfo[idxGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
     5973           IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, idxGstSimdReg),
     5974           IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, idxGstSimdReg)));
     5975
     5976#ifdef RT_ARCH_AMD64
     5977# error "Port me"
     5978#elif defined(RT_ARCH_ARM64)
     5979    /* ASSUMING there are two consecutive host registers to store the potential 256-bit guest register. */
     5980    Assert(!(idxHstSimdReg & 0x1));
     5981    if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, idxGstSimdReg))
     5982    {
     5983        Assert(   pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
     5984               || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
     5985        off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[idxGstSimdReg].offXmm);
     5986    }
     5987
     5988    if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, idxGstSimdReg))
     5989    {
     5990        Assert(   pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
     5991               || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
     5992        off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg + 1, g_aGstSimdShadowInfo[idxGstSimdReg].offYmm);
     5993    }
     5994#endif
     5995
     5996    IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, idxGstSimdReg);
     5997    return off;
     5998}
     5999
     6000#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
     6001
     6002
     6003
     6004/*********************************************************************************************************************************
     6005*   Code emitters for flushing pending guest register writes and sanity checks                                                   *
     6006*********************************************************************************************************************************/
     6007
    51366008/**
    51376009 * Flushes delayed write of a specific guest register.
     
    51496021    /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
    51506022#endif
     6023
     6024#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     6025    if (   enmClass == kIemNativeGstRegRef_XReg
     6026        && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
     6027    {
     6028        off = iemNativeSimdRegFlushPendingWrite(pReNative, off, idxReg);
     6029        /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
     6030        uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
     6031
     6032        iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
     6033        iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
     6034    }
     6035#endif
    51516036    RT_NOREF(pReNative, enmClass, idxReg);
    51526037    return off;
     
    51636048 * RIP updates, since these are the most common ones.
    51646049 */
    5165 DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept /*= 0*/)
     6050DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept /*= 0*/,
     6051                                                           bool fFlushShadows /*= true*/)
    51666052{
    51676053#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
     
    51706056#else
    51716057    RT_NOREF(pReNative, fGstShwExcept);
     6058#endif
     6059
     6060#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     6061    /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
     6062    for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
     6063    {
     6064        Assert(   (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
     6065               || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
     6066
     6067        if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
     6068            off = iemNativeSimdRegFlushPendingWrite(pReNative, off, idxGstSimdReg);
     6069
     6070        if (   fFlushShadows
     6071            && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
     6072        {
     6073            uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
     6074
     6075            iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
     6076            iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
     6077        }
     6078    }
     6079#else
     6080    RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
    51726081#endif
    51736082
     
    52776186}
    52786187
     6188
     6189#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     6190/**
     6191 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
     6192 *
     6193 * @returns New code buffer offset on success, UINT32_MAX on failure.
     6194 * @param   pReNative       The recompiler state.
     6195 * @param   off             The current code buffer position.
     6196 * @param   idxHstSimdReg   The host register to load the guest register value into.
     6197 * @param   enmGstSimdReg   The guest register to load.
     6198 * @param   enmLoadSz       The load size of the register.
     6199 *
     6200 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
     6201 *       that is something the caller needs to do if applicable.
     6202 */
     6203DECL_HIDDEN_THROW(uint32_t)
     6204iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
     6205                                             IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
     6206{
     6207    Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
     6208
     6209    pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
     6210    switch (enmLoadSz)
     6211    {
     6212        case kIemNativeGstSimdRegLdStSz_256:
     6213            return iemNativeEmitSimdLoadVecRegFromVCpuU256(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm,
     6214                                                           g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
     6215        case kIemNativeGstSimdRegLdStSz_Low128:
     6216            return iemNativeEmitSimdLoadVecRegFromVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
     6217        case kIemNativeGstSimdRegLdStSz_High128:
     6218            return iemNativeEmitSimdLoadVecRegFromVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
     6219        default:
     6220            AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
     6221    }
     6222}
     6223#endif
    52796224
    52806225#ifdef VBOX_STRICT
     
    54316376    return off;
    54326377}
     6378
     6379
     6380# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     6381/**
     6382 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
     6383 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
     6384 * instruction if that's not the case.
     6385 *
     6386 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
     6387 *       Trashes EFLAGS on AMD64.
     6388 */
     6389static uint32_t
     6390iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg,
     6391                                    IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
     6392{
     6393#  ifdef RT_ARCH_AMD64
     6394#   error "Port me!"
     6395#  elif defined(RT_ARCH_ARM64)
     6396    /* mov vectmp0, [gstreg] */
     6397    off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
     6398
     6399    if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
     6400    {
     6401        uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
     6402        /* eor vectmp0, vectmp0, idxSimdReg */
     6403        pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
     6404        /* cnt vectmp0, vectmp0, #0*/
     6405        pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
     6406        /* umov tmp0, vectmp0.D[0] */
     6407        pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
     6408                                                    0 /*idxElem*/, kArmv8InstrUmovSz_U64);
     6409        /* cbz tmp0, +1 */
     6410        pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
     6411        /* brk #0x1000+enmGstReg */
     6412        pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
     6413        IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     6414    }
     6415
     6416    if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
     6417    {
     6418        uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
     6419        /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
     6420        pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg);
     6421        /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
     6422        pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
     6423        /* umov tmp0, (vectmp0 + 1).D[0] */
     6424        pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
     6425                                                    0 /*idxElem*/, kArmv8InstrUmovSz_U64);
     6426        /* cbz tmp0, +1 */
     6427        pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
     6428        /* brk #0x1000+enmGstReg */
     6429        pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
     6430        IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     6431    }
     6432
     6433#  else
     6434#   error "Port me!"
     6435#  endif
     6436    return off;
     6437}
     6438# endif
    54336439#endif /* VBOX_STRICT */
    54346440
     
    70868092     * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
    70878093     */
    7088     off = iemNativeRegFlushPendingWrites(pReNative, off);
     8094    off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
    70898095
    70908096#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
     
    71478153     * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
    71488154     */
    7149     off = iemNativeRegFlushPendingWrites(pReNative, off);
     8155    off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
    71508156
    71518157#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
     
    1097011976    off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
    1097111977
    10972     /** @todo r=aeichner This needs to be done as soon as we shadow SSE registers in host registers, needs
    10973      *                   figuring out the semantics on how this is tracked.
    10974      *                   For now this is safe though as the reference will directly operate on the CPUMCTX
    10975      *                   structure so the value can't get out of sync.
    10976      */
    10977 #if 0
     11978#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    1097811979    /* If it's not a const reference we need to flush the shadow copy of the register now. */
    1097911980    if (!fConst)
    10980         iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_XREG(iXReg)));
     11981        iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
    1098111982#else
    1098211983    RT_NOREF(fConst);
     
    1405615057
    1405715058
     15059#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     15060/*********************************************************************************************************************************
     15061*   Emitters for SSE/AVX specific operations.                                                                                    *
     15062*********************************************************************************************************************************/
     15063
     15064#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
     15065    off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
     15066
     15067/** Emits code for IEM_MC_FETCH_FSW. */
     15068DECL_INLINE_THROW(uint32_t)
     15069iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
     15070{
     15071    /* Allocate destination and source register. */
     15072    uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
     15073                                                                          kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
     15074    uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
     15075                                                                          kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
     15076
     15077    off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
     15078    IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXRegDst);
     15079    /* We don't need to write everything back here as the destination is marked as dirty and will be flushed automatically. */
     15080
     15081    /* Free but don't flush the source and destination register. */
     15082    iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
     15083    iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
     15084
     15085    return off;
     15086}
     15087#endif
     15088
     15089
    1405815090/*********************************************************************************************************************************
    1405915091*   The native code generator functions for each MC block.                                                                       *
     
    1416915201        ENTRY(cpum.GstCtx.eflags),
    1417015202        ENTRY(cpum.GstCtx.uRipInhibitInt),
     15203#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     15204        ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
     15205        ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
     15206        ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
     15207        ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
     15208        ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
     15209        ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
     15210        ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
     15211        ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
     15212        ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
     15213        ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
     15214        ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
     15215        ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
     15216        ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
     15217        ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
     15218        ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
     15219        ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
     15220        ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
     15221        ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
     15222        ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
     15223        ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
     15224        ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
     15225        ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
     15226        ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
     15227        ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
     15228        ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
     15229        ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
     15230        ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
     15231        ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
     15232        ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
     15233        ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
     15234        ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
     15235        ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
     15236#endif
    1417115237#undef ENTRY
    1417215238    };
     
    1450215568                            continue;
    1450315569                        }
     15570
     15571#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     15572                        case kIemTbDbgEntryType_GuestSimdRegShadowing:
     15573                        {
     15574                            PCIEMTBDBGENTRY const pEntry    = &pDbgInfo->aEntries[iDbgEntry];
     15575                            const char * const    pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
     15576                            if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
     15577                                pHlp->pfnPrintf(pHlp, "  Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
     15578                                                g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
     15579                            else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
     15580                                pHlp->pfnPrintf(pHlp, "  Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
     15581                                                g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
     15582                            else
     15583                                pHlp->pfnPrintf(pHlp, "  Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
     15584                                                g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
     15585                                                g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
     15586                            continue;
     15587                        }
     15588#endif
    1450415589
    1450515590                        case kIemTbDbgEntryType_Label:
     
    1526216347        Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
    1526316348        iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
    15264 # ifdef DEBUG_bird
     16349# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
    1526516350        RTLogFlush(NULL);
    1526616351# endif
  • trunk/src/VBox/VMM/include/IEMInternal.h

    r103700 r103728  
    9292# define IEMNATIVE_WITH_DELAYED_PC_UPDATING
    9393#endif
     94
     95/** @def IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     96 * Enables the SIMD register allocator @bugref{10614}.  */
     97//# define IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     98/** Enables access to even callee saved registers. */
     99//# define IEMNATIVE_WITH_SIMD_REG_ACCESS_ALL_REGISTERS
    94100
    95101/** @def VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
     
    964970    /** Info about a host register shadowing a guest register. */
    965971    kIemTbDbgEntryType_GuestRegShadowing,
     972#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     973    /** Info about a host SIMD register shadowing a guest SIMD register. */
     974    kIemTbDbgEntryType_GuestSimdRegShadowing,
     975#endif
    966976#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    967977    /** Info about a delayed RIP update. */
     
    10391049        uint32_t    idxHstRegPrev : 8;
    10401050    } GuestRegShadowing;
     1051
     1052#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     1053    struct
     1054    {
     1055        /* kIemTbDbgEntryType_GuestSimdRegShadowing. */
     1056        uint32_t    uType             : 4;
     1057        uint32_t    uUnused           : 4;
     1058        /** The guest register being shadowed (IEMNATIVEGSTSIMDREG). */
     1059        uint32_t    idxGstSimdReg     : 8;
     1060        /** The host new register number, UINT8_MAX if dropped. */
     1061        uint32_t    idxHstSimdReg     : 8;
     1062        /** The previous host register number, UINT8_MAX if new.   */
     1063        uint32_t    idxHstSimdRegPrev : 8;
     1064    } GuestSimdRegShadowing;
     1065#endif
    10411066
    10421067#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
  • trunk/src/VBox/VMM/include/IEMN8veRecompiler.h

    r103671 r103728  
    157157 * Mask GPRs with fixes assignments, either by us or dictated by the CPU/OS
    158158 * architecture. */
     159#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     160/** @def IEMNATIVE_SIMD_REG_FIXED_TMP0
     161 * Mask SIMD registers with fixes assignments, either by us or dictated by the CPU/OS
     162 * architecture. */
     163/** @def IEMNATIVE_SIMD_REG_FIXED_TMP0
     164 * Dedicated temporary SIMD register. */
     165#endif
    159166#if defined(RT_ARCH_AMD64) && !defined(DOXYGEN_RUNNING)
    160167# define IEMNATIVE_REG_FIXED_PVMCPU         X86_GREG_xBX
     
    164171                                            | RT_BIT_32(X86_GREG_xSP) \
    165172                                            | RT_BIT_32(X86_GREG_xBP) )
     173
     174# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     175#  if defined(IEMNATIVE_WITH_SIMD_REG_ACCESS_ALL_REGISTERS) || !defined(_MSC_VER)
     176#   define IEMNATIVE_SIMD_REG_FIXED_MASK   0
     177#  else
     178/** On Windows xmm6 through xmm15 are marked as callee saved. */
     179#   define IEMNATIVE_SIMD_REG_FIXED_MASK   (UINT32_C(0xffc0))
     180#  endif
     181# endif
    166182
    167183#elif defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
     
    186202                                             | IEMNATIVE_REG_FIXED_MASK_ADD)
    187203
     204# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     205# define IEMNATIVE_SIMD_REG_FIXED_TMP0     ARMV8_A64_REG_Q30
     206#  if defined(IEMNATIVE_WITH_SIMD_REG_ACCESS_ALL_REGISTERS)
     207#   define IEMNATIVE_SIMD_REG_FIXED_MASK   RT_BIT_32(ARMV8_A64_REG_Q30)
     208#  else
     209/** arm64 declares the low 64-bit of v8-v15 as callee saved. */
     210#   define IEMNATIVE_SIMD_REG_FIXED_MASK   (  UINT32_C(0xff00) \
     211                                            | RT_BIT_32(ARMV8_A64_REG_Q30))
     212#  endif
     213# endif
     214
    188215#else
    189216# error "port me"
     
    227254                                             | RT_BIT_32(X86_GREG_x10) \
    228255                                             | RT_BIT_32(X86_GREG_x11) )
     256#  ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     257/* xmm0 - xmm5 are marked as volatile. */
     258#   define IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK (UINT32_C(0x3f))
     259#  endif
     260
    229261# else
    230262#  define IEMNATIVE_CALL_ARG_GREG_COUNT     6
     
    250282                                             | RT_BIT_32(X86_GREG_x10) \
    251283                                             | RT_BIT_32(X86_GREG_x11) )
     284#  ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     285/* xmm0 - xmm15 are marked as volatile. */
     286#   define IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK (UINT32_C(0xffff))
     287#  endif
    252288# endif
    253289
     
    289325                                             | RT_BIT_32(ARMV8_A64_REG_X16) \
    290326                                             | RT_BIT_32(ARMV8_A64_REG_X17) )
     327# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     328/* The low 64 bits of v8 - v15 marked as callee saved but the rest is volatile,
     329 * so to simplify our life a bit we just mark everything as volatile. */
     330#  define IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK (UINT32_C(0xffffffff))
     331# endif
    291332
    292333#endif
     
    306347 * Mask corresponding to IEMNATIVE_HST_GREG_COUNT that can be applied to
    307348 * inverted register masks and such to get down to a correct set of regs. */
     349#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     350/** @def IEMNATIVE_HST_SIMD_REG_COUNT
     351 * Number of host SIMD registers we track. */
     352/** @def IEMNATIVE_HST_SIMD_REG_MASK
     353 * Mask corresponding to IEMNATIVE_HST_SIMD_REG_COUNT that can be applied to
     354 * inverted register masks and such to get down to a correct set of regs. */
     355#endif
    308356#ifdef RT_ARCH_AMD64
    309357# define IEMNATIVE_HST_GREG_COUNT           16
    310358# define IEMNATIVE_HST_GREG_MASK            UINT32_C(0xffff)
    311359
     360# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     361#  define IEMNATIVE_HST_SIMD_REG_COUNT      16
     362#  define IEMNATIVE_HST_SIMD_REG_MASK       UINT32_C(0xffff)
     363# endif
     364
    312365#elif defined(RT_ARCH_ARM64)
    313366# define IEMNATIVE_HST_GREG_COUNT           32
    314367# define IEMNATIVE_HST_GREG_MASK            UINT32_MAX
     368
     369# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     370#  define IEMNATIVE_HST_SIMD_REG_COUNT      32
     371#  define IEMNATIVE_HST_SIMD_REG_MASK       UINT32_MAX
     372# endif
     373
    315374#else
    316375# error "Port me!"
     
    735794/** @} */
    736795
     796
     797#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     798/**
     799 * Guest registers that can be shadowed in host SIMD registers.
     800 *
     801 * @todo r=aeichner Liveness tracking
     802 * @todo r=aeichner Given that we can only track xmm/ymm here does this actually make sense?
     803 */
     804typedef enum IEMNATIVEGSTSIMDREG : uint8_t
     805{
     806    kIemNativeGstSimdReg_SimdRegFirst  = 0,
     807    kIemNativeGstSimdReg_SimdRegLast   = kIemNativeGstSimdReg_SimdRegFirst + 15,
     808    kIemNativeGstSimdReg_End
     809} IEMNATIVEGSTSIMDREG;
     810
     811/** @name Helpers for converting register numbers to IEMNATIVEGSTSIMDREG values.
     812 * @{  */
     813#define IEMNATIVEGSTSIMDREG_SIMD(a_iSimdReg)   ((IEMNATIVEGSTSIMDREG)(kIemNativeGstSimdReg_SimdRegFirst + (a_iSimdReg)))
     814/** @} */
     815
     816/**
     817 * The Load/store size for a SIMD guest register.
     818 */
     819typedef enum IEMNATIVEGSTSIMDREGLDSTSZ : uint8_t
     820{
     821    /** Invalid size. */
     822    kIemNativeGstSimdRegLdStSz_Invalid = 0,
     823    /** Loads the low 128-bit of a guest SIMD register. */
     824    kIemNativeGstSimdRegLdStSz_Low128,
     825    /** Loads the high 128-bit of a guest SIMD register. */
     826    kIemNativeGstSimdRegLdStSz_High128,
     827    /** Loads the whole 256-bits of a guest SIMD register. */
     828    kIemNativeGstSimdRegLdStSz_256,
     829    /** End value. */
     830    kIemNativeGstSimdRegLdStSz_End
     831} IEMNATIVEGSTSIMDREGLDSTSZ;
     832#endif
     833
     834
    737835/**
    738836 * Intended use statement for iemNativeRegAllocTmpForGuestReg().
     
    9121010
    9131011
     1012#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     1013/**
     1014 * Host SIMD register entry - this tracks a virtual 256-bit register split into two 128-bit
     1015 * halves, on architectures where there is no 256-bit register available this entry will track
     1016 * two adjacent 128-bit host registers.
     1017 *
     1018 * The actual allocation status is kept in IEMRECOMPILERSTATE::bmHstSimdRegs.
     1019 */
     1020typedef struct IEMNATIVEHSTSIMDREG
     1021{
     1022    /** Set of guest registers this one shadows.
     1023     *
     1024     * Using a bitmap here so we can designate the same host register as a copy
     1025     * for more than one guest register.  This is expected to be useful in
     1026     * situations where one value is copied to several registers in a sequence.
     1027     * If the mapping is 1:1, then we'd have to pick which side of a 'MOV SRC,DST'
     1028     * sequence we'd want to let this register follow to be a copy of and there
     1029     * will always be places where we'd be picking the wrong one.
     1030     */
     1031    uint64_t                  fGstRegShadows;
     1032    /** What is being kept in this register. */
     1033    IEMNATIVEWHAT             enmWhat;
     1034    /** Flag what is currently loaded, low 128-bits, high 128-bits or complete 256-bits. */
     1035    IEMNATIVEGSTSIMDREGLDSTSZ enmLoaded;
     1036    /** Alignment padding. */
     1037    uint8_t                   abAlign[6];
     1038} IEMNATIVEHSTSIMDREG;
     1039#endif
     1040
     1041
    9141042/**
    9151043 * Core state for the native recompiler, that is, things that needs careful
     
    9351063    uint64_t                    bmGstRegShadows;
    9361064
     1065#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     1066    /** Allocation bitmap for aHstSimdRegs. */
     1067    uint32_t                    bmHstSimdRegs;
     1068
     1069    /** Bitmap marking which host SIMD register contains guest SIMD register shadow copies.
     1070     * This is used during register allocation to try preserve copies.  */
     1071    uint32_t                    bmHstSimdRegsWithGstShadow;
     1072    /** Bitmap marking valid entries in aidxSimdGstRegShadows. */
     1073    uint64_t                    bmGstSimdRegShadows;
     1074    /** Bitmap marking whether the low 128-bit of the shadowed guest register are dirty and need writeback. */
     1075    uint64_t                    bmGstSimdRegShadowDirtyLo128;
     1076    /** Bitmap marking whether the high 128-bit of the shadowed guest register are dirty and need writeback. */
     1077    uint64_t                    bmGstSimdRegShadowDirtyHi128;
     1078#endif
     1079
    9371080    union
    9381081    {
     
    9531096     * there are no duplicate copies or ambiguities like that). */
    9541097    uint8_t                     aidxGstRegShadows[kIemNativeGstReg_End];
     1098#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     1099    /** Maps a guest SIMD register to a host SIMD register (index by IEMNATIVEGSTSIMDREG).
     1100     * Entries are only valid if the corresponding bit in bmGstSimdRegShadows is set.
     1101     * (A shadow copy of a guest register can only be held in a one host register,
     1102     * there are no duplicate copies or ambiguities like that). */
     1103    uint8_t                     aidxGstSimdRegShadows[kIemNativeGstSimdReg_End];
     1104#endif
    9551105
    9561106    /** Host register allocation tracking. */
    9571107    IEMNATIVEHSTREG             aHstRegs[IEMNATIVE_HST_GREG_COUNT];
     1108#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     1109    /** Host SIMD register allocation tracking. */
     1110    IEMNATIVEHSTSIMDREG         aHstSimdRegs[IEMNATIVE_HST_SIMD_REG_COUNT];
     1111#endif
    9581112
    9591113    /** Variables and arguments. */
     
    9821136# define IEMNATIVE_VAR_IDX_UNPACK(a_idxVar) (a_idxVar)
    9831137# define IEMNATIVE_VAR_IDX_PACK(a_idxVar)   (a_idxVar)
     1138#endif
     1139
     1140
     1141#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     1142/** Clear the dirty state of the given guest SIMD register. */
     1143# define IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(a_pReNative, a_iSimdReg) \
     1144    do { \
     1145        (a_pReNative)->Core.bmGstSimdRegShadowDirtyLo128 &= ~RT_BIT_64(a_iSimdReg); \
     1146        (a_pReNative)->Core.bmGstSimdRegShadowDirtyHi128 &= ~RT_BIT_64(a_iSimdReg); \
     1147    } while (0)
     1148
     1149/** Returns whether the low 128-bits of the given guest SIMD register are dirty. */
     1150# define IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(a_pReNative, a_iSimdReg) \
     1151    RT_BOOL((a_pReNative)->Core.bmGstSimdRegShadowDirtyLo128 & RT_BIT_64(a_iSimdReg))
     1152/** Returns whether the high 128-bits of the given guest SIMD register are dirty. */
     1153# define IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(a_pReNative, a_iSimdReg) \
     1154    RT_BOOL((a_pReNative)->Core.bmGstSimdRegShadowDirtyHi128 & RT_BIT_64(a_iSimdReg))
     1155/** Returns whether the given guest SIMD register is dirty. */
     1156# define IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(a_pReNative, a_iSimdReg) \
     1157    RT_BOOL(((a_pReNative)->Core.bmGstSimdRegShadowDirtyLo128 | (a_pReNative)->Core.bmGstSimdRegShadowDirtyHi128) & RT_BIT_64(a_iSimdReg))
     1158
     1159/** Set the low 128-bits of the given guest SIMD register to the dirty state. */
     1160# define IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(a_pReNative, a_iSimdReg) \
     1161    ((a_pReNative)->Core.bmGstSimdRegShadowDirtyLo128 |= RT_BIT_64(a_iSimdReg))
     1162/** Set the high 128-bits of the given guest SIMD register to the dirty state. */
     1163# define IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(a_pReNative, a_iSimdReg) \
     1164    ((a_pReNative)->Core.bmGstSimdRegShadowDirtyHi128 |= RT_BIT_64(a_iSimdReg))
    9841165#endif
    9851166
     
    12381419DECLHIDDEN(void)            iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT;
    12391420DECLHIDDEN(void)            iemNativeRegFreeAndFlushMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegMask) RT_NOEXCEPT;
    1240 DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExept = 0);
     1421DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExept = 0, bool fFlushShadows = true);
    12411422DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs,
    12421423                                                                  uint32_t fKeepVars = 0);
     
    12641445DECL_HIDDEN_THROW(uint32_t) iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    12651446                                                      PCIEMTHRDEDCALLENTRY pCallEntry);
     1447
     1448#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     1449DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile = true);
     1450DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
     1451                                                      bool fPreferVolatile = true);
     1452DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
     1453                                                                   IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse = kIemNativeGstRegUse_ReadOnly,
     1454                                                                   bool fNoVolatileRegs = false);
     1455DECL_HIDDEN_THROW(uint32_t) iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off,
     1456                                                                         uint8_t idxHstSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg,
     1457                                                                         IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz);
     1458#endif
    12661459
    12671460extern DECL_HIDDEN_DATA(const char * const) g_apszIemNativeHstRegNames[];
  • trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h

    r103646 r103728  
    68226822
    68236823
     6824#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     6825
     6826/**
     6827 * Emits a 128-bit vector register store to a VCpu value.
     6828 */
     6829DECL_FORCE_INLINE_THROW(uint32_t)
     6830iemNativeEmitSimdStoreVecRegToVCpuU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
     6831{
     6832#ifdef RT_ARCH_AMD64
     6833    AssertReleaseFailed();
     6834#elif defined(RT_ARCH_ARM64)
     6835    off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
     6836
     6837#else
     6838# error "port me"
     6839#endif
     6840    return off;
     6841}
     6842
     6843
     6844/**
     6845 * Emits a 128-bit vector register load of a VCpu value.
     6846 */
     6847DECL_INLINE_THROW(uint32_t)
     6848iemNativeEmitSimdStoreVecRegToVCpuU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
     6849{
     6850#ifdef RT_ARCH_AMD64
     6851    off = iemNativeEmitSimdStoreVecRegToVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iVecReg, offVCpu);
     6852#elif defined(RT_ARCH_ARM64)
     6853    off = iemNativeEmitSimdStoreVecRegToVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
     6854#else
     6855# error "port me"
     6856#endif
     6857    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     6858    return off;
     6859}
     6860
     6861
     6862/**
     6863 * Emits a 128-bit vector register load of a VCpu value.
     6864 */
     6865DECL_FORCE_INLINE_THROW(uint32_t)
     6866iemNativeEmitSimdLoadVecRegFromVCpuU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
     6867{
     6868#ifdef RT_ARCH_AMD64
     6869    AssertReleaseFailed();
     6870#elif defined(RT_ARCH_ARM64)
     6871    off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
     6872
     6873#else
     6874# error "port me"
     6875#endif
     6876    return off;
     6877}
     6878
     6879
     6880/**
     6881 * Emits a 128-bit vector register load of a VCpu value.
     6882 */
     6883DECL_INLINE_THROW(uint32_t)
     6884iemNativeEmitSimdLoadVecRegFromVCpuU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
     6885{
     6886#ifdef RT_ARCH_AMD64
     6887    off = iemNativeEmitSimdLoadVecRegFromVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iVecReg, offVCpu);
     6888#elif defined(RT_ARCH_ARM64)
     6889    off = iemNativeEmitSimdLoadVecRegFromVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
     6890#else
     6891# error "port me"
     6892#endif
     6893    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     6894    return off;
     6895}
     6896
     6897
     6898/**
     6899 * Emits a 256-bit vector register store to a VCpu value.
     6900 */
     6901DECL_FORCE_INLINE_THROW(uint32_t)
     6902iemNativeEmitSimdStoreVecRegToVCpuU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpuLow, uint32_t offVCpuHigh)
     6903{
     6904#ifdef RT_ARCH_AMD64
     6905    AssertReleaseFailed();
     6906#elif defined(RT_ARCH_ARM64)
     6907    /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
     6908    Assert(!(iVecReg & 0x1));
     6909    off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, iVecReg,     offVCpuLow);
     6910    off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, iVecReg + 1, offVCpuHigh);
     6911#else
     6912# error "port me"
     6913#endif
     6914    return off;
     6915}
     6916
     6917
     6918/**
     6919 * Emits a 256-bit vector register load of a VCpu value.
     6920 */
     6921DECL_FORCE_INLINE_THROW(uint32_t)
     6922iemNativeEmitSimdLoadVecRegFromVCpuU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpuLow, uint32_t offVCpuHigh)
     6923{
     6924#ifdef RT_ARCH_AMD64
     6925    AssertReleaseFailed();
     6926#elif defined(RT_ARCH_ARM64)
     6927    /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
     6928    Assert(!(iVecReg & 0x1));
     6929    off = iemNativeEmitSimdLoadVecRegFromVCpuU128(pReNative, off, iVecReg,     offVCpuLow);
     6930    off = iemNativeEmitSimdLoadVecRegFromVCpuU128(pReNative, off, iVecReg + 1, offVCpuHigh);
     6931#else
     6932# error "port me"
     6933#endif
     6934    return off;
     6935}
     6936
     6937
     6938/**
     6939 * Emits a vecdst = vecsrc load.
     6940 */
     6941DECL_FORCE_INLINE(uint32_t)
     6942iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
     6943{
     6944#ifdef RT_ARCH_AMD64
     6945    /* movdqu vecdst, vecsrc */
     6946    pCodeBuf[off++] = 0xf3;
     6947
     6948    if ((iVecRegDst | iVecRegSrc) >= 8)
     6949        pCodeBuf[off++] = iVecRegDst < 8  ? X86_OP_REX_B
     6950                        : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
     6951                        :                   X86_OP_REX_R;
     6952    pCodeBuf[off++] = 0x0f;
     6953    pCodeBuf[off++] = 0x6f;
     6954    pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
     6955
     6956#elif defined(RT_ARCH_ARM64)
     6957    /* mov dst, src;   alias for: orr dst, src, src */
     6958    pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
     6959
     6960#else
     6961# error "port me"
     6962#endif
     6963    return off;
     6964}
     6965
     6966
     6967/**
     6968 * Emits a gprdst = gprsrc load, 128-bit.
     6969 */
     6970DECL_INLINE_THROW(uint32_t)
     6971iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
     6972{
     6973#ifdef RT_ARCH_AMD64
     6974    off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iVecRegDst, iVecRegSrc);
     6975#elif defined(RT_ARCH_ARM64)
     6976    off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
     6977#else
     6978# error "port me"
     6979#endif
     6980    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     6981    return off;
     6982}
     6983
     6984
     6985/**
     6986 * Emits a gprdst = gprsrc load, 256-bit.
     6987 */
     6988DECL_INLINE_THROW(uint32_t)
     6989iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
     6990{
     6991#ifdef RT_ARCH_AMD64
     6992    AssertReleaseFailed();
     6993#elif defined(RT_ARCH_ARM64)
     6994    /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
     6995    Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
     6996    off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst,     iVecRegSrc    );
     6997    off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
     6998#else
     6999# error "port me"
     7000#endif
     7001    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     7002    return off;
     7003}
     7004
     7005#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
     7006
    68247007/** @} */
    68257008
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette