VirtualBox

Changeset 103942 in vbox for trunk/src/VBox/VMM/include


Ignore:
Timestamp:
Mar 20, 2024 10:22:38 AM (11 months ago)
Author:
vboxsync
Message:

VMM/IEM: Implement memory stores from SIMD registers and implement native emitters for IEM_MC_STORE_MEM_U128_ALIGN_SSE()/IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(), bugref:10614

Location:
trunk/src/VBox/VMM/include
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/include/IEMN8veRecompiler.h

    r103934 r103942  
    16551655IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value));
    16561656IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value));
     1657#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     1658IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src));
     1659IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src));
     1660#endif
    16571661IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value));
    16581662IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value));
     
    16831687IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value));
    16841688IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value));
     1689#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     1690IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src));
     1691IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src));
     1692#endif
    16851693IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value));
    16861694IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value));
  • trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h

    r103916 r103942  
    25022502# endif
    25032503
     2504
     2505/**
     2506 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
     2507 */
     2508DECL_FORCE_INLINE_THROW(uint32_t)
     2509iemNativeEmitVecRegByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg,
     2510                             uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
     2511{
     2512    /*
     2513     * There are a couple of ldr variants that takes an immediate offset, so
     2514     * try use those if we can, otherwise we have to use the temporary register
     2515     * help with the addressing.
     2516     */
     2517    if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
     2518    {
     2519        /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
     2520        uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
     2521        pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
     2522    }
     2523    else
     2524    {
     2525        /* The offset is too large, so we must load it into a register and use
     2526           ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
     2527        /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
     2528        uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
     2529
     2530        uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
     2531        pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, idxTmpReg);
     2532
     2533        iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
     2534    }
     2535    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     2536    return off;
     2537}
    25042538#endif /* RT_ARCH_ARM64 */
    25052539
     
    29372971
    29382972#elif defined(RT_ARCH_ARM64)
    2939     off = iemNativeEmitGprByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
     2973    off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
    29402974
    29412975#else
     
    29953029#elif defined(RT_ARCH_ARM64)
    29963030    Assert(!(iVecRegDst & 0x1));
    2997     off = iemNativeEmitGprByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
    2998                                     kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
    2999     off = iemNativeEmitGprByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
    3000                                     kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
     3031    off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
     3032                                       kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
     3033    off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
     3034                                       kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
    30013035
    30023036#else
     
    33193353    return off;
    33203354}
     3355
     3356
     3357#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     3358/**
     3359 * Emits a 128-bit vector register store via a GPR base address with a displacement.
     3360 *
     3361 * @note ARM64: Misaligned @a offDisp values and values not in the
     3362 *       -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
     3363 *       @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
     3364 *       does not heed this.
     3365 */
     3366DECL_FORCE_INLINE_THROW(uint32_t)
     3367iemNativeEmitStoreVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
     3368                                   int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
     3369{
     3370#ifdef RT_ARCH_AMD64
     3371    /* movdqu mem128, reg128 */
     3372    pCodeBuf[off++] = 0xf3;
     3373    if (iVecRegDst >= 8 || iGprBase >= 8)
     3374        pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
     3375    pCodeBuf[off++] = 0x0f;
     3376    pCodeBuf[off++] = 0x7f;
     3377    off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
     3378    RT_NOREF(iGprTmp);
     3379
     3380#elif defined(RT_ARCH_ARM64)
     3381    off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
     3382                                         kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
     3383
     3384#else
     3385# error "port me"
     3386#endif
     3387    return off;
     3388}
     3389
     3390
     3391/**
     3392 * Emits a 128-bit vector register store via a GPR base address with a displacement.
     3393 */
     3394DECL_INLINE_THROW(uint32_t)
     3395iemNativeEmitStoreVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
     3396{
     3397#ifdef RT_ARCH_AMD64
     3398    off = iemNativeEmitStoreVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
     3399    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     3400
     3401#elif defined(RT_ARCH_ARM64)
     3402    off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
     3403
     3404#else
     3405# error "port me"
     3406#endif
     3407    return off;
     3408}
     3409
     3410
     3411/**
     3412 * Emits a 256-bit vector register store via a GPR base address with a displacement.
     3413 *
     3414 * @note ARM64: Misaligned @a offDisp values and values not in the
     3415 *       -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
     3416 *       @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
     3417 *       does not heed this.
     3418 */
     3419DECL_FORCE_INLINE_THROW(uint32_t)
     3420iemNativeEmitStoreVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
     3421                                    int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
     3422{
     3423#ifdef RT_ARCH_AMD64
     3424    /* vmovdqu reg256, mem256 */
     3425    AssertFailed();
     3426    pCodeBuf[off++] = X86_OP_VEX3;
     3427    pCodeBuf[off++] =   (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
     3428                      | X86_OP_VEX3_BYTE1_X
     3429                      | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
     3430                      | UINT8_C(0x01);
     3431    pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
     3432    pCodeBuf[off++] = 0x7f;
     3433    off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
     3434    RT_NOREF(iGprTmp);
     3435
     3436#elif defined(RT_ARCH_ARM64)
     3437    Assert(!(iVecRegDst & 0x1));
     3438    off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
     3439                                         kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
     3440    off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
     3441                                         kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
     3442#else
     3443# error "port me"
     3444#endif
     3445    return off;
     3446}
     3447
     3448
     3449/**
     3450 * Emits a 256-bit GPR load via a GPR base address with a displacement.
     3451 */
     3452DECL_INLINE_THROW(uint32_t)
     3453iemNativeEmitStoreVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
     3454{
     3455#ifdef RT_ARCH_AMD64
     3456    off = iemNativeEmitStoreVecRegByGprU256Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
     3457    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     3458
     3459#elif defined(RT_ARCH_ARM64)
     3460    Assert(!(iVecRegDst & 0x1));
     3461    off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
     3462                                       kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
     3463    off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
     3464                                       kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
     3465
     3466#else
     3467# error "port me"
     3468#endif
     3469    return off;
     3470}
     3471#endif
    33213472
    33223473
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette