VirtualBox

Changeset 103750 in vbox


Ignore:
Timestamp:
Mar 10, 2024 8:12:55 PM (11 months ago)
Author:
vboxsync
Message:

VMM/IEM: Implement some ofthe amd64 emitters for SSE/AVX, bugref:10614

Location:
trunk/src/VBox/VMM
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp

    r103739 r103750  
    57455745            case kIemNativeGstSimdRegLdStSz_256:
    57465746                off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
     5747                break;
    57475748            case kIemNativeGstSimdRegLdStSz_Low128:
    57485749                off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
     5750                break;
    57495751            case kIemNativeGstSimdRegLdStSz_High128:
    57505752                off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
     5753                break;
    57515754            default:
    57525755                AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
     
    59775980
    59785981#ifdef RT_ARCH_AMD64
    5979 # error "Port me"
     5982    if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, idxGstSimdReg))
     5983    {
     5984        Assert(   pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
     5985               || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
     5986        off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[idxGstSimdReg].offXmm);
     5987    }
     5988
     5989    if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, idxGstSimdReg))
     5990    {
     5991        Assert(   pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
     5992               || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
     5993        AssertReleaseFailed();
     5994        //off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[idxGstSimdReg].offYmm);
     5995    }
    59805996#elif defined(RT_ARCH_ARM64)
    59815997    /* ASSUMING there are two consecutive host registers to store the potential 256-bit guest register. */
     
    59916007    {
    59926008        Assert(   pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
    5993                || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
     6009               || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
    59946010        off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg + 1, g_aGstSimdShadowInfo[idxGstSimdReg].offYmm);
    59956011    }
     
    63946410{
    63956411#  ifdef RT_ARCH_AMD64
    6396 #   error "Port me!"
     6412    Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128); /** @todo 256-bit variant. */
     6413
     6414    /* movdqa vectmp0, idxSimdReg */
     6415    off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
     6416
     6417    uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
     6418
     6419    /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
     6420    pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     6421    if (idxSimdReg >= 8)
     6422        pbCodeBuf[off++] = X86_OP_REX_R;
     6423    pbCodeBuf[off++] = 0x0f;
     6424    pbCodeBuf[off++] = 0x38;
     6425    pbCodeBuf[off++] = 0x29;
     6426    off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
     6427
     6428    /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
     6429    pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     6430    pbCodeBuf[off++] =   X86_OP_REX_W
     6431                       | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
     6432                       | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
     6433    pbCodeBuf[off++] = 0x0f;
     6434    pbCodeBuf[off++] = 0x3a;
     6435    pbCodeBuf[off++] = 0x16;
     6436    pbCodeBuf[off++] = 0xeb;
     6437    pbCodeBuf[off++] = 0x00;
     6438
     6439    /* test tmp0, 0xffffffff. */
     6440    pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
     6441    pbCodeBuf[off++] = 0xf7;
     6442    pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, IEMNATIVE_REG_FIXED_TMP0 & 7);
     6443    pbCodeBuf[off++] = 0xff;
     6444    pbCodeBuf[off++] = 0xff;
     6445    pbCodeBuf[off++] = 0xff;
     6446    pbCodeBuf[off++] = 0xff;
     6447
     6448    /* je/jz +1 */
     6449    pbCodeBuf[off++] = 0x74;
     6450    pbCodeBuf[off++] = 0x01;
     6451
     6452    /* int3 */
     6453    pbCodeBuf[off++] = 0xcc;
     6454
     6455    /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
     6456    pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     6457    pbCodeBuf[off++] =   X86_OP_REX_W
     6458                       | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
     6459                       | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
     6460    pbCodeBuf[off++] = 0x0f;
     6461    pbCodeBuf[off++] = 0x3a;
     6462    pbCodeBuf[off++] = 0x16;
     6463    pbCodeBuf[off++] = 0xeb;
     6464    pbCodeBuf[off++] = 0x01;
     6465
     6466    /* test tmp0, 0xffffffff. */
     6467    pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
     6468    pbCodeBuf[off++] = 0xf7;
     6469    pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, IEMNATIVE_REG_FIXED_TMP0 & 7);
     6470    pbCodeBuf[off++] = 0xff;
     6471    pbCodeBuf[off++] = 0xff;
     6472    pbCodeBuf[off++] = 0xff;
     6473    pbCodeBuf[off++] = 0xff;
     6474
     6475    /* je/jz +1 */
     6476    pbCodeBuf[off++] = 0x74;
     6477    pbCodeBuf[off++] = 0x01;
     6478
     6479    /* int3 */
     6480    pbCodeBuf[off++] = 0xcc;
     6481
    63976482#  elif defined(RT_ARCH_ARM64)
    63986483    /* mov vectmp0, [gstreg] */
     
    64136498        /* brk #0x1000+enmGstReg */
    64146499        pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
    6415         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    64166500    }
    64176501
     
    64306514        /* brk #0x1000+enmGstReg */
    64316515        pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
    6432         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    64336516    }
    64346517
     
    64366519#   error "Port me!"
    64376520#  endif
     6521
     6522    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    64386523    return off;
    64396524}
     
    81698254    uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
    81708255    uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
    8171 
    8172 #if 1
    8173     off = iemNativeEmitBrk(pReNative, off, 0x4223); /** @todo Test this when AVX gets actually available. */
    8174 #endif
    81758256
    81768257    /** @todo r=aeichner Optimize this more later to have less compares and branches,
  • trunk/src/VBox/VMM/include/IEMN8veRecompiler.h

    r103741 r103750  
    173173
    174174# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     175#  define IEMNATIVE_SIMD_REG_FIXED_TMP0    5 /* xmm5/ymm5 */
    175176#  if defined(IEMNATIVE_WITH_SIMD_REG_ACCESS_ALL_REGISTERS) || !defined(_MSC_VER)
    176177#   define IEMNATIVE_SIMD_REG_FIXED_MASK   0
  • trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h

    r103728 r103750  
    68316831{
    68326832#ifdef RT_ARCH_AMD64
    6833     AssertReleaseFailed();
     6833    /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
     6834    pCodeBuf[off++] = 0x66;
     6835    if (iVecReg >= 8)
     6836        pCodeBuf[off++] = X86_OP_REX_R;
     6837    pCodeBuf[off++] = 0x0f;
     6838    pCodeBuf[off++] = 0x7f;
     6839    off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
    68346840#elif defined(RT_ARCH_ARM64)
    68356841    off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
     
    68496855{
    68506856#ifdef RT_ARCH_AMD64
    6851     off = iemNativeEmitSimdStoreVecRegToVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iVecReg, offVCpu);
     6857    off = iemNativeEmitSimdStoreVecRegToVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
    68526858#elif defined(RT_ARCH_ARM64)
    68536859    off = iemNativeEmitSimdStoreVecRegToVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
     
    68676873{
    68686874#ifdef RT_ARCH_AMD64
    6869     AssertReleaseFailed();
     6875    /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
     6876    pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     6877    if (iVecReg >= 8)
     6878        pCodeBuf[off++] = X86_OP_REX_R;
     6879    pCodeBuf[off++] = 0x0f;
     6880    pCodeBuf[off++] = 0x6f;
     6881    off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
    68706882#elif defined(RT_ARCH_ARM64)
    68716883    off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
     
    68856897{
    68866898#ifdef RT_ARCH_AMD64
    6887     off = iemNativeEmitSimdLoadVecRegFromVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iVecReg, offVCpu);
     6899    off = iemNativeEmitSimdLoadVecRegFromVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
    68886900#elif defined(RT_ARCH_ARM64)
    68896901    off = iemNativeEmitSimdLoadVecRegFromVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
     
    68966908
    68976909
     6910#if 0 /* unused */
    68986911/**
    68996912 * Emits a 256-bit vector register store to a VCpu value.
     
    69146927    return off;
    69156928}
     6929#endif
    69166930
    69176931
     
    69246938#ifdef RT_ARCH_AMD64
    69256939    AssertReleaseFailed();
     6940    RT_NOREF(pReNative, off, iVecReg, offVCpuLow, offVCpuHigh);
    69266941#elif defined(RT_ARCH_ARM64)
    69276942    /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
     
    69726987{
    69736988#ifdef RT_ARCH_AMD64
    6974     off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iVecRegDst, iVecRegSrc);
     6989    off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
    69756990#elif defined(RT_ARCH_ARM64)
    69766991    off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
     
    69846999
    69857000/**
    6986  * Emits a gprdst = gprsrc load, 256-bit.
     7001 * Emits a vecdst = vecsrc load, 256-bit.
    69877002 */
    69887003DECL_INLINE_THROW(uint32_t)
     
    69907005{
    69917006#ifdef RT_ARCH_AMD64
    6992     AssertReleaseFailed();
     7007    /* vmovdqa ymm, ymm */
     7008    uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
     7009    if (iVecRegDst >= 8 && iVecRegSrc >= 8)
     7010    {
     7011        pbCodeBuf[off++] = 0xc4;
     7012        pbCodeBuf[off++] = 0x41;
     7013        pbCodeBuf[off++] = 0x7d;
     7014        pbCodeBuf[off++] = 0x6f;
     7015        pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
     7016    }
     7017    else
     7018    {
     7019        pbCodeBuf[off++] = 0xc5;                                               /* Two byte VEX prefix */
     7020        pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
     7021        pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
     7022        pbCodeBuf[off++] =   iVecRegSrc >= 8
     7023                           ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
     7024                           : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
     7025    }
    69937026#elif defined(RT_ARCH_ARM64)
    69947027    /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette