VirtualBox

Changeset 96064 in vbox


Ignore:
Timestamp:
Aug 5, 2022 3:56:29 PM (3 years ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
152863
Message:

VMM/IEM: Implement [v]pmaddubsw instructions, bugref:9898

Location:
trunk/src/VBox/VMM
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm

    r96054 r96064  
    36583658IEMIMPL_MEDIA_F2 phaddsw, 1
    36593659IEMIMPL_MEDIA_F2 phsubsw, 1
     3660IEMIMPL_MEDIA_F2 pmaddubsw, 1
    36603661
    36613662
     
    42034204IEMIMPL_MEDIA_OPT_F3 vphaddsw
    42044205IEMIMPL_MEDIA_OPT_F3 vphsubsw
     4206IEMIMPL_MEDIA_OPT_F3 vpmaddubsw
    42054207
    42064208
  • trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp

    r96054 r96064  
    1286612866
    1286712867/*
     12868 * PMADDUBSW / VPMADDUBSW
     12869 */
     12870IEM_DECL_IMPL_DEF(void, iemAImpl_pmaddubsw_u64_fallback,(PCX86FXSTATE pFpuState, uint64_t *puDst, uint64_t const *puSrc))
     12871{
     12872    RTUINT64U uSrc1 = { *puDst };
     12873    RTUINT64U uSrc2 = { *puSrc };
     12874    RTUINT64U uDst = { 0 }; /* Shut up MSVC. */
     12875
     12876    uDst.ai16[0] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)uSrc1.au8[0] * uSrc2.ai8[0] + (uint16_t)uSrc1.au8[1] * uSrc2.ai8[1]);
     12877    uDst.ai16[1] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)uSrc1.au8[2] * uSrc2.ai8[2] + (uint16_t)uSrc1.au8[3] * uSrc2.ai8[3]);
     12878    uDst.ai16[2] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)uSrc1.au8[4] * uSrc2.ai8[4] + (uint16_t)uSrc1.au8[5] * uSrc2.ai8[5]);
     12879    uDst.ai16[3] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)uSrc1.au8[6] * uSrc2.ai8[6] + (uint16_t)uSrc1.au8[7] * uSrc2.ai8[7]);
     12880    *puDst = uDst.u;
     12881    RT_NOREF(pFpuState);
     12882}
     12883
     12884
     12885IEM_DECL_IMPL_DEF(void, iemAImpl_pmaddubsw_u128_fallback,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
     12886{
     12887    RTUINT128U uSrc1 = *puDst;
     12888
     12889    puDst->ai16[0] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)uSrc1.au8[ 0] * puSrc->ai8[ 0] + (uint16_t)uSrc1.au8[ 1] * puSrc->ai8[ 1]);
     12890    puDst->ai16[1] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)uSrc1.au8[ 2] * puSrc->ai8[ 2] + (uint16_t)uSrc1.au8[ 3] * puSrc->ai8[ 3]);
     12891    puDst->ai16[2] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)uSrc1.au8[ 4] * puSrc->ai8[ 4] + (uint16_t)uSrc1.au8[ 5] * puSrc->ai8[ 5]);
     12892    puDst->ai16[3] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)uSrc1.au8[ 6] * puSrc->ai8[ 6] + (uint16_t)uSrc1.au8[ 7] * puSrc->ai8[ 7]);
     12893    puDst->ai16[4] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)uSrc1.au8[ 8] * puSrc->ai8[ 8] + (uint16_t)uSrc1.au8[ 9] * puSrc->ai8[ 9]);
     12894    puDst->ai16[5] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)uSrc1.au8[10] * puSrc->ai8[10] + (uint16_t)uSrc1.au8[11] * puSrc->ai8[11]);
     12895    puDst->ai16[6] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)uSrc1.au8[12] * puSrc->ai8[12] + (uint16_t)uSrc1.au8[13] * puSrc->ai8[13]);
     12896    puDst->ai16[7] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)uSrc1.au8[14] * puSrc->ai8[14] + (uint16_t)uSrc1.au8[15] * puSrc->ai8[15]);
     12897    RT_NOREF(pFpuState);
     12898}
     12899
     12900
     12901IEM_DECL_IMPL_DEF(void, iemAImpl_vpmaddubsw_u128_fallback,(PRTUINT128U puDst, PCRTUINT128U puSrc1, PCRTUINT128U puSrc2))
     12902{
     12903    RTUINT128U uDst; /* puDst can be the same as one of the source operands. */
     12904
     12905    uDst.ai16[0] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[ 0] * puSrc2->ai8[ 0] + (uint16_t)puSrc1->au8[ 1] * puSrc2->ai8[ 1]);
     12906    uDst.ai16[1] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[ 2] * puSrc2->ai8[ 2] + (uint16_t)puSrc1->au8[ 3] * puSrc2->ai8[ 3]);
     12907    uDst.ai16[2] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[ 4] * puSrc2->ai8[ 4] + (uint16_t)puSrc1->au8[ 5] * puSrc2->ai8[ 5]);
     12908    uDst.ai16[3] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[ 6] * puSrc2->ai8[ 6] + (uint16_t)puSrc1->au8[ 7] * puSrc2->ai8[ 7]);
     12909    uDst.ai16[4] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[ 8] * puSrc2->ai8[ 8] + (uint16_t)puSrc1->au8[ 9] * puSrc2->ai8[ 9]);
     12910    uDst.ai16[5] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[10] * puSrc2->ai8[10] + (uint16_t)puSrc1->au8[11] * puSrc2->ai8[11]);
     12911    uDst.ai16[6] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[12] * puSrc2->ai8[12] + (uint16_t)puSrc1->au8[13] * puSrc2->ai8[13]);
     12912    uDst.ai16[7] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[14] * puSrc2->ai8[14] + (uint16_t)puSrc1->au8[15] * puSrc2->ai8[15]);
     12913
     12914    puDst->au64[0] = uDst.au64[0];
     12915    puDst->au64[1] = uDst.au64[1];
     12916}
     12917
     12918
     12919IEM_DECL_IMPL_DEF(void, iemAImpl_vpmaddubsw_u256_fallback,(PRTUINT256U puDst, PCRTUINT256U puSrc1, PCRTUINT256U puSrc2))
     12920{
     12921    RTUINT256U uDst; /* puDst can be the same as one of the source operands. */
     12922
     12923    uDst.ai16[ 0] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[ 0] * puSrc2->ai8[ 0] + (uint16_t)puSrc1->au8[ 1] * puSrc2->ai8[ 1]);
     12924    uDst.ai16[ 1] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[ 2] * puSrc2->ai8[ 2] + (uint16_t)puSrc1->au8[ 3] * puSrc2->ai8[ 3]);
     12925    uDst.ai16[ 2] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[ 4] * puSrc2->ai8[ 4] + (uint16_t)puSrc1->au8[ 5] * puSrc2->ai8[ 5]);
     12926    uDst.ai16[ 3] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[ 6] * puSrc2->ai8[ 6] + (uint16_t)puSrc1->au8[ 7] * puSrc2->ai8[ 7]);
     12927    uDst.ai16[ 4] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[ 8] * puSrc2->ai8[ 8] + (uint16_t)puSrc1->au8[ 9] * puSrc2->ai8[ 9]);
     12928    uDst.ai16[ 5] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[10] * puSrc2->ai8[10] + (uint16_t)puSrc1->au8[11] * puSrc2->ai8[11]);
     12929    uDst.ai16[ 6] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[12] * puSrc2->ai8[12] + (uint16_t)puSrc1->au8[13] * puSrc2->ai8[13]);
     12930    uDst.ai16[ 7] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[14] * puSrc2->ai8[14] + (uint16_t)puSrc1->au8[15] * puSrc2->ai8[15]);
     12931    uDst.ai16[ 8] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[16] * puSrc2->ai8[16] + (uint16_t)puSrc1->au8[17] * puSrc2->ai8[17]);
     12932    uDst.ai16[ 9] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[18] * puSrc2->ai8[18] + (uint16_t)puSrc1->au8[19] * puSrc2->ai8[19]);
     12933    uDst.ai16[10] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[20] * puSrc2->ai8[20] + (uint16_t)puSrc1->au8[21] * puSrc2->ai8[21]);
     12934    uDst.ai16[11] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[22] * puSrc2->ai8[22] + (uint16_t)puSrc1->au8[23] * puSrc2->ai8[23]);
     12935    uDst.ai16[12] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[24] * puSrc2->ai8[24] + (uint16_t)puSrc1->au8[25] * puSrc2->ai8[25]);
     12936    uDst.ai16[13] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[26] * puSrc2->ai8[26] + (uint16_t)puSrc1->au8[27] * puSrc2->ai8[27]);
     12937    uDst.ai16[14] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[28] * puSrc2->ai8[28] + (uint16_t)puSrc1->au8[29] * puSrc2->ai8[29]);
     12938    uDst.ai16[15] = SATURATED_SIGNED_DWORD_TO_SIGNED_WORD((uint16_t)puSrc1->au8[30] * puSrc2->ai8[30] + (uint16_t)puSrc1->au8[31] * puSrc2->ai8[31]);
     12939
     12940    puDst->au64[0] = uDst.au64[0];
     12941    puDst->au64[1] = uDst.au64[1];
     12942    puDst->au64[2] = uDst.au64[2];
     12943    puDst->au64[3] = uDst.au64[3];
     12944}
     12945
     12946
     12947/*
    1286812948 * CRC32 (SEE 4.2).
    1286912949 */
  • trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsThree0f38.cpp.h

    r96054 r96064  
    338338
    339339/** Opcode      0x0f 0x38 0x04. */
    340 FNIEMOP_STUB(iemOp_pmaddubsw_Pq_Qq);
     340FNIEMOP_DEF(iemOp_pmaddubsw_Pq_Qq)
     341{
     342    IEMOP_MNEMONIC2(RM, PMADDUBSW, pmaddubsw, Pq, Qq, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZES);
     343    return FNIEMOP_CALL_2(iemOpCommonMmx_FullFull_To_Full_Ex,
     344                          IEM_SELECT_HOST_OR_FALLBACK(fSsse3, iemAImpl_pmaddubsw_u64, &iemAImpl_pmaddubsw_u64_fallback),
     345                          IEM_GET_GUEST_CPU_FEATURES(pVCpu)->fSsse3);
     346}
     347
     348
    341349/** Opcode 0x66 0x0f 0x38 0x04. */
    342 FNIEMOP_STUB(iemOp_pmaddubsw_Vx_Wx);
     350FNIEMOP_DEF(iemOp_pmaddubsw_Vx_Wx)
     351{
     352    IEMOP_MNEMONIC2(RM, PMADDUBSW, pmaddubsw, Vx, Wx, DISOPTYPE_HARMLESS, IEMOPHINT_IGNORES_OP_SIZES);
     353    return FNIEMOP_CALL_1(iemOpCommonSsse3_FullFull_To_Full,
     354                          IEM_SELECT_HOST_OR_FALLBACK(fSsse3, iemAImpl_pmaddubsw_u128, iemAImpl_pmaddubsw_u128_fallback));
     355
     356}
    343357
    344358
  • trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsVexMap2.cpp.h

    r96054 r96064  
    7373
    7474/*  Opcode VEX.0F38 0x04 - invalid. */
     75
     76
    7577/** Opcode VEX.66.0F38 0x04. */
    76 FNIEMOP_STUB(iemOp_vpmaddubsw_Vx_Hx_Wx);
     78FNIEMOP_DEF(iemOp_vpmaddubsw_Vx_Hx_Wx)
     79{
     80    IEMOP_MNEMONIC3(VEX_RVM, VPMADDUBSW, vpmaddubsw, Vx, Hx, Wx, DISOPTYPE_HARMLESS, 0);
     81    IEMOPMEDIAOPTF3_INIT_VARS(vpmaddubsw);
     82    return FNIEMOP_CALL_1(iemOpCommonAvxAvx2_Vx_Hx_Wx_Opt, IEM_SELECT_HOST_OR_FALLBACK(fAvx2, &s_Host, &s_Fallback));
     83}
     84
     85
    7786/* Opcode VEX.0F38 0x05 - invalid. */
    7887
  • trunk/src/VBox/VMM/include/IEMInternal.h

    r96054 r96064  
    17931793FNIEMAIMPLMEDIAF2U64     iemAImpl_phaddsw_u64, iemAImpl_phaddsw_u64_fallback;
    17941794FNIEMAIMPLMEDIAF2U64     iemAImpl_phsubsw_u64, iemAImpl_phsubsw_u64_fallback;
     1795FNIEMAIMPLMEDIAF2U64     iemAImpl_pmaddubsw_u64, iemAImpl_pmaddubsw_u64_fallback;
    17951796FNIEMAIMPLMEDIAOPTF2U64  iemAImpl_psllw_u64, iemAImpl_psrlw_u64, iemAImpl_psraw_u64;
    17961797FNIEMAIMPLMEDIAOPTF2U64  iemAImpl_pslld_u64, iemAImpl_psrld_u64, iemAImpl_psrad_u64;
     
    18431844FNIEMAIMPLMEDIAF2U128    iemAImpl_phaddsw_u128, iemAImpl_phaddsw_u128_fallback;
    18441845FNIEMAIMPLMEDIAF2U128    iemAImpl_phsubsw_u128, iemAImpl_phsubsw_u128_fallback;
     1846FNIEMAIMPLMEDIAF2U128    iemAImpl_pmaddubsw_u128, iemAImpl_pmaddubsw_u128_fallback;
    18451847FNIEMAIMPLMEDIAOPTF2U128 iemAImpl_packsswb_u128, iemAImpl_packuswb_u128;
    18461848FNIEMAIMPLMEDIAOPTF2U128 iemAImpl_packssdw_u128, iemAImpl_packusdw_u128;
     
    19031905FNIEMAIMPLMEDIAOPTF3U128 iemAImpl_vphaddsw_u128,   iemAImpl_vphaddsw_u128_fallback;
    19041906FNIEMAIMPLMEDIAOPTF3U128 iemAImpl_vphsubsw_u128,   iemAImpl_vphsubsw_u128_fallback;
     1907FNIEMAIMPLMEDIAOPTF3U128 iemAImpl_vpmaddubsw_u128, iemAImpl_vpmaddubsw_u128_fallback;
    19051908
    19061909FNIEMAIMPLMEDIAOPTF2U128 iemAImpl_vpabsb_u128,     iemAImpl_vpabsb_u128_fallback;
     
    19601963FNIEMAIMPLMEDIAOPTF3U256 iemAImpl_vphaddsw_u256,   iemAImpl_vphaddsw_u256_fallback;
    19611964FNIEMAIMPLMEDIAOPTF3U256 iemAImpl_vphsubsw_u256,   iemAImpl_vphsubsw_u256_fallback;
     1965FNIEMAIMPLMEDIAOPTF3U256 iemAImpl_vpmaddubsw_u256, iemAImpl_vpmaddubsw_u256_fallback;
    19621966
    19631967FNIEMAIMPLMEDIAOPTF2U256 iemAImpl_vpabsb_u256,     iemAImpl_vpabsb_u256_fallback;
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette