VirtualBox

Changeset 96094 in vbox


Ignore:
Timestamp:
Aug 7, 2022 6:46:04 PM (3 years ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
152893
Message:

VMM/IEM: Implement [v]psadbw instructions, bugref:9898

Location:
trunk/src/VBox/VMM
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm

    r96087 r96094  
    37163716IEMIMPL_MEDIA_OPT_F2 pavgb,    1
    37173717IEMIMPL_MEDIA_OPT_F2 pavgw,    1
     3718IEMIMPL_MEDIA_OPT_F2 psadbw,   1
    37183719
    37193720
     
    42074208IEMIMPL_MEDIA_OPT_F3 vpmaddubsw
    42084209IEMIMPL_MEDIA_OPT_F3 vpmulhrsw
     4210IEMIMPL_MEDIA_OPT_F3 vpsadbw
    42094211
    42104212
  • trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp

    r96087 r96094  
    1302913029
    1303013030/*
     13031 * PSADBW / VPSADBW
     13032 */
     13033#ifdef IEM_WITHOUT_ASSEMBLY
     13034
     13035IEM_DECL_IMPL_DEF(void, iemAImpl_psadbw_u64,(uint64_t *puDst, uint64_t const *puSrc))
     13036{
     13037    RTUINT64U uSrc1 = { *puDst };
     13038    RTUINT64U uSrc2 = { *puSrc };
     13039    RTUINT64U uDst;
     13040    uint16_t uSum = RT_ABS((int16_t)uSrc1.au8[0] - uSrc2.au8[0]);
     13041    uSum += RT_ABS((int16_t)uSrc1.au8[1] - uSrc2.au8[1]);
     13042    uSum += RT_ABS((int16_t)uSrc1.au8[2] - uSrc2.au8[2]);
     13043    uSum += RT_ABS((int16_t)uSrc1.au8[3] - uSrc2.au8[3]);
     13044    uSum += RT_ABS((int16_t)uSrc1.au8[4] - uSrc2.au8[4]);
     13045    uSum += RT_ABS((int16_t)uSrc1.au8[5] - uSrc2.au8[5]);
     13046    uSum += RT_ABS((int16_t)uSrc1.au8[6] - uSrc2.au8[6]);
     13047    uSum += RT_ABS((int16_t)uSrc1.au8[7] - uSrc2.au8[7]);
     13048
     13049    uDst.au64[0] = 0;
     13050    uDst.au16[0] = uSum;
     13051    *puDst = uDst.u;
     13052}
     13053
     13054
     13055IEM_DECL_IMPL_DEF(void, iemAImpl_psadbw_u128,(PRTUINT128U puDst, PCRTUINT128U puSrc))
     13056{
     13057    RTUINT128U uSrc1 = *puDst;
     13058
     13059    puDst->au64[0] = 0;
     13060    puDst->au64[1] = 0;
     13061
     13062    uint16_t uSum = RT_ABS((int16_t)uSrc1.ai8[0] - puSrc->ai8[0]);
     13063    uSum += RT_ABS((int16_t)uSrc1.au8[1] - puSrc->au8[1]);
     13064    uSum += RT_ABS((int16_t)uSrc1.au8[2] - puSrc->au8[2]);
     13065    uSum += RT_ABS((int16_t)uSrc1.au8[3] - puSrc->au8[3]);
     13066    uSum += RT_ABS((int16_t)uSrc1.au8[4] - puSrc->au8[4]);
     13067    uSum += RT_ABS((int16_t)uSrc1.au8[5] - puSrc->au8[5]);
     13068    uSum += RT_ABS((int16_t)uSrc1.au8[6] - puSrc->au8[6]);
     13069    uSum += RT_ABS((int16_t)uSrc1.au8[7] - puSrc->au8[7]);
     13070    puDst->au16[0] = uSum;
     13071
     13072    uSum  = RT_ABS((int16_t)uSrc1.au8[ 8] - puSrc->au8[ 8]);
     13073    uSum += RT_ABS((int16_t)uSrc1.au8[ 9] - puSrc->au8[ 9]);
     13074    uSum += RT_ABS((int16_t)uSrc1.au8[10] - puSrc->au8[10]);
     13075    uSum += RT_ABS((int16_t)uSrc1.au8[11] - puSrc->au8[11]);
     13076    uSum += RT_ABS((int16_t)uSrc1.au8[12] - puSrc->au8[12]);
     13077    uSum += RT_ABS((int16_t)uSrc1.au8[13] - puSrc->au8[13]);
     13078    uSum += RT_ABS((int16_t)uSrc1.au8[14] - puSrc->au8[14]);
     13079    uSum += RT_ABS((int16_t)uSrc1.au8[15] - puSrc->au8[15]);
     13080    puDst->au16[4] = uSum;
     13081}
     13082
     13083#endif
     13084
     13085IEM_DECL_IMPL_DEF(void, iemAImpl_vpsadbw_u128_fallback,(PRTUINT128U puDst, PCRTUINT128U puSrc1, PCRTUINT128U puSrc2))
     13086{
     13087    RTUINT128U uSrc1 = *puSrc1;
     13088    RTUINT128U uSrc2 = *puSrc2;
     13089
     13090    puDst->au64[0] = 0;
     13091    puDst->au64[1] = 0;
     13092
     13093    uint16_t uSum = RT_ABS((int16_t)uSrc1.ai8[0] - uSrc2.ai8[0]);
     13094    uSum += RT_ABS((int16_t)uSrc1.au8[1] - uSrc2.au8[1]);
     13095    uSum += RT_ABS((int16_t)uSrc1.au8[2] - uSrc2.au8[2]);
     13096    uSum += RT_ABS((int16_t)uSrc1.au8[3] - uSrc2.au8[3]);
     13097    uSum += RT_ABS((int16_t)uSrc1.au8[4] - uSrc2.au8[4]);
     13098    uSum += RT_ABS((int16_t)uSrc1.au8[5] - uSrc2.au8[5]);
     13099    uSum += RT_ABS((int16_t)uSrc1.au8[6] - uSrc2.au8[6]);
     13100    uSum += RT_ABS((int16_t)uSrc1.au8[7] - uSrc2.au8[7]);
     13101    puDst->au16[0] = uSum;
     13102
     13103    uSum  = RT_ABS((int16_t)uSrc1.au8[ 8] - uSrc2.au8[ 8]);
     13104    uSum += RT_ABS((int16_t)uSrc1.au8[ 9] - uSrc2.au8[ 9]);
     13105    uSum += RT_ABS((int16_t)uSrc1.au8[10] - uSrc2.au8[10]);
     13106    uSum += RT_ABS((int16_t)uSrc1.au8[11] - uSrc2.au8[11]);
     13107    uSum += RT_ABS((int16_t)uSrc1.au8[12] - uSrc2.au8[12]);
     13108    uSum += RT_ABS((int16_t)uSrc1.au8[13] - uSrc2.au8[13]);
     13109    uSum += RT_ABS((int16_t)uSrc1.au8[14] - uSrc2.au8[14]);
     13110    uSum += RT_ABS((int16_t)uSrc1.au8[15] - uSrc2.au8[15]);
     13111    puDst->au16[4] = uSum;
     13112}
     13113
     13114IEM_DECL_IMPL_DEF(void, iemAImpl_vpsadbw_u256_fallback,(PRTUINT256U puDst, PCRTUINT256U puSrc1, PCRTUINT256U puSrc2))
     13115{
     13116    RTUINT256U uSrc1 = *puSrc1;
     13117    RTUINT256U uSrc2 = *puSrc2;
     13118
     13119    puDst->au64[0] = 0;
     13120    puDst->au64[1] = 0;
     13121    puDst->au64[2] = 0;
     13122    puDst->au64[3] = 0;
     13123
     13124    uint16_t uSum = RT_ABS((int16_t)uSrc1.au8[0] - uSrc2.au8[0]);
     13125    uSum += RT_ABS((int16_t)uSrc1.au8[1] - uSrc2.au8[1]);
     13126    uSum += RT_ABS((int16_t)uSrc1.au8[2] - uSrc2.au8[2]);
     13127    uSum += RT_ABS((int16_t)uSrc1.au8[3] - uSrc2.au8[3]);
     13128    uSum += RT_ABS((int16_t)uSrc1.au8[4] - uSrc2.au8[4]);
     13129    uSum += RT_ABS((int16_t)uSrc1.au8[5] - uSrc2.au8[5]);
     13130    uSum += RT_ABS((int16_t)uSrc1.au8[6] - uSrc2.au8[6]);
     13131    uSum += RT_ABS((int16_t)uSrc1.au8[7] - uSrc2.au8[7]);
     13132    puDst->au16[0] = uSum;
     13133
     13134    uSum  = RT_ABS((int16_t)uSrc1.au8[ 8] - uSrc2.au8[ 8]);
     13135    uSum += RT_ABS((int16_t)uSrc1.au8[ 9] - uSrc2.au8[ 9]);
     13136    uSum += RT_ABS((int16_t)uSrc1.au8[10] - uSrc2.au8[10]);
     13137    uSum += RT_ABS((int16_t)uSrc1.au8[11] - uSrc2.au8[11]);
     13138    uSum += RT_ABS((int16_t)uSrc1.au8[12] - uSrc2.au8[12]);
     13139    uSum += RT_ABS((int16_t)uSrc1.au8[13] - uSrc2.au8[13]);
     13140    uSum += RT_ABS((int16_t)uSrc1.au8[14] - uSrc2.au8[14]);
     13141    uSum += RT_ABS((int16_t)uSrc1.au8[15] - uSrc2.au8[15]);
     13142    puDst->au16[4] = uSum;
     13143
     13144    uSum  = RT_ABS((int16_t)uSrc1.au8[16] - uSrc2.au8[16]);
     13145    uSum += RT_ABS((int16_t)uSrc1.au8[17] - uSrc2.au8[17]);
     13146    uSum += RT_ABS((int16_t)uSrc1.au8[18] - uSrc2.au8[18]);
     13147    uSum += RT_ABS((int16_t)uSrc1.au8[19] - uSrc2.au8[19]);
     13148    uSum += RT_ABS((int16_t)uSrc1.au8[20] - uSrc2.au8[20]);
     13149    uSum += RT_ABS((int16_t)uSrc1.au8[21] - uSrc2.au8[21]);
     13150    uSum += RT_ABS((int16_t)uSrc1.au8[22] - uSrc2.au8[22]);
     13151    uSum += RT_ABS((int16_t)uSrc1.au8[23] - uSrc2.au8[23]);
     13152    puDst->au16[8] = uSum;
     13153
     13154    uSum  = RT_ABS((int16_t)uSrc1.au8[24] - uSrc2.au8[24]);
     13155    uSum += RT_ABS((int16_t)uSrc1.au8[25] - uSrc2.au8[25]);
     13156    uSum += RT_ABS((int16_t)uSrc1.au8[26] - uSrc2.au8[26]);
     13157    uSum += RT_ABS((int16_t)uSrc1.au8[27] - uSrc2.au8[27]);
     13158    uSum += RT_ABS((int16_t)uSrc1.au8[28] - uSrc2.au8[28]);
     13159    uSum += RT_ABS((int16_t)uSrc1.au8[29] - uSrc2.au8[29]);
     13160    uSum += RT_ABS((int16_t)uSrc1.au8[30] - uSrc2.au8[30]);
     13161    uSum += RT_ABS((int16_t)uSrc1.au8[31] - uSrc2.au8[31]);
     13162    puDst->au16[12] = uSum;
     13163}
     13164
     13165
     13166/*
    1303113167 * CRC32 (SEE 4.2).
    1303213168 */
  • trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsTwoByte0f.cpp.h

    r96028 r96094  
    1058810588
    1058910589/** Opcode      0x0f 0xf6 - psadbw Pq, Qq */
    10590 FNIEMOP_STUB(iemOp_psadbw_Pq_Qq);
     10590FNIEMOP_DEF(iemOp_psadbw_Pq_Qq)
     10591{
     10592    IEMOP_MNEMONIC2(RM, PSADBW, psadbw, Pq, Qq, DISOPTYPE_HARMLESS | DISOPTYPE_MMX, IEMOPHINT_IGNORES_OP_SIZES);
     10593    return FNIEMOP_CALL_1(iemOpCommonMmxSseOpt_FullFull_To_Full, iemAImpl_psadbw_u64);
     10594}
     10595
     10596
    1059110597/** Opcode 0x66 0x0f 0xf6 - psadbw Vx, Wx */
    10592 FNIEMOP_STUB(iemOp_psadbw_Vx_Wx);
     10598FNIEMOP_DEF(iemOp_psadbw_Vx_Wx)
     10599{
     10600    IEMOP_MNEMONIC2(RM, PSADBW, psaddw, Vx, Wx, DISOPTYPE_HARMLESS | DISOPTYPE_SSE, IEMOPHINT_IGNORES_OP_SIZES);
     10601    return FNIEMOP_CALL_1(iemOpCommonSse2Opt_FullFull_To_Full, iemAImpl_psadbw_u128);
     10602}
     10603
     10604
    1059310605/*  Opcode 0xf2 0x0f 0xf6 - invalid */
    1059410606
  • trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsVexMap1.cpp.h

    r96032 r96094  
    44854485
    44864486/*  Opcode VEX.0F 0xf6 - invalid */
     4487
     4488
    44874489/** Opcode VEX.66.0F 0xf6 - vpsadbw Vx, Hx, Wx */
    4488 FNIEMOP_STUB(iemOp_vpsadbw_Vx_Hx_Wx);
     4490FNIEMOP_DEF(iemOp_vpsadbw_Vx_Hx_Wx)
     4491{
     4492    IEMOP_MNEMONIC3(VEX_RVM, VPSADBW, vpsadbw, Vx, Hx, Wx, DISOPTYPE_HARMLESS, 0);
     4493    IEMOPMEDIAOPTF3_INIT_VARS(vpsadbw);
     4494    return FNIEMOP_CALL_1(iemOpCommonAvxAvx2_Vx_Hx_Wx_Opt, IEM_SELECT_HOST_OR_FALLBACK(fAvx2, &s_Host, &s_Fallback));
     4495}
     4496
     4497
    44894498/*  Opcode VEX.F2.0F 0xf6 - invalid */
    44904499
  • trunk/src/VBox/VMM/include/IEMInternal.h

    r96087 r96094  
    18021802FNIEMAIMPLMEDIAOPTF2U64  iemAImpl_pmulhuw_u64;
    18031803FNIEMAIMPLMEDIAOPTF2U64  iemAImpl_pavgb_u64, iemAImpl_pavgw_u64;
     1804FNIEMAIMPLMEDIAOPTF2U64  iemAImpl_psadbw_u64;
    18041805
    18051806FNIEMAIMPLMEDIAF2U128    iemAImpl_pshufb_u128, iemAImpl_pshufb_u128_fallback;
     
    18541855FNIEMAIMPLMEDIAOPTF2U128 iemAImpl_pmulhuw_u128;
    18551856FNIEMAIMPLMEDIAOPTF2U128 iemAImpl_pavgb_u128, iemAImpl_pavgw_u128;
     1857FNIEMAIMPLMEDIAOPTF2U128 iemAImpl_psadbw_u128;
    18561858
    18571859FNIEMAIMPLMEDIAF3U128    iemAImpl_vpshufb_u128,    iemAImpl_vpshufb_u128_fallback;
     
    19091911FNIEMAIMPLMEDIAOPTF3U128 iemAImpl_vpmaddubsw_u128, iemAImpl_vpmaddubsw_u128_fallback;
    19101912FNIEMAIMPLMEDIAOPTF3U128 iemAImpl_vpmulhrsw_u128,  iemAImpl_vpmulhrsw_u128_fallback;
     1913FNIEMAIMPLMEDIAOPTF3U128 iemAImpl_vpsadbw_u128,    iemAImpl_vpsadbw_u128_fallback;
    19111914
    19121915FNIEMAIMPLMEDIAOPTF2U128 iemAImpl_vpabsb_u128,     iemAImpl_vpabsb_u128_fallback;
     
    19681971FNIEMAIMPLMEDIAOPTF3U256 iemAImpl_vpmaddubsw_u256, iemAImpl_vpmaddubsw_u256_fallback;
    19691972FNIEMAIMPLMEDIAOPTF3U256 iemAImpl_vpmulhrsw_u256,  iemAImpl_vpmulhrsw_u256_fallback;
     1973FNIEMAIMPLMEDIAOPTF3U256 iemAImpl_vpsadbw_u256,    iemAImpl_vpsadbw_u256_fallback;
    19701974
    19711975FNIEMAIMPLMEDIAOPTF2U256 iemAImpl_vpabsb_u256,     iemAImpl_vpabsb_u256_fallback;
  • trunk/src/VBox/VMM/testcase/tstIEMCheckMc.cpp

    r96028 r96094  
    446446#define iemAImpl_pavgb_u64              NULL
    447447#define iemAImpl_pavgw_u64              NULL
     448#define iemAImpl_psadbw_u64             NULL
    448449
    449450#define iemAImpl_pmaxub_u128            NULL
     
    453454#define iemAImpl_pavgb_u128             NULL
    454455#define iemAImpl_pavgw_u128             NULL
     456#define iemAImpl_psadbw_u128            NULL
    455457
    456458/** @}  */
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette