VirtualBox

Changeset 103819 in vbox for trunk/src/VBox/VMM/include


Ignore:
Timestamp:
Mar 13, 2024 9:31:09 AM (13 months ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
162188
Message:

VMM/IEM: amd64 bugfixes for r162184 and r162185, bugref:10614

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h

    r103816 r103819  
    74077407     *        vbroadcast needs a memory operand or another xmm register to work... */
    74087408
    7409     /* pinsrd vecsrc, gpr, #0 (ASSUMES SSE4.1). */
     7409    /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
    74107410    pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    74117411    if (iVecRegDst >= 8 || iGprSrc >= 8)
     
    74247424        pCodeBuf[off++] = X86_OP_VEX3;
    74257425        pCodeBuf[off++] =   X86_OP_VEX3_BYTE1_X
     7426                          | 0x02                 /* opcode map. */
    74267427                          | (  iVecRegDst >= 8
    74277428                             ? 0
     
    74337434    else
    74347435    {
    7435         /* pinsrd vecsrc, gpr, #1 (ASSUMES SSE4.1). */
    7436         pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    7437         if (iVecRegDst >= 8 || iGprSrc >= 8)
    7438             pCodeBuf[off++] =   (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
    7439                               | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
    7440         pCodeBuf[off++] = 0x0f;
    7441         pCodeBuf[off++] = 0x3a;
    7442         pCodeBuf[off++] = 0x22;
    7443         pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
    7444         pCodeBuf[off++] = 0x00;
    7445 
    7446         /* pinsrd vecsrc, gpr, #2 (ASSUMES SSE4.1). */
    7447         pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    7448         if (iVecRegDst >= 8 || iGprSrc >= 8)
    7449             pCodeBuf[off++] =   (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
    7450                               | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
    7451         pCodeBuf[off++] = 0x0f;
    7452         pCodeBuf[off++] = 0x3a;
    7453         pCodeBuf[off++] = 0x22;
    7454         pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
    7455         pCodeBuf[off++] = 0x00;
    7456 
    7457         /* pinsrd vecsrc, gpr, #3 (ASSUMES SSE4.1). */
    7458         pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    7459         if (iVecRegDst >= 8 || iGprSrc >= 8)
    7460             pCodeBuf[off++] =   (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
    7461                               | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
    7462         pCodeBuf[off++] = 0x0f;
    7463         pCodeBuf[off++] = 0x3a;
    7464         pCodeBuf[off++] = 0x22;
    7465         pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
    7466         pCodeBuf[off++] = 0x00;
    7467     }
    7468 #elif defined(RT_ARCH_ARM64)
    7469     /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
    7470     Assert(!(iVecRegDst & 0x1) || !f256Bit);
    7471 
    7472     /* dup vecsrc, gpr */
    7473     pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
    7474     if (f256Bit)
    7475         pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
    7476 #else
    7477 # error "port me"
    7478 #endif
    7479     return off;
    7480 }
    7481 
    7482 
    7483 /**
    7484  * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
    7485  */
    7486 DECL_INLINE_THROW(uint32_t)
    7487 iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
    7488 {
    7489 #ifdef RT_ARCH_AMD64
    7490     off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 12 : 28), off, iVecRegDst, iGprSrc, f256Bit);
    7491 #elif defined(RT_ARCH_ARM64)
    7492     off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
    7493 #else
    7494 # error "port me"
    7495 #endif
    7496     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    7497     return off;
    7498 }
    7499 
    7500 
    7501 /**
    7502  * Emits a vecdst = gprsrc broadcast, 64-bit.
    7503  */
    7504 DECL_FORCE_INLINE(uint32_t)
    7505 iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
    7506 {
    7507 #ifdef RT_ARCH_AMD64
    7508     /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
    7509      *        vbroadcast needs a memory operand or another xmm register to work... */
    7510 
    7511     /* pinsrq vecsrc, gpr, #0 (ASSUMES SSE4.1). */
    7512     pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    7513     if (iVecRegDst >= 8 || iGprSrc >= 8)
    7514         pCodeBuf[off++] =   (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
    7515                           | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
    7516     pCodeBuf[off++] = 0x0f;
    7517     pCodeBuf[off++] = 0x3a;
    7518     pCodeBuf[off++] = 0x22;
    7519     pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
    7520     pCodeBuf[off++] = 0x00;
    7521 
    7522     if (f256Bit)
    7523     {
    7524         /* When broadcasting the entire ymm register we can use vbroadcastsd now. */
    7525         /* vbroadcastsd ymm, xmm (ASSUMES AVX2). */
    7526         pCodeBuf[off++] = X86_OP_VEX3;
    7527         pCodeBuf[off++] =   X86_OP_VEX3_BYTE1_X
    7528                           | (  iVecRegDst >= 8
    7529                              ? 0
    7530                              : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
    7531         pCodeBuf[off++] = 0x7d;
    7532         pCodeBuf[off++] = 0x19;
    7533         pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
    7534     }
    7535     else
    7536     {
    7537         /* pinsrq vecsrc, gpr, #1 (ASSUMES SSE4.1). */
     7436        /* pinsrd vecdst, gpr, #1 (ASSUMES SSE4.1). */
    75387437        pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    75397438        if (iVecRegDst >= 8 || iGprSrc >= 8)
     
    75457444        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
    75467445        pCodeBuf[off++] = 0x01;
     7446
     7447        /* pinsrd vecdst, gpr, #2 (ASSUMES SSE4.1). */
     7448        pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     7449        if (iVecRegDst >= 8 || iGprSrc >= 8)
     7450            pCodeBuf[off++] =   (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
     7451                              | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
     7452        pCodeBuf[off++] = 0x0f;
     7453        pCodeBuf[off++] = 0x3a;
     7454        pCodeBuf[off++] = 0x22;
     7455        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
     7456        pCodeBuf[off++] = 0x02;
     7457
     7458        /* pinsrd vecdst, gpr, #3 (ASSUMES SSE4.1). */
     7459        pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     7460        if (iVecRegDst >= 8 || iGprSrc >= 8)
     7461            pCodeBuf[off++] =   (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
     7462                              | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
     7463        pCodeBuf[off++] = 0x0f;
     7464        pCodeBuf[off++] = 0x3a;
     7465        pCodeBuf[off++] = 0x22;
     7466        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
     7467        pCodeBuf[off++] = 0x03;
     7468    }
     7469#elif defined(RT_ARCH_ARM64)
     7470    /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
     7471    Assert(!(iVecRegDst & 0x1) || !f256Bit);
     7472
     7473    /* dup vecsrc, gpr */
     7474    pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
     7475    if (f256Bit)
     7476        pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
     7477#else
     7478# error "port me"
     7479#endif
     7480    return off;
     7481}
     7482
     7483
     7484/**
     7485 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
     7486 */
     7487DECL_INLINE_THROW(uint32_t)
     7488iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
     7489{
     7490#ifdef RT_ARCH_AMD64
     7491    off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 12 : 28), off, iVecRegDst, iGprSrc, f256Bit);
     7492#elif defined(RT_ARCH_ARM64)
     7493    off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
     7494#else
     7495# error "port me"
     7496#endif
     7497    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     7498    return off;
     7499}
     7500
     7501
     7502/**
     7503 * Emits a vecdst = gprsrc broadcast, 64-bit.
     7504 */
     7505DECL_FORCE_INLINE(uint32_t)
     7506iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
     7507{
     7508#ifdef RT_ARCH_AMD64
     7509    /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
     7510     *        vbroadcast needs a memory operand or another xmm register to work... */
     7511
     7512    /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
     7513    pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     7514    pCodeBuf[off++] =   X86_OP_REX_W
     7515                      | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
     7516                      | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
     7517    pCodeBuf[off++] = 0x0f;
     7518    pCodeBuf[off++] = 0x3a;
     7519    pCodeBuf[off++] = 0x22;
     7520    pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
     7521    pCodeBuf[off++] = 0x00;
     7522
     7523    if (f256Bit)
     7524    {
     7525        /* When broadcasting the entire ymm register we can use vbroadcastsd now. */
     7526        /* vbroadcastsd ymm, xmm (ASSUMES AVX2). */
     7527        pCodeBuf[off++] = X86_OP_VEX3;
     7528        pCodeBuf[off++] =   X86_OP_VEX3_BYTE1_X
     7529                          | 0x02                 /* opcode map. */
     7530                          | (  iVecRegDst >= 8
     7531                             ? 0
     7532                             : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
     7533        pCodeBuf[off++] = 0x7d;
     7534        pCodeBuf[off++] = 0x19;
     7535        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
     7536    }
     7537    else
     7538    {
     7539        /* pinsrq vecdst, gpr, #1 (ASSUMES SSE4.1). */
     7540        pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     7541        pCodeBuf[off++] =   X86_OP_REX_W
     7542                          | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
     7543                          | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
     7544        pCodeBuf[off++] = 0x0f;
     7545        pCodeBuf[off++] = 0x3a;
     7546        pCodeBuf[off++] = 0x22;
     7547        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
     7548        pCodeBuf[off++] = 0x01;
    75477549    }
    75487550#elif defined(RT_ARCH_ARM64)
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette