VirtualBox

Ignore:
Timestamp:
Apr 23, 2024 9:49:16 AM (9 months ago)
Author:
vboxsync
Message:

VMM/IEM: Improved loading 32-bit constants via iemNativeEmitLoadGprImmEx. bugref:10370

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h

    r104372 r104402  
    179179#endif
    180180    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     181    return off;
     182}
     183
     184
     185/**
     186 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
     187 * buffer space.
     188 *
     189 * Max buffer consumption:
     190 *      - AMD64: 6 instruction bytes.
     191 *      - ARM64: 2 instruction words (8 bytes).
     192 *
     193 * @note The top 32 bits will be cleared.
     194 */
     195DECL_FORCE_INLINE(uint32_t)
     196iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
     197{
     198#ifdef RT_ARCH_AMD64
     199    if (uImm32 == 0)
     200    {
     201        /* xor gpr, gpr */
     202        if (iGpr >= 8)
     203            pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
     204        pCodeBuf[off++] = 0x33;
     205        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
     206    }
     207    else
     208    {
     209        /* mov gpr, imm32 */
     210        if (iGpr >= 8)
     211            pCodeBuf[off++] = X86_OP_REX_B;
     212        pCodeBuf[off++] = 0xb8 + (iGpr & 7);
     213        pCodeBuf[off++] = RT_BYTE1(uImm32);
     214        pCodeBuf[off++] = RT_BYTE2(uImm32);
     215        pCodeBuf[off++] = RT_BYTE3(uImm32);
     216        pCodeBuf[off++] = RT_BYTE4(uImm32);
     217    }
     218
     219#elif defined(RT_ARCH_ARM64)
     220    if ((uImm32 >> 16) == 0)
     221        /* movz gpr, imm16 */
     222        pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32,                    0, false /*f64Bit*/);
     223    else if ((uImm32 & UINT32_C(0xffff)) == 0)
     224        /* movz gpr, imm16, lsl #16 */
     225        pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16,              1, false /*f64Bit*/);
     226    else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
     227        /* movn gpr, imm16, lsl #16 */
     228        pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16,             1, false /*f64Bit*/);
     229    else if ((uImm32 >> 16) == UINT32_C(0xffff))
     230        /* movn gpr, imm16 */
     231        pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32,                   0, false /*f64Bit*/);
     232    else
     233    {
     234        pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
     235        pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16,              1, false /*f64Bit*/);
     236    }
     237
     238#else
     239# error "port me"
     240#endif
    181241    return off;
    182242}
     
    247307
    248308#elif defined(RT_ARCH_ARM64)
     309    /*
     310     * Quick simplification: Do 32-bit load if top half is zero.
     311     */
     312    if (uImm64 <= UINT32_MAX)
     313        return iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGpr, (uint32_t)uImm64);
     314
    249315    /*
    250316     * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
     
    339405    }
    340406
    341     /** @todo load into 'w' register instead of 'x' when imm64 <= UINT32_MAX?
    342      *        clang 12.x does that, only to use the 'x' version for the
    343      *        addressing in the following ldr). */
    344 
    345407#else
    346408# error "port me"
     
    364426#endif
    365427    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    366     return off;
    367 }
    368 
    369 
    370 /**
    371  * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
    372  * buffer space.
    373  *
    374  * Max buffer consumption:
    375  *      - AMD64: 6 instruction bytes.
    376  *      - ARM64: 2 instruction words (8 bytes).
    377  *
    378  * @note The top 32 bits will be cleared.
    379  */
    380 DECLINLINE(uint32_t) iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
    381 {
    382 #ifdef RT_ARCH_AMD64
    383     if (uImm32 == 0)
    384     {
    385         /* xor gpr, gpr */
    386         if (iGpr >= 8)
    387             pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
    388         pCodeBuf[off++] = 0x33;
    389         pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
    390     }
    391     else
    392     {
    393         /* mov gpr, imm32 */
    394         if (iGpr >= 8)
    395             pCodeBuf[off++] = X86_OP_REX_B;
    396         pCodeBuf[off++] = 0xb8 + (iGpr & 7);
    397         pCodeBuf[off++] = RT_BYTE1(uImm32);
    398         pCodeBuf[off++] = RT_BYTE2(uImm32);
    399         pCodeBuf[off++] = RT_BYTE3(uImm32);
    400         pCodeBuf[off++] = RT_BYTE4(uImm32);
    401     }
    402 
    403 #elif defined(RT_ARCH_ARM64)
    404     if ((uImm32 >> 16) == 0)
    405         /* movz gpr, imm16 */
    406         pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32,                    0, false /*f64Bit*/);
    407     else if ((uImm32 & UINT32_C(0xffff)) == 0)
    408         /* movz gpr, imm16, lsl #16 */
    409         pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16,              1, false /*f64Bit*/);
    410     else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
    411         /* movn gpr, imm16, lsl #16 */
    412         pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16,             1, false /*f64Bit*/);
    413     else if ((uImm32 >> 16) == UINT32_C(0xffff))
    414         /* movn gpr, imm16 */
    415         pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32,                   0, false /*f64Bit*/);
    416     else
    417     {
    418         pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
    419         pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16,              1, false /*f64Bit*/);
    420     }
    421 
    422 #else
    423 # error "port me"
    424 #endif
    425428    return off;
    426429}
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette