VirtualBox

Changeset 102623 in vbox for trunk/src/VBox/VMM/include


Ignore:
Timestamp:
Dec 16, 2023 12:00:51 AM (14 months ago)
Author:
vboxsync
Message:

VMM/IEM: ARM64 version of BODY_CHECK_OPCODES, enabled two more builtins. Optimized immediate loading a little bit. bugref:10371

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h

    r102593 r102623  
    167167
    168168/**
    169  * Emits loading a constant into a 64-bit GPR
    170  */
    171 DECL_INLINE_THROW(uint32_t)
    172 iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
    173 {
    174     if (!uImm64)
    175         return iemNativeEmitGprZero(pReNative, off, iGpr);
    176 
    177 #ifdef RT_ARCH_AMD64
    178     if (uImm64 <= UINT32_MAX)
     169 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
     170 * buffer space.
     171 *
     172 * Max buffer consumption:
     173 *      - AMD64: 10 instruction bytes.
     174 *      - ARM64: 4 instruction words (16 bytes).
     175 */
     176DECLINLINE(uint32_t) iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
     177{
     178#ifdef RT_ARCH_AMD64
     179    if (uImm64 == 0)
     180    {
     181        /* xor gpr, gpr */
     182        if (iGpr >= 8)
     183            pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
     184        pCodeBuf[off++] = 0x33;
     185        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
     186    }
     187    else if (uImm64 <= UINT32_MAX)
    179188    {
    180189        /* mov gpr, imm32 */
    181         uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
    182190        if (iGpr >= 8)
    183             pbCodeBuf[off++] = X86_OP_REX_B;
    184         pbCodeBuf[off++] = 0xb8 + (iGpr & 7);
    185         pbCodeBuf[off++] = RT_BYTE1(uImm64);
    186         pbCodeBuf[off++] = RT_BYTE2(uImm64);
    187         pbCodeBuf[off++] = RT_BYTE3(uImm64);
    188         pbCodeBuf[off++] = RT_BYTE4(uImm64);
     191            pCodeBuf[off++] = X86_OP_REX_B;
     192        pCodeBuf[off++] = 0xb8 + (iGpr & 7);
     193        pCodeBuf[off++] = RT_BYTE1(uImm64);
     194        pCodeBuf[off++] = RT_BYTE2(uImm64);
     195        pCodeBuf[off++] = RT_BYTE3(uImm64);
     196        pCodeBuf[off++] = RT_BYTE4(uImm64);
    189197    }
    190198    else
    191199    {
    192200        /* mov gpr, imm64 */
    193         uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    194201        if (iGpr < 8)
    195             pbCodeBuf[off++] = X86_OP_REX_W;
     202            pCodeBuf[off++] = X86_OP_REX_W;
    196203        else
    197             pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
    198         pbCodeBuf[off++] = 0xb8 + (iGpr & 7);
    199         pbCodeBuf[off++] = RT_BYTE1(uImm64);
    200         pbCodeBuf[off++] = RT_BYTE2(uImm64);
    201         pbCodeBuf[off++] = RT_BYTE3(uImm64);
    202         pbCodeBuf[off++] = RT_BYTE4(uImm64);
    203         pbCodeBuf[off++] = RT_BYTE5(uImm64);
    204         pbCodeBuf[off++] = RT_BYTE6(uImm64);
    205         pbCodeBuf[off++] = RT_BYTE7(uImm64);
    206         pbCodeBuf[off++] = RT_BYTE8(uImm64);
    207     }
    208 
    209 #elif defined(RT_ARCH_ARM64)
    210     uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
    211 
     204            pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
     205        pCodeBuf[off++] = 0xb8 + (iGpr & 7);
     206        pCodeBuf[off++] = RT_BYTE1(uImm64);
     207        pCodeBuf[off++] = RT_BYTE2(uImm64);
     208        pCodeBuf[off++] = RT_BYTE3(uImm64);
     209        pCodeBuf[off++] = RT_BYTE4(uImm64);
     210        pCodeBuf[off++] = RT_BYTE5(uImm64);
     211        pCodeBuf[off++] = RT_BYTE6(uImm64);
     212        pCodeBuf[off++] = RT_BYTE7(uImm64);
     213        pCodeBuf[off++] = RT_BYTE8(uImm64);
     214    }
     215
     216#elif defined(RT_ARCH_ARM64)
    212217    /*
    213218     * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
     
    220225     * the remainder.
    221226     */
    222     uint32_t fMovK = 0;
    223     /* mov  gpr, imm16 */
    224     uint32_t uImmPart = ((uint32_t)((uImm64 >>  0) & UINT32_C(0xffff)) << 5);
    225     if (uImmPart)
    226     {
    227         pu32CodeBuf[off++] = UINT32_C(0xd2800000) |         (UINT32_C(0) << 21) | uImmPart | iGpr;
    228         fMovK |= RT_BIT_32(29);
    229     }
    230     /* mov[k] gpr, imm16, lsl #16 */
    231     uImmPart = ((uint32_t)((uImm64 >> 16) & UINT32_C(0xffff)) << 5);
    232     if (uImmPart)
    233     {
    234         pu32CodeBuf[off++] = UINT32_C(0xd2800000) | fMovK | (UINT32_C(1) << 21) | uImmPart | iGpr;
    235         fMovK |= RT_BIT_32(29);
    236     }
    237     /* mov[k] gpr, imm16, lsl #32 */
    238     uImmPart = ((uint32_t)((uImm64 >> 32) & UINT32_C(0xffff)) << 5);
    239     if (uImmPart)
    240     {
    241         pu32CodeBuf[off++] = UINT32_C(0xd2800000) | fMovK | (UINT32_C(2) << 21) | uImmPart | iGpr;
    242         fMovK |= RT_BIT_32(29);
    243     }
    244     /* mov[k] gpr, imm16, lsl #48 */
    245     uImmPart = ((uint32_t)((uImm64 >> 48) & UINT32_C(0xffff)) << 5);
    246     if (uImmPart)
    247         pu32CodeBuf[off++] = UINT32_C(0xd2800000) | fMovK | (UINT32_C(3) << 21) | uImmPart | iGpr;
    248 
    249     /** @todo there is an inverted mask variant we might want to explore if it
    250      *        reduces the number of instructions... */
     227    unsigned cZeroHalfWords = !( uImm64        & UINT16_MAX)
     228                            + !((uImm64 >> 16) & UINT16_MAX)
     229                            + !((uImm64 >> 32) & UINT16_MAX)
     230                            + !((uImm64 >> 48) & UINT16_MAX);
     231    unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
     232                            : ( (uImm64        & UINT16_MAX) == UINT16_MAX)
     233                            + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
     234                            + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
     235                            + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
     236    if (cFfffHalfWords <= cZeroHalfWords)
     237    {
     238        uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
     239
     240        /* movz gpr, imm16 */
     241        uint32_t uImmPart = (uint32_t)((uImm64 >>  0) & UINT32_C(0xffff));
     242        if (uImmPart || cZeroHalfWords == 4)
     243        {
     244            pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
     245            fMovBase |= RT_BIT_32(29);
     246        }
     247        /* mov[z/k] gpr, imm16, lsl #16 */
     248        uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
     249        if (uImmPart)
     250        {
     251            pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
     252            fMovBase |= RT_BIT_32(29);
     253        }
     254        /* mov[z/k] gpr, imm16, lsl #32 */
     255        uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
     256        if (uImmPart)
     257        {
     258            pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
     259            fMovBase |= RT_BIT_32(29);
     260        }
     261        /* mov[z/k] gpr, imm16, lsl #48 */
     262        uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
     263        if (uImmPart)
     264            pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
     265    }
     266    else
     267    {
     268        uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
     269
     270        /* find the first half-word that isn't UINT16_MAX. */
     271        uint32_t const iHwNotFfff =  (uImm64        & UINT16_MAX) != UINT16_MAX ? 0
     272                                  : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
     273                                  : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
     274
     275        /* movn gpr, imm16, lsl #iHwNotFfff*16 */
     276        uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
     277        pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
     278        fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
     279        /* movk gpr, imm16 */
     280        if (iHwNotFfff != 0)
     281        {
     282            uImmPart = (uint32_t)((uImm64 >>  0) & UINT32_C(0xffff));
     283            if (uImmPart != UINT32_C(0xffff))
     284                pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
     285        }
     286        /* movk gpr, imm16, lsl #16 */
     287        if (iHwNotFfff != 1)
     288        {
     289            uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
     290            if (uImmPart != UINT32_C(0xffff))
     291                pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
     292        }
     293        /* movk gpr, imm16, lsl #32 */
     294        if (iHwNotFfff != 2)
     295        {
     296            uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
     297            if (uImmPart != UINT32_C(0xffff))
     298                pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
     299        }
     300        /* movk gpr, imm16, lsl #48 */
     301        if (iHwNotFfff != 3)
     302        {
     303            uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
     304            if (uImmPart != UINT32_C(0xffff))
     305                pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
     306        }
     307    }
     308
    251309    /** @todo load into 'w' register instead of 'x' when imm64 <= UINT32_MAX?
    252310     *        clang 12.x does that, only to use the 'x' version for the
    253311     *        addressing in the following ldr). */
    254312
     313#else
     314# error "port me"
     315#endif
     316    return off;
     317}
     318
     319
     320/**
     321 * Emits loading a constant into a 64-bit GPR
     322 */
     323DECL_INLINE_THROW(uint32_t)
     324iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
     325{
     326#ifdef RT_ARCH_AMD64
     327    off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
     328#elif defined(RT_ARCH_ARM64)
     329    off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
     330#else
     331# error "port me"
     332#endif
     333    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     334    return off;
     335}
     336
     337
     338/**
     339 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
     340 * buffer space.
     341 *
     342 * Max buffer consumption:
     343 *      - AMD64: 6 instruction bytes.
     344 *      - ARM64: 2 instruction words (8 bytes).
     345 *
     346 * @note The top 32 bits will be cleared.
     347 */
     348DECLINLINE(uint32_t) iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
     349{
     350#ifdef RT_ARCH_AMD64
     351    if (uImm32 == 0)
     352    {
     353        /* xor gpr, gpr */
     354        if (iGpr >= 8)
     355            pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
     356        pCodeBuf[off++] = 0x33;
     357        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
     358    }
     359    else
     360    {
     361        /* mov gpr, imm32 */
     362        if (iGpr >= 8)
     363            pCodeBuf[off++] = X86_OP_REX_B;
     364        pCodeBuf[off++] = 0xb8 + (iGpr & 7);
     365        pCodeBuf[off++] = RT_BYTE1(uImm32);
     366        pCodeBuf[off++] = RT_BYTE2(uImm32);
     367        pCodeBuf[off++] = RT_BYTE3(uImm32);
     368        pCodeBuf[off++] = RT_BYTE4(uImm32);
     369    }
     370
     371#elif defined(RT_ARCH_ARM64)
     372    if ((uImm32 >> 16) == 0)
     373        /* movz gpr, imm16 */
     374        pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32,                    0, false /*f64Bit*/);
     375    else if ((uImm32 & UINT32_C(0xffff)) == 0)
     376        /* movz gpr, imm16, lsl #16 */
     377        pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16,              1, false /*f64Bit*/);
     378    else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
     379        /* movn gpr, imm16, lsl #16 */
     380        pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16,             1, false /*f64Bit*/);
     381    else if ((uImm32 >> 16) == UINT32_C(0xffff))
     382        /* movn gpr, imm16 */
     383        pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32,                   0, false /*f64Bit*/);
     384    else
     385    {
     386        pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
     387        pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16,              1, false /*f64Bit*/);
     388    }
     389
     390#else
     391# error "port me"
     392#endif
     393    return off;
     394}
     395
     396
     397/**
     398 * Emits loading a constant into a 32-bit GPR.
     399 * @note The top 32 bits will be cleared.
     400 */
     401DECL_INLINE_THROW(uint32_t)
     402iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
     403{
     404#ifdef RT_ARCH_AMD64
     405    off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
     406#elif defined(RT_ARCH_ARM64)
     407    off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
    255408#else
    256409# error "port me"
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette