VirtualBox

Changeset 101247 in vbox for trunk


Ignore:
Timestamp:
Sep 22, 2023 11:48:24 PM (19 months ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
159230
Message:

VMM/IEM: Working on emitting native arm64 instructions... bugref:10370

Location:
trunk/src/VBox/VMM
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp

    r101203 r101247  
    5959#include <iprt/mem.h>
    6060#include <iprt/string.h>
     61#if   defined(RT_ARCH_AMD64)
     62# include <iprt/x86.h>
     63#elif defined(RT_ARCH_ARM64)
     64# include <iprt/armv8.h>
     65#endif
    6166
    6267#ifdef RT_OS_WINDOWS
     
    97102# error The setjmp approach must be enabled for the recompiler.
    98103#endif
    99 
    100 
    101 /*********************************************************************************************************************************
    102 *   Defined Constants And Macros                                                                                                 *
    103 *********************************************************************************************************************************/
    104 /** @name Stack Frame Layout
    105  *
    106  * @{  */
    107 /** The size of the area for stack variables and spills and stuff. */
    108 #define IEMNATIVE_FRAME_VAR_SIZE            0x40
    109 #ifdef RT_ARCH_AMD64
    110 /** Number of stack arguments slots for calls made from the frame. */
    111 # define IEMNATIVE_FRAME_STACK_ARG_COUNT    4
    112 /** An stack alignment adjustment (between non-volatile register pushes and
    113  *  the stack variable area, so the latter better aligned). */
    114 # define IEMNATIVE_FRAME_ALIGN_SIZE         8
    115 /** Number of any shadow arguments (spill area) for calls we make. */
    116 # ifdef RT_OS_WINDOWS
    117 #  define IEMNATIVE_FRAME_SHADOW_ARG_COUNT  4
    118 # else
    119 #  define IEMNATIVE_FRAME_SHADOW_ARG_COUNT  0
    120 # endif
    121 
    122 /** Frame pointer (RBP) relative offset of the last push. */
    123 # ifdef RT_OS_WINDOWS
    124 #  define IEMNATIVE_FP_OFF_LAST_PUSH        (7 * -8)
    125 # else
    126 #  define IEMNATIVE_FP_OFF_LAST_PUSH        (5 * -8)
    127 # endif
    128 /** Frame pointer (RBP) relative offset of the stack variable area (the lowest
    129  * address for it). */
    130 # define IEMNATIVE_FP_OFF_STACK_VARS        (IEMNATIVE_FP_OFF_LAST_PUSH - IEMNATIVE_FRAME_ALIGN_SIZE - IEMNATIVE_FRAME_VAR_SIZE)
    131 /** Frame pointer (RBP) relative offset of the first stack argument for calls. */
    132 # define IEMNATIVE_FP_OFF_STACK_ARG0        (IEMNATIVE_FP_OFF_STACK_VARS - IEMNATIVE_FRAME_STACK_ARG_COUNT * 8)
    133 /** Frame pointer (RBP) relative offset of the second stack argument for calls. */
    134 # define IEMNATIVE_FP_OFF_STACK_ARG1        (IEMNATIVE_FP_OFF_STACK_ARG0 + 8)
    135 /** Frame pointer (RBP) relative offset of the third stack argument for calls. */
    136 # define IEMNATIVE_FP_OFF_STACK_ARG2        (IEMNATIVE_FP_OFF_STACK_ARG0 + 16)
    137 /** Frame pointer (RBP) relative offset of the fourth stack argument for calls. */
    138 # define IEMNATIVE_FP_OFF_STACK_ARG3        (IEMNATIVE_FP_OFF_STACK_ARG0 + 24)
    139 
    140 # ifdef RT_OS_WINDOWS
    141 /** Frame pointer (RBP) relative offset of the first incoming shadow argument. */
    142 #  define IEMNATIVE_FP_OFF_IN_SHADOW_ARG0   (16)
    143 /** Frame pointer (RBP) relative offset of the second incoming shadow argument. */
    144 #  define IEMNATIVE_FP_OFF_IN_SHADOW_ARG1   (24)
    145 /** Frame pointer (RBP) relative offset of the third incoming shadow argument. */
    146 #  define IEMNATIVE_FP_OFF_IN_SHADOW_ARG2   (32)
    147 /** Frame pointer (RBP) relative offset of the fourth incoming shadow argument. */
    148 #  define IEMNATIVE_FP_OFF_IN_SHADOW_ARG3   (40)
    149 # endif
    150 
    151 #elif RT_ARCH_ARM64
    152 
    153 #else
    154 # error "port me"
    155 #endif
    156 /** @} */
    157104
    158105
     
    475422    Ptr = iemDwarfPutLeb128(Ptr, 1);                        /* Code alignment factor (LEB128 = 1). */
    476423    Ptr = iemDwarfPutLeb128(Ptr, -8);                       /* Data alignment factor (LEB128 = -8). */
     424#  ifdef RT_ARCH_AMD64
    477425    Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA);          /* Return address column (ULEB128) */
     426#  elif defined(RT_ARCH_ARM64)
     427    Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_PC);          /* Return address column (ULEB128) */
     428#  else
     429#   error "port me"
     430#  endif
    478431    /* Initial instructions: */
     432#  ifdef RT_ARCH_AMD64
    479433    Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16);   /* CFA     = RBP + 0x10 - first stack parameter */
    480434    Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA,  1);    /* Ret RIP = [CFA + 1*-8] */
     
    485439    Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6);    /* R14     = [CFA + 6*-8] */
    486440    Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7);    /* R15     = [CFA + 7*-8] */
     441#  elif defined(RT_ARCH_ARM64)
     442    Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP,  0);    /* CFA     = BP + 0x00 - first stack parameter */
     443    Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_PC,  1);    /* Ret PC  = [CFA + 1*-8] */
     444    Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP,  2);    /* Ret BP  = [CFA + 2*-8] */
     445#  endif
    487446    while ((Ptr.u - PtrCie.u) & 3)
    488447        *Ptr.pb++ = DW_CFA_nop;
     
    11471106# ifdef RT_OS_WINDOWS
    11481107#  ifndef VBOXSTRICTRC_STRICT_ENABLED
    1149     off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, X86_GREG_xBX);
     1108    off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
    11501109    AssertReturn(off != UINT32_MAX, UINT32_MAX);
    11511110    if (cParams > 0)
     
    11651124    }
    11661125#  else  /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
    1167     off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xBX);
     1126    off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
    11681127    AssertReturn(off != UINT32_MAX, UINT32_MAX);
    11691128    if (cParams > 0)
     
    11881147#  endif /* VBOXSTRICTRC_STRICT_ENABLED */
    11891148# else
    1190     off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, X86_GREG_xBX);
     1149    off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
    11911150    AssertReturn(off != UINT32_MAX, UINT32_MAX);
    11921151    if (cParams > 0)
     
    12241183
    12251184#elif RT_ARCH_ARM64
    1226     RT_NOREF(pReNative, pCallEntry);
     1185    RT_NOREF(pReNative, pCallEntry, cParams);
    12271186    off = UINT32_MAX;
    12281187
     
    12921251        off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8,  X86_GREG_xCX); /* cl = instruction number */
    12931252        AssertReturn(off != UINT32_MAX, UINT32_MAX);
    1294         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, X86_GREG_xBX);
     1253        off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
    12951254        AssertReturn(off != UINT32_MAX, UINT32_MAX);
    12961255        off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
    12971256        AssertReturn(off != UINT32_MAX, UINT32_MAX);
    12981257# else
    1299         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, X86_GREG_xBX);
     1258        off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
    13001259        AssertReturn(off != UINT32_MAX, UINT32_MAX);
    13011260        off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
     
    13431302
    13441303
     1304typedef enum
     1305{
     1306    kArm64InstrStLdPairType_kPostIndex = 1,
     1307    kArm64InstrStLdPairType_kSigned    = 2,
     1308    kArm64InstrStLdPairType_kPreIndex  = 3
     1309} ARM64INSTRSTLDPAIRTYPE;
     1310
     1311DECL_FORCE_INLINE(uint32_t) Armv8A64MkInstrStLdPair(bool fLoad, uint32_t iOpc, ARM64INSTRSTLDPAIRTYPE enmType,
     1312                                                    uint32_t iReg1, uint32_t iReg2, uint32_t iBaseReg, int32_t iImm7 = 0)
     1313{
     1314    Assert(iOpc < 3); Assert(iReg1 <= 31); Assert(iReg2 <= 31); Assert(iBaseReg <= 31); Assert(iImm7 < 64 && iImm7 >= -64);
     1315    return (iOpc << 30)
     1316         | UINT32_C(0x28000000)
     1317         | ((uint32_t)enmType << 23)
     1318         | ((uint32_t)fLoad << 22)
     1319         | ((uint32_t)iImm7 << 15)
     1320         | (iReg2 << 10)
     1321         | (iBaseReg << 5)
     1322         | iReg1;
     1323}
     1324
     1325
     1326
    13451327/**
    13461328 * Emits a standard prolog.
     
    13641346    pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
    13651347    pbCodeBuf[off++] = 0x50 + X86_GREG_xBX;     /* push rbx */
     1348    AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
    13661349# ifdef RT_OS_WINDOWS
    13671350    pbCodeBuf[off++] = X86_OP_REX_W;            /* mov rbx, rcx ; RBX = pVCpu */
     
    13951378
    13961379#elif RT_ARCH_ARM64
    1397     RT_NOREF(pReNative);
    1398     off = UINT32_MAX;
     1380    /*
     1381     * We set up a stack frame exactly like on x86, only we have to push the
     1382     * return address our selves here.  We save all non-volatile registers.
     1383     */
     1384    uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
     1385    AssertReturn(pu32CodeBuf, UINT32_MAX);
     1386    /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
     1387    AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
     1388    pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kPreIndex,
     1389                                                 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
     1390                                                 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
     1391    /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
     1392    pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
     1393                                                 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
     1394    pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
     1395                                                 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
     1396    pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
     1397                                                 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
     1398    pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
     1399                                                 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
     1400    /* Save the BP and LR (ret address) registers at the top of the frame. */
     1401    pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
     1402                                                 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
     1403    AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
     1404    /* sub bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
     1405    AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 < 4096);
     1406    pu32CodeBuf[off++] = UINT32_C(0xd1000000) | ((IEMNATIVE_FRAME_SAVE_REG_SIZE - 16) << 10) | ARMV8_A64_REG_SP | ARMV8_A64_REG_BP;
     1407
     1408    /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ;  Allocate the variable area from SP. */
     1409    AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 4096);
     1410    pu32CodeBuf[off++] = UINT32_C(0xd1000000) | (IEMNATIVE_FRAME_VAR_SIZE << 10)             | ARMV8_A64_REG_SP | ARMV8_A64_REG_SP;
    13991411
    14001412#else
  • trunk/src/VBox/VMM/include/IEMN8veRecompiler.h

    r101203 r101247  
    3737 * @{
    3838 */
     39
     40/** @name Stack Frame Layout
     41 *
     42 * @{  */
     43/** The size of the area for stack variables and spills and stuff. */
     44#define IEMNATIVE_FRAME_VAR_SIZE            0x40
     45#ifdef RT_ARCH_AMD64
     46/** Number of stack arguments slots for calls made from the frame. */
     47# define IEMNATIVE_FRAME_STACK_ARG_COUNT    4
     48/** An stack alignment adjustment (between non-volatile register pushes and
     49 *  the stack variable area, so the latter better aligned). */
     50# define IEMNATIVE_FRAME_ALIGN_SIZE         8
     51/** Number of any shadow arguments (spill area) for calls we make. */
     52# ifdef RT_OS_WINDOWS
     53#  define IEMNATIVE_FRAME_SHADOW_ARG_COUNT  4
     54# else
     55#  define IEMNATIVE_FRAME_SHADOW_ARG_COUNT  0
     56# endif
     57
     58/** Frame pointer (RBP) relative offset of the last push. */
     59# ifdef RT_OS_WINDOWS
     60#  define IEMNATIVE_FP_OFF_LAST_PUSH        (7 * -8)
     61# else
     62#  define IEMNATIVE_FP_OFF_LAST_PUSH        (5 * -8)
     63# endif
     64/** Frame pointer (RBP) relative offset of the stack variable area (the lowest
     65 * address for it). */
     66# define IEMNATIVE_FP_OFF_STACK_VARS        (IEMNATIVE_FP_OFF_LAST_PUSH - IEMNATIVE_FRAME_ALIGN_SIZE - IEMNATIVE_FRAME_VAR_SIZE)
     67/** Frame pointer (RBP) relative offset of the first stack argument for calls. */
     68# define IEMNATIVE_FP_OFF_STACK_ARG0        (IEMNATIVE_FP_OFF_STACK_VARS - IEMNATIVE_FRAME_STACK_ARG_COUNT * 8)
     69/** Frame pointer (RBP) relative offset of the second stack argument for calls. */
     70# define IEMNATIVE_FP_OFF_STACK_ARG1        (IEMNATIVE_FP_OFF_STACK_ARG0 + 8)
     71/** Frame pointer (RBP) relative offset of the third stack argument for calls. */
     72# define IEMNATIVE_FP_OFF_STACK_ARG2        (IEMNATIVE_FP_OFF_STACK_ARG0 + 16)
     73/** Frame pointer (RBP) relative offset of the fourth stack argument for calls. */
     74# define IEMNATIVE_FP_OFF_STACK_ARG3        (IEMNATIVE_FP_OFF_STACK_ARG0 + 24)
     75
     76# ifdef RT_OS_WINDOWS
     77/** Frame pointer (RBP) relative offset of the first incoming shadow argument. */
     78#  define IEMNATIVE_FP_OFF_IN_SHADOW_ARG0   (16)
     79/** Frame pointer (RBP) relative offset of the second incoming shadow argument. */
     80#  define IEMNATIVE_FP_OFF_IN_SHADOW_ARG1   (24)
     81/** Frame pointer (RBP) relative offset of the third incoming shadow argument. */
     82#  define IEMNATIVE_FP_OFF_IN_SHADOW_ARG2   (32)
     83/** Frame pointer (RBP) relative offset of the fourth incoming shadow argument. */
     84#  define IEMNATIVE_FP_OFF_IN_SHADOW_ARG3   (40)
     85# endif
     86
     87#elif RT_ARCH_ARM64
     88/** No stack argument slots, enough got 8 registers for arguments.  */
     89# define IEMNATIVE_FRAME_STACK_ARG_COUNT    0
     90/** There are no argument spill area. */
     91# define IEMNATIVE_FRAME_SHADOW_ARG_COUNT   0
     92
     93/** Number of saved registers at the top of our stack frame.
     94 * This includes the return address and old frame pointer, so x19 thru x30. */
     95# define IEMNATIVE_FRAME_SAVE_REG_COUNT     (12)
     96/** The size of the save registered (IEMNATIVE_FRAME_SAVE_REG_COUNT). */
     97# define IEMNATIVE_FRAME_SAVE_REG_SIZE      (IEMNATIVE_FRAME_SAVE_REG_COUNT * 8)
     98
     99/** Frame pointer (BP) relative offset of the last push. */
     100# define IEMNATIVE_FP_OFF_LAST_PUSH         (7 * -8)
     101
     102/** Frame pointer (BP) relative offset of the stack variable area (the lowest
     103 * address for it). */
     104# define IEMNATIVE_FP_OFF_STACK_VARS        (IEMNATIVE_FP_OFF_LAST_PUSH - IEMNATIVE_FRAME_ALIGN_SIZE - IEMNATIVE_FRAME_VAR_SIZE)
     105
     106#else
     107# error "port me"
     108#endif
     109/** @} */
     110
     111
     112/** @name Fixed Register Allocation(s)
     113 * @{ */
     114/** @def IEMNATIVE_REG_FIXED_PVMCPU
     115 * The register number hold in pVCpu pointer.  */
     116#ifdef RT_ARCH_AMD64
     117# define IEMNATIVE_REG_FIXED_PVMCPU         X86_GREG_xBX
     118#elif RT_ARCH_ARM64
     119# define IEMNATIVE_REG_FIXED_PVMCPU         ARMV8_A64_REG_X28
     120/** Dedicated temporary register.
     121 * @todo replace this by a register allocator and content tracker.  */
     122# define IEMNATIVE_REG_FIXED_TMP0           ARMV8_A64_REG_X15
     123#else
     124# error "port me"
     125#endif
     126/** @} */
    39127
    40128/** Native code generator label types. */
     
    139227 *          failure.
    140228 * @param   pReNative   The native recompile state.
    141  * @param   off         Current instruction offset.
     229 * @param   off         Current instruction offset.  Works safely for UINT32_MAX
     230 *                      as well.
    142231 * @param   cInstrReq   Number of instruction about to be added.  It's okay to
    143232 *                      overestimate this a bit.
     
    145234DECL_FORCE_INLINE(PIEMNATIVEINSTR) iemNativeInstrBufEnsure(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
    146235{
    147     if (RT_LIKELY(off + cInstrReq <= pReNative->cInstrBufAlloc))
     236    if (RT_LIKELY(off + (uint64_t)cInstrReq <= pReNative->cInstrBufAlloc))
    148237        return pReNative->pInstrBuf;
    149238    return iemNativeInstrBufEnsureSlow(pReNative, off, cInstrReq);
     
    160249    uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    161250    AssertReturn(pbCodeBuf, UINT32_MAX);
    162     pbCodeBuf[off++] = 0x90;                    /* nop */
     251    /* nop */
     252    pbCodeBuf[off++] = 0x90;
    163253
    164254#elif RT_ARCH_ARM64
    165255    uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    166     pu32CodeBuf[off++] = 0xe503201f;            /* nop? */
     256    AssertReturn(pu32CodeBuf, UINT32_MAX);
     257    /* nop */
     258    pu32CodeBuf[off++] = 0xd503201f;
    167259
    168260#else
     
    181273    uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    182274    AssertReturn(pbCodeBuf, UINT32_MAX);
    183     if (iGpr >= 8)                          /* xor gpr32, gpr32 */
     275    /* xor gpr32, gpr32 */
     276    if (iGpr >= 8)
    184277        pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
    185278    pbCodeBuf[off++] = 0x33;
     
    187280
    188281#elif RT_ARCH_ARM64
    189     RT_NOREF(pReNative, iGpr, uImm64);
    190     off = UINT32_MAX;
     282    uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
     283    AssertReturn(pu32CodeBuf, UINT32_MAX);
     284    /* mov gpr, #0x0 */
     285    pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
    191286
    192287#else
     
    241336
    242337#elif RT_ARCH_ARM64
    243     RT_NOREF(pReNative, iGpr, uImm64);
    244     off = UINT32_MAX;
     338    uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
     339    AssertReturn(pu32CodeBuf, UINT32_MAX);
     340
     341    /*
     342     * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
     343     * supply remaining bits using 'movk grp, imm16, lsl #x'.
     344     *
     345     * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
     346     * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
     347     * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
     348     * after the first non-zero immediate component so we switch to movk for
     349     * the remainder.
     350     */
     351    uint32_t fMovK = 0;
     352    /* mov  gpr, imm16 */
     353    uint32_t uImmPart = ((uint32_t)((uImm64 >>  0) & UINT32_C(0xffff)) << 5);
     354    if (uImmPart)
     355    {
     356        pu32CodeBuf[off++] = UINT32_C(0xd2800000) |         (UINT32_C(0) << 21) | uImmPart | iGpr;
     357        fMovK |= RT_BIT_32(29);
     358    }
     359    /* mov[k] gpr, imm16, lsl #16 */
     360    uImmPart = ((uint32_t)((uImm64 >> 16) & UINT32_C(0xffff)) << 5);
     361    if (uImmPart)
     362    {
     363        pu32CodeBuf[off++] = UINT32_C(0xd2800000) | fMovK | (UINT32_C(1) << 21) | uImmPart | iGpr;
     364        fMovK |= RT_BIT_32(29);
     365    }
     366    /* mov[k] gpr, imm16, lsl #32 */
     367    uImmPart = ((uint32_t)((uImm64 >> 32) & UINT32_C(0xffff)) << 5);
     368    if (uImmPart)
     369    {
     370        pu32CodeBuf[off++] = UINT32_C(0xd2800000) | fMovK | (UINT32_C(2) << 21) | uImmPart | iGpr;
     371        fMovK |= RT_BIT_32(29);
     372    }
     373    /* mov[k] gpr, imm16, lsl #48 */
     374    uImmPart = ((uint32_t)((uImm64 >> 48) & UINT32_C(0xffff)) << 5);
     375    if (uImmPart)
     376        pu32CodeBuf[off++] = UINT32_C(0xd2800000) | fMovK | (UINT32_C(3) << 21) | uImmPart | iGpr;
     377
     378    /** @todo there is an inverted mask variant we might want to explore if it
     379     *        reduces the number of instructions... */
     380    /** @todo load into 'w' register instead of 'x' when imm64 <= UINT32_MAX?
     381     *        clang 12.x does that, only to use the 'x' version for the
     382     *        addressing in the following ldr). */
    245383
    246384#else
     
    266404    if (offVCpu < 128)
    267405    {
    268         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGpr & 7, X86_GREG_xBX);
     406        pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGpr & 7, IEMNATIVE_REG_FIXED_PVMCPU);
    269407        pbCodeBuf[off++] = (uint8_t)offVCpu;
    270408    }
    271409    else
    272410    {
    273         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGpr & 7, X86_GREG_xBX);
     411        pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGpr & 7, IEMNATIVE_REG_FIXED_PVMCPU);
    274412        pbCodeBuf[off++] = RT_BYTE1(offVCpu);
    275413        pbCodeBuf[off++] = RT_BYTE2(offVCpu);
     
    279417
    280418#elif RT_ARCH_ARM64
    281     RT_NOREF(pReNative, idxInstr);
    282     off = UINT32_MAX;
     419    /*
     420     * There are a couple of ldr variants that takes an immediate offset, so
     421     * try use those if we can, otherwise we have to use the temporary register
     422     * help with the addressing.
     423     */
     424    if (offVCpu < _16K)
     425    {
     426        /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
     427        uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
     428        AssertReturn(pu32CodeBuf, UINT32_MAX);
     429        pu32CodeBuf[off++] = UINT32_C(0xb9400000) | (offVCpu << 10) | (IEMNATIVE_REG_FIXED_PVMCPU << 5) | iGpr;
     430    }
     431    else
     432    {
     433        /* The offset is too large, so we must load it into a register and use
     434           ldr Wt, [<Xn|SP>, (<Wm>|<Xm>). */
     435        /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
     436        off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
     437        uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
     438        AssertReturn(pu32CodeBuf, UINT32_MAX);
     439        pu32CodeBuf[off++] = UINT32_C(0xb8600800) | ((uint32_t)IEMNATIVE_REG_FIXED_TMP0 << 16)
     440                           | ((uint32_t)IEMNATIVE_REG_FIXED_PVMCPU << 5) | iGpr;
     441    }
    283442
    284443#else
     
    308467
    309468#elif RT_ARCH_ARM64
    310     RT_NOREF(pReNative, iGprDst, iGprSrc);
    311     off = UINT32_MAX;
     469    uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
     470    AssertReturn(pu32CodeBuf, UINT32_MAX);
     471    /* mov dst, src;   alias for: orr dst, xzr, src */
     472    pu32CodeBuf[off++] = UINT32_C(0xaa000000) | ((uint32_t)iGprSrc << 16) | ((uint32_t)ARMV8_A64_REG_XZR << 5) | iGprDst;
    312473
    313474#else
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette