- Timestamp:
- Sep 22, 2023 11:48:24 PM (19 months ago)
- svn:sync-xref-src-repo-rev:
- 159230
- Location:
- trunk/src/VBox/VMM
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp
r101203 r101247 59 59 #include <iprt/mem.h> 60 60 #include <iprt/string.h> 61 #if defined(RT_ARCH_AMD64) 62 # include <iprt/x86.h> 63 #elif defined(RT_ARCH_ARM64) 64 # include <iprt/armv8.h> 65 #endif 61 66 62 67 #ifdef RT_OS_WINDOWS … … 97 102 # error The setjmp approach must be enabled for the recompiler. 98 103 #endif 99 100 101 /*********************************************************************************************************************************102 * Defined Constants And Macros *103 *********************************************************************************************************************************/104 /** @name Stack Frame Layout105 *106 * @{ */107 /** The size of the area for stack variables and spills and stuff. */108 #define IEMNATIVE_FRAME_VAR_SIZE 0x40109 #ifdef RT_ARCH_AMD64110 /** Number of stack arguments slots for calls made from the frame. */111 # define IEMNATIVE_FRAME_STACK_ARG_COUNT 4112 /** An stack alignment adjustment (between non-volatile register pushes and113 * the stack variable area, so the latter better aligned). */114 # define IEMNATIVE_FRAME_ALIGN_SIZE 8115 /** Number of any shadow arguments (spill area) for calls we make. */116 # ifdef RT_OS_WINDOWS117 # define IEMNATIVE_FRAME_SHADOW_ARG_COUNT 4118 # else119 # define IEMNATIVE_FRAME_SHADOW_ARG_COUNT 0120 # endif121 122 /** Frame pointer (RBP) relative offset of the last push. */123 # ifdef RT_OS_WINDOWS124 # define IEMNATIVE_FP_OFF_LAST_PUSH (7 * -8)125 # else126 # define IEMNATIVE_FP_OFF_LAST_PUSH (5 * -8)127 # endif128 /** Frame pointer (RBP) relative offset of the stack variable area (the lowest129 * address for it). */130 # define IEMNATIVE_FP_OFF_STACK_VARS (IEMNATIVE_FP_OFF_LAST_PUSH - IEMNATIVE_FRAME_ALIGN_SIZE - IEMNATIVE_FRAME_VAR_SIZE)131 /** Frame pointer (RBP) relative offset of the first stack argument for calls. */132 # define IEMNATIVE_FP_OFF_STACK_ARG0 (IEMNATIVE_FP_OFF_STACK_VARS - IEMNATIVE_FRAME_STACK_ARG_COUNT * 8)133 /** Frame pointer (RBP) relative offset of the second stack argument for calls. */134 # define IEMNATIVE_FP_OFF_STACK_ARG1 (IEMNATIVE_FP_OFF_STACK_ARG0 + 8)135 /** Frame pointer (RBP) relative offset of the third stack argument for calls. */136 # define IEMNATIVE_FP_OFF_STACK_ARG2 (IEMNATIVE_FP_OFF_STACK_ARG0 + 16)137 /** Frame pointer (RBP) relative offset of the fourth stack argument for calls. */138 # define IEMNATIVE_FP_OFF_STACK_ARG3 (IEMNATIVE_FP_OFF_STACK_ARG0 + 24)139 140 # ifdef RT_OS_WINDOWS141 /** Frame pointer (RBP) relative offset of the first incoming shadow argument. */142 # define IEMNATIVE_FP_OFF_IN_SHADOW_ARG0 (16)143 /** Frame pointer (RBP) relative offset of the second incoming shadow argument. */144 # define IEMNATIVE_FP_OFF_IN_SHADOW_ARG1 (24)145 /** Frame pointer (RBP) relative offset of the third incoming shadow argument. */146 # define IEMNATIVE_FP_OFF_IN_SHADOW_ARG2 (32)147 /** Frame pointer (RBP) relative offset of the fourth incoming shadow argument. */148 # define IEMNATIVE_FP_OFF_IN_SHADOW_ARG3 (40)149 # endif150 151 #elif RT_ARCH_ARM64152 153 #else154 # error "port me"155 #endif156 /** @} */157 104 158 105 … … 475 422 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */ 476 423 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */ 424 # ifdef RT_ARCH_AMD64 477 425 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */ 426 # elif defined(RT_ARCH_ARM64) 427 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_PC); /* Return address column (ULEB128) */ 428 # else 429 # error "port me" 430 # endif 478 431 /* Initial instructions: */ 432 # ifdef RT_ARCH_AMD64 479 433 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */ 480 434 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */ … … 485 439 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */ 486 440 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */ 441 # elif defined(RT_ARCH_ARM64) 442 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 0); /* CFA = BP + 0x00 - first stack parameter */ 443 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_PC, 1); /* Ret PC = [CFA + 1*-8] */ 444 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */ 445 # endif 487 446 while ((Ptr.u - PtrCie.u) & 3) 488 447 *Ptr.pb++ = DW_CFA_nop; … … 1147 1106 # ifdef RT_OS_WINDOWS 1148 1107 # ifndef VBOXSTRICTRC_STRICT_ENABLED 1149 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, X86_GREG_xBX);1108 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU); 1150 1109 AssertReturn(off != UINT32_MAX, UINT32_MAX); 1151 1110 if (cParams > 0) … … 1165 1124 } 1166 1125 # else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */ 1167 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xBX);1126 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU); 1168 1127 AssertReturn(off != UINT32_MAX, UINT32_MAX); 1169 1128 if (cParams > 0) … … 1188 1147 # endif /* VBOXSTRICTRC_STRICT_ENABLED */ 1189 1148 # else 1190 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, X86_GREG_xBX);1149 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU); 1191 1150 AssertReturn(off != UINT32_MAX, UINT32_MAX); 1192 1151 if (cParams > 0) … … 1224 1183 1225 1184 #elif RT_ARCH_ARM64 1226 RT_NOREF(pReNative, pCallEntry );1185 RT_NOREF(pReNative, pCallEntry, cParams); 1227 1186 off = UINT32_MAX; 1228 1187 … … 1292 1251 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */ 1293 1252 AssertReturn(off != UINT32_MAX, UINT32_MAX); 1294 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, X86_GREG_xBX);1253 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU); 1295 1254 AssertReturn(off != UINT32_MAX, UINT32_MAX); 1296 1255 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX); 1297 1256 AssertReturn(off != UINT32_MAX, UINT32_MAX); 1298 1257 # else 1299 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, X86_GREG_xBX);1258 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU); 1300 1259 AssertReturn(off != UINT32_MAX, UINT32_MAX); 1301 1260 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX); … … 1343 1302 1344 1303 1304 typedef enum 1305 { 1306 kArm64InstrStLdPairType_kPostIndex = 1, 1307 kArm64InstrStLdPairType_kSigned = 2, 1308 kArm64InstrStLdPairType_kPreIndex = 3 1309 } ARM64INSTRSTLDPAIRTYPE; 1310 1311 DECL_FORCE_INLINE(uint32_t) Armv8A64MkInstrStLdPair(bool fLoad, uint32_t iOpc, ARM64INSTRSTLDPAIRTYPE enmType, 1312 uint32_t iReg1, uint32_t iReg2, uint32_t iBaseReg, int32_t iImm7 = 0) 1313 { 1314 Assert(iOpc < 3); Assert(iReg1 <= 31); Assert(iReg2 <= 31); Assert(iBaseReg <= 31); Assert(iImm7 < 64 && iImm7 >= -64); 1315 return (iOpc << 30) 1316 | UINT32_C(0x28000000) 1317 | ((uint32_t)enmType << 23) 1318 | ((uint32_t)fLoad << 22) 1319 | ((uint32_t)iImm7 << 15) 1320 | (iReg2 << 10) 1321 | (iBaseReg << 5) 1322 | iReg1; 1323 } 1324 1325 1326 1345 1327 /** 1346 1328 * Emits a standard prolog. … … 1364 1346 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP); 1365 1347 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */ 1348 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX); 1366 1349 # ifdef RT_OS_WINDOWS 1367 1350 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */ … … 1395 1378 1396 1379 #elif RT_ARCH_ARM64 1397 RT_NOREF(pReNative); 1398 off = UINT32_MAX; 1380 /* 1381 * We set up a stack frame exactly like on x86, only we have to push the 1382 * return address our selves here. We save all non-volatile registers. 1383 */ 1384 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10); 1385 AssertReturn(pu32CodeBuf, UINT32_MAX); 1386 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */ 1387 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8); 1388 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kPreIndex, 1389 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP, 1390 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8); 1391 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */ 1392 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned, 1393 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2); 1394 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned, 1395 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4); 1396 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned, 1397 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6); 1398 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned, 1399 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8); 1400 /* Save the BP and LR (ret address) registers at the top of the frame. */ 1401 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned, 1402 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10); 1403 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12); 1404 /* sub bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */ 1405 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 < 4096); 1406 pu32CodeBuf[off++] = UINT32_C(0xd1000000) | ((IEMNATIVE_FRAME_SAVE_REG_SIZE - 16) << 10) | ARMV8_A64_REG_SP | ARMV8_A64_REG_BP; 1407 1408 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */ 1409 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 4096); 1410 pu32CodeBuf[off++] = UINT32_C(0xd1000000) | (IEMNATIVE_FRAME_VAR_SIZE << 10) | ARMV8_A64_REG_SP | ARMV8_A64_REG_SP; 1399 1411 1400 1412 #else -
trunk/src/VBox/VMM/include/IEMN8veRecompiler.h
r101203 r101247 37 37 * @{ 38 38 */ 39 40 /** @name Stack Frame Layout 41 * 42 * @{ */ 43 /** The size of the area for stack variables and spills and stuff. */ 44 #define IEMNATIVE_FRAME_VAR_SIZE 0x40 45 #ifdef RT_ARCH_AMD64 46 /** Number of stack arguments slots for calls made from the frame. */ 47 # define IEMNATIVE_FRAME_STACK_ARG_COUNT 4 48 /** An stack alignment adjustment (between non-volatile register pushes and 49 * the stack variable area, so the latter better aligned). */ 50 # define IEMNATIVE_FRAME_ALIGN_SIZE 8 51 /** Number of any shadow arguments (spill area) for calls we make. */ 52 # ifdef RT_OS_WINDOWS 53 # define IEMNATIVE_FRAME_SHADOW_ARG_COUNT 4 54 # else 55 # define IEMNATIVE_FRAME_SHADOW_ARG_COUNT 0 56 # endif 57 58 /** Frame pointer (RBP) relative offset of the last push. */ 59 # ifdef RT_OS_WINDOWS 60 # define IEMNATIVE_FP_OFF_LAST_PUSH (7 * -8) 61 # else 62 # define IEMNATIVE_FP_OFF_LAST_PUSH (5 * -8) 63 # endif 64 /** Frame pointer (RBP) relative offset of the stack variable area (the lowest 65 * address for it). */ 66 # define IEMNATIVE_FP_OFF_STACK_VARS (IEMNATIVE_FP_OFF_LAST_PUSH - IEMNATIVE_FRAME_ALIGN_SIZE - IEMNATIVE_FRAME_VAR_SIZE) 67 /** Frame pointer (RBP) relative offset of the first stack argument for calls. */ 68 # define IEMNATIVE_FP_OFF_STACK_ARG0 (IEMNATIVE_FP_OFF_STACK_VARS - IEMNATIVE_FRAME_STACK_ARG_COUNT * 8) 69 /** Frame pointer (RBP) relative offset of the second stack argument for calls. */ 70 # define IEMNATIVE_FP_OFF_STACK_ARG1 (IEMNATIVE_FP_OFF_STACK_ARG0 + 8) 71 /** Frame pointer (RBP) relative offset of the third stack argument for calls. */ 72 # define IEMNATIVE_FP_OFF_STACK_ARG2 (IEMNATIVE_FP_OFF_STACK_ARG0 + 16) 73 /** Frame pointer (RBP) relative offset of the fourth stack argument for calls. */ 74 # define IEMNATIVE_FP_OFF_STACK_ARG3 (IEMNATIVE_FP_OFF_STACK_ARG0 + 24) 75 76 # ifdef RT_OS_WINDOWS 77 /** Frame pointer (RBP) relative offset of the first incoming shadow argument. */ 78 # define IEMNATIVE_FP_OFF_IN_SHADOW_ARG0 (16) 79 /** Frame pointer (RBP) relative offset of the second incoming shadow argument. */ 80 # define IEMNATIVE_FP_OFF_IN_SHADOW_ARG1 (24) 81 /** Frame pointer (RBP) relative offset of the third incoming shadow argument. */ 82 # define IEMNATIVE_FP_OFF_IN_SHADOW_ARG2 (32) 83 /** Frame pointer (RBP) relative offset of the fourth incoming shadow argument. */ 84 # define IEMNATIVE_FP_OFF_IN_SHADOW_ARG3 (40) 85 # endif 86 87 #elif RT_ARCH_ARM64 88 /** No stack argument slots, enough got 8 registers for arguments. */ 89 # define IEMNATIVE_FRAME_STACK_ARG_COUNT 0 90 /** There are no argument spill area. */ 91 # define IEMNATIVE_FRAME_SHADOW_ARG_COUNT 0 92 93 /** Number of saved registers at the top of our stack frame. 94 * This includes the return address and old frame pointer, so x19 thru x30. */ 95 # define IEMNATIVE_FRAME_SAVE_REG_COUNT (12) 96 /** The size of the save registered (IEMNATIVE_FRAME_SAVE_REG_COUNT). */ 97 # define IEMNATIVE_FRAME_SAVE_REG_SIZE (IEMNATIVE_FRAME_SAVE_REG_COUNT * 8) 98 99 /** Frame pointer (BP) relative offset of the last push. */ 100 # define IEMNATIVE_FP_OFF_LAST_PUSH (7 * -8) 101 102 /** Frame pointer (BP) relative offset of the stack variable area (the lowest 103 * address for it). */ 104 # define IEMNATIVE_FP_OFF_STACK_VARS (IEMNATIVE_FP_OFF_LAST_PUSH - IEMNATIVE_FRAME_ALIGN_SIZE - IEMNATIVE_FRAME_VAR_SIZE) 105 106 #else 107 # error "port me" 108 #endif 109 /** @} */ 110 111 112 /** @name Fixed Register Allocation(s) 113 * @{ */ 114 /** @def IEMNATIVE_REG_FIXED_PVMCPU 115 * The register number hold in pVCpu pointer. */ 116 #ifdef RT_ARCH_AMD64 117 # define IEMNATIVE_REG_FIXED_PVMCPU X86_GREG_xBX 118 #elif RT_ARCH_ARM64 119 # define IEMNATIVE_REG_FIXED_PVMCPU ARMV8_A64_REG_X28 120 /** Dedicated temporary register. 121 * @todo replace this by a register allocator and content tracker. */ 122 # define IEMNATIVE_REG_FIXED_TMP0 ARMV8_A64_REG_X15 123 #else 124 # error "port me" 125 #endif 126 /** @} */ 39 127 40 128 /** Native code generator label types. */ … … 139 227 * failure. 140 228 * @param pReNative The native recompile state. 141 * @param off Current instruction offset. 229 * @param off Current instruction offset. Works safely for UINT32_MAX 230 * as well. 142 231 * @param cInstrReq Number of instruction about to be added. It's okay to 143 232 * overestimate this a bit. … … 145 234 DECL_FORCE_INLINE(PIEMNATIVEINSTR) iemNativeInstrBufEnsure(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq) 146 235 { 147 if (RT_LIKELY(off + cInstrReq <= pReNative->cInstrBufAlloc))236 if (RT_LIKELY(off + (uint64_t)cInstrReq <= pReNative->cInstrBufAlloc)) 148 237 return pReNative->pInstrBuf; 149 238 return iemNativeInstrBufEnsureSlow(pReNative, off, cInstrReq); … … 160 249 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1); 161 250 AssertReturn(pbCodeBuf, UINT32_MAX); 162 pbCodeBuf[off++] = 0x90; /* nop */ 251 /* nop */ 252 pbCodeBuf[off++] = 0x90; 163 253 164 254 #elif RT_ARCH_ARM64 165 255 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1); 166 pu32CodeBuf[off++] = 0xe503201f; /* nop? */ 256 AssertReturn(pu32CodeBuf, UINT32_MAX); 257 /* nop */ 258 pu32CodeBuf[off++] = 0xd503201f; 167 259 168 260 #else … … 181 273 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3); 182 274 AssertReturn(pbCodeBuf, UINT32_MAX); 183 if (iGpr >= 8) /* xor gpr32, gpr32 */ 275 /* xor gpr32, gpr32 */ 276 if (iGpr >= 8) 184 277 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B; 185 278 pbCodeBuf[off++] = 0x33; … … 187 280 188 281 #elif RT_ARCH_ARM64 189 RT_NOREF(pReNative, iGpr, uImm64); 190 off = UINT32_MAX; 282 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1); 283 AssertReturn(pu32CodeBuf, UINT32_MAX); 284 /* mov gpr, #0x0 */ 285 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr; 191 286 192 287 #else … … 241 336 242 337 #elif RT_ARCH_ARM64 243 RT_NOREF(pReNative, iGpr, uImm64); 244 off = UINT32_MAX; 338 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4); 339 AssertReturn(pu32CodeBuf, UINT32_MAX); 340 341 /* 342 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and 343 * supply remaining bits using 'movk grp, imm16, lsl #x'. 344 * 345 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp, 346 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff 347 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it 348 * after the first non-zero immediate component so we switch to movk for 349 * the remainder. 350 */ 351 uint32_t fMovK = 0; 352 /* mov gpr, imm16 */ 353 uint32_t uImmPart = ((uint32_t)((uImm64 >> 0) & UINT32_C(0xffff)) << 5); 354 if (uImmPart) 355 { 356 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | uImmPart | iGpr; 357 fMovK |= RT_BIT_32(29); 358 } 359 /* mov[k] gpr, imm16, lsl #16 */ 360 uImmPart = ((uint32_t)((uImm64 >> 16) & UINT32_C(0xffff)) << 5); 361 if (uImmPart) 362 { 363 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | fMovK | (UINT32_C(1) << 21) | uImmPart | iGpr; 364 fMovK |= RT_BIT_32(29); 365 } 366 /* mov[k] gpr, imm16, lsl #32 */ 367 uImmPart = ((uint32_t)((uImm64 >> 32) & UINT32_C(0xffff)) << 5); 368 if (uImmPart) 369 { 370 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | fMovK | (UINT32_C(2) << 21) | uImmPart | iGpr; 371 fMovK |= RT_BIT_32(29); 372 } 373 /* mov[k] gpr, imm16, lsl #48 */ 374 uImmPart = ((uint32_t)((uImm64 >> 48) & UINT32_C(0xffff)) << 5); 375 if (uImmPart) 376 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | fMovK | (UINT32_C(3) << 21) | uImmPart | iGpr; 377 378 /** @todo there is an inverted mask variant we might want to explore if it 379 * reduces the number of instructions... */ 380 /** @todo load into 'w' register instead of 'x' when imm64 <= UINT32_MAX? 381 * clang 12.x does that, only to use the 'x' version for the 382 * addressing in the following ldr). */ 245 383 246 384 #else … … 266 404 if (offVCpu < 128) 267 405 { 268 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGpr & 7, X86_GREG_xBX);406 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGpr & 7, IEMNATIVE_REG_FIXED_PVMCPU); 269 407 pbCodeBuf[off++] = (uint8_t)offVCpu; 270 408 } 271 409 else 272 410 { 273 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGpr & 7, X86_GREG_xBX);411 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGpr & 7, IEMNATIVE_REG_FIXED_PVMCPU); 274 412 pbCodeBuf[off++] = RT_BYTE1(offVCpu); 275 413 pbCodeBuf[off++] = RT_BYTE2(offVCpu); … … 279 417 280 418 #elif RT_ARCH_ARM64 281 RT_NOREF(pReNative, idxInstr); 282 off = UINT32_MAX; 419 /* 420 * There are a couple of ldr variants that takes an immediate offset, so 421 * try use those if we can, otherwise we have to use the temporary register 422 * help with the addressing. 423 */ 424 if (offVCpu < _16K) 425 { 426 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */ 427 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1); 428 AssertReturn(pu32CodeBuf, UINT32_MAX); 429 pu32CodeBuf[off++] = UINT32_C(0xb9400000) | (offVCpu << 10) | (IEMNATIVE_REG_FIXED_PVMCPU << 5) | iGpr; 430 } 431 else 432 { 433 /* The offset is too large, so we must load it into a register and use 434 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>). */ 435 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */ 436 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu); 437 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1); 438 AssertReturn(pu32CodeBuf, UINT32_MAX); 439 pu32CodeBuf[off++] = UINT32_C(0xb8600800) | ((uint32_t)IEMNATIVE_REG_FIXED_TMP0 << 16) 440 | ((uint32_t)IEMNATIVE_REG_FIXED_PVMCPU << 5) | iGpr; 441 } 283 442 284 443 #else … … 308 467 309 468 #elif RT_ARCH_ARM64 310 RT_NOREF(pReNative, iGprDst, iGprSrc); 311 off = UINT32_MAX; 469 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1); 470 AssertReturn(pu32CodeBuf, UINT32_MAX); 471 /* mov dst, src; alias for: orr dst, xzr, src */ 472 pu32CodeBuf[off++] = UINT32_C(0xaa000000) | ((uint32_t)iGprSrc << 16) | ((uint32_t)ARMV8_A64_REG_XZR << 5) | iGprDst; 312 473 313 474 #else
Note:
See TracChangeset
for help on using the changeset viewer.