VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompiler.h@ 101306

Last change on this file since 101306 was 101306, checked in by vboxsync, 19 months ago

VMM/IEM: Arm build fix. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 27.4 KB
Line 
1/* $Id: IEMN8veRecompiler.h 101306 2023-09-29 01:19:34Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Internals.
4 */
5
6/*
7 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompiler_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompiler_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34
35/** @defgroup grp_iem_n8ve_re Native Recompiler Internals.
36 * @ingroup grp_iem_int
37 * @{
38 */
39
40/** @name Stack Frame Layout
41 *
42 * @{ */
43/** The size of the area for stack variables and spills and stuff. */
44#define IEMNATIVE_FRAME_VAR_SIZE 0x40
45#ifdef RT_ARCH_AMD64
46/** Number of stack arguments slots for calls made from the frame. */
47# define IEMNATIVE_FRAME_STACK_ARG_COUNT 4
48/** An stack alignment adjustment (between non-volatile register pushes and
49 * the stack variable area, so the latter better aligned). */
50# define IEMNATIVE_FRAME_ALIGN_SIZE 8
51/** Number of any shadow arguments (spill area) for calls we make. */
52# ifdef RT_OS_WINDOWS
53# define IEMNATIVE_FRAME_SHADOW_ARG_COUNT 4
54# else
55# define IEMNATIVE_FRAME_SHADOW_ARG_COUNT 0
56# endif
57
58/** Frame pointer (RBP) relative offset of the last push. */
59# ifdef RT_OS_WINDOWS
60# define IEMNATIVE_FP_OFF_LAST_PUSH (7 * -8)
61# else
62# define IEMNATIVE_FP_OFF_LAST_PUSH (5 * -8)
63# endif
64/** Frame pointer (RBP) relative offset of the stack variable area (the lowest
65 * address for it). */
66# define IEMNATIVE_FP_OFF_STACK_VARS (IEMNATIVE_FP_OFF_LAST_PUSH - IEMNATIVE_FRAME_ALIGN_SIZE - IEMNATIVE_FRAME_VAR_SIZE)
67/** Frame pointer (RBP) relative offset of the first stack argument for calls. */
68# define IEMNATIVE_FP_OFF_STACK_ARG0 (IEMNATIVE_FP_OFF_STACK_VARS - IEMNATIVE_FRAME_STACK_ARG_COUNT * 8)
69/** Frame pointer (RBP) relative offset of the second stack argument for calls. */
70# define IEMNATIVE_FP_OFF_STACK_ARG1 (IEMNATIVE_FP_OFF_STACK_ARG0 + 8)
71/** Frame pointer (RBP) relative offset of the third stack argument for calls. */
72# define IEMNATIVE_FP_OFF_STACK_ARG2 (IEMNATIVE_FP_OFF_STACK_ARG0 + 16)
73/** Frame pointer (RBP) relative offset of the fourth stack argument for calls. */
74# define IEMNATIVE_FP_OFF_STACK_ARG3 (IEMNATIVE_FP_OFF_STACK_ARG0 + 24)
75
76# ifdef RT_OS_WINDOWS
77/** Frame pointer (RBP) relative offset of the first incoming shadow argument. */
78# define IEMNATIVE_FP_OFF_IN_SHADOW_ARG0 (16)
79/** Frame pointer (RBP) relative offset of the second incoming shadow argument. */
80# define IEMNATIVE_FP_OFF_IN_SHADOW_ARG1 (24)
81/** Frame pointer (RBP) relative offset of the third incoming shadow argument. */
82# define IEMNATIVE_FP_OFF_IN_SHADOW_ARG2 (32)
83/** Frame pointer (RBP) relative offset of the fourth incoming shadow argument. */
84# define IEMNATIVE_FP_OFF_IN_SHADOW_ARG3 (40)
85# endif
86
87#elif RT_ARCH_ARM64
88/** No stack argument slots, enough got 8 registers for arguments. */
89# define IEMNATIVE_FRAME_STACK_ARG_COUNT 0
90/** There are no argument spill area. */
91# define IEMNATIVE_FRAME_SHADOW_ARG_COUNT 0
92
93/** Number of saved registers at the top of our stack frame.
94 * This includes the return address and old frame pointer, so x19 thru x30. */
95# define IEMNATIVE_FRAME_SAVE_REG_COUNT (12)
96/** The size of the save registered (IEMNATIVE_FRAME_SAVE_REG_COUNT). */
97# define IEMNATIVE_FRAME_SAVE_REG_SIZE (IEMNATIVE_FRAME_SAVE_REG_COUNT * 8)
98
99/** Frame pointer (BP) relative offset of the last push. */
100# define IEMNATIVE_FP_OFF_LAST_PUSH (7 * -8)
101
102/** Frame pointer (BP) relative offset of the stack variable area (the lowest
103 * address for it). */
104# define IEMNATIVE_FP_OFF_STACK_VARS (IEMNATIVE_FP_OFF_LAST_PUSH - IEMNATIVE_FRAME_ALIGN_SIZE - IEMNATIVE_FRAME_VAR_SIZE)
105
106#else
107# error "port me"
108#endif
109/** @} */
110
111
112/** @name Fixed Register Allocation(s)
113 * @{ */
114/** @def IEMNATIVE_REG_FIXED_PVMCPU
115 * The register number hold in pVCpu pointer. */
116/** @def IEMNATIVE_REG_FIXED_TMP0
117 * Dedicated temporary register.
118 * @todo replace this by a register allocator and content tracker. */
119#ifdef RT_ARCH_AMD64
120# define IEMNATIVE_REG_FIXED_PVMCPU X86_GREG_xBX
121# define IEMNATIVE_REG_FIXED_TMP0 X86_GREG_x11
122
123#elif defined(RT_ARCH_ARM64)
124# define IEMNATIVE_REG_FIXED_PVMCPU ARMV8_A64_REG_X28
125# define IEMNATIVE_REG_FIXED_TMP0 ARMV8_A64_REG_X15
126
127#else
128# error "port me"
129#endif
130/** @} */
131
132/** @name Call related registers.
133 * @{ */
134/** @def IEMNATIVE_CALL_RET_GREG
135 * The return value register. */
136/** @def IEMNATIVE_CALL_ARG_GREG_COUNT
137 * Number of arguments in registers. */
138/** @def IEMNATIVE_CALL_ARG0_GREG
139 * The general purpose register carrying argument \#0. */
140/** @def IEMNATIVE_CALL_ARG1_GREG
141 * The general purpose register carrying argument \#1. */
142/** @def IEMNATIVE_CALL_ARG2_GREG
143 * The general purpose register carrying argument \#2. */
144/** @def IEMNATIVE_CALL_ARG3_GREG
145 * The general purpose register carrying argument \#3. */
146#ifdef RT_ARCH_AMD64
147# define IEMNATIVE_CALL_RET_GREG X86_GREG_xAX
148
149# ifdef RT_OS_WINDOWS
150# define IEMNATIVE_CALL_ARG_GREG_COUNT 4
151# define IEMNATIVE_CALL_ARG0_GREG X86_GREG_xCX
152# define IEMNATIVE_CALL_ARG1_GREG X86_GREG_xDX
153# define IEMNATIVE_CALL_ARG2_GREG X86_GREG_x8
154# define IEMNATIVE_CALL_ARG3_GREG X86_GREG_x9
155# else
156# define IEMNATIVE_CALL_ARG_GREG_COUNT 6
157# define IEMNATIVE_CALL_ARG0_GREG X86_GREG_xDI
158# define IEMNATIVE_CALL_ARG1_GREG X86_GREG_xSI
159# define IEMNATIVE_CALL_ARG2_GREG X86_GREG_xDX
160# define IEMNATIVE_CALL_ARG3_GREG X86_GREG_xCX
161# define IEMNATIVE_CALL_ARG4_GREG X86_GREG_x8
162# define IEMNATIVE_CALL_ARG5_GREG X86_GREG_x9
163# endif
164
165#elif defined(RT_ARCH_ARM64)
166# define IEMNATIVE_CALL_RET_GREG ARMV8_A64_REG_X0
167# define IEMNATIVE_CALL_ARG_GREG_COUNT 8
168# define IEMNATIVE_CALL_ARG0_GREG ARMV8_A64_REG_X0
169# define IEMNATIVE_CALL_ARG1_GREG ARMV8_A64_REG_X1
170# define IEMNATIVE_CALL_ARG2_GREG ARMV8_A64_REG_X2
171# define IEMNATIVE_CALL_ARG3_GREG ARMV8_A64_REG_X3
172# define IEMNATIVE_CALL_ARG4_GREG ARMV8_A64_REG_X4
173# define IEMNATIVE_CALL_ARG5_GREG ARMV8_A64_REG_X5
174# define IEMNATIVE_CALL_ARG6_GREG ARMV8_A64_REG_X6
175# define IEMNATIVE_CALL_ARG7_GREG ARMV8_A64_REG_X7
176
177#endif
178
179/** @} */
180
181/** Native code generator label types. */
182typedef enum
183{
184 kIemNativeLabelType_Invalid = 0,
185 kIemNativeLabelType_Return,
186 kIemNativeLabelType_NonZeroRetOrPassUp,
187 kIemNativeLabelType_End
188} IEMNATIVELABELTYPE;
189
190/** Native code generator label definition. */
191typedef struct IEMNATIVELABEL
192{
193 /** Code offset if defined, UINT32_MAX if it needs to be generated after/in
194 * the epilog. */
195 uint32_t off;
196 /** The type of label (IEMNATIVELABELTYPE). */
197 uint16_t enmType;
198 /** Additional label data, type specific. */
199 uint16_t uData;
200} IEMNATIVELABEL;
201/** Pointer to a label. */
202typedef IEMNATIVELABEL *PIEMNATIVELABEL;
203
204
205/** Native code generator fixup types. */
206typedef enum
207{
208 kIemNativeFixupType_Invalid = 0,
209#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
210 /** AMD64 fixup: PC relative 32-bit with addend in bData. */
211 kIemNativeFixupType_Rel32,
212#elif defined(RT_ARCH_ARM64)
213 /** ARM64 fixup: PC relative offset at bits 23:5 (CBZ, CBNZ). */
214 kIemNativeFixupType_RelImm19At5,
215#endif
216 kIemNativeFixupType_End
217} IEMNATIVEFIXUPTYPE;
218
219/** Native code generator fixup. */
220typedef struct IEMNATIVEFIXUP
221{
222 /** Code offset of the fixup location. */
223 uint32_t off;
224 /** The IEMNATIVELABEL this is a fixup for. */
225 uint16_t idxLabel;
226 /** The fixup type (IEMNATIVEFIXUPTYPE). */
227 uint8_t enmType;
228 /** Addend or other data. */
229 int8_t offAddend;
230} IEMNATIVEFIXUP;
231/** Pointer to a native code generator fixup. */
232typedef IEMNATIVEFIXUP *PIEMNATIVEFIXUP;
233
234/**
235 * Native recompiler state.
236 */
237typedef struct IEMRECOMPILERSTATE
238{
239 /** Size of the buffer that pbNativeRecompileBufR3 points to in
240 * IEMNATIVEINSTR units. */
241 uint32_t cInstrBufAlloc;
242 uint32_t uPadding; /* We don't keep track of this here... */
243 /** Fixed temporary code buffer for native recompilation. */
244 PIEMNATIVEINSTR pInstrBuf;
245
246 /** Actual number of labels in paLabels. */
247 uint32_t cLabels;
248 /** Max number of entries allowed in paLabels before reallocating it. */
249 uint32_t cLabelsAlloc;
250 /** Labels defined while recompiling (referenced by fixups). */
251 PIEMNATIVELABEL paLabels;
252
253 /** Actual number of fixups paFixups. */
254 uint32_t cFixups;
255 /** Max number of entries allowed in paFixups before reallocating it. */
256 uint32_t cFixupsAlloc;
257 /** Buffer used by the recompiler for recording fixups when generating code. */
258 PIEMNATIVEFIXUP paFixups;
259} IEMRECOMPILERSTATE;
260/** Pointer to a native recompiler state. */
261typedef IEMRECOMPILERSTATE *PIEMRECOMPILERSTATE;
262
263
264/**
265 * Native recompiler worker for a threaded function.
266 *
267 * @returns New code buffer offset, UINT32_MAX in case of failure.
268 * @param pReNative The native recompiler state.
269 * @param off The current code buffer offset.
270 * @param pCallEntry The threaded call entry.
271 *
272 * @note This is not allowed to throw anything atm.
273 */
274typedef DECLCALLBACKTYPE(uint32_t, FNIEMNATIVERECOMPFUNC,(PIEMRECOMPILERSTATE pReNative, uint32_t off,
275 PCIEMTHRDEDCALLENTRY pCallEntry));
276/** Pointer to a native recompiler worker for a threaded function. */
277typedef FNIEMNATIVERECOMPFUNC *PFNIEMNATIVERECOMPFUNC;
278
279/** Defines a native recompiler worker for a threaded function. */
280#define IEM_DECL_IEMNATIVERECOMPFUNC_DEF(a_Name) \
281 DECLCALLBACK(uint32_t) a_Name(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
282/** Prototypes a native recompiler function for a threaded function. */
283#define IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(a_Name) FNIEMNATIVERECOMPFUNC a_Name
284
285
286DECLHIDDEN(uint32_t) iemNativeMakeLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
287 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT;
288DECLHIDDEN(bool) iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
289 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend = 0) RT_NOEXCEPT;
290DECLHIDDEN(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off,
291 uint32_t cInstrReq) RT_NOEXCEPT;
292
293DECLHIDDEN(uint32_t) iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
294 uint8_t idxInstr) RT_NOEXCEPT;
295
296
297/**
298 * Ensures that there is sufficient space in the instruction output buffer.
299 *
300 * This will reallocate the buffer if needed and allowed.
301 *
302 * @returns Pointer to the instruction output buffer on success, NULL on
303 * failure.
304 * @param pReNative The native recompile state.
305 * @param off Current instruction offset. Works safely for UINT32_MAX
306 * as well.
307 * @param cInstrReq Number of instruction about to be added. It's okay to
308 * overestimate this a bit.
309 */
310DECL_FORCE_INLINE(PIEMNATIVEINSTR) iemNativeInstrBufEnsure(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
311{
312 if (RT_LIKELY(off + (uint64_t)cInstrReq <= pReNative->cInstrBufAlloc))
313 return pReNative->pInstrBuf;
314 return iemNativeInstrBufEnsureSlow(pReNative, off, cInstrReq);
315}
316
317
318/**
319 * Emit a simple marker instruction to more easily tell where something starts
320 * in the disassembly.
321 */
322DECLINLINE(uint32_t) iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off)
323{
324#ifdef RT_ARCH_AMD64
325 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
326 AssertReturn(pbCodeBuf, UINT32_MAX);
327 /* nop */
328 pbCodeBuf[off++] = 0x90;
329
330#elif RT_ARCH_ARM64
331 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
332 AssertReturn(pu32CodeBuf, UINT32_MAX);
333 /* nop */
334 pu32CodeBuf[off++] = 0xd503201f;
335
336#else
337# error "port me"
338#endif
339 return off;
340}
341
342
343/**
344 * Emits setting a GPR to zero.
345 */
346DECLINLINE(uint32_t) iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
347{
348#ifdef RT_ARCH_AMD64
349 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
350 AssertReturn(pbCodeBuf, UINT32_MAX);
351 /* xor gpr32, gpr32 */
352 if (iGpr >= 8)
353 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
354 pbCodeBuf[off++] = 0x33;
355 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
356
357#elif RT_ARCH_ARM64
358 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
359 AssertReturn(pu32CodeBuf, UINT32_MAX);
360 /* mov gpr, #0x0 */
361 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
362
363#else
364# error "port me"
365#endif
366 RT_NOREF(pReNative);
367 return off;
368}
369
370
371/**
372 * Emits loading a constant into a 64-bit GPR
373 */
374DECLINLINE(uint32_t) iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
375{
376 if (!uImm64)
377 return iemNativeEmitGprZero(pReNative, off, iGpr);
378
379#ifdef RT_ARCH_AMD64
380 if (uImm64 <= UINT32_MAX)
381 {
382 /* mov gpr, imm32 */
383 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
384 AssertReturn(pbCodeBuf, UINT32_MAX);
385 if (iGpr >= 8)
386 pbCodeBuf[off++] = X86_OP_REX_B;
387 pbCodeBuf[off++] = 0xb8 + (iGpr & 7);
388 pbCodeBuf[off++] = RT_BYTE1(uImm64);
389 pbCodeBuf[off++] = RT_BYTE2(uImm64);
390 pbCodeBuf[off++] = RT_BYTE3(uImm64);
391 pbCodeBuf[off++] = RT_BYTE4(uImm64);
392 }
393 else
394 {
395 /* mov gpr, imm64 */
396 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
397 AssertReturn(pbCodeBuf, UINT32_MAX);
398 if (iGpr < 8)
399 pbCodeBuf[off++] = X86_OP_REX_W;
400 else
401 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
402 pbCodeBuf[off++] = 0xb8 + (iGpr & 7);
403 pbCodeBuf[off++] = RT_BYTE1(uImm64);
404 pbCodeBuf[off++] = RT_BYTE2(uImm64);
405 pbCodeBuf[off++] = RT_BYTE3(uImm64);
406 pbCodeBuf[off++] = RT_BYTE4(uImm64);
407 pbCodeBuf[off++] = RT_BYTE5(uImm64);
408 pbCodeBuf[off++] = RT_BYTE6(uImm64);
409 pbCodeBuf[off++] = RT_BYTE7(uImm64);
410 pbCodeBuf[off++] = RT_BYTE8(uImm64);
411 }
412
413#elif RT_ARCH_ARM64
414 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
415 AssertReturn(pu32CodeBuf, UINT32_MAX);
416
417 /*
418 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
419 * supply remaining bits using 'movk grp, imm16, lsl #x'.
420 *
421 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
422 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
423 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
424 * after the first non-zero immediate component so we switch to movk for
425 * the remainder.
426 */
427 uint32_t fMovK = 0;
428 /* mov gpr, imm16 */
429 uint32_t uImmPart = ((uint32_t)((uImm64 >> 0) & UINT32_C(0xffff)) << 5);
430 if (uImmPart)
431 {
432 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | uImmPart | iGpr;
433 fMovK |= RT_BIT_32(29);
434 }
435 /* mov[k] gpr, imm16, lsl #16 */
436 uImmPart = ((uint32_t)((uImm64 >> 16) & UINT32_C(0xffff)) << 5);
437 if (uImmPart)
438 {
439 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | fMovK | (UINT32_C(1) << 21) | uImmPart | iGpr;
440 fMovK |= RT_BIT_32(29);
441 }
442 /* mov[k] gpr, imm16, lsl #32 */
443 uImmPart = ((uint32_t)((uImm64 >> 32) & UINT32_C(0xffff)) << 5);
444 if (uImmPart)
445 {
446 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | fMovK | (UINT32_C(2) << 21) | uImmPart | iGpr;
447 fMovK |= RT_BIT_32(29);
448 }
449 /* mov[k] gpr, imm16, lsl #48 */
450 uImmPart = ((uint32_t)((uImm64 >> 48) & UINT32_C(0xffff)) << 5);
451 if (uImmPart)
452 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | fMovK | (UINT32_C(3) << 21) | uImmPart | iGpr;
453
454 /** @todo there is an inverted mask variant we might want to explore if it
455 * reduces the number of instructions... */
456 /** @todo load into 'w' register instead of 'x' when imm64 <= UINT32_MAX?
457 * clang 12.x does that, only to use the 'x' version for the
458 * addressing in the following ldr). */
459
460#else
461# error "port me"
462#endif
463 return off;
464}
465
466
467/**
468 * Emits a 32-bit GPR load of a VCpu value.
469 */
470DECLINLINE(uint32_t) iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
471{
472#ifdef RT_ARCH_AMD64
473 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
474 AssertReturn(pbCodeBuf, UINT32_MAX);
475
476 /* mov reg32, mem32 */
477 if (iGpr >= 8)
478 pbCodeBuf[off++] = X86_OP_REX_R;
479 pbCodeBuf[off++] = 0x8b;
480 if (offVCpu < 128)
481 {
482 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGpr & 7, IEMNATIVE_REG_FIXED_PVMCPU);
483 pbCodeBuf[off++] = (uint8_t)offVCpu;
484 }
485 else
486 {
487 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGpr & 7, IEMNATIVE_REG_FIXED_PVMCPU);
488 pbCodeBuf[off++] = RT_BYTE1(offVCpu);
489 pbCodeBuf[off++] = RT_BYTE2(offVCpu);
490 pbCodeBuf[off++] = RT_BYTE3(offVCpu);
491 pbCodeBuf[off++] = RT_BYTE4(offVCpu);
492 }
493
494#elif RT_ARCH_ARM64
495 /*
496 * There are a couple of ldr variants that takes an immediate offset, so
497 * try use those if we can, otherwise we have to use the temporary register
498 * help with the addressing.
499 */
500 if (offVCpu < _16K)
501 {
502 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
503 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
504 AssertReturn(pu32CodeBuf, UINT32_MAX);
505 pu32CodeBuf[off++] = UINT32_C(0xb9400000) | (offVCpu << 10) | (IEMNATIVE_REG_FIXED_PVMCPU << 5) | iGpr;
506 }
507 else
508 {
509 /* The offset is too large, so we must load it into a register and use
510 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>). */
511 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
512 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
513 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
514 AssertReturn(pu32CodeBuf, UINT32_MAX);
515 pu32CodeBuf[off++] = UINT32_C(0xb8600800) | ((uint32_t)IEMNATIVE_REG_FIXED_TMP0 << 16)
516 | ((uint32_t)IEMNATIVE_REG_FIXED_PVMCPU << 5) | iGpr;
517 }
518
519#else
520# error "port me"
521#endif
522 return off;
523}
524
525
526/**
527 * Emits a gprdst = gprsrc load.
528 */
529DECLINLINE(uint32_t) iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
530{
531#ifdef RT_ARCH_AMD64
532 /* mov gprdst, gprsrc */
533 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
534 AssertReturn(pbCodeBuf, UINT32_MAX);
535 if ((iGprDst | iGprSrc) >= 8)
536 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
537 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
538 : X86_OP_REX_W | X86_OP_REX_R;
539 else
540 pbCodeBuf[off++] = X86_OP_REX_W;
541 pbCodeBuf[off++] = 0x8b;
542 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
543
544#elif RT_ARCH_ARM64
545 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
546 AssertReturn(pu32CodeBuf, UINT32_MAX);
547 /* mov dst, src; alias for: orr dst, xzr, src */
548 pu32CodeBuf[off++] = UINT32_C(0xaa000000) | ((uint32_t)iGprSrc << 16) | ((uint32_t)ARMV8_A64_REG_XZR << 5) | iGprDst;
549
550#else
551# error "port me"
552#endif
553 return off;
554}
555
556#ifdef RT_ARCH_AMD64
557/**
558 * Common bit of iemNativeEmitLoadGprByBp and friends.
559 */
560DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp)
561{
562 if (offDisp < 128 && offDisp >= -128)
563 {
564 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
565 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
566 }
567 else
568 {
569 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
570 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
571 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
572 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
573 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
574 }
575 return off;
576}
577#endif
578
579
580#ifdef RT_ARCH_AMD64
581/**
582 * Emits a 64-bit GRP load instruction with an BP relative source address.
583 */
584DECLINLINE(uint32_t) iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
585{
586 /* mov gprdst, qword [rbp + offDisp] */
587 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
588 AssertReturn(pbCodeBuf, UINT32_MAX);
589 if (iGprDst < 8)
590 pbCodeBuf[off++] = X86_OP_REX_W;
591 else
592 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
593 pbCodeBuf[off++] = 0x8b;
594 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp);
595}
596#endif
597
598
599#ifdef RT_ARCH_AMD64
600/**
601 * Emits a 32-bit GRP load instruction with an BP relative source address.
602 */
603DECLINLINE(uint32_t) iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
604{
605 /* mov gprdst, dword [rbp + offDisp] */
606 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
607 AssertReturn(pbCodeBuf, UINT32_MAX);
608 if (iGprDst >= 8)
609 pbCodeBuf[off++] = X86_OP_REX_R;
610 pbCodeBuf[off++] = 0x8b;
611 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp);
612}
613#endif
614
615
616#ifdef RT_ARCH_AMD64
617/**
618 * Emits a load effective address to a GRP with an BP relative source address.
619 */
620DECLINLINE(uint32_t) iemNativeEmitLeaGrpByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
621{
622 /* lea gprdst, [rbp + offDisp] */
623 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
624 AssertReturn(pbCodeBuf, UINT32_MAX);
625 if (iGprDst < 8)
626 pbCodeBuf[off++] = X86_OP_REX_W;
627 else
628 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
629 pbCodeBuf[off++] = 0x8d;
630 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp);
631}
632#endif
633
634
635/**
636 * Emits a 64-bit GPR store with an BP relative destination address.
637 *
638 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
639 */
640DECLINLINE(uint32_t) iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
641{
642#ifdef RT_ARCH_AMD64
643 /* mov qword [rbp + offDisp], gprdst */
644 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
645 AssertReturn(pbCodeBuf, UINT32_MAX);
646 if (iGprSrc < 8)
647 pbCodeBuf[off++] = X86_OP_REX_W;
648 else
649 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
650 pbCodeBuf[off++] = 0x89;
651 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp);
652
653#elif defined(RT_ARCH_ARM64)
654 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
655 {
656 /* str w/ unsigned imm12 (scaled) */
657 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
658 AssertReturn(pu32CodeBuf, UINT32_MAX);
659 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
660 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
661 }
662 else if (offDisp >= -256 && offDisp <= 256)
663 {
664 /* stur w/ signed imm9 (unscaled) */
665 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
666 AssertReturn(pu32CodeBuf, UINT32_MAX);
667 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
668 }
669 else
670 {
671 /* Use temporary indexing register. */
672 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
673 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
674 AssertReturn(pu32CodeBuf, UINT32_MAX);
675 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
676 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
677 }
678 return off;
679
680#else
681# error "Port me!"
682#endif
683}
684
685
686/**
687 * Emits a 64-bit immediate store with an BP relative destination address.
688 *
689 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
690 */
691DECLINLINE(uint32_t) iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
692{
693#ifdef RT_ARCH_AMD64
694 if ((int64_t)uImm64 == (int32_t)uImm64)
695 {
696 /* mov qword [rbp + offDisp], imm32 - sign extended */
697 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
698 AssertReturn(pbCodeBuf, UINT32_MAX);
699
700 pbCodeBuf[off++] = X86_OP_REX_W;
701 pbCodeBuf[off++] = 0xc7;
702 if (offDisp < 128 && offDisp >= -128)
703 {
704 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
705 pbCodeBuf[off++] = (uint8_t)offDisp;
706 }
707 else
708 {
709 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
710 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
711 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
712 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
713 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
714 }
715 pbCodeBuf[off++] = RT_BYTE1(uImm64);
716 pbCodeBuf[off++] = RT_BYTE2(uImm64);
717 pbCodeBuf[off++] = RT_BYTE3(uImm64);
718 pbCodeBuf[off++] = RT_BYTE4(uImm64);
719 return off;
720 }
721#endif
722
723 /* Load tmp0, imm64; Store tmp to bp+disp. */
724 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
725 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
726}
727
728
729#ifdef RT_ARCH_AMD64
730/**
731 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
732 */
733DECLINLINE(uint32_t) iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend)
734{
735 /* sub gprdst, imm8/imm32 */
736 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
737 AssertReturn(pbCodeBuf, UINT32_MAX);
738 if (iGprDst < 7)
739 pbCodeBuf[off++] = X86_OP_REX_W;
740 else
741 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
742 if (iSubtrahend < 128 && iSubtrahend >= -128)
743 {
744 pbCodeBuf[off++] = 0x83;
745 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
746 pbCodeBuf[off++] = (uint8_t)iSubtrahend;
747 }
748 else
749 {
750 pbCodeBuf[off++] = 0x81;
751 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
752 pbCodeBuf[off++] = RT_BYTE1(iSubtrahend);
753 pbCodeBuf[off++] = RT_BYTE2(iSubtrahend);
754 pbCodeBuf[off++] = RT_BYTE3(iSubtrahend);
755 pbCodeBuf[off++] = RT_BYTE4(iSubtrahend);
756 }
757 return off;
758}
759#endif
760
761/** @} */
762
763#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompiler_h */
764
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette