VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 103763

Last change on this file since 103763 was 103763, checked in by vboxsync, 11 months ago

VMM/IEM: Implement native emitter for IEM_MC_FETCH_XREG_U32(), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 247.7 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 103763 2024-03-11 12:47:57Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 pu32CodeBuf[off++] = 0xd503201f;
71
72 RT_NOREF(uInfo);
73#else
74# error "port me"
75#endif
76 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
77 return off;
78}
79
80
81/**
82 * Emit a breakpoint instruction.
83 */
84DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
85{
86#ifdef RT_ARCH_AMD64
87 pCodeBuf[off++] = 0xcc;
88 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
89
90#elif defined(RT_ARCH_ARM64)
91 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
92
93#else
94# error "error"
95#endif
96 return off;
97}
98
99
100/**
101 * Emit a breakpoint instruction.
102 */
103DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
104{
105#ifdef RT_ARCH_AMD64
106 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
107#elif defined(RT_ARCH_ARM64)
108 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
109#else
110# error "error"
111#endif
112 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
113 return off;
114}
115
116
117/*********************************************************************************************************************************
118* Loads, Stores and Related Stuff. *
119*********************************************************************************************************************************/
120
121#ifdef RT_ARCH_AMD64
122/**
123 * Common bit of iemNativeEmitLoadGprByGpr and friends.
124 */
125DECL_FORCE_INLINE(uint32_t)
126iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
127{
128 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
129 {
130 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
131 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
132 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
133 }
134 else if (offDisp == (int8_t)offDisp)
135 {
136 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
137 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
138 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
139 pbCodeBuf[off++] = (uint8_t)offDisp;
140 }
141 else
142 {
143 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
144 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
145 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
146 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
147 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
148 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
149 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
150 }
151 return off;
152}
153#endif /* RT_ARCH_AMD64 */
154
155/**
156 * Emits setting a GPR to zero.
157 */
158DECL_INLINE_THROW(uint32_t)
159iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
160{
161#ifdef RT_ARCH_AMD64
162 /* xor gpr32, gpr32 */
163 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
164 if (iGpr >= 8)
165 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
166 pbCodeBuf[off++] = 0x33;
167 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
168
169#elif defined(RT_ARCH_ARM64)
170 /* mov gpr, #0x0 */
171 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
172 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
173
174#else
175# error "port me"
176#endif
177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
178 return off;
179}
180
181
182/**
183 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
184 * buffer space.
185 *
186 * Max buffer consumption:
187 * - AMD64: 10 instruction bytes.
188 * - ARM64: 4 instruction words (16 bytes).
189 */
190DECL_FORCE_INLINE(uint32_t)
191iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
192{
193#ifdef RT_ARCH_AMD64
194 if (uImm64 == 0)
195 {
196 /* xor gpr, gpr */
197 if (iGpr >= 8)
198 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
199 pCodeBuf[off++] = 0x33;
200 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
201 }
202 else if (uImm64 <= UINT32_MAX)
203 {
204 /* mov gpr, imm32 */
205 if (iGpr >= 8)
206 pCodeBuf[off++] = X86_OP_REX_B;
207 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
208 pCodeBuf[off++] = RT_BYTE1(uImm64);
209 pCodeBuf[off++] = RT_BYTE2(uImm64);
210 pCodeBuf[off++] = RT_BYTE3(uImm64);
211 pCodeBuf[off++] = RT_BYTE4(uImm64);
212 }
213 else if (uImm64 == (uint64_t)(int32_t)uImm64)
214 {
215 /* mov gpr, sx(imm32) */
216 if (iGpr < 8)
217 pCodeBuf[off++] = X86_OP_REX_W;
218 else
219 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
220 pCodeBuf[off++] = 0xc7;
221 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
222 pCodeBuf[off++] = RT_BYTE1(uImm64);
223 pCodeBuf[off++] = RT_BYTE2(uImm64);
224 pCodeBuf[off++] = RT_BYTE3(uImm64);
225 pCodeBuf[off++] = RT_BYTE4(uImm64);
226 }
227 else
228 {
229 /* mov gpr, imm64 */
230 if (iGpr < 8)
231 pCodeBuf[off++] = X86_OP_REX_W;
232 else
233 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
234 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
235 pCodeBuf[off++] = RT_BYTE1(uImm64);
236 pCodeBuf[off++] = RT_BYTE2(uImm64);
237 pCodeBuf[off++] = RT_BYTE3(uImm64);
238 pCodeBuf[off++] = RT_BYTE4(uImm64);
239 pCodeBuf[off++] = RT_BYTE5(uImm64);
240 pCodeBuf[off++] = RT_BYTE6(uImm64);
241 pCodeBuf[off++] = RT_BYTE7(uImm64);
242 pCodeBuf[off++] = RT_BYTE8(uImm64);
243 }
244
245#elif defined(RT_ARCH_ARM64)
246 /*
247 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
248 * supply remaining bits using 'movk grp, imm16, lsl #x'.
249 *
250 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
251 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
252 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
253 * after the first non-zero immediate component so we switch to movk for
254 * the remainder.
255 */
256 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
257 + !((uImm64 >> 16) & UINT16_MAX)
258 + !((uImm64 >> 32) & UINT16_MAX)
259 + !((uImm64 >> 48) & UINT16_MAX);
260 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
261 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
262 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
263 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
264 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
265 if (cFfffHalfWords <= cZeroHalfWords)
266 {
267 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
268
269 /* movz gpr, imm16 */
270 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
271 if (uImmPart || cZeroHalfWords == 4)
272 {
273 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
274 fMovBase |= RT_BIT_32(29);
275 }
276 /* mov[z/k] gpr, imm16, lsl #16 */
277 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
278 if (uImmPart)
279 {
280 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
281 fMovBase |= RT_BIT_32(29);
282 }
283 /* mov[z/k] gpr, imm16, lsl #32 */
284 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
285 if (uImmPart)
286 {
287 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
288 fMovBase |= RT_BIT_32(29);
289 }
290 /* mov[z/k] gpr, imm16, lsl #48 */
291 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
292 if (uImmPart)
293 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
294 }
295 else
296 {
297 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
298
299 /* find the first half-word that isn't UINT16_MAX. */
300 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
301 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
302 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
303
304 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
305 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
306 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
307 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
308 /* movk gpr, imm16 */
309 if (iHwNotFfff != 0)
310 {
311 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
312 if (uImmPart != UINT32_C(0xffff))
313 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
314 }
315 /* movk gpr, imm16, lsl #16 */
316 if (iHwNotFfff != 1)
317 {
318 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
319 if (uImmPart != UINT32_C(0xffff))
320 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
321 }
322 /* movk gpr, imm16, lsl #32 */
323 if (iHwNotFfff != 2)
324 {
325 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
326 if (uImmPart != UINT32_C(0xffff))
327 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
328 }
329 /* movk gpr, imm16, lsl #48 */
330 if (iHwNotFfff != 3)
331 {
332 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
333 if (uImmPart != UINT32_C(0xffff))
334 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
335 }
336 }
337
338 /** @todo load into 'w' register instead of 'x' when imm64 <= UINT32_MAX?
339 * clang 12.x does that, only to use the 'x' version for the
340 * addressing in the following ldr). */
341
342#else
343# error "port me"
344#endif
345 return off;
346}
347
348
349/**
350 * Emits loading a constant into a 64-bit GPR
351 */
352DECL_INLINE_THROW(uint32_t)
353iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
354{
355#ifdef RT_ARCH_AMD64
356 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
357#elif defined(RT_ARCH_ARM64)
358 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
359#else
360# error "port me"
361#endif
362 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
363 return off;
364}
365
366
367/**
368 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
369 * buffer space.
370 *
371 * Max buffer consumption:
372 * - AMD64: 6 instruction bytes.
373 * - ARM64: 2 instruction words (8 bytes).
374 *
375 * @note The top 32 bits will be cleared.
376 */
377DECLINLINE(uint32_t) iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
378{
379#ifdef RT_ARCH_AMD64
380 if (uImm32 == 0)
381 {
382 /* xor gpr, gpr */
383 if (iGpr >= 8)
384 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
385 pCodeBuf[off++] = 0x33;
386 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
387 }
388 else
389 {
390 /* mov gpr, imm32 */
391 if (iGpr >= 8)
392 pCodeBuf[off++] = X86_OP_REX_B;
393 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
394 pCodeBuf[off++] = RT_BYTE1(uImm32);
395 pCodeBuf[off++] = RT_BYTE2(uImm32);
396 pCodeBuf[off++] = RT_BYTE3(uImm32);
397 pCodeBuf[off++] = RT_BYTE4(uImm32);
398 }
399
400#elif defined(RT_ARCH_ARM64)
401 if ((uImm32 >> 16) == 0)
402 /* movz gpr, imm16 */
403 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
404 else if ((uImm32 & UINT32_C(0xffff)) == 0)
405 /* movz gpr, imm16, lsl #16 */
406 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
407 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
408 /* movn gpr, imm16, lsl #16 */
409 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
410 else if ((uImm32 >> 16) == UINT32_C(0xffff))
411 /* movn gpr, imm16 */
412 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
413 else
414 {
415 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
416 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
417 }
418
419#else
420# error "port me"
421#endif
422 return off;
423}
424
425
426/**
427 * Emits loading a constant into a 32-bit GPR.
428 * @note The top 32 bits will be cleared.
429 */
430DECL_INLINE_THROW(uint32_t)
431iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
432{
433#ifdef RT_ARCH_AMD64
434 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
435#elif defined(RT_ARCH_ARM64)
436 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
437#else
438# error "port me"
439#endif
440 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
441 return off;
442}
443
444
445/**
446 * Emits loading a constant into a 8-bit GPR
447 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
448 * only the ARM64 version does that.
449 */
450DECL_INLINE_THROW(uint32_t)
451iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
452{
453#ifdef RT_ARCH_AMD64
454 /* mov gpr, imm8 */
455 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
456 if (iGpr >= 8)
457 pbCodeBuf[off++] = X86_OP_REX_B;
458 else if (iGpr >= 4)
459 pbCodeBuf[off++] = X86_OP_REX;
460 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
461 pbCodeBuf[off++] = RT_BYTE1(uImm8);
462
463#elif defined(RT_ARCH_ARM64)
464 /* movz gpr, imm16, lsl #0 */
465 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
466 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
467
468#else
469# error "port me"
470#endif
471 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
472 return off;
473}
474
475
476#ifdef RT_ARCH_AMD64
477/**
478 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
479 */
480DECL_FORCE_INLINE(uint32_t)
481iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
482{
483 if (offVCpu < 128)
484 {
485 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
486 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
487 }
488 else
489 {
490 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
491 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
492 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
493 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
494 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
495 }
496 return off;
497}
498
499#elif defined(RT_ARCH_ARM64)
500
501/**
502 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
503 *
504 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
505 * registers (@a iGprTmp).
506 * @note DON'T try this with prefetch.
507 */
508DECL_FORCE_INLINE_THROW(uint32_t)
509iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
510 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
511{
512 /*
513 * There are a couple of ldr variants that takes an immediate offset, so
514 * try use those if we can, otherwise we have to use the temporary register
515 * help with the addressing.
516 */
517 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
518 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
519 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
520 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
521 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
522 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
523 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
524 {
525 /* The offset is too large, so we must load it into a register and use
526 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
527 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
528 if (iGprTmp == UINT8_MAX)
529 iGprTmp = iGprReg;
530 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
531 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
532 }
533 else
534# ifdef IEM_WITH_THROW_CATCH
535 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
536# else
537 AssertReleaseFailedStmt(off = UINT32_MAX);
538# endif
539
540 return off;
541}
542
543/**
544 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
545 */
546DECL_FORCE_INLINE_THROW(uint32_t)
547iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
548 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
549{
550 /*
551 * There are a couple of ldr variants that takes an immediate offset, so
552 * try use those if we can, otherwise we have to use the temporary register
553 * help with the addressing.
554 */
555 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
556 {
557 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
558 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
559 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
560 }
561 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
562 {
563 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
564 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
565 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
566 }
567 else
568 {
569 /* The offset is too large, so we must load it into a register and use
570 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
571 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
572 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
573 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
574 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
575 IEMNATIVE_REG_FIXED_TMP0);
576 }
577 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
578 return off;
579}
580
581#endif /* RT_ARCH_ARM64 */
582
583
584/**
585 * Emits a 64-bit GPR load of a VCpu value.
586 */
587DECL_FORCE_INLINE_THROW(uint32_t)
588iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
589{
590#ifdef RT_ARCH_AMD64
591 /* mov reg64, mem64 */
592 if (iGpr < 8)
593 pCodeBuf[off++] = X86_OP_REX_W;
594 else
595 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
596 pCodeBuf[off++] = 0x8b;
597 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off,iGpr, offVCpu);
598
599#elif defined(RT_ARCH_ARM64)
600 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
601
602#else
603# error "port me"
604#endif
605 return off;
606}
607
608
609/**
610 * Emits a 64-bit GPR load of a VCpu value.
611 */
612DECL_INLINE_THROW(uint32_t)
613iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
614{
615#ifdef RT_ARCH_AMD64
616 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
617 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
618
619#elif defined(RT_ARCH_ARM64)
620 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
621
622#else
623# error "port me"
624#endif
625 return off;
626}
627
628
629/**
630 * Emits a 32-bit GPR load of a VCpu value.
631 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
632 */
633DECL_INLINE_THROW(uint32_t)
634iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
635{
636#ifdef RT_ARCH_AMD64
637 /* mov reg32, mem32 */
638 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
639 if (iGpr >= 8)
640 pbCodeBuf[off++] = X86_OP_REX_R;
641 pbCodeBuf[off++] = 0x8b;
642 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
644
645#elif defined(RT_ARCH_ARM64)
646 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
647
648#else
649# error "port me"
650#endif
651 return off;
652}
653
654
655/**
656 * Emits a 16-bit GPR load of a VCpu value.
657 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
658 */
659DECL_INLINE_THROW(uint32_t)
660iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
661{
662#ifdef RT_ARCH_AMD64
663 /* movzx reg32, mem16 */
664 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
665 if (iGpr >= 8)
666 pbCodeBuf[off++] = X86_OP_REX_R;
667 pbCodeBuf[off++] = 0x0f;
668 pbCodeBuf[off++] = 0xb7;
669 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
670 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
671
672#elif defined(RT_ARCH_ARM64)
673 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
674
675#else
676# error "port me"
677#endif
678 return off;
679}
680
681
682/**
683 * Emits a 8-bit GPR load of a VCpu value.
684 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
685 */
686DECL_INLINE_THROW(uint32_t)
687iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
688{
689#ifdef RT_ARCH_AMD64
690 /* movzx reg32, mem8 */
691 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
692 if (iGpr >= 8)
693 pbCodeBuf[off++] = X86_OP_REX_R;
694 pbCodeBuf[off++] = 0x0f;
695 pbCodeBuf[off++] = 0xb6;
696 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
697 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
698
699#elif defined(RT_ARCH_ARM64)
700 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
701
702#else
703# error "port me"
704#endif
705 return off;
706}
707
708
709/**
710 * Emits a store of a GPR value to a 64-bit VCpu field.
711 */
712DECL_FORCE_INLINE_THROW(uint32_t)
713iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
714 uint8_t iGprTmp = UINT8_MAX)
715{
716#ifdef RT_ARCH_AMD64
717 /* mov mem64, reg64 */
718 if (iGpr < 8)
719 pCodeBuf[off++] = X86_OP_REX_W;
720 else
721 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
722 pCodeBuf[off++] = 0x89;
723 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
724 RT_NOREF(iGprTmp);
725
726#elif defined(RT_ARCH_ARM64)
727 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
728
729#else
730# error "port me"
731#endif
732 return off;
733}
734
735
736/**
737 * Emits a store of a GPR value to a 64-bit VCpu field.
738 */
739DECL_INLINE_THROW(uint32_t)
740iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
741{
742#ifdef RT_ARCH_AMD64
743 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
744#elif defined(RT_ARCH_ARM64)
745 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
746 IEMNATIVE_REG_FIXED_TMP0);
747#else
748# error "port me"
749#endif
750 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
751 return off;
752}
753
754
755/**
756 * Emits a store of a GPR value to a 32-bit VCpu field.
757 */
758DECL_INLINE_THROW(uint32_t)
759iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
760{
761#ifdef RT_ARCH_AMD64
762 /* mov mem32, reg32 */
763 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
764 if (iGpr >= 8)
765 pbCodeBuf[off++] = X86_OP_REX_R;
766 pbCodeBuf[off++] = 0x89;
767 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
769
770#elif defined(RT_ARCH_ARM64)
771 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
772
773#else
774# error "port me"
775#endif
776 return off;
777}
778
779
780/**
781 * Emits a store of a GPR value to a 16-bit VCpu field.
782 */
783DECL_INLINE_THROW(uint32_t)
784iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
785{
786#ifdef RT_ARCH_AMD64
787 /* mov mem16, reg16 */
788 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
789 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
790 if (iGpr >= 8)
791 pbCodeBuf[off++] = X86_OP_REX_R;
792 pbCodeBuf[off++] = 0x89;
793 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
794 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
795
796#elif defined(RT_ARCH_ARM64)
797 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
798
799#else
800# error "port me"
801#endif
802 return off;
803}
804
805
806/**
807 * Emits a store of a GPR value to a 8-bit VCpu field.
808 */
809DECL_INLINE_THROW(uint32_t)
810iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
811{
812#ifdef RT_ARCH_AMD64
813 /* mov mem8, reg8 */
814 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
815 if (iGpr >= 8)
816 pbCodeBuf[off++] = X86_OP_REX_R;
817 pbCodeBuf[off++] = 0x88;
818 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
820
821#elif defined(RT_ARCH_ARM64)
822 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
823
824#else
825# error "port me"
826#endif
827 return off;
828}
829
830
831/**
832 * Emits a store of an immediate value to a 16-bit VCpu field.
833 *
834 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
835 * offset can be encoded as an immediate or not. The @a offVCpu immediate
836 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
837 */
838DECL_FORCE_INLINE_THROW(uint32_t)
839iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
840 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
841{
842#ifdef RT_ARCH_AMD64
843 /* mov mem16, imm16 */
844 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
845 pCodeBuf[off++] = 0xc7;
846 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
847 pCodeBuf[off++] = RT_BYTE1(uImm);
848 pCodeBuf[off++] = RT_BYTE2(uImm);
849 RT_NOREF(idxTmp1, idxTmp2);
850
851#elif defined(RT_ARCH_ARM64)
852 if (idxTmp1 != UINT8_MAX)
853 {
854 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
855 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
856 sizeof(uint16_t), idxTmp2);
857 }
858 else
859# ifdef IEM_WITH_THROW_CATCH
860 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
861# else
862 AssertReleaseFailedStmt(off = UINT32_MAX);
863# endif
864
865#else
866# error "port me"
867#endif
868 return off;
869}
870
871
872/**
873 * Emits a store of an immediate value to a 8-bit VCpu field.
874 */
875DECL_INLINE_THROW(uint32_t)
876iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu)
877{
878#ifdef RT_ARCH_AMD64
879 /* mov mem8, imm8 */
880 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
881 pbCodeBuf[off++] = 0xc6;
882 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
883 pbCodeBuf[off++] = bImm;
884 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
885
886#elif defined(RT_ARCH_ARM64)
887 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
888 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
889 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
890 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
891
892#else
893# error "port me"
894#endif
895 return off;
896}
897
898
899/**
900 * Emits a load effective address to a GRP of a VCpu field.
901 */
902DECL_INLINE_THROW(uint32_t)
903iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
904{
905#ifdef RT_ARCH_AMD64
906 /* lea gprdst, [rbx + offDisp] */
907 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
908 if (iGprDst < 8)
909 pbCodeBuf[off++] = X86_OP_REX_W;
910 else
911 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
912 pbCodeBuf[off++] = 0x8d;
913 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
914
915#elif defined(RT_ARCH_ARM64)
916 if (offVCpu < (unsigned)_4K)
917 {
918 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
919 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
920 }
921 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
922 {
923 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
924 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
925 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
926 }
927 else
928 {
929 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
930 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
931 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
932 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
933 }
934
935#else
936# error "port me"
937#endif
938 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
939 return off;
940}
941
942
943/** This is just as a typesafe alternative to RT_UOFFSETOF. */
944DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
945{
946 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
947 Assert(off < sizeof(VMCPU));
948 return off;
949}
950
951
952/** This is just as a typesafe alternative to RT_UOFFSETOF. */
953DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
954{
955 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
956 Assert(off < sizeof(VMCPU));
957 return off;
958}
959
960
961/**
962 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
963 *
964 * @note The two temp registers are not required for AMD64. ARM64 always
965 * requires the first, and the 2nd is needed if the offset cannot be
966 * encoded as an immediate.
967 */
968DECL_FORCE_INLINE(uint32_t)
969iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
970{
971#ifdef RT_ARCH_AMD64
972 /* inc qword [pVCpu + off] */
973 pCodeBuf[off++] = X86_OP_REX_W;
974 pCodeBuf[off++] = 0xff;
975 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
976 RT_NOREF(idxTmp1, idxTmp2);
977
978#elif defined(RT_ARCH_ARM64)
979 /* Determine how we're to access pVCpu first. */
980 uint32_t const cbData = sizeof(STAMCOUNTER);
981 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
982 {
983 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
984 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
985 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
986 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
987 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
988 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
989 }
990 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
991 {
992 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
993 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
994 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
995 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
996 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
997 }
998 else
999 {
1000 /* The offset is too large, so we must load it into a register and use
1001 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1002 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1003 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1004 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1005 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1006 }
1007
1008#else
1009# error "port me"
1010#endif
1011 return off;
1012}
1013
1014
1015/**
1016 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1017 *
1018 * @note The two temp registers are not required for AMD64. ARM64 always
1019 * requires the first, and the 2nd is needed if the offset cannot be
1020 * encoded as an immediate.
1021 */
1022DECL_FORCE_INLINE(uint32_t)
1023iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1024{
1025#ifdef RT_ARCH_AMD64
1026 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1027#elif defined(RT_ARCH_ARM64)
1028 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1029#else
1030# error "port me"
1031#endif
1032 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1033 return off;
1034}
1035
1036
1037/**
1038 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1039 *
1040 * @note The two temp registers are not required for AMD64. ARM64 always
1041 * requires the first, and the 2nd is needed if the offset cannot be
1042 * encoded as an immediate.
1043 */
1044DECL_FORCE_INLINE(uint32_t)
1045iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1046{
1047 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1048#ifdef RT_ARCH_AMD64
1049 /* inc dword [pVCpu + offVCpu] */
1050 pCodeBuf[off++] = 0xff;
1051 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1052 RT_NOREF(idxTmp1, idxTmp2);
1053
1054#elif defined(RT_ARCH_ARM64)
1055 /* Determine how we're to access pVCpu first. */
1056 uint32_t const cbData = sizeof(uint32_t);
1057 if (offVCpu < (unsigned)(_4K * cbData))
1058 {
1059 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1060 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1061 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1062 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1063 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1064 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1065 }
1066 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1067 {
1068 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1069 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1070 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1071 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1072 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1073 }
1074 else
1075 {
1076 /* The offset is too large, so we must load it into a register and use
1077 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1078 of the instruction if that'll reduce the constant to 16-bits. */
1079 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1080 {
1081 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1082 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1083 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1084 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1085 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1086 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1087 }
1088 else
1089 {
1090 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1091 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1092 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1093 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1094 }
1095 }
1096
1097#else
1098# error "port me"
1099#endif
1100 return off;
1101}
1102
1103
1104/**
1105 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1106 *
1107 * @note The two temp registers are not required for AMD64. ARM64 always
1108 * requires the first, and the 2nd is needed if the offset cannot be
1109 * encoded as an immediate.
1110 */
1111DECL_FORCE_INLINE(uint32_t)
1112iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1113{
1114#ifdef RT_ARCH_AMD64
1115 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1116#elif defined(RT_ARCH_ARM64)
1117 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1118#else
1119# error "port me"
1120#endif
1121 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1122 return off;
1123}
1124
1125
1126/**
1127 * Emits a gprdst = gprsrc load.
1128 */
1129DECL_FORCE_INLINE(uint32_t)
1130iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1131{
1132#ifdef RT_ARCH_AMD64
1133 /* mov gprdst, gprsrc */
1134 if ((iGprDst | iGprSrc) >= 8)
1135 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1136 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1137 : X86_OP_REX_W | X86_OP_REX_R;
1138 else
1139 pCodeBuf[off++] = X86_OP_REX_W;
1140 pCodeBuf[off++] = 0x8b;
1141 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1142
1143#elif defined(RT_ARCH_ARM64)
1144 /* mov dst, src; alias for: orr dst, xzr, src */
1145 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1146
1147#else
1148# error "port me"
1149#endif
1150 return off;
1151}
1152
1153
1154/**
1155 * Emits a gprdst = gprsrc load.
1156 */
1157DECL_INLINE_THROW(uint32_t)
1158iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1159{
1160#ifdef RT_ARCH_AMD64
1161 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1162#elif defined(RT_ARCH_ARM64)
1163 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1164#else
1165# error "port me"
1166#endif
1167 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1168 return off;
1169}
1170
1171
1172/**
1173 * Emits a gprdst = gprsrc[31:0] load.
1174 * @note Bits 63 thru 32 are cleared.
1175 */
1176DECL_FORCE_INLINE(uint32_t)
1177iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1178{
1179#ifdef RT_ARCH_AMD64
1180 /* mov gprdst, gprsrc */
1181 if ((iGprDst | iGprSrc) >= 8)
1182 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1183 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1184 : X86_OP_REX_R;
1185 pCodeBuf[off++] = 0x8b;
1186 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1187
1188#elif defined(RT_ARCH_ARM64)
1189 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1190 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1191
1192#else
1193# error "port me"
1194#endif
1195 return off;
1196}
1197
1198
1199/**
1200 * Emits a gprdst = gprsrc[31:0] load.
1201 * @note Bits 63 thru 32 are cleared.
1202 */
1203DECL_INLINE_THROW(uint32_t)
1204iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1205{
1206#ifdef RT_ARCH_AMD64
1207 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1208#elif defined(RT_ARCH_ARM64)
1209 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1210#else
1211# error "port me"
1212#endif
1213 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1214 return off;
1215}
1216
1217
1218/**
1219 * Emits a gprdst = gprsrc[15:0] load.
1220 * @note Bits 63 thru 15 are cleared.
1221 */
1222DECL_INLINE_THROW(uint32_t)
1223iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1224{
1225#ifdef RT_ARCH_AMD64
1226 /* movzx Gv,Ew */
1227 if ((iGprDst | iGprSrc) >= 8)
1228 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1229 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1230 : X86_OP_REX_R;
1231 pCodeBuf[off++] = 0x0f;
1232 pCodeBuf[off++] = 0xb7;
1233 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1234
1235#elif defined(RT_ARCH_ARM64)
1236 /* and gprdst, gprsrc, #0xffff */
1237# if 1
1238 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1239 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1240# else
1241 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1242 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1243# endif
1244
1245#else
1246# error "port me"
1247#endif
1248 return off;
1249}
1250
1251
1252/**
1253 * Emits a gprdst = gprsrc[15:0] load.
1254 * @note Bits 63 thru 15 are cleared.
1255 */
1256DECL_INLINE_THROW(uint32_t)
1257iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1258{
1259#ifdef RT_ARCH_AMD64
1260 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1261#elif defined(RT_ARCH_ARM64)
1262 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1263#else
1264# error "port me"
1265#endif
1266 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1267 return off;
1268}
1269
1270
1271/**
1272 * Emits a gprdst = gprsrc[7:0] load.
1273 * @note Bits 63 thru 8 are cleared.
1274 */
1275DECL_FORCE_INLINE(uint32_t)
1276iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1277{
1278#ifdef RT_ARCH_AMD64
1279 /* movzx Gv,Eb */
1280 if (iGprDst >= 8 || iGprSrc >= 8)
1281 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1282 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1283 : X86_OP_REX_R;
1284 else if (iGprSrc >= 4)
1285 pCodeBuf[off++] = X86_OP_REX;
1286 pCodeBuf[off++] = 0x0f;
1287 pCodeBuf[off++] = 0xb6;
1288 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1289
1290#elif defined(RT_ARCH_ARM64)
1291 /* and gprdst, gprsrc, #0xff */
1292 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1293 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1294
1295#else
1296# error "port me"
1297#endif
1298 return off;
1299}
1300
1301
1302/**
1303 * Emits a gprdst = gprsrc[7:0] load.
1304 * @note Bits 63 thru 8 are cleared.
1305 */
1306DECL_INLINE_THROW(uint32_t)
1307iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1308{
1309#ifdef RT_ARCH_AMD64
1310 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1311#elif defined(RT_ARCH_ARM64)
1312 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1313#else
1314# error "port me"
1315#endif
1316 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1317 return off;
1318}
1319
1320
1321/**
1322 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1323 * @note Bits 63 thru 8 are cleared.
1324 */
1325DECL_INLINE_THROW(uint32_t)
1326iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1327{
1328#ifdef RT_ARCH_AMD64
1329 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1330
1331 /* movzx Gv,Ew */
1332 if ((iGprDst | iGprSrc) >= 8)
1333 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1334 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1335 : X86_OP_REX_R;
1336 pbCodeBuf[off++] = 0x0f;
1337 pbCodeBuf[off++] = 0xb7;
1338 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1339
1340 /* shr Ev,8 */
1341 if (iGprDst >= 8)
1342 pbCodeBuf[off++] = X86_OP_REX_B;
1343 pbCodeBuf[off++] = 0xc1;
1344 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1345 pbCodeBuf[off++] = 8;
1346
1347#elif defined(RT_ARCH_ARM64)
1348 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1349 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1350 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1351
1352#else
1353# error "port me"
1354#endif
1355 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1356 return off;
1357}
1358
1359
1360/**
1361 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1362 */
1363DECL_INLINE_THROW(uint32_t)
1364iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1365{
1366#ifdef RT_ARCH_AMD64
1367 /* movsxd r64, r/m32 */
1368 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1369 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1370 pbCodeBuf[off++] = 0x63;
1371 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1372
1373#elif defined(RT_ARCH_ARM64)
1374 /* sxtw dst, src */
1375 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1376 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1377
1378#else
1379# error "port me"
1380#endif
1381 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1382 return off;
1383}
1384
1385
1386/**
1387 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1388 */
1389DECL_INLINE_THROW(uint32_t)
1390iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1391{
1392#ifdef RT_ARCH_AMD64
1393 /* movsx r64, r/m16 */
1394 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1395 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1396 pbCodeBuf[off++] = 0x0f;
1397 pbCodeBuf[off++] = 0xbf;
1398 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1399
1400#elif defined(RT_ARCH_ARM64)
1401 /* sxth dst, src */
1402 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1403 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1404
1405#else
1406# error "port me"
1407#endif
1408 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1409 return off;
1410}
1411
1412
1413/**
1414 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1415 */
1416DECL_INLINE_THROW(uint32_t)
1417iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1418{
1419#ifdef RT_ARCH_AMD64
1420 /* movsx r64, r/m16 */
1421 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1422 if (iGprDst >= 8 || iGprSrc >= 8)
1423 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1424 pbCodeBuf[off++] = 0x0f;
1425 pbCodeBuf[off++] = 0xbf;
1426 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1427
1428#elif defined(RT_ARCH_ARM64)
1429 /* sxth dst32, src */
1430 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1431 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1432
1433#else
1434# error "port me"
1435#endif
1436 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1437 return off;
1438}
1439
1440
1441/**
1442 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1443 */
1444DECL_INLINE_THROW(uint32_t)
1445iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1446{
1447#ifdef RT_ARCH_AMD64
1448 /* movsx r64, r/m8 */
1449 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1450 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1451 pbCodeBuf[off++] = 0x0f;
1452 pbCodeBuf[off++] = 0xbe;
1453 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1454
1455#elif defined(RT_ARCH_ARM64)
1456 /* sxtb dst, src */
1457 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1458 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1459
1460#else
1461# error "port me"
1462#endif
1463 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1464 return off;
1465}
1466
1467
1468/**
1469 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1470 * @note Bits 63 thru 32 are cleared.
1471 */
1472DECL_INLINE_THROW(uint32_t)
1473iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1474{
1475#ifdef RT_ARCH_AMD64
1476 /* movsx r32, r/m8 */
1477 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1478 if (iGprDst >= 8 || iGprSrc >= 8)
1479 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1480 else if (iGprSrc >= 4)
1481 pbCodeBuf[off++] = X86_OP_REX;
1482 pbCodeBuf[off++] = 0x0f;
1483 pbCodeBuf[off++] = 0xbe;
1484 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1485
1486#elif defined(RT_ARCH_ARM64)
1487 /* sxtb dst32, src32 */
1488 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1489 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1490
1491#else
1492# error "port me"
1493#endif
1494 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1495 return off;
1496}
1497
1498
1499/**
1500 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1501 * @note Bits 63 thru 16 are cleared.
1502 */
1503DECL_INLINE_THROW(uint32_t)
1504iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1505{
1506#ifdef RT_ARCH_AMD64
1507 /* movsx r16, r/m8 */
1508 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1509 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1510 if (iGprDst >= 8 || iGprSrc >= 8)
1511 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1512 else if (iGprSrc >= 4)
1513 pbCodeBuf[off++] = X86_OP_REX;
1514 pbCodeBuf[off++] = 0x0f;
1515 pbCodeBuf[off++] = 0xbe;
1516 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1517
1518 /* movzx r32, r/m16 */
1519 if (iGprDst >= 8)
1520 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
1521 pbCodeBuf[off++] = 0x0f;
1522 pbCodeBuf[off++] = 0xb7;
1523 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
1524
1525#elif defined(RT_ARCH_ARM64)
1526 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
1527 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1528 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1529 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1530 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
1531
1532#else
1533# error "port me"
1534#endif
1535 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1536 return off;
1537}
1538
1539
1540/**
1541 * Emits a gprdst = gprsrc + addend load.
1542 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1543 */
1544#ifdef RT_ARCH_AMD64
1545DECL_INLINE_THROW(uint32_t)
1546iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1547 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1548{
1549 Assert(iAddend != 0);
1550
1551 /* lea gprdst, [gprsrc + iAddend] */
1552 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1553 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1554 pbCodeBuf[off++] = 0x8d;
1555 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1556 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1557 return off;
1558}
1559
1560#elif defined(RT_ARCH_ARM64)
1561DECL_INLINE_THROW(uint32_t)
1562iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1563 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1564{
1565 if ((uint32_t)iAddend < 4096)
1566 {
1567 /* add dst, src, uimm12 */
1568 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1569 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
1570 }
1571 else if ((uint32_t)-iAddend < 4096)
1572 {
1573 /* sub dst, src, uimm12 */
1574 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1575 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
1576 }
1577 else
1578 {
1579 Assert(iGprSrc != iGprDst);
1580 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
1581 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1582 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
1583 }
1584 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1585 return off;
1586}
1587#else
1588# error "port me"
1589#endif
1590
1591/**
1592 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
1593 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1594 */
1595#ifdef RT_ARCH_AMD64
1596DECL_INLINE_THROW(uint32_t)
1597iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1598 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1599#else
1600DECL_INLINE_THROW(uint32_t)
1601iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1602 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1603#endif
1604{
1605 if (iAddend != 0)
1606 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1607 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
1608}
1609
1610
1611/**
1612 * Emits a gprdst = gprsrc32 + addend load.
1613 * @note Bits 63 thru 32 are cleared.
1614 */
1615DECL_INLINE_THROW(uint32_t)
1616iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1617 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1618{
1619 Assert(iAddend != 0);
1620
1621#ifdef RT_ARCH_AMD64
1622 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
1623 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1624 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
1625 if ((iGprDst | iGprSrc) >= 8)
1626 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1627 pbCodeBuf[off++] = 0x8d;
1628 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1629
1630#elif defined(RT_ARCH_ARM64)
1631 if ((uint32_t)iAddend < 4096)
1632 {
1633 /* add dst, src, uimm12 */
1634 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1635 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
1636 }
1637 else if ((uint32_t)-iAddend < 4096)
1638 {
1639 /* sub dst, src, uimm12 */
1640 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1641 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
1642 }
1643 else
1644 {
1645 Assert(iGprSrc != iGprDst);
1646 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
1647 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1648 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
1649 }
1650
1651#else
1652# error "port me"
1653#endif
1654 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1655 return off;
1656}
1657
1658
1659/**
1660 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
1661 */
1662DECL_INLINE_THROW(uint32_t)
1663iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1664 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1665{
1666 if (iAddend != 0)
1667 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1668 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
1669}
1670
1671
1672/**
1673 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1674 * destination.
1675 */
1676DECL_FORCE_INLINE(uint32_t)
1677iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1678{
1679#ifdef RT_ARCH_AMD64
1680 /* mov reg16, r/m16 */
1681 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1682 if (idxDst >= 8 || idxSrc >= 8)
1683 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
1684 pCodeBuf[off++] = 0x8b;
1685 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
1686
1687#elif defined(RT_ARCH_ARM64)
1688 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
1689 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
1690
1691#else
1692# error "Port me!"
1693#endif
1694 return off;
1695}
1696
1697
1698/**
1699 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1700 * destination.
1701 */
1702DECL_INLINE_THROW(uint32_t)
1703iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1704{
1705#ifdef RT_ARCH_AMD64
1706 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
1707#elif defined(RT_ARCH_ARM64)
1708 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
1709#else
1710# error "Port me!"
1711#endif
1712 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1713 return off;
1714}
1715
1716
1717#ifdef RT_ARCH_AMD64
1718/**
1719 * Common bit of iemNativeEmitLoadGprByBp and friends.
1720 */
1721DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
1722 PIEMRECOMPILERSTATE pReNativeAssert)
1723{
1724 if (offDisp < 128 && offDisp >= -128)
1725 {
1726 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
1727 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
1728 }
1729 else
1730 {
1731 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
1732 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
1733 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
1734 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
1735 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
1736 }
1737 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
1738 return off;
1739}
1740#elif defined(RT_ARCH_ARM64)
1741/**
1742 * Common bit of iemNativeEmitLoadGprByBp and friends.
1743 */
1744DECL_FORCE_INLINE_THROW(uint32_t)
1745iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
1746 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
1747{
1748 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
1749 {
1750 /* str w/ unsigned imm12 (scaled) */
1751 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1752 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
1753 }
1754 else if (offDisp >= -256 && offDisp <= 256)
1755 {
1756 /* stur w/ signed imm9 (unscaled) */
1757 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1758 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
1759 }
1760 else
1761 {
1762 /* Use temporary indexing register. */
1763 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
1764 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1765 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
1766 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
1767 }
1768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1769 return off;
1770}
1771#endif
1772
1773
1774/**
1775 * Emits a 64-bit GRP load instruction with an BP relative source address.
1776 */
1777DECL_INLINE_THROW(uint32_t)
1778iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
1779{
1780#ifdef RT_ARCH_AMD64
1781 /* mov gprdst, qword [rbp + offDisp] */
1782 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1783 if (iGprDst < 8)
1784 pbCodeBuf[off++] = X86_OP_REX_W;
1785 else
1786 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
1787 pbCodeBuf[off++] = 0x8b;
1788 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
1789
1790#elif defined(RT_ARCH_ARM64)
1791 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
1792
1793#else
1794# error "port me"
1795#endif
1796}
1797
1798
1799/**
1800 * Emits a 32-bit GRP load instruction with an BP relative source address.
1801 * @note Bits 63 thru 32 of the GPR will be cleared.
1802 */
1803DECL_INLINE_THROW(uint32_t)
1804iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
1805{
1806#ifdef RT_ARCH_AMD64
1807 /* mov gprdst, dword [rbp + offDisp] */
1808 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1809 if (iGprDst >= 8)
1810 pbCodeBuf[off++] = X86_OP_REX_R;
1811 pbCodeBuf[off++] = 0x8b;
1812 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
1813
1814#elif defined(RT_ARCH_ARM64)
1815 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
1816
1817#else
1818# error "port me"
1819#endif
1820}
1821
1822
1823/**
1824 * Emits a 16-bit GRP load instruction with an BP relative source address.
1825 * @note Bits 63 thru 16 of the GPR will be cleared.
1826 */
1827DECL_INLINE_THROW(uint32_t)
1828iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
1829{
1830#ifdef RT_ARCH_AMD64
1831 /* movzx gprdst, word [rbp + offDisp] */
1832 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1833 if (iGprDst >= 8)
1834 pbCodeBuf[off++] = X86_OP_REX_R;
1835 pbCodeBuf[off++] = 0x0f;
1836 pbCodeBuf[off++] = 0xb7;
1837 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
1838
1839#elif defined(RT_ARCH_ARM64)
1840 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
1841
1842#else
1843# error "port me"
1844#endif
1845}
1846
1847
1848/**
1849 * Emits a 8-bit GRP load instruction with an BP relative source address.
1850 * @note Bits 63 thru 8 of the GPR will be cleared.
1851 */
1852DECL_INLINE_THROW(uint32_t)
1853iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
1854{
1855#ifdef RT_ARCH_AMD64
1856 /* movzx gprdst, byte [rbp + offDisp] */
1857 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1858 if (iGprDst >= 8)
1859 pbCodeBuf[off++] = X86_OP_REX_R;
1860 pbCodeBuf[off++] = 0x0f;
1861 pbCodeBuf[off++] = 0xb6;
1862 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
1863
1864#elif defined(RT_ARCH_ARM64)
1865 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
1866
1867#else
1868# error "port me"
1869#endif
1870}
1871
1872
1873/**
1874 * Emits a load effective address to a GRP with an BP relative source address.
1875 */
1876DECL_INLINE_THROW(uint32_t)
1877iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
1878{
1879#ifdef RT_ARCH_AMD64
1880 /* lea gprdst, [rbp + offDisp] */
1881 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1882 if (iGprDst < 8)
1883 pbCodeBuf[off++] = X86_OP_REX_W;
1884 else
1885 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
1886 pbCodeBuf[off++] = 0x8d;
1887 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
1888
1889#elif defined(RT_ARCH_ARM64)
1890 if ((uint32_t)offDisp < (unsigned)_4K)
1891 {
1892 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1893 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)offDisp);
1894 }
1895 else if ((uint32_t)-offDisp < (unsigned)_4K)
1896 {
1897 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1898 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)-offDisp);
1899 }
1900 else
1901 {
1902 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
1903 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offDisp >= 0 ? (uint32_t)offDisp : (uint32_t)-offDisp);
1904 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1905 if (offDisp >= 0)
1906 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
1907 else
1908 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
1909 }
1910
1911#else
1912# error "port me"
1913#endif
1914
1915 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1916 return off;
1917}
1918
1919
1920/**
1921 * Emits a 64-bit GPR store with an BP relative destination address.
1922 *
1923 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
1924 */
1925DECL_INLINE_THROW(uint32_t)
1926iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
1927{
1928#ifdef RT_ARCH_AMD64
1929 /* mov qword [rbp + offDisp], gprdst */
1930 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1931 if (iGprSrc < 8)
1932 pbCodeBuf[off++] = X86_OP_REX_W;
1933 else
1934 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
1935 pbCodeBuf[off++] = 0x89;
1936 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
1937
1938#elif defined(RT_ARCH_ARM64)
1939 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
1940 {
1941 /* str w/ unsigned imm12 (scaled) */
1942 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1943 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
1944 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
1945 }
1946 else if (offDisp >= -256 && offDisp <= 256)
1947 {
1948 /* stur w/ signed imm9 (unscaled) */
1949 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1950 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
1951 }
1952 else if ((uint32_t)-offDisp < (unsigned)_4K)
1953 {
1954 /* Use temporary indexing register w/ sub uimm12. */
1955 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1956 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
1957 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
1958 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
1959 }
1960 else
1961 {
1962 /* Use temporary indexing register. */
1963 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
1964 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1965 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
1966 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
1967 }
1968 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1969 return off;
1970
1971#else
1972# error "Port me!"
1973#endif
1974}
1975
1976
1977/**
1978 * Emits a 64-bit immediate store with an BP relative destination address.
1979 *
1980 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
1981 */
1982DECL_INLINE_THROW(uint32_t)
1983iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
1984{
1985#ifdef RT_ARCH_AMD64
1986 if ((int64_t)uImm64 == (int32_t)uImm64)
1987 {
1988 /* mov qword [rbp + offDisp], imm32 - sign extended */
1989 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
1990 pbCodeBuf[off++] = X86_OP_REX_W;
1991 pbCodeBuf[off++] = 0xc7;
1992 if (offDisp < 128 && offDisp >= -128)
1993 {
1994 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
1995 pbCodeBuf[off++] = (uint8_t)offDisp;
1996 }
1997 else
1998 {
1999 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2000 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2001 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2002 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2003 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2004 }
2005 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2006 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2007 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2008 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2009 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2010 return off;
2011 }
2012#endif
2013
2014 /* Load tmp0, imm64; Store tmp to bp+disp. */
2015 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2016 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2017}
2018
2019#if defined(RT_ARCH_ARM64)
2020
2021/**
2022 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2023 *
2024 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2025 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2026 * caller does not heed this.
2027 *
2028 * @note DON'T try this with prefetch.
2029 */
2030DECL_FORCE_INLINE_THROW(uint32_t)
2031iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2032 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2033{
2034 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2035 {
2036 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2037 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2038 }
2039 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2040 && iGprReg != iGprBase)
2041 || iGprTmp != UINT8_MAX)
2042 {
2043 /* The offset is too large, so we must load it into a register and use
2044 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2045 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2046 if (iGprTmp == UINT8_MAX)
2047 iGprTmp = iGprReg;
2048 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2049 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2050 }
2051 else
2052# ifdef IEM_WITH_THROW_CATCH
2053 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2054# else
2055 AssertReleaseFailedStmt(off = UINT32_MAX);
2056# endif
2057 return off;
2058}
2059
2060/**
2061 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2062 */
2063DECL_FORCE_INLINE_THROW(uint32_t)
2064iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2065 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2066{
2067 /*
2068 * There are a couple of ldr variants that takes an immediate offset, so
2069 * try use those if we can, otherwise we have to use the temporary register
2070 * help with the addressing.
2071 */
2072 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2073 {
2074 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2075 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2076 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2077 }
2078 else
2079 {
2080 /* The offset is too large, so we must load it into a register and use
2081 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2082 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2083 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2084
2085 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2086 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2087
2088 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2089 }
2090 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2091 return off;
2092}
2093
2094#endif /* RT_ARCH_ARM64 */
2095
2096/**
2097 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2098 *
2099 * @note ARM64: Misaligned @a offDisp values and values not in the
2100 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2101 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2102 * does not heed this.
2103 */
2104DECL_FORCE_INLINE_THROW(uint32_t)
2105iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2106 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2107{
2108#ifdef RT_ARCH_AMD64
2109 /* mov reg64, mem64 */
2110 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2111 pCodeBuf[off++] = 0x8b;
2112 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2113 RT_NOREF(iGprTmp);
2114
2115#elif defined(RT_ARCH_ARM64)
2116 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2117 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2118
2119#else
2120# error "port me"
2121#endif
2122 return off;
2123}
2124
2125
2126/**
2127 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2128 */
2129DECL_INLINE_THROW(uint32_t)
2130iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2131{
2132#ifdef RT_ARCH_AMD64
2133 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2134 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2135
2136#elif defined(RT_ARCH_ARM64)
2137 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2138
2139#else
2140# error "port me"
2141#endif
2142 return off;
2143}
2144
2145
2146/**
2147 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2148 *
2149 * @note ARM64: Misaligned @a offDisp values and values not in the
2150 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2151 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2152 * caller does not heed this.
2153 *
2154 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2155 */
2156DECL_FORCE_INLINE_THROW(uint32_t)
2157iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2158 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2159{
2160#ifdef RT_ARCH_AMD64
2161 /* mov reg32, mem32 */
2162 if (iGprDst >= 8 || iGprBase >= 8)
2163 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2164 pCodeBuf[off++] = 0x8b;
2165 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2166 RT_NOREF(iGprTmp);
2167
2168#elif defined(RT_ARCH_ARM64)
2169 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2170 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2171
2172#else
2173# error "port me"
2174#endif
2175 return off;
2176}
2177
2178
2179/**
2180 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2181 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2182 */
2183DECL_INLINE_THROW(uint32_t)
2184iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2185{
2186#ifdef RT_ARCH_AMD64
2187 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2188 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2189
2190#elif defined(RT_ARCH_ARM64)
2191 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2192
2193#else
2194# error "port me"
2195#endif
2196 return off;
2197}
2198
2199
2200/**
2201 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2202 * sign-extending the value to 64 bits.
2203 *
2204 * @note ARM64: Misaligned @a offDisp values and values not in the
2205 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2206 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2207 * caller does not heed this.
2208 */
2209DECL_FORCE_INLINE_THROW(uint32_t)
2210iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2211 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2212{
2213#ifdef RT_ARCH_AMD64
2214 /* movsxd reg64, mem32 */
2215 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2216 pCodeBuf[off++] = 0x63;
2217 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2218 RT_NOREF(iGprTmp);
2219
2220#elif defined(RT_ARCH_ARM64)
2221 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2222 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2223
2224#else
2225# error "port me"
2226#endif
2227 return off;
2228}
2229
2230
2231/**
2232 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2233 *
2234 * @note ARM64: Misaligned @a offDisp values and values not in the
2235 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2236 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2237 * caller does not heed this.
2238 *
2239 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2240 */
2241DECL_FORCE_INLINE_THROW(uint32_t)
2242iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2243 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2244{
2245#ifdef RT_ARCH_AMD64
2246 /* movzx reg32, mem16 */
2247 if (iGprDst >= 8 || iGprBase >= 8)
2248 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2249 pCodeBuf[off++] = 0x0f;
2250 pCodeBuf[off++] = 0xb7;
2251 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2252 RT_NOREF(iGprTmp);
2253
2254#elif defined(RT_ARCH_ARM64)
2255 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2256 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2257
2258#else
2259# error "port me"
2260#endif
2261 return off;
2262}
2263
2264
2265/**
2266 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2267 * sign-extending the value to 64 bits.
2268 *
2269 * @note ARM64: Misaligned @a offDisp values and values not in the
2270 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2271 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2272 * caller does not heed this.
2273 */
2274DECL_FORCE_INLINE_THROW(uint32_t)
2275iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2276 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2277{
2278#ifdef RT_ARCH_AMD64
2279 /* movsx reg64, mem16 */
2280 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2281 pCodeBuf[off++] = 0x0f;
2282 pCodeBuf[off++] = 0xbf;
2283 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2284 RT_NOREF(iGprTmp);
2285
2286#elif defined(RT_ARCH_ARM64)
2287 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2288 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2289
2290#else
2291# error "port me"
2292#endif
2293 return off;
2294}
2295
2296
2297/**
2298 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2299 * sign-extending the value to 32 bits.
2300 *
2301 * @note ARM64: Misaligned @a offDisp values and values not in the
2302 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2303 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2304 * caller does not heed this.
2305 *
2306 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2307 */
2308DECL_FORCE_INLINE_THROW(uint32_t)
2309iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2310 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2311{
2312#ifdef RT_ARCH_AMD64
2313 /* movsx reg32, mem16 */
2314 if (iGprDst >= 8 || iGprBase >= 8)
2315 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2316 pCodeBuf[off++] = 0x0f;
2317 pCodeBuf[off++] = 0xbf;
2318 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2319 RT_NOREF(iGprTmp);
2320
2321#elif defined(RT_ARCH_ARM64)
2322 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2323 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
2324
2325#else
2326# error "port me"
2327#endif
2328 return off;
2329}
2330
2331
2332/**
2333 * Emits a 8-bit GPR load via a GPR base address with a displacement.
2334 *
2335 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2336 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2337 * same. Will assert / throw if caller does not heed this.
2338 *
2339 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
2340 */
2341DECL_FORCE_INLINE_THROW(uint32_t)
2342iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2343 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2344{
2345#ifdef RT_ARCH_AMD64
2346 /* movzx reg32, mem8 */
2347 if (iGprDst >= 8 || iGprBase >= 8)
2348 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2349 pCodeBuf[off++] = 0x0f;
2350 pCodeBuf[off++] = 0xb6;
2351 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2352 RT_NOREF(iGprTmp);
2353
2354#elif defined(RT_ARCH_ARM64)
2355 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2356 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
2357
2358#else
2359# error "port me"
2360#endif
2361 return off;
2362}
2363
2364
2365/**
2366 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2367 * sign-extending the value to 64 bits.
2368 *
2369 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2370 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2371 * same. Will assert / throw if caller does not heed this.
2372 */
2373DECL_FORCE_INLINE_THROW(uint32_t)
2374iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2375 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2376{
2377#ifdef RT_ARCH_AMD64
2378 /* movsx reg64, mem8 */
2379 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2380 pCodeBuf[off++] = 0x0f;
2381 pCodeBuf[off++] = 0xbe;
2382 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2383 RT_NOREF(iGprTmp);
2384
2385#elif defined(RT_ARCH_ARM64)
2386 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2387 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
2388
2389#else
2390# error "port me"
2391#endif
2392 return off;
2393}
2394
2395
2396/**
2397 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2398 * sign-extending the value to 32 bits.
2399 *
2400 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2401 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2402 * same. Will assert / throw if caller does not heed this.
2403 *
2404 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2405 */
2406DECL_FORCE_INLINE_THROW(uint32_t)
2407iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2408 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2409{
2410#ifdef RT_ARCH_AMD64
2411 /* movsx reg32, mem8 */
2412 if (iGprDst >= 8 || iGprBase >= 8)
2413 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2414 pCodeBuf[off++] = 0x0f;
2415 pCodeBuf[off++] = 0xbe;
2416 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2417 RT_NOREF(iGprTmp);
2418
2419#elif defined(RT_ARCH_ARM64)
2420 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2421 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2422
2423#else
2424# error "port me"
2425#endif
2426 return off;
2427}
2428
2429
2430/**
2431 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2432 * sign-extending the value to 16 bits.
2433 *
2434 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2435 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2436 * same. Will assert / throw if caller does not heed this.
2437 *
2438 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2439 */
2440DECL_FORCE_INLINE_THROW(uint32_t)
2441iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2442 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2443{
2444#ifdef RT_ARCH_AMD64
2445 /* movsx reg32, mem8 */
2446 if (iGprDst >= 8 || iGprBase >= 8)
2447 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2448 pCodeBuf[off++] = 0x0f;
2449 pCodeBuf[off++] = 0xbe;
2450 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2451# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
2452 /* and reg32, 0xffffh */
2453 if (iGprDst >= 8)
2454 pCodeBuf[off++] = X86_OP_REX_B;
2455 pCodeBuf[off++] = 0x81;
2456 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
2457 pCodeBuf[off++] = 0xff;
2458 pCodeBuf[off++] = 0xff;
2459 pCodeBuf[off++] = 0;
2460 pCodeBuf[off++] = 0;
2461# else
2462 /* movzx reg32, reg16 */
2463 if (iGprDst >= 8)
2464 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
2465 pCodeBuf[off++] = 0x0f;
2466 pCodeBuf[off++] = 0xb7;
2467 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2468# endif
2469 RT_NOREF(iGprTmp);
2470
2471#elif defined(RT_ARCH_ARM64)
2472 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2473 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2474 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2475 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
2476
2477#else
2478# error "port me"
2479#endif
2480 return off;
2481}
2482
2483
2484/**
2485 * Emits a 64-bit GPR store via a GPR base address with a displacement.
2486 *
2487 * @note ARM64: Misaligned @a offDisp values and values not in the
2488 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2489 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2490 * does not heed this.
2491 */
2492DECL_FORCE_INLINE_THROW(uint32_t)
2493iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2494 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2495{
2496#ifdef RT_ARCH_AMD64
2497 /* mov mem64, reg64 */
2498 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2499 pCodeBuf[off++] = 0x89;
2500 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2501 RT_NOREF(iGprTmp);
2502
2503#elif defined(RT_ARCH_ARM64)
2504 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2505 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
2506
2507#else
2508# error "port me"
2509#endif
2510 return off;
2511}
2512
2513
2514/**
2515 * Emits a 32-bit GPR store via a GPR base address with a displacement.
2516 *
2517 * @note ARM64: Misaligned @a offDisp values and values not in the
2518 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
2519 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2520 * does not heed this.
2521 */
2522DECL_FORCE_INLINE_THROW(uint32_t)
2523iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2524 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2525{
2526#ifdef RT_ARCH_AMD64
2527 /* mov mem32, reg32 */
2528 if (iGprSrc >= 8 || iGprBase >= 8)
2529 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2530 pCodeBuf[off++] = 0x89;
2531 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2532 RT_NOREF(iGprTmp);
2533
2534#elif defined(RT_ARCH_ARM64)
2535 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2536 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
2537
2538#else
2539# error "port me"
2540#endif
2541 return off;
2542}
2543
2544
2545/**
2546 * Emits a 16-bit GPR store via a GPR base address with a displacement.
2547 *
2548 * @note ARM64: Misaligned @a offDisp values and values not in the
2549 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
2550 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2551 * does not heed this.
2552 */
2553DECL_FORCE_INLINE_THROW(uint32_t)
2554iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2555 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2556{
2557#ifdef RT_ARCH_AMD64
2558 /* mov mem16, reg16 */
2559 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2560 if (iGprSrc >= 8 || iGprBase >= 8)
2561 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2562 pCodeBuf[off++] = 0x89;
2563 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2564 RT_NOREF(iGprTmp);
2565
2566#elif defined(RT_ARCH_ARM64)
2567 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2568 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
2569
2570#else
2571# error "port me"
2572#endif
2573 return off;
2574}
2575
2576
2577/**
2578 * Emits a 8-bit GPR store via a GPR base address with a displacement.
2579 *
2580 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2581 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2582 * same. Will assert / throw if caller does not heed this.
2583 */
2584DECL_FORCE_INLINE_THROW(uint32_t)
2585iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2586 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2587{
2588#ifdef RT_ARCH_AMD64
2589 /* mov mem8, reg8 */
2590 if (iGprSrc >= 8 || iGprBase >= 8)
2591 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2592 else if (iGprSrc >= 4)
2593 pCodeBuf[off++] = X86_OP_REX;
2594 pCodeBuf[off++] = 0x88;
2595 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2596 RT_NOREF(iGprTmp);
2597
2598#elif defined(RT_ARCH_ARM64)
2599 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2600 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
2601
2602#else
2603# error "port me"
2604#endif
2605 return off;
2606}
2607
2608
2609/**
2610 * Emits a 64-bit immediate store via a GPR base address with a displacement.
2611 *
2612 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
2613 * AMD64 it depends on the immediate value.
2614 *
2615 * @note ARM64: Misaligned @a offDisp values and values not in the
2616 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2617 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2618 * does not heed this.
2619 */
2620DECL_FORCE_INLINE_THROW(uint32_t)
2621iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
2622 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2623{
2624#ifdef RT_ARCH_AMD64
2625 if ((int32_t)uImm == (int64_t)uImm)
2626 {
2627 /* mov mem64, imm32 (sign-extended) */
2628 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2629 pCodeBuf[off++] = 0xc7;
2630 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
2631 pCodeBuf[off++] = RT_BYTE1(uImm);
2632 pCodeBuf[off++] = RT_BYTE2(uImm);
2633 pCodeBuf[off++] = RT_BYTE3(uImm);
2634 pCodeBuf[off++] = RT_BYTE4(uImm);
2635 }
2636 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
2637 {
2638 /* require temporary register. */
2639 if (iGprImmTmp == UINT8_MAX)
2640 iGprImmTmp = iGprTmp;
2641 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
2642 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
2643 }
2644 else
2645# ifdef IEM_WITH_THROW_CATCH
2646 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2647# else
2648 AssertReleaseFailedStmt(off = UINT32_MAX);
2649# endif
2650
2651#elif defined(RT_ARCH_ARM64)
2652 if (uImm == 0)
2653 iGprImmTmp = ARMV8_A64_REG_XZR;
2654 else
2655 {
2656 Assert(iGprImmTmp < 31);
2657 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
2658 }
2659 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
2660
2661#else
2662# error "port me"
2663#endif
2664 return off;
2665}
2666
2667
2668/**
2669 * Emits a 32-bit GPR store via a GPR base address with a displacement.
2670 *
2671 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
2672 *
2673 * @note ARM64: Misaligned @a offDisp values and values not in the
2674 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
2675 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2676 * does not heed this.
2677 */
2678DECL_FORCE_INLINE_THROW(uint32_t)
2679iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
2680 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2681{
2682#ifdef RT_ARCH_AMD64
2683 /* mov mem32, imm32 */
2684 if (iGprBase >= 8)
2685 pCodeBuf[off++] = X86_OP_REX_B;
2686 pCodeBuf[off++] = 0xc7;
2687 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
2688 pCodeBuf[off++] = RT_BYTE1(uImm);
2689 pCodeBuf[off++] = RT_BYTE2(uImm);
2690 pCodeBuf[off++] = RT_BYTE3(uImm);
2691 pCodeBuf[off++] = RT_BYTE4(uImm);
2692 RT_NOREF(iGprImmTmp, iGprTmp);
2693
2694#elif defined(RT_ARCH_ARM64)
2695 Assert(iGprImmTmp < 31);
2696 if (uImm == 0)
2697 iGprImmTmp = ARMV8_A64_REG_XZR;
2698 else
2699 {
2700 Assert(iGprImmTmp < 31);
2701 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
2702 }
2703 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
2704 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
2705
2706#else
2707# error "port me"
2708#endif
2709 return off;
2710}
2711
2712
2713/**
2714 * Emits a 16-bit GPR store via a GPR base address with a displacement.
2715 *
2716 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
2717 *
2718 * @note ARM64: Misaligned @a offDisp values and values not in the
2719 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
2720 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2721 * does not heed this.
2722 */
2723DECL_FORCE_INLINE_THROW(uint32_t)
2724iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
2725 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2726{
2727#ifdef RT_ARCH_AMD64
2728 /* mov mem16, imm16 */
2729 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2730 if (iGprBase >= 8)
2731 pCodeBuf[off++] = X86_OP_REX_B;
2732 pCodeBuf[off++] = 0xc7;
2733 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
2734 pCodeBuf[off++] = RT_BYTE1(uImm);
2735 pCodeBuf[off++] = RT_BYTE2(uImm);
2736 RT_NOREF(iGprImmTmp, iGprTmp);
2737
2738#elif defined(RT_ARCH_ARM64)
2739 if (uImm == 0)
2740 iGprImmTmp = ARMV8_A64_REG_XZR;
2741 else
2742 {
2743 Assert(iGprImmTmp < 31);
2744 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
2745 }
2746 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
2747 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
2748
2749#else
2750# error "port me"
2751#endif
2752 return off;
2753}
2754
2755
2756/**
2757 * Emits a 8-bit GPR store via a GPR base address with a displacement.
2758 *
2759 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
2760 *
2761 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2762 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2763 * same. Will assert / throw if caller does not heed this.
2764 */
2765DECL_FORCE_INLINE_THROW(uint32_t)
2766iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
2767 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2768{
2769#ifdef RT_ARCH_AMD64
2770 /* mov mem8, imm8 */
2771 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2772 if (iGprBase >= 8)
2773 pCodeBuf[off++] = X86_OP_REX_B;
2774 pCodeBuf[off++] = 0xc6;
2775 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
2776 pCodeBuf[off++] = uImm;
2777 RT_NOREF(iGprImmTmp, iGprTmp);
2778
2779#elif defined(RT_ARCH_ARM64)
2780 if (uImm == 0)
2781 iGprImmTmp = ARMV8_A64_REG_XZR;
2782 else
2783 {
2784 Assert(iGprImmTmp < 31);
2785 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
2786 }
2787 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
2788 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
2789
2790#else
2791# error "port me"
2792#endif
2793 return off;
2794}
2795
2796
2797
2798/*********************************************************************************************************************************
2799* Subtraction and Additions *
2800*********************************************************************************************************************************/
2801
2802/**
2803 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
2804 * @note The AMD64 version sets flags.
2805 */
2806DECL_INLINE_THROW(uint32_t)
2807iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
2808{
2809#if defined(RT_ARCH_AMD64)
2810 /* sub Gv,Ev */
2811 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
2812 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
2813 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
2814 pbCodeBuf[off++] = 0x2b;
2815 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
2816
2817#elif defined(RT_ARCH_ARM64)
2818 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2819 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
2820
2821#else
2822# error "Port me"
2823#endif
2824 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2825 return off;
2826}
2827
2828
2829/**
2830 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
2831 * @note The AMD64 version sets flags.
2832 */
2833DECL_FORCE_INLINE(uint32_t)
2834iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
2835{
2836#if defined(RT_ARCH_AMD64)
2837 /* sub Gv,Ev */
2838 if (iGprDst >= 8 || iGprSubtrahend >= 8)
2839 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
2840 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
2841 pCodeBuf[off++] = 0x2b;
2842 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
2843
2844#elif defined(RT_ARCH_ARM64)
2845 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
2846
2847#else
2848# error "Port me"
2849#endif
2850 return off;
2851}
2852
2853
2854/**
2855 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
2856 * @note The AMD64 version sets flags.
2857 */
2858DECL_INLINE_THROW(uint32_t)
2859iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
2860{
2861#if defined(RT_ARCH_AMD64)
2862 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
2863#elif defined(RT_ARCH_ARM64)
2864 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
2865#else
2866# error "Port me"
2867#endif
2868 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2869 return off;
2870}
2871
2872
2873/**
2874 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
2875 *
2876 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
2877 *
2878 * @note Larger constants will require a temporary register. Failing to specify
2879 * one when needed will trigger fatal assertion / throw.
2880 */
2881DECL_FORCE_INLINE_THROW(uint32_t)
2882iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
2883 uint8_t iGprTmp = UINT8_MAX)
2884{
2885#ifdef RT_ARCH_AMD64
2886 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
2887 if (iSubtrahend == 1)
2888 {
2889 /* dec r/m64 */
2890 pCodeBuf[off++] = 0xff;
2891 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
2892 }
2893 else if (iSubtrahend == -1)
2894 {
2895 /* inc r/m64 */
2896 pCodeBuf[off++] = 0xff;
2897 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
2898 }
2899 else if ((int8_t)iSubtrahend == iSubtrahend)
2900 {
2901 /* sub r/m64, imm8 */
2902 pCodeBuf[off++] = 0x83;
2903 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
2904 pCodeBuf[off++] = (uint8_t)iSubtrahend;
2905 }
2906 else if ((int32_t)iSubtrahend == iSubtrahend)
2907 {
2908 /* sub r/m64, imm32 */
2909 pCodeBuf[off++] = 0x81;
2910 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
2911 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
2912 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
2913 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
2914 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
2915 }
2916 else if (iGprTmp != UINT8_MAX)
2917 {
2918 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
2919 /* sub r/m64, r64 */
2920 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
2921 pCodeBuf[off++] = 0x29;
2922 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
2923 }
2924 else
2925# ifdef IEM_WITH_THROW_CATCH
2926 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2927# else
2928 AssertReleaseFailedStmt(off = UINT32_MAX);
2929# endif
2930
2931#elif defined(RT_ARCH_ARM64)
2932 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
2933 if (uAbsSubtrahend < 4096)
2934 {
2935 if (iSubtrahend >= 0)
2936 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
2937 else
2938 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
2939 }
2940 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
2941 {
2942 if (iSubtrahend >= 0)
2943 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
2944 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
2945 else
2946 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
2947 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
2948 }
2949 else if (iGprTmp != UINT8_MAX)
2950 {
2951 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
2952 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
2953 }
2954 else
2955# ifdef IEM_WITH_THROW_CATCH
2956 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2957# else
2958 AssertReleaseFailedStmt(off = UINT32_MAX);
2959# endif
2960
2961#else
2962# error "Port me"
2963#endif
2964 return off;
2965}
2966
2967
2968/**
2969 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
2970 *
2971 * @note Larger constants will require a temporary register. Failing to specify
2972 * one when needed will trigger fatal assertion / throw.
2973 */
2974DECL_INLINE_THROW(uint32_t)
2975iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
2976 uint8_t iGprTmp = UINT8_MAX)
2977
2978{
2979#ifdef RT_ARCH_AMD64
2980 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
2981#elif defined(RT_ARCH_ARM64)
2982 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
2983#else
2984# error "Port me"
2985#endif
2986 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2987 return off;
2988}
2989
2990
2991/**
2992 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
2993 *
2994 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
2995 *
2996 * @note ARM64: Larger constants will require a temporary register. Failing to
2997 * specify one when needed will trigger fatal assertion / throw.
2998 */
2999DECL_FORCE_INLINE_THROW(uint32_t)
3000iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3001 uint8_t iGprTmp = UINT8_MAX)
3002{
3003#ifdef RT_ARCH_AMD64
3004 if (iGprDst >= 8)
3005 pCodeBuf[off++] = X86_OP_REX_B;
3006 if (iSubtrahend == 1)
3007 {
3008 /* dec r/m32 */
3009 pCodeBuf[off++] = 0xff;
3010 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3011 }
3012 else if (iSubtrahend == -1)
3013 {
3014 /* inc r/m32 */
3015 pCodeBuf[off++] = 0xff;
3016 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3017 }
3018 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3019 {
3020 /* sub r/m32, imm8 */
3021 pCodeBuf[off++] = 0x83;
3022 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3023 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3024 }
3025 else
3026 {
3027 /* sub r/m32, imm32 */
3028 pCodeBuf[off++] = 0x81;
3029 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3030 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3031 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3032 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3033 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3034 }
3035 RT_NOREF(iGprTmp);
3036
3037#elif defined(RT_ARCH_ARM64)
3038 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3039 if (uAbsSubtrahend < 4096)
3040 {
3041 if (iSubtrahend >= 0)
3042 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3043 else
3044 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3045 }
3046 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3047 {
3048 if (iSubtrahend >= 0)
3049 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3050 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3051 else
3052 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3053 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3054 }
3055 else if (iGprTmp != UINT8_MAX)
3056 {
3057 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3058 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3059 }
3060 else
3061# ifdef IEM_WITH_THROW_CATCH
3062 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3063# else
3064 AssertReleaseFailedStmt(off = UINT32_MAX);
3065# endif
3066
3067#else
3068# error "Port me"
3069#endif
3070 return off;
3071}
3072
3073
3074/**
3075 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3076 *
3077 * @note ARM64: Larger constants will require a temporary register. Failing to
3078 * specify one when needed will trigger fatal assertion / throw.
3079 */
3080DECL_INLINE_THROW(uint32_t)
3081iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3082 uint8_t iGprTmp = UINT8_MAX)
3083
3084{
3085#ifdef RT_ARCH_AMD64
3086 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
3087#elif defined(RT_ARCH_ARM64)
3088 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
3089#else
3090# error "Port me"
3091#endif
3092 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3093 return off;
3094}
3095
3096
3097/**
3098 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
3099 *
3100 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
3101 * so not suitable as a base for conditional jumps.
3102 *
3103 * @note AMD64: Will only update the lower 16 bits of the register.
3104 * @note ARM64: Will update the entire register.
3105 * @note ARM64: Larger constants will require a temporary register. Failing to
3106 * specify one when needed will trigger fatal assertion / throw.
3107 */
3108DECL_FORCE_INLINE_THROW(uint32_t)
3109iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
3110 uint8_t iGprTmp = UINT8_MAX)
3111{
3112#ifdef RT_ARCH_AMD64
3113 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3114 if (iGprDst >= 8)
3115 pCodeBuf[off++] = X86_OP_REX_B;
3116 if (iSubtrahend == 1)
3117 {
3118 /* dec r/m16 */
3119 pCodeBuf[off++] = 0xff;
3120 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3121 }
3122 else if (iSubtrahend == -1)
3123 {
3124 /* inc r/m16 */
3125 pCodeBuf[off++] = 0xff;
3126 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3127 }
3128 else if ((int8_t)iSubtrahend == iSubtrahend)
3129 {
3130 /* sub r/m16, imm8 */
3131 pCodeBuf[off++] = 0x83;
3132 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3133 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3134 }
3135 else
3136 {
3137 /* sub r/m16, imm16 */
3138 pCodeBuf[off++] = 0x81;
3139 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3140 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
3141 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
3142 }
3143 RT_NOREF(iGprTmp);
3144
3145#elif defined(RT_ARCH_ARM64)
3146 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3147 if (uAbsSubtrahend < 4096)
3148 {
3149 if (iSubtrahend >= 0)
3150 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3151 else
3152 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3153 }
3154 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3155 {
3156 if (iSubtrahend >= 0)
3157 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3158 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3159 else
3160 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3161 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3162 }
3163 else if (iGprTmp != UINT8_MAX)
3164 {
3165 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3166 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3167 }
3168 else
3169# ifdef IEM_WITH_THROW_CATCH
3170 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3171# else
3172 AssertReleaseFailedStmt(off = UINT32_MAX);
3173# endif
3174 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3175
3176#else
3177# error "Port me"
3178#endif
3179 return off;
3180}
3181
3182
3183/**
3184 * Emits adding a 64-bit GPR to another, storing the result in the first.
3185 * @note The AMD64 version sets flags.
3186 */
3187DECL_FORCE_INLINE(uint32_t)
3188iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3189{
3190#if defined(RT_ARCH_AMD64)
3191 /* add Gv,Ev */
3192 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3193 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
3194 pCodeBuf[off++] = 0x03;
3195 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3196
3197#elif defined(RT_ARCH_ARM64)
3198 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
3199
3200#else
3201# error "Port me"
3202#endif
3203 return off;
3204}
3205
3206
3207/**
3208 * Emits adding a 64-bit GPR to another, storing the result in the first.
3209 * @note The AMD64 version sets flags.
3210 */
3211DECL_INLINE_THROW(uint32_t)
3212iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3213{
3214#if defined(RT_ARCH_AMD64)
3215 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3216#elif defined(RT_ARCH_ARM64)
3217 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3218#else
3219# error "Port me"
3220#endif
3221 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3222 return off;
3223}
3224
3225
3226/**
3227 * Emits adding a 64-bit GPR to another, storing the result in the first.
3228 * @note The AMD64 version sets flags.
3229 */
3230DECL_FORCE_INLINE(uint32_t)
3231iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3232{
3233#if defined(RT_ARCH_AMD64)
3234 /* add Gv,Ev */
3235 if (iGprDst >= 8 || iGprAddend >= 8)
3236 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
3237 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
3238 pCodeBuf[off++] = 0x03;
3239 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3240
3241#elif defined(RT_ARCH_ARM64)
3242 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
3243
3244#else
3245# error "Port me"
3246#endif
3247 return off;
3248}
3249
3250
3251/**
3252 * Emits adding a 64-bit GPR to another, storing the result in the first.
3253 * @note The AMD64 version sets flags.
3254 */
3255DECL_INLINE_THROW(uint32_t)
3256iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3257{
3258#if defined(RT_ARCH_AMD64)
3259 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3260#elif defined(RT_ARCH_ARM64)
3261 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3262#else
3263# error "Port me"
3264#endif
3265 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3266 return off;
3267}
3268
3269
3270/**
3271 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3272 */
3273DECL_INLINE_THROW(uint32_t)
3274iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3275{
3276#if defined(RT_ARCH_AMD64)
3277 /* add or inc */
3278 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3279 if (iImm8 != 1)
3280 {
3281 pCodeBuf[off++] = 0x83;
3282 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3283 pCodeBuf[off++] = (uint8_t)iImm8;
3284 }
3285 else
3286 {
3287 pCodeBuf[off++] = 0xff;
3288 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3289 }
3290
3291#elif defined(RT_ARCH_ARM64)
3292 if (iImm8 >= 0)
3293 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
3294 else
3295 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
3296
3297#else
3298# error "Port me"
3299#endif
3300 return off;
3301}
3302
3303
3304/**
3305 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3306 */
3307DECL_INLINE_THROW(uint32_t)
3308iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3309{
3310#if defined(RT_ARCH_AMD64)
3311 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
3312#elif defined(RT_ARCH_ARM64)
3313 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
3314#else
3315# error "Port me"
3316#endif
3317 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3318 return off;
3319}
3320
3321
3322/**
3323 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
3324 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3325 */
3326DECL_FORCE_INLINE(uint32_t)
3327iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3328{
3329#if defined(RT_ARCH_AMD64)
3330 /* add or inc */
3331 if (iGprDst >= 8)
3332 pCodeBuf[off++] = X86_OP_REX_B;
3333 if (iImm8 != 1)
3334 {
3335 pCodeBuf[off++] = 0x83;
3336 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3337 pCodeBuf[off++] = (uint8_t)iImm8;
3338 }
3339 else
3340 {
3341 pCodeBuf[off++] = 0xff;
3342 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3343 }
3344
3345#elif defined(RT_ARCH_ARM64)
3346 if (iImm8 >= 0)
3347 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
3348 else
3349 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
3350
3351#else
3352# error "Port me"
3353#endif
3354 return off;
3355}
3356
3357
3358/**
3359 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
3360 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3361 */
3362DECL_INLINE_THROW(uint32_t)
3363iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3364{
3365#if defined(RT_ARCH_AMD64)
3366 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
3367#elif defined(RT_ARCH_ARM64)
3368 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
3369#else
3370# error "Port me"
3371#endif
3372 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3373 return off;
3374}
3375
3376
3377/**
3378 * Emits a 64-bit GPR additions with a 64-bit signed addend.
3379 *
3380 * @note Will assert / throw if @a iGprTmp is not specified when needed.
3381 */
3382DECL_FORCE_INLINE_THROW(uint32_t)
3383iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
3384{
3385#if defined(RT_ARCH_AMD64)
3386 if ((int8_t)iAddend == iAddend)
3387 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
3388
3389 if ((int32_t)iAddend == iAddend)
3390 {
3391 /* add grp, imm32 */
3392 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3393 pCodeBuf[off++] = 0x81;
3394 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3395 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
3396 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
3397 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
3398 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
3399 }
3400 else if (iGprTmp != UINT8_MAX)
3401 {
3402 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
3403
3404 /* add dst, tmpreg */
3405 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3406 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
3407 pCodeBuf[off++] = 0x03;
3408 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
3409 }
3410 else
3411# ifdef IEM_WITH_THROW_CATCH
3412 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3413# else
3414 AssertReleaseFailedStmt(off = UINT32_MAX);
3415# endif
3416
3417#elif defined(RT_ARCH_ARM64)
3418 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
3419 if (uAbsAddend < 4096)
3420 {
3421 if (iAddend >= 0)
3422 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
3423 else
3424 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
3425 }
3426 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
3427 {
3428 if (iAddend >= 0)
3429 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
3430 true /*f64Bit*/, true /*fShift12*/);
3431 else
3432 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
3433 true /*f64Bit*/, true /*fShift12*/);
3434 }
3435 else if (iGprTmp != UINT8_MAX)
3436 {
3437 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
3438 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
3439 }
3440 else
3441# ifdef IEM_WITH_THROW_CATCH
3442 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3443# else
3444 AssertReleaseFailedStmt(off = UINT32_MAX);
3445# endif
3446
3447#else
3448# error "Port me"
3449#endif
3450 return off;
3451}
3452
3453
3454/**
3455 * Emits a 64-bit GPR additions with a 64-bit signed addend.
3456 */
3457DECL_INLINE_THROW(uint32_t)
3458iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
3459{
3460#if defined(RT_ARCH_AMD64)
3461 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
3462 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
3463
3464 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
3465 {
3466 /* add grp, imm32 */
3467 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3468 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3469 pbCodeBuf[off++] = 0x81;
3470 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3471 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
3472 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
3473 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
3474 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
3475 }
3476 else
3477 {
3478 /* Best to use a temporary register to deal with this in the simplest way: */
3479 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
3480
3481 /* add dst, tmpreg */
3482 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3483 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3484 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
3485 pbCodeBuf[off++] = 0x03;
3486 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
3487
3488 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
3489 }
3490
3491#elif defined(RT_ARCH_ARM64)
3492 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
3493 {
3494 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3495 if (iAddend >= 0)
3496 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend);
3497 else
3498 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend);
3499 }
3500 else
3501 {
3502 /* Use temporary register for the immediate. */
3503 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
3504
3505 /* add gprdst, gprdst, tmpreg */
3506 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3507 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg);
3508
3509 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
3510 }
3511
3512#else
3513# error "Port me"
3514#endif
3515 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3516 return off;
3517}
3518
3519
3520/**
3521 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
3522 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3523 * @note For ARM64 the iAddend value must be in the range 0x000..0xfff,
3524 * or that range shifted 12 bits to the left (e.g. 0x1000..0xfff000 with
3525 * the lower 12 bits always zero). The negative ranges are also allowed,
3526 * making it behave like a subtraction. If the constant does not conform,
3527 * bad stuff will happen.
3528 */
3529DECL_FORCE_INLINE_THROW(uint32_t)
3530iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend)
3531{
3532#if defined(RT_ARCH_AMD64)
3533 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
3534 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
3535
3536 /* add grp, imm32 */
3537 if (iGprDst >= 8)
3538 pCodeBuf[off++] = X86_OP_REX_B;
3539 pCodeBuf[off++] = 0x81;
3540 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3541 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
3542 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
3543 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
3544 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
3545
3546#elif defined(RT_ARCH_ARM64)
3547 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
3548 if (uAbsAddend <= 0xfff)
3549 {
3550 if (iAddend >= 0)
3551 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
3552 else
3553 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
3554 }
3555 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
3556 {
3557 if (iAddend >= 0)
3558 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
3559 false /*f64Bit*/, true /*fShift12*/);
3560 else
3561 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
3562 false /*f64Bit*/, true /*fShift12*/);
3563 }
3564 else
3565# ifdef IEM_WITH_THROW_CATCH
3566 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3567# else
3568 AssertReleaseFailedStmt(off = UINT32_MAX);
3569# endif
3570
3571#else
3572# error "Port me"
3573#endif
3574 return off;
3575}
3576
3577
3578/**
3579 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
3580 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3581 */
3582DECL_INLINE_THROW(uint32_t)
3583iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
3584{
3585#if defined(RT_ARCH_AMD64)
3586 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
3587
3588#elif defined(RT_ARCH_ARM64)
3589 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
3590 {
3591 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3592 if (iAddend >= 0)
3593 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend, false /*f64Bit*/);
3594 else
3595 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend, false /*f64Bit*/);
3596 }
3597 else
3598 {
3599 /* Use temporary register for the immediate. */
3600 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint32_t)iAddend);
3601
3602 /* add gprdst, gprdst, tmpreg */
3603 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3604 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
3605
3606 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
3607 }
3608
3609#else
3610# error "Port me"
3611#endif
3612 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3613 return off;
3614}
3615
3616
3617/**
3618 * Emits a 16-bit GPR add with a signed immediate addend.
3619 *
3620 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
3621 * so not suitable as a base for conditional jumps.
3622 *
3623 * @note AMD64: Will only update the lower 16 bits of the register.
3624 * @note ARM64: Will update the entire register.
3625 * @note ARM64: Larger constants will require a temporary register. Failing to
3626 * specify one when needed will trigger fatal assertion / throw.
3627 * @sa iemNativeEmitSubGpr16ImmEx
3628 */
3629DECL_FORCE_INLINE_THROW(uint32_t)
3630iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend,
3631 uint8_t iGprTmp = UINT8_MAX)
3632{
3633#ifdef RT_ARCH_AMD64
3634 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3635 if (iGprDst >= 8)
3636 pCodeBuf[off++] = X86_OP_REX_B;
3637 if (iAddend == 1)
3638 {
3639 /* inc r/m16 */
3640 pCodeBuf[off++] = 0xff;
3641 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3642 }
3643 else if (iAddend == -1)
3644 {
3645 /* dec r/m16 */
3646 pCodeBuf[off++] = 0xff;
3647 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3648 }
3649 else if ((int8_t)iAddend == iAddend)
3650 {
3651 /* add r/m16, imm8 */
3652 pCodeBuf[off++] = 0x83;
3653 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3654 pCodeBuf[off++] = (uint8_t)iAddend;
3655 }
3656 else
3657 {
3658 /* add r/m16, imm16 */
3659 pCodeBuf[off++] = 0x81;
3660 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3661 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
3662 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
3663 }
3664 RT_NOREF(iGprTmp);
3665
3666#elif defined(RT_ARCH_ARM64)
3667 uint32_t uAbsAddend = RT_ABS(iAddend);
3668 if (uAbsAddend < 4096)
3669 {
3670 if (iAddend >= 0)
3671 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
3672 else
3673 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
3674 }
3675 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
3676 {
3677 if (iAddend >= 0)
3678 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
3679 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3680 else
3681 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
3682 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3683 }
3684 else if (iGprTmp != UINT8_MAX)
3685 {
3686 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iAddend);
3687 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3688 }
3689 else
3690# ifdef IEM_WITH_THROW_CATCH
3691 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3692# else
3693 AssertReleaseFailedStmt(off = UINT32_MAX);
3694# endif
3695 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3696
3697#else
3698# error "Port me"
3699#endif
3700 return off;
3701}
3702
3703
3704
3705/**
3706 * Adds two 64-bit GPRs together, storing the result in a third register.
3707 */
3708DECL_FORCE_INLINE(uint32_t)
3709iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
3710{
3711#ifdef RT_ARCH_AMD64
3712 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
3713 {
3714 /** @todo consider LEA */
3715 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
3716 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
3717 }
3718 else
3719 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
3720
3721#elif defined(RT_ARCH_ARM64)
3722 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
3723
3724#else
3725# error "Port me!"
3726#endif
3727 return off;
3728}
3729
3730
3731
3732/**
3733 * Adds two 32-bit GPRs together, storing the result in a third register.
3734 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
3735 */
3736DECL_FORCE_INLINE(uint32_t)
3737iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
3738{
3739#ifdef RT_ARCH_AMD64
3740 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
3741 {
3742 /** @todo consider LEA */
3743 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
3744 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
3745 }
3746 else
3747 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
3748
3749#elif defined(RT_ARCH_ARM64)
3750 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
3751
3752#else
3753# error "Port me!"
3754#endif
3755 return off;
3756}
3757
3758
3759/**
3760 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
3761 * third register.
3762 *
3763 * @note The ARM64 version does not work for non-trivial constants if the
3764 * two registers are the same. Will assert / throw exception.
3765 */
3766DECL_FORCE_INLINE_THROW(uint32_t)
3767iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
3768{
3769#ifdef RT_ARCH_AMD64
3770 /** @todo consider LEA */
3771 if ((int8_t)iImmAddend == iImmAddend)
3772 {
3773 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
3774 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
3775 }
3776 else
3777 {
3778 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
3779 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
3780 }
3781
3782#elif defined(RT_ARCH_ARM64)
3783 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
3784 if (uAbsImmAddend < 4096)
3785 {
3786 if (iImmAddend >= 0)
3787 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend);
3788 else
3789 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend);
3790 }
3791 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
3792 {
3793 if (iImmAddend >= 0)
3794 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
3795 else
3796 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
3797 }
3798 else if (iGprDst != iGprAddend)
3799 {
3800 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
3801 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
3802 }
3803 else
3804# ifdef IEM_WITH_THROW_CATCH
3805 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3806# else
3807 AssertReleaseFailedStmt(off = UINT32_MAX);
3808# endif
3809
3810#else
3811# error "Port me!"
3812#endif
3813 return off;
3814}
3815
3816
3817/**
3818 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
3819 * third register.
3820 *
3821 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
3822 *
3823 * @note The ARM64 version does not work for non-trivial constants if the
3824 * two registers are the same. Will assert / throw exception.
3825 */
3826DECL_FORCE_INLINE_THROW(uint32_t)
3827iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
3828{
3829#ifdef RT_ARCH_AMD64
3830 /** @todo consider LEA */
3831 if ((int8_t)iImmAddend == iImmAddend)
3832 {
3833 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
3834 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
3835 }
3836 else
3837 {
3838 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
3839 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
3840 }
3841
3842#elif defined(RT_ARCH_ARM64)
3843 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
3844 if (uAbsImmAddend < 4096)
3845 {
3846 if (iImmAddend >= 0)
3847 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
3848 else
3849 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
3850 }
3851 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
3852 {
3853 if (iImmAddend >= 0)
3854 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
3855 else
3856 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
3857 }
3858 else if (iGprDst != iGprAddend)
3859 {
3860 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
3861 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
3862 }
3863 else
3864# ifdef IEM_WITH_THROW_CATCH
3865 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3866# else
3867 AssertReleaseFailedStmt(off = UINT32_MAX);
3868# endif
3869
3870#else
3871# error "Port me!"
3872#endif
3873 return off;
3874}
3875
3876
3877/*********************************************************************************************************************************
3878* Unary Operations *
3879*********************************************************************************************************************************/
3880
3881/**
3882 * Emits code for two complement negation of a 64-bit GPR.
3883 */
3884DECL_FORCE_INLINE_THROW(uint32_t)
3885iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
3886{
3887#if defined(RT_ARCH_AMD64)
3888 /* neg Ev */
3889 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3890 pCodeBuf[off++] = 0xf7;
3891 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
3892
3893#elif defined(RT_ARCH_ARM64)
3894 /* sub dst, xzr, dst */
3895 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
3896
3897#else
3898# error "Port me"
3899#endif
3900 return off;
3901}
3902
3903
3904/**
3905 * Emits code for two complement negation of a 64-bit GPR.
3906 */
3907DECL_INLINE_THROW(uint32_t)
3908iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
3909{
3910#if defined(RT_ARCH_AMD64)
3911 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
3912#elif defined(RT_ARCH_ARM64)
3913 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
3914#else
3915# error "Port me"
3916#endif
3917 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3918 return off;
3919}
3920
3921
3922/**
3923 * Emits code for two complement negation of a 32-bit GPR.
3924 * @note bit 32 thru 63 are set to zero.
3925 */
3926DECL_FORCE_INLINE_THROW(uint32_t)
3927iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
3928{
3929#if defined(RT_ARCH_AMD64)
3930 /* neg Ev */
3931 if (iGprDst >= 8)
3932 pCodeBuf[off++] = X86_OP_REX_B;
3933 pCodeBuf[off++] = 0xf7;
3934 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
3935
3936#elif defined(RT_ARCH_ARM64)
3937 /* sub dst, xzr, dst */
3938 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
3939
3940#else
3941# error "Port me"
3942#endif
3943 return off;
3944}
3945
3946
3947/**
3948 * Emits code for two complement negation of a 32-bit GPR.
3949 * @note bit 32 thru 63 are set to zero.
3950 */
3951DECL_INLINE_THROW(uint32_t)
3952iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
3953{
3954#if defined(RT_ARCH_AMD64)
3955 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
3956#elif defined(RT_ARCH_ARM64)
3957 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
3958#else
3959# error "Port me"
3960#endif
3961 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3962 return off;
3963}
3964
3965
3966
3967/*********************************************************************************************************************************
3968* Bit Operations *
3969*********************************************************************************************************************************/
3970
3971/**
3972 * Emits code for clearing bits 16 thru 63 in the GPR.
3973 */
3974DECL_INLINE_THROW(uint32_t)
3975iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
3976{
3977#if defined(RT_ARCH_AMD64)
3978 /* movzx Gv,Ew */
3979 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
3980 if (iGprDst >= 8)
3981 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
3982 pbCodeBuf[off++] = 0x0f;
3983 pbCodeBuf[off++] = 0xb7;
3984 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
3985
3986#elif defined(RT_ARCH_ARM64)
3987 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3988# if 1
3989 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
3990# else
3991 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
3992 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
3993# endif
3994#else
3995# error "Port me"
3996#endif
3997 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3998 return off;
3999}
4000
4001
4002/**
4003 * Emits code for AND'ing two 64-bit GPRs.
4004 *
4005 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4006 * and ARM64 hosts.
4007 */
4008DECL_FORCE_INLINE(uint32_t)
4009iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4010{
4011#if defined(RT_ARCH_AMD64)
4012 /* and Gv, Ev */
4013 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4014 pCodeBuf[off++] = 0x23;
4015 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4016 RT_NOREF(fSetFlags);
4017
4018#elif defined(RT_ARCH_ARM64)
4019 if (!fSetFlags)
4020 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4021 else
4022 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4023
4024#else
4025# error "Port me"
4026#endif
4027 return off;
4028}
4029
4030
4031/**
4032 * Emits code for AND'ing two 64-bit GPRs.
4033 *
4034 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4035 * and ARM64 hosts.
4036 */
4037DECL_INLINE_THROW(uint32_t)
4038iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4039{
4040#if defined(RT_ARCH_AMD64)
4041 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4042#elif defined(RT_ARCH_ARM64)
4043 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4044#else
4045# error "Port me"
4046#endif
4047 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4048 return off;
4049}
4050
4051
4052/**
4053 * Emits code for AND'ing two 32-bit GPRs.
4054 */
4055DECL_FORCE_INLINE(uint32_t)
4056iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4057{
4058#if defined(RT_ARCH_AMD64)
4059 /* and Gv, Ev */
4060 if (iGprDst >= 8 || iGprSrc >= 8)
4061 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4062 pCodeBuf[off++] = 0x23;
4063 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4064 RT_NOREF(fSetFlags);
4065
4066#elif defined(RT_ARCH_ARM64)
4067 if (!fSetFlags)
4068 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4069 else
4070 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4071
4072#else
4073# error "Port me"
4074#endif
4075 return off;
4076}
4077
4078
4079/**
4080 * Emits code for AND'ing two 32-bit GPRs.
4081 */
4082DECL_INLINE_THROW(uint32_t)
4083iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4084{
4085#if defined(RT_ARCH_AMD64)
4086 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4087#elif defined(RT_ARCH_ARM64)
4088 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4089#else
4090# error "Port me"
4091#endif
4092 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4093 return off;
4094}
4095
4096
4097/**
4098 * Emits code for AND'ing a 64-bit GPRs with a constant.
4099 *
4100 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4101 * and ARM64 hosts.
4102 */
4103DECL_INLINE_THROW(uint32_t)
4104iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4105{
4106#if defined(RT_ARCH_AMD64)
4107 if ((int64_t)uImm == (int8_t)uImm)
4108 {
4109 /* and Ev, imm8 */
4110 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4111 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4112 pbCodeBuf[off++] = 0x83;
4113 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4114 pbCodeBuf[off++] = (uint8_t)uImm;
4115 }
4116 else if ((int64_t)uImm == (int32_t)uImm)
4117 {
4118 /* and Ev, imm32 */
4119 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4120 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4121 pbCodeBuf[off++] = 0x81;
4122 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4123 pbCodeBuf[off++] = RT_BYTE1(uImm);
4124 pbCodeBuf[off++] = RT_BYTE2(uImm);
4125 pbCodeBuf[off++] = RT_BYTE3(uImm);
4126 pbCodeBuf[off++] = RT_BYTE4(uImm);
4127 }
4128 else
4129 {
4130 /* Use temporary register for the 64-bit immediate. */
4131 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4132 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
4133 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4134 }
4135 RT_NOREF(fSetFlags);
4136
4137#elif defined(RT_ARCH_ARM64)
4138 uint32_t uImmR = 0;
4139 uint32_t uImmNandS = 0;
4140 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4141 {
4142 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4143 if (!fSetFlags)
4144 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
4145 else
4146 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
4147 }
4148 else
4149 {
4150 /* Use temporary register for the 64-bit immediate. */
4151 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4152 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4153 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4154 }
4155
4156#else
4157# error "Port me"
4158#endif
4159 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4160 return off;
4161}
4162
4163
4164/**
4165 * Emits code for AND'ing an 32-bit GPRs with a constant.
4166 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4167 * @note For ARM64 this only supports @a uImm values that can be expressed using
4168 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
4169 * make sure this is possible!
4170 */
4171DECL_FORCE_INLINE_THROW(uint32_t)
4172iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4173{
4174#if defined(RT_ARCH_AMD64)
4175 /* and Ev, imm */
4176 if (iGprDst >= 8)
4177 pCodeBuf[off++] = X86_OP_REX_B;
4178 if ((int32_t)uImm == (int8_t)uImm)
4179 {
4180 pCodeBuf[off++] = 0x83;
4181 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4182 pCodeBuf[off++] = (uint8_t)uImm;
4183 }
4184 else
4185 {
4186 pCodeBuf[off++] = 0x81;
4187 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4188 pCodeBuf[off++] = RT_BYTE1(uImm);
4189 pCodeBuf[off++] = RT_BYTE2(uImm);
4190 pCodeBuf[off++] = RT_BYTE3(uImm);
4191 pCodeBuf[off++] = RT_BYTE4(uImm);
4192 }
4193 RT_NOREF(fSetFlags);
4194
4195#elif defined(RT_ARCH_ARM64)
4196 uint32_t uImmR = 0;
4197 uint32_t uImmNandS = 0;
4198 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4199 {
4200 if (!fSetFlags)
4201 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4202 else
4203 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4204 }
4205 else
4206# ifdef IEM_WITH_THROW_CATCH
4207 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4208# else
4209 AssertReleaseFailedStmt(off = UINT32_MAX);
4210# endif
4211
4212#else
4213# error "Port me"
4214#endif
4215 return off;
4216}
4217
4218
4219/**
4220 * Emits code for AND'ing an 32-bit GPRs with a constant.
4221 *
4222 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4223 */
4224DECL_INLINE_THROW(uint32_t)
4225iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4226{
4227#if defined(RT_ARCH_AMD64)
4228 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
4229
4230#elif defined(RT_ARCH_ARM64)
4231 uint32_t uImmR = 0;
4232 uint32_t uImmNandS = 0;
4233 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4234 {
4235 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4236 if (!fSetFlags)
4237 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4238 else
4239 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4240 }
4241 else
4242 {
4243 /* Use temporary register for the 64-bit immediate. */
4244 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4245 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4246 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4247 }
4248
4249#else
4250# error "Port me"
4251#endif
4252 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4253 return off;
4254}
4255
4256
4257/**
4258 * Emits code for AND'ing an 64-bit GPRs with a constant.
4259 *
4260 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4261 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4262 * the same.
4263 */
4264DECL_FORCE_INLINE_THROW(uint32_t)
4265iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
4266 bool fSetFlags = false)
4267{
4268#if defined(RT_ARCH_AMD64)
4269 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4270 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
4271 RT_NOREF(fSetFlags);
4272
4273#elif defined(RT_ARCH_ARM64)
4274 uint32_t uImmR = 0;
4275 uint32_t uImmNandS = 0;
4276 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4277 {
4278 if (!fSetFlags)
4279 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4280 else
4281 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4282 }
4283 else if (iGprDst != iGprSrc)
4284 {
4285 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4286 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4287 }
4288 else
4289# ifdef IEM_WITH_THROW_CATCH
4290 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4291# else
4292 AssertReleaseFailedStmt(off = UINT32_MAX);
4293# endif
4294
4295#else
4296# error "Port me"
4297#endif
4298 return off;
4299}
4300
4301/**
4302 * Emits code for AND'ing an 32-bit GPRs with a constant.
4303 *
4304 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4305 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4306 * the same.
4307 *
4308 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4309 */
4310DECL_FORCE_INLINE_THROW(uint32_t)
4311iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
4312 bool fSetFlags = false)
4313{
4314#if defined(RT_ARCH_AMD64)
4315 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
4316 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
4317 RT_NOREF(fSetFlags);
4318
4319#elif defined(RT_ARCH_ARM64)
4320 uint32_t uImmR = 0;
4321 uint32_t uImmNandS = 0;
4322 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4323 {
4324 if (!fSetFlags)
4325 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
4326 else
4327 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
4328 }
4329 else if (iGprDst != iGprSrc)
4330 {
4331 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
4332 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4333 }
4334 else
4335# ifdef IEM_WITH_THROW_CATCH
4336 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4337# else
4338 AssertReleaseFailedStmt(off = UINT32_MAX);
4339# endif
4340
4341#else
4342# error "Port me"
4343#endif
4344 return off;
4345}
4346
4347
4348/**
4349 * Emits code for OR'ing two 64-bit GPRs.
4350 */
4351DECL_FORCE_INLINE(uint32_t)
4352iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4353{
4354#if defined(RT_ARCH_AMD64)
4355 /* or Gv, Ev */
4356 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4357 pCodeBuf[off++] = 0x0b;
4358 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4359
4360#elif defined(RT_ARCH_ARM64)
4361 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
4362
4363#else
4364# error "Port me"
4365#endif
4366 return off;
4367}
4368
4369
4370/**
4371 * Emits code for OR'ing two 64-bit GPRs.
4372 */
4373DECL_INLINE_THROW(uint32_t)
4374iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4375{
4376#if defined(RT_ARCH_AMD64)
4377 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4378#elif defined(RT_ARCH_ARM64)
4379 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4380#else
4381# error "Port me"
4382#endif
4383 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4384 return off;
4385}
4386
4387
4388/**
4389 * Emits code for OR'ing two 32-bit GPRs.
4390 * @note Bits 63:32 of the destination GPR will be cleared.
4391 */
4392DECL_FORCE_INLINE(uint32_t)
4393iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4394{
4395#if defined(RT_ARCH_AMD64)
4396 /* or Gv, Ev */
4397 if (iGprDst >= 8 || iGprSrc >= 8)
4398 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4399 pCodeBuf[off++] = 0x0b;
4400 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4401
4402#elif defined(RT_ARCH_ARM64)
4403 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4404
4405#else
4406# error "Port me"
4407#endif
4408 return off;
4409}
4410
4411
4412/**
4413 * Emits code for OR'ing two 32-bit GPRs.
4414 * @note Bits 63:32 of the destination GPR will be cleared.
4415 */
4416DECL_INLINE_THROW(uint32_t)
4417iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4418{
4419#if defined(RT_ARCH_AMD64)
4420 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4421#elif defined(RT_ARCH_ARM64)
4422 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4423#else
4424# error "Port me"
4425#endif
4426 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4427 return off;
4428}
4429
4430
4431/**
4432 * Emits code for OR'ing a 64-bit GPRs with a constant.
4433 */
4434DECL_INLINE_THROW(uint32_t)
4435iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
4436{
4437#if defined(RT_ARCH_AMD64)
4438 if ((int64_t)uImm == (int8_t)uImm)
4439 {
4440 /* or Ev, imm8 */
4441 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4442 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4443 pbCodeBuf[off++] = 0x83;
4444 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4445 pbCodeBuf[off++] = (uint8_t)uImm;
4446 }
4447 else if ((int64_t)uImm == (int32_t)uImm)
4448 {
4449 /* or Ev, imm32 */
4450 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4451 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4452 pbCodeBuf[off++] = 0x81;
4453 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4454 pbCodeBuf[off++] = RT_BYTE1(uImm);
4455 pbCodeBuf[off++] = RT_BYTE2(uImm);
4456 pbCodeBuf[off++] = RT_BYTE3(uImm);
4457 pbCodeBuf[off++] = RT_BYTE4(uImm);
4458 }
4459 else
4460 {
4461 /* Use temporary register for the 64-bit immediate. */
4462 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4463 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
4464 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4465 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4466 }
4467
4468#elif defined(RT_ARCH_ARM64)
4469 uint32_t uImmR = 0;
4470 uint32_t uImmNandS = 0;
4471 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4472 {
4473 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4474 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
4475 }
4476 else
4477 {
4478 /* Use temporary register for the 64-bit immediate. */
4479 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4480 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
4481 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4482 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4483 }
4484
4485#else
4486# error "Port me"
4487#endif
4488 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4489 return off;
4490}
4491
4492
4493/**
4494 * Emits code for OR'ing an 32-bit GPRs with a constant.
4495 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4496 * @note For ARM64 this only supports @a uImm values that can be expressed using
4497 * the two 6-bit immediates of the OR instructions. The caller must make
4498 * sure this is possible!
4499 */
4500DECL_FORCE_INLINE_THROW(uint32_t)
4501iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
4502{
4503#if defined(RT_ARCH_AMD64)
4504 /* or Ev, imm */
4505 if (iGprDst >= 8)
4506 pCodeBuf[off++] = X86_OP_REX_B;
4507 if ((int32_t)uImm == (int8_t)uImm)
4508 {
4509 pCodeBuf[off++] = 0x83;
4510 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4511 pCodeBuf[off++] = (uint8_t)uImm;
4512 }
4513 else
4514 {
4515 pCodeBuf[off++] = 0x81;
4516 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4517 pCodeBuf[off++] = RT_BYTE1(uImm);
4518 pCodeBuf[off++] = RT_BYTE2(uImm);
4519 pCodeBuf[off++] = RT_BYTE3(uImm);
4520 pCodeBuf[off++] = RT_BYTE4(uImm);
4521 }
4522
4523#elif defined(RT_ARCH_ARM64)
4524 uint32_t uImmR = 0;
4525 uint32_t uImmNandS = 0;
4526 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4527 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4528 else
4529# ifdef IEM_WITH_THROW_CATCH
4530 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4531# else
4532 AssertReleaseFailedStmt(off = UINT32_MAX);
4533# endif
4534
4535#else
4536# error "Port me"
4537#endif
4538 return off;
4539}
4540
4541
4542/**
4543 * Emits code for OR'ing an 32-bit GPRs with a constant.
4544 *
4545 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4546 */
4547DECL_INLINE_THROW(uint32_t)
4548iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
4549{
4550#if defined(RT_ARCH_AMD64)
4551 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
4552
4553#elif defined(RT_ARCH_ARM64)
4554 uint32_t uImmR = 0;
4555 uint32_t uImmNandS = 0;
4556 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4557 {
4558 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4559 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4560 }
4561 else
4562 {
4563 /* Use temporary register for the 64-bit immediate. */
4564 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4565 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
4566 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4567 }
4568
4569#else
4570# error "Port me"
4571#endif
4572 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4573 return off;
4574}
4575
4576
4577/**
4578 * Emits code for XOR'ing two 64-bit GPRs.
4579 */
4580DECL_INLINE_THROW(uint32_t)
4581iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4582{
4583#if defined(RT_ARCH_AMD64)
4584 /* and Gv, Ev */
4585 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4586 pCodeBuf[off++] = 0x33;
4587 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4588
4589#elif defined(RT_ARCH_ARM64)
4590 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
4591
4592#else
4593# error "Port me"
4594#endif
4595 return off;
4596}
4597
4598
4599/**
4600 * Emits code for XOR'ing two 64-bit GPRs.
4601 */
4602DECL_INLINE_THROW(uint32_t)
4603iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4604{
4605#if defined(RT_ARCH_AMD64)
4606 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4607#elif defined(RT_ARCH_ARM64)
4608 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4609#else
4610# error "Port me"
4611#endif
4612 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4613 return off;
4614}
4615
4616
4617/**
4618 * Emits code for XOR'ing two 32-bit GPRs.
4619 */
4620DECL_INLINE_THROW(uint32_t)
4621iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4622{
4623#if defined(RT_ARCH_AMD64)
4624 /* and Gv, Ev */
4625 if (iGprDst >= 8 || iGprSrc >= 8)
4626 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4627 pCodeBuf[off++] = 0x33;
4628 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4629
4630#elif defined(RT_ARCH_ARM64)
4631 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4632
4633#else
4634# error "Port me"
4635#endif
4636 return off;
4637}
4638
4639
4640/**
4641 * Emits code for XOR'ing two 32-bit GPRs.
4642 */
4643DECL_INLINE_THROW(uint32_t)
4644iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4645{
4646#if defined(RT_ARCH_AMD64)
4647 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4648#elif defined(RT_ARCH_ARM64)
4649 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4650#else
4651# error "Port me"
4652#endif
4653 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4654 return off;
4655}
4656
4657
4658/**
4659 * Emits code for XOR'ing an 32-bit GPRs with a constant.
4660 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4661 * @note For ARM64 this only supports @a uImm values that can be expressed using
4662 * the two 6-bit immediates of the EOR instructions. The caller must make
4663 * sure this is possible!
4664 */
4665DECL_FORCE_INLINE_THROW(uint32_t)
4666iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
4667{
4668#if defined(RT_ARCH_AMD64)
4669 /* and Ev, imm */
4670 if (iGprDst >= 8)
4671 pCodeBuf[off++] = X86_OP_REX_B;
4672 if ((int32_t)uImm == (int8_t)uImm)
4673 {
4674 pCodeBuf[off++] = 0x83;
4675 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
4676 pCodeBuf[off++] = (uint8_t)uImm;
4677 }
4678 else
4679 {
4680 pCodeBuf[off++] = 0x81;
4681 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
4682 pCodeBuf[off++] = RT_BYTE1(uImm);
4683 pCodeBuf[off++] = RT_BYTE2(uImm);
4684 pCodeBuf[off++] = RT_BYTE3(uImm);
4685 pCodeBuf[off++] = RT_BYTE4(uImm);
4686 }
4687
4688#elif defined(RT_ARCH_ARM64)
4689 uint32_t uImmR = 0;
4690 uint32_t uImmNandS = 0;
4691 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4692 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4693 else
4694# ifdef IEM_WITH_THROW_CATCH
4695 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4696# else
4697 AssertReleaseFailedStmt(off = UINT32_MAX);
4698# endif
4699
4700#else
4701# error "Port me"
4702#endif
4703 return off;
4704}
4705
4706
4707/*********************************************************************************************************************************
4708* Shifting *
4709*********************************************************************************************************************************/
4710
4711/**
4712 * Emits code for shifting a GPR a fixed number of bits to the left.
4713 */
4714DECL_FORCE_INLINE(uint32_t)
4715iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
4716{
4717 Assert(cShift > 0 && cShift < 64);
4718
4719#if defined(RT_ARCH_AMD64)
4720 /* shl dst, cShift */
4721 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4722 if (cShift != 1)
4723 {
4724 pCodeBuf[off++] = 0xc1;
4725 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4726 pCodeBuf[off++] = cShift;
4727 }
4728 else
4729 {
4730 pCodeBuf[off++] = 0xd1;
4731 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4732 }
4733
4734#elif defined(RT_ARCH_ARM64)
4735 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
4736
4737#else
4738# error "Port me"
4739#endif
4740 return off;
4741}
4742
4743
4744/**
4745 * Emits code for shifting a GPR a fixed number of bits to the left.
4746 */
4747DECL_INLINE_THROW(uint32_t)
4748iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
4749{
4750#if defined(RT_ARCH_AMD64)
4751 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
4752#elif defined(RT_ARCH_ARM64)
4753 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
4754#else
4755# error "Port me"
4756#endif
4757 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4758 return off;
4759}
4760
4761
4762/**
4763 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
4764 */
4765DECL_FORCE_INLINE(uint32_t)
4766iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
4767{
4768 Assert(cShift > 0 && cShift < 32);
4769
4770#if defined(RT_ARCH_AMD64)
4771 /* shl dst, cShift */
4772 if (iGprDst >= 8)
4773 pCodeBuf[off++] = X86_OP_REX_B;
4774 if (cShift != 1)
4775 {
4776 pCodeBuf[off++] = 0xc1;
4777 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4778 pCodeBuf[off++] = cShift;
4779 }
4780 else
4781 {
4782 pCodeBuf[off++] = 0xd1;
4783 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4784 }
4785
4786#elif defined(RT_ARCH_ARM64)
4787 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
4788
4789#else
4790# error "Port me"
4791#endif
4792 return off;
4793}
4794
4795
4796/**
4797 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
4798 */
4799DECL_INLINE_THROW(uint32_t)
4800iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
4801{
4802#if defined(RT_ARCH_AMD64)
4803 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
4804#elif defined(RT_ARCH_ARM64)
4805 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
4806#else
4807# error "Port me"
4808#endif
4809 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4810 return off;
4811}
4812
4813
4814/**
4815 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
4816 */
4817DECL_FORCE_INLINE(uint32_t)
4818iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
4819{
4820 Assert(cShift > 0 && cShift < 64);
4821
4822#if defined(RT_ARCH_AMD64)
4823 /* shr dst, cShift */
4824 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4825 if (cShift != 1)
4826 {
4827 pCodeBuf[off++] = 0xc1;
4828 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
4829 pCodeBuf[off++] = cShift;
4830 }
4831 else
4832 {
4833 pCodeBuf[off++] = 0xd1;
4834 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
4835 }
4836
4837#elif defined(RT_ARCH_ARM64)
4838 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
4839
4840#else
4841# error "Port me"
4842#endif
4843 return off;
4844}
4845
4846
4847/**
4848 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
4849 */
4850DECL_INLINE_THROW(uint32_t)
4851iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
4852{
4853#if defined(RT_ARCH_AMD64)
4854 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
4855#elif defined(RT_ARCH_ARM64)
4856 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
4857#else
4858# error "Port me"
4859#endif
4860 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4861 return off;
4862}
4863
4864
4865/**
4866 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
4867 * right.
4868 */
4869DECL_FORCE_INLINE(uint32_t)
4870iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
4871{
4872 Assert(cShift > 0 && cShift < 32);
4873
4874#if defined(RT_ARCH_AMD64)
4875 /* shr dst, cShift */
4876 if (iGprDst >= 8)
4877 pCodeBuf[off++] = X86_OP_REX_B;
4878 if (cShift != 1)
4879 {
4880 pCodeBuf[off++] = 0xc1;
4881 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
4882 pCodeBuf[off++] = cShift;
4883 }
4884 else
4885 {
4886 pCodeBuf[off++] = 0xd1;
4887 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
4888 }
4889
4890#elif defined(RT_ARCH_ARM64)
4891 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
4892
4893#else
4894# error "Port me"
4895#endif
4896 return off;
4897}
4898
4899
4900/**
4901 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
4902 * right.
4903 */
4904DECL_INLINE_THROW(uint32_t)
4905iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
4906{
4907#if defined(RT_ARCH_AMD64)
4908 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
4909#elif defined(RT_ARCH_ARM64)
4910 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
4911#else
4912# error "Port me"
4913#endif
4914 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4915 return off;
4916}
4917
4918
4919/**
4920 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
4921 * right and assigning it to a different GPR.
4922 */
4923DECL_INLINE_THROW(uint32_t)
4924iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
4925{
4926 Assert(cShift > 0); Assert(cShift < 32);
4927#if defined(RT_ARCH_AMD64)
4928 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
4929 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
4930
4931#elif defined(RT_ARCH_ARM64)
4932 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
4933
4934#else
4935# error "Port me"
4936#endif
4937 return off;
4938}
4939
4940
4941/**
4942 * Emits code for rotating a GPR a fixed number of bits to the left.
4943 */
4944DECL_FORCE_INLINE(uint32_t)
4945iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
4946{
4947 Assert(cShift > 0 && cShift < 64);
4948
4949#if defined(RT_ARCH_AMD64)
4950 /* rol dst, cShift */
4951 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4952 if (cShift != 1)
4953 {
4954 pCodeBuf[off++] = 0xc1;
4955 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4956 pCodeBuf[off++] = cShift;
4957 }
4958 else
4959 {
4960 pCodeBuf[off++] = 0xd1;
4961 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4962 }
4963
4964#elif defined(RT_ARCH_ARM64)
4965 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
4966
4967#else
4968# error "Port me"
4969#endif
4970 return off;
4971}
4972
4973
4974/**
4975 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
4976 * @note Bits 63:32 of the destination GPR will be cleared.
4977 */
4978DECL_FORCE_INLINE(uint32_t)
4979iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
4980{
4981#if defined(RT_ARCH_AMD64)
4982 /*
4983 * There is no bswap r16 on x86 (the encoding exists but does not work).
4984 * So just use a rol (gcc -O2 is doing that).
4985 *
4986 * rol r16, 0x8
4987 */
4988 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4989 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4990 if (iGpr >= 8)
4991 pbCodeBuf[off++] = X86_OP_REX_B;
4992 pbCodeBuf[off++] = 0xc1;
4993 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
4994 pbCodeBuf[off++] = 0x08;
4995#elif defined(RT_ARCH_ARM64)
4996 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4997
4998 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
4999#else
5000# error "Port me"
5001#endif
5002
5003 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5004 return off;
5005}
5006
5007
5008/**
5009 * Emits code for reversing the byte order in a 32-bit GPR.
5010 * @note Bits 63:32 of the destination GPR will be cleared.
5011 */
5012DECL_FORCE_INLINE(uint32_t)
5013iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5014{
5015#if defined(RT_ARCH_AMD64)
5016 /* bswap r32 */
5017 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5018
5019 if (iGpr >= 8)
5020 pbCodeBuf[off++] = X86_OP_REX_B;
5021 pbCodeBuf[off++] = 0x0f;
5022 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5023#elif defined(RT_ARCH_ARM64)
5024 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5025
5026 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
5027#else
5028# error "Port me"
5029#endif
5030
5031 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5032 return off;
5033}
5034
5035
5036/**
5037 * Emits code for reversing the byte order in a 64-bit GPR.
5038 */
5039DECL_FORCE_INLINE(uint32_t)
5040iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5041{
5042#if defined(RT_ARCH_AMD64)
5043 /* bswap r64 */
5044 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5045
5046 if (iGpr >= 8)
5047 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
5048 else
5049 pbCodeBuf[off++] = X86_OP_REX_W;
5050 pbCodeBuf[off++] = 0x0f;
5051 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5052#elif defined(RT_ARCH_ARM64)
5053 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5054
5055 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
5056#else
5057# error "Port me"
5058#endif
5059
5060 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5061 return off;
5062}
5063
5064
5065/*********************************************************************************************************************************
5066* Compare and Testing *
5067*********************************************************************************************************************************/
5068
5069
5070#ifdef RT_ARCH_ARM64
5071/**
5072 * Emits an ARM64 compare instruction.
5073 */
5074DECL_INLINE_THROW(uint32_t)
5075iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
5076 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
5077{
5078 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5079 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
5080 f64Bit, true /*fSetFlags*/, cShift, enmShift);
5081 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5082 return off;
5083}
5084#endif
5085
5086
5087/**
5088 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5089 * with conditional instruction.
5090 */
5091DECL_FORCE_INLINE(uint32_t)
5092iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5093{
5094#ifdef RT_ARCH_AMD64
5095 /* cmp Gv, Ev */
5096 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5097 pCodeBuf[off++] = 0x3b;
5098 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5099
5100#elif defined(RT_ARCH_ARM64)
5101 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
5102
5103#else
5104# error "Port me!"
5105#endif
5106 return off;
5107}
5108
5109
5110/**
5111 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5112 * with conditional instruction.
5113 */
5114DECL_INLINE_THROW(uint32_t)
5115iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5116{
5117#ifdef RT_ARCH_AMD64
5118 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5119#elif defined(RT_ARCH_ARM64)
5120 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5121#else
5122# error "Port me!"
5123#endif
5124 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5125 return off;
5126}
5127
5128
5129/**
5130 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5131 * with conditional instruction.
5132 */
5133DECL_FORCE_INLINE(uint32_t)
5134iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5135{
5136#ifdef RT_ARCH_AMD64
5137 /* cmp Gv, Ev */
5138 if (iGprLeft >= 8 || iGprRight >= 8)
5139 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5140 pCodeBuf[off++] = 0x3b;
5141 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5142
5143#elif defined(RT_ARCH_ARM64)
5144 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
5145
5146#else
5147# error "Port me!"
5148#endif
5149 return off;
5150}
5151
5152
5153/**
5154 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5155 * with conditional instruction.
5156 */
5157DECL_INLINE_THROW(uint32_t)
5158iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5159{
5160#ifdef RT_ARCH_AMD64
5161 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5162#elif defined(RT_ARCH_ARM64)
5163 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5164#else
5165# error "Port me!"
5166#endif
5167 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5168 return off;
5169}
5170
5171
5172/**
5173 * Emits a compare of a 64-bit GPR with a constant value, settings status
5174 * flags/whatever for use with conditional instruction.
5175 */
5176DECL_INLINE_THROW(uint32_t)
5177iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
5178{
5179#ifdef RT_ARCH_AMD64
5180 if (uImm <= UINT32_C(0xff))
5181 {
5182 /* cmp Ev, Ib */
5183 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5184 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
5185 pbCodeBuf[off++] = 0x83;
5186 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5187 pbCodeBuf[off++] = (uint8_t)uImm;
5188 }
5189 else if ((int64_t)uImm == (int32_t)uImm)
5190 {
5191 /* cmp Ev, imm */
5192 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5193 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
5194 pbCodeBuf[off++] = 0x81;
5195 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5196 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5197 pbCodeBuf[off++] = RT_BYTE1(uImm);
5198 pbCodeBuf[off++] = RT_BYTE2(uImm);
5199 pbCodeBuf[off++] = RT_BYTE3(uImm);
5200 pbCodeBuf[off++] = RT_BYTE4(uImm);
5201 }
5202 else
5203 {
5204 /* Use temporary register for the immediate. */
5205 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5206 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
5207 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5208 }
5209
5210#elif defined(RT_ARCH_ARM64)
5211 /** @todo guess there are clevere things we can do here... */
5212 if (uImm < _4K)
5213 {
5214 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5215 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5216 true /*64Bit*/, true /*fSetFlags*/);
5217 }
5218 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5219 {
5220 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5221 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
5222 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5223 }
5224 else
5225 {
5226 /* Use temporary register for the immediate. */
5227 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5228 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
5229 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5230 }
5231
5232#else
5233# error "Port me!"
5234#endif
5235
5236 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5237 return off;
5238}
5239
5240
5241/**
5242 * Emits a compare of a 32-bit GPR with a constant value, settings status
5243 * flags/whatever for use with conditional instruction.
5244 *
5245 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
5246 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
5247 * bits all zero). Will release assert or throw exception if the caller
5248 * violates this restriction.
5249 */
5250DECL_FORCE_INLINE_THROW(uint32_t)
5251iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
5252{
5253#ifdef RT_ARCH_AMD64
5254 if (iGprLeft >= 8)
5255 pCodeBuf[off++] = X86_OP_REX_B;
5256 if (uImm <= UINT32_C(0x7f))
5257 {
5258 /* cmp Ev, Ib */
5259 pCodeBuf[off++] = 0x83;
5260 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5261 pCodeBuf[off++] = (uint8_t)uImm;
5262 }
5263 else
5264 {
5265 /* cmp Ev, imm */
5266 pCodeBuf[off++] = 0x81;
5267 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5268 pCodeBuf[off++] = RT_BYTE1(uImm);
5269 pCodeBuf[off++] = RT_BYTE2(uImm);
5270 pCodeBuf[off++] = RT_BYTE3(uImm);
5271 pCodeBuf[off++] = RT_BYTE4(uImm);
5272 }
5273
5274#elif defined(RT_ARCH_ARM64)
5275 /** @todo guess there are clevere things we can do here... */
5276 if (uImm < _4K)
5277 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5278 false /*64Bit*/, true /*fSetFlags*/);
5279 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5280 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5281 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5282 else
5283# ifdef IEM_WITH_THROW_CATCH
5284 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5285# else
5286 AssertReleaseFailedStmt(off = UINT32_MAX);
5287# endif
5288
5289#else
5290# error "Port me!"
5291#endif
5292 return off;
5293}
5294
5295
5296/**
5297 * Emits a compare of a 32-bit GPR with a constant value, settings status
5298 * flags/whatever for use with conditional instruction.
5299 */
5300DECL_INLINE_THROW(uint32_t)
5301iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
5302{
5303#ifdef RT_ARCH_AMD64
5304 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
5305
5306#elif defined(RT_ARCH_ARM64)
5307 /** @todo guess there are clevere things we can do here... */
5308 if (uImm < _4K)
5309 {
5310 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5311 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5312 false /*64Bit*/, true /*fSetFlags*/);
5313 }
5314 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5315 {
5316 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5317 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5318 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5319 }
5320 else
5321 {
5322 /* Use temporary register for the immediate. */
5323 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5324 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
5325 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5326 }
5327
5328#else
5329# error "Port me!"
5330#endif
5331
5332 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5333 return off;
5334}
5335
5336
5337/**
5338 * Emits a compare of a 32-bit GPR with a constant value, settings status
5339 * flags/whatever for use with conditional instruction.
5340 *
5341 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
5342 * 16-bit value from @a iGrpLeft.
5343 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
5344 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
5345 * bits all zero). Will release assert or throw exception if the caller
5346 * violates this restriction.
5347 */
5348DECL_FORCE_INLINE_THROW(uint32_t)
5349iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
5350 uint8_t idxTmpReg = UINT8_MAX)
5351{
5352#ifdef RT_ARCH_AMD64
5353 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5354 if (iGprLeft >= 8)
5355 pCodeBuf[off++] = X86_OP_REX_B;
5356 if (uImm <= UINT32_C(0x7f))
5357 {
5358 /* cmp Ev, Ib */
5359 pCodeBuf[off++] = 0x83;
5360 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5361 pCodeBuf[off++] = (uint8_t)uImm;
5362 }
5363 else
5364 {
5365 /* cmp Ev, imm */
5366 pCodeBuf[off++] = 0x81;
5367 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5368 pCodeBuf[off++] = RT_BYTE1(uImm);
5369 pCodeBuf[off++] = RT_BYTE2(uImm);
5370 }
5371 RT_NOREF(idxTmpReg);
5372
5373#elif defined(RT_ARCH_ARM64)
5374# ifdef IEM_WITH_THROW_CATCH
5375 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5376# else
5377 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
5378# endif
5379 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
5380 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
5381 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
5382
5383#else
5384# error "Port me!"
5385#endif
5386 return off;
5387}
5388
5389
5390/**
5391 * Emits a compare of a 16-bit GPR with a constant value, settings status
5392 * flags/whatever for use with conditional instruction.
5393 *
5394 * @note ARM64: Helper register is required (idxTmpReg).
5395 */
5396DECL_INLINE_THROW(uint32_t)
5397iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
5398 uint8_t idxTmpReg = UINT8_MAX)
5399{
5400#ifdef RT_ARCH_AMD64
5401 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
5402#elif defined(RT_ARCH_ARM64)
5403 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
5404#else
5405# error "Port me!"
5406#endif
5407 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5408 return off;
5409}
5410
5411
5412
5413/*********************************************************************************************************************************
5414* Branching *
5415*********************************************************************************************************************************/
5416
5417/**
5418 * Emits a JMP rel32 / B imm19 to the given label.
5419 */
5420DECL_FORCE_INLINE_THROW(uint32_t)
5421iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
5422{
5423 Assert(idxLabel < pReNative->cLabels);
5424
5425#ifdef RT_ARCH_AMD64
5426 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
5427 {
5428 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
5429 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
5430 {
5431 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
5432 pCodeBuf[off++] = (uint8_t)offRel;
5433 }
5434 else
5435 {
5436 offRel -= 3;
5437 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
5438 pCodeBuf[off++] = RT_BYTE1(offRel);
5439 pCodeBuf[off++] = RT_BYTE2(offRel);
5440 pCodeBuf[off++] = RT_BYTE3(offRel);
5441 pCodeBuf[off++] = RT_BYTE4(offRel);
5442 }
5443 }
5444 else
5445 {
5446 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
5447 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
5448 pCodeBuf[off++] = 0xfe;
5449 pCodeBuf[off++] = 0xff;
5450 pCodeBuf[off++] = 0xff;
5451 pCodeBuf[off++] = 0xff;
5452 }
5453 pCodeBuf[off++] = 0xcc; /* int3 poison */
5454
5455#elif defined(RT_ARCH_ARM64)
5456 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
5457 pCodeBuf[off++] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
5458 else
5459 {
5460 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
5461 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
5462 }
5463
5464#else
5465# error "Port me!"
5466#endif
5467 return off;
5468}
5469
5470
5471/**
5472 * Emits a JMP rel32 / B imm19 to the given label.
5473 */
5474DECL_INLINE_THROW(uint32_t)
5475iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5476{
5477#ifdef RT_ARCH_AMD64
5478 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
5479#elif defined(RT_ARCH_ARM64)
5480 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
5481#else
5482# error "Port me!"
5483#endif
5484 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5485 return off;
5486}
5487
5488
5489/**
5490 * Emits a JMP rel32 / B imm19 to a new undefined label.
5491 */
5492DECL_INLINE_THROW(uint32_t)
5493iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
5494{
5495 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
5496 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
5497}
5498
5499/** Condition type. */
5500#ifdef RT_ARCH_AMD64
5501typedef enum IEMNATIVEINSTRCOND : uint8_t
5502{
5503 kIemNativeInstrCond_o = 0,
5504 kIemNativeInstrCond_no,
5505 kIemNativeInstrCond_c,
5506 kIemNativeInstrCond_nc,
5507 kIemNativeInstrCond_e,
5508 kIemNativeInstrCond_ne,
5509 kIemNativeInstrCond_be,
5510 kIemNativeInstrCond_nbe,
5511 kIemNativeInstrCond_s,
5512 kIemNativeInstrCond_ns,
5513 kIemNativeInstrCond_p,
5514 kIemNativeInstrCond_np,
5515 kIemNativeInstrCond_l,
5516 kIemNativeInstrCond_nl,
5517 kIemNativeInstrCond_le,
5518 kIemNativeInstrCond_nle
5519} IEMNATIVEINSTRCOND;
5520#elif defined(RT_ARCH_ARM64)
5521typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
5522# define kIemNativeInstrCond_o todo_conditional_codes
5523# define kIemNativeInstrCond_no todo_conditional_codes
5524# define kIemNativeInstrCond_c todo_conditional_codes
5525# define kIemNativeInstrCond_nc todo_conditional_codes
5526# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
5527# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
5528# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
5529# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
5530# define kIemNativeInstrCond_s todo_conditional_codes
5531# define kIemNativeInstrCond_ns todo_conditional_codes
5532# define kIemNativeInstrCond_p todo_conditional_codes
5533# define kIemNativeInstrCond_np todo_conditional_codes
5534# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
5535# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
5536# define kIemNativeInstrCond_le kArmv8InstrCond_Le
5537# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
5538#else
5539# error "Port me!"
5540#endif
5541
5542
5543/**
5544 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
5545 */
5546DECL_FORCE_INLINE_THROW(uint32_t)
5547iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
5548 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
5549{
5550 Assert(idxLabel < pReNative->cLabels);
5551
5552 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
5553#ifdef RT_ARCH_AMD64
5554 if (offLabel >= off)
5555 {
5556 /* jcc rel32 */
5557 pCodeBuf[off++] = 0x0f;
5558 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
5559 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
5560 pCodeBuf[off++] = 0x00;
5561 pCodeBuf[off++] = 0x00;
5562 pCodeBuf[off++] = 0x00;
5563 pCodeBuf[off++] = 0x00;
5564 }
5565 else
5566 {
5567 int32_t offDisp = offLabel - (off + 2);
5568 if ((int8_t)offDisp == offDisp)
5569 {
5570 /* jcc rel8 */
5571 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
5572 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
5573 }
5574 else
5575 {
5576 /* jcc rel32 */
5577 offDisp -= 4;
5578 pCodeBuf[off++] = 0x0f;
5579 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
5580 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
5581 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
5582 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
5583 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
5584 }
5585 }
5586
5587#elif defined(RT_ARCH_ARM64)
5588 if (offLabel >= off)
5589 {
5590 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5591 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
5592 }
5593 else
5594 {
5595 Assert(off - offLabel <= 0x3ffffU);
5596 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
5597 }
5598
5599#else
5600# error "Port me!"
5601#endif
5602 return off;
5603}
5604
5605
5606/**
5607 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
5608 */
5609DECL_INLINE_THROW(uint32_t)
5610iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
5611{
5612#ifdef RT_ARCH_AMD64
5613 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
5614#elif defined(RT_ARCH_ARM64)
5615 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
5616#else
5617# error "Port me!"
5618#endif
5619 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5620 return off;
5621}
5622
5623
5624/**
5625 * Emits a Jcc rel32 / B.cc imm19 to a new label.
5626 */
5627DECL_INLINE_THROW(uint32_t)
5628iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5629 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
5630{
5631 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
5632 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
5633}
5634
5635
5636/**
5637 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
5638 */
5639DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5640{
5641#ifdef RT_ARCH_AMD64
5642 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
5643#elif defined(RT_ARCH_ARM64)
5644 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
5645#else
5646# error "Port me!"
5647#endif
5648}
5649
5650/**
5651 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
5652 */
5653DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5654 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
5655{
5656#ifdef RT_ARCH_AMD64
5657 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
5658#elif defined(RT_ARCH_ARM64)
5659 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
5660#else
5661# error "Port me!"
5662#endif
5663}
5664
5665
5666/**
5667 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
5668 */
5669DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5670{
5671#ifdef RT_ARCH_AMD64
5672 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
5673#elif defined(RT_ARCH_ARM64)
5674 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
5675#else
5676# error "Port me!"
5677#endif
5678}
5679
5680/**
5681 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
5682 */
5683DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5684 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
5685{
5686#ifdef RT_ARCH_AMD64
5687 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
5688#elif defined(RT_ARCH_ARM64)
5689 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
5690#else
5691# error "Port me!"
5692#endif
5693}
5694
5695
5696/**
5697 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
5698 */
5699DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5700{
5701#ifdef RT_ARCH_AMD64
5702 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
5703#elif defined(RT_ARCH_ARM64)
5704 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
5705#else
5706# error "Port me!"
5707#endif
5708}
5709
5710/**
5711 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
5712 */
5713DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5714 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
5715{
5716#ifdef RT_ARCH_AMD64
5717 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
5718#elif defined(RT_ARCH_ARM64)
5719 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
5720#else
5721# error "Port me!"
5722#endif
5723}
5724
5725
5726/**
5727 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
5728 */
5729DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5730{
5731#ifdef RT_ARCH_AMD64
5732 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
5733#elif defined(RT_ARCH_ARM64)
5734 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
5735#else
5736# error "Port me!"
5737#endif
5738}
5739
5740/**
5741 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
5742 */
5743DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5744 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
5745{
5746#ifdef RT_ARCH_AMD64
5747 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
5748#elif defined(RT_ARCH_ARM64)
5749 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
5750#else
5751# error "Port me!"
5752#endif
5753}
5754
5755
5756/**
5757 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
5758 */
5759DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5760{
5761#ifdef RT_ARCH_AMD64
5762 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
5763#elif defined(RT_ARCH_ARM64)
5764 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
5765#else
5766# error "Port me!"
5767#endif
5768}
5769
5770/**
5771 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
5772 */
5773DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5774 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
5775{
5776#ifdef RT_ARCH_AMD64
5777 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
5778#elif defined(RT_ARCH_ARM64)
5779 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
5780#else
5781# error "Port me!"
5782#endif
5783}
5784
5785
5786/**
5787 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
5788 *
5789 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
5790 *
5791 * Only use hardcoded jumps forward when emitting for exactly one
5792 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
5793 * the right target address on all platforms!
5794 *
5795 * Please also note that on x86 it is necessary pass off + 256 or higher
5796 * for @a offTarget one believe the intervening code is more than 127
5797 * bytes long.
5798 */
5799DECL_FORCE_INLINE(uint32_t)
5800iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
5801{
5802#ifdef RT_ARCH_AMD64
5803 /* jcc rel8 / rel32 */
5804 int32_t offDisp = (int32_t)(offTarget - (off + 2));
5805 if (offDisp < 128 && offDisp >= -128)
5806 {
5807 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
5808 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
5809 }
5810 else
5811 {
5812 offDisp -= 4;
5813 pCodeBuf[off++] = 0x0f;
5814 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
5815 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
5816 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
5817 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
5818 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
5819 }
5820
5821#elif defined(RT_ARCH_ARM64)
5822 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
5823
5824#else
5825# error "Port me!"
5826#endif
5827 return off;
5828}
5829
5830
5831/**
5832 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
5833 *
5834 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
5835 *
5836 * Only use hardcoded jumps forward when emitting for exactly one
5837 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
5838 * the right target address on all platforms!
5839 *
5840 * Please also note that on x86 it is necessary pass off + 256 or higher
5841 * for @a offTarget one believe the intervening code is more than 127
5842 * bytes long.
5843 */
5844DECL_INLINE_THROW(uint32_t)
5845iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
5846{
5847#ifdef RT_ARCH_AMD64
5848 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
5849#elif defined(RT_ARCH_ARM64)
5850 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
5851#else
5852# error "Port me!"
5853#endif
5854 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5855 return off;
5856}
5857
5858
5859/**
5860 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
5861 *
5862 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
5863 */
5864DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
5865{
5866#ifdef RT_ARCH_AMD64
5867 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
5868#elif defined(RT_ARCH_ARM64)
5869 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
5870#else
5871# error "Port me!"
5872#endif
5873}
5874
5875
5876/**
5877 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
5878 *
5879 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
5880 */
5881DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
5882{
5883#ifdef RT_ARCH_AMD64
5884 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
5885#elif defined(RT_ARCH_ARM64)
5886 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
5887#else
5888# error "Port me!"
5889#endif
5890}
5891
5892
5893/**
5894 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
5895 *
5896 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
5897 */
5898DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
5899{
5900#ifdef RT_ARCH_AMD64
5901 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
5902#elif defined(RT_ARCH_ARM64)
5903 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
5904#else
5905# error "Port me!"
5906#endif
5907}
5908
5909
5910/**
5911 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
5912 *
5913 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
5914 */
5915DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
5916{
5917#ifdef RT_ARCH_AMD64
5918 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
5919#elif defined(RT_ARCH_ARM64)
5920 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
5921#else
5922# error "Port me!"
5923#endif
5924}
5925
5926
5927/**
5928 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
5929 *
5930 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
5931 */
5932DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
5933{
5934#ifdef RT_ARCH_AMD64
5935 /* jmp rel8 or rel32 */
5936 int32_t offDisp = offTarget - (off + 2);
5937 if (offDisp < 128 && offDisp >= -128)
5938 {
5939 pCodeBuf[off++] = 0xeb;
5940 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
5941 }
5942 else
5943 {
5944 offDisp -= 3;
5945 pCodeBuf[off++] = 0xe9;
5946 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
5947 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
5948 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
5949 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
5950 }
5951
5952#elif defined(RT_ARCH_ARM64)
5953 pCodeBuf[off++] = Armv8A64MkInstrB((int32_t)(offTarget - off));
5954
5955#else
5956# error "Port me!"
5957#endif
5958 return off;
5959}
5960
5961
5962/**
5963 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
5964 *
5965 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
5966 */
5967DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
5968{
5969#ifdef RT_ARCH_AMD64
5970 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
5971#elif defined(RT_ARCH_ARM64)
5972 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
5973#else
5974# error "Port me!"
5975#endif
5976 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5977 return off;
5978}
5979
5980
5981/**
5982 * Fixes up a conditional jump to a fixed label.
5983 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
5984 * iemNativeEmitJzToFixed, ...
5985 */
5986DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
5987{
5988#ifdef RT_ARCH_AMD64
5989 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
5990 uint8_t const bOpcode = pbCodeBuf[offFixup];
5991 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
5992 {
5993 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
5994 AssertStmt(pbCodeBuf[offFixup + 1] == offTarget - (offFixup + 2),
5995 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
5996 }
5997 else
5998 {
5999 if (bOpcode != 0x0f)
6000 Assert(bOpcode == 0xe9);
6001 else
6002 {
6003 offFixup += 1;
6004 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
6005 }
6006 uint32_t const offRel32 = offTarget - (offFixup + 5);
6007 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
6008 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
6009 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
6010 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
6011 }
6012
6013#elif defined(RT_ARCH_ARM64)
6014 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
6015 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
6016 {
6017 /* B.COND + BC.COND */
6018 int32_t const offDisp = offTarget - offFixup;
6019 Assert(offDisp >= -262144 && offDisp < 262144);
6020 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
6021 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6022 }
6023 else
6024 {
6025 /* B imm26 */
6026 Assert((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000));
6027 int32_t const offDisp = offTarget - offFixup;
6028 Assert(offDisp >= -33554432 && offDisp < 33554432);
6029 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
6030 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6031 }
6032
6033#else
6034# error "Port me!"
6035#endif
6036}
6037
6038
6039/**
6040 * Internal helper, don't call directly.
6041 */
6042DECL_INLINE_THROW(uint32_t)
6043iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
6044 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
6045{
6046 Assert(iBitNo < 64);
6047#ifdef RT_ARCH_AMD64
6048 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6049 if (iBitNo < 8)
6050 {
6051 /* test Eb, imm8 */
6052 if (iGprSrc >= 4)
6053 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6054 pbCodeBuf[off++] = 0xf6;
6055 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6056 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
6057 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6058 }
6059 else
6060 {
6061 /* bt Ev, imm8 */
6062 if (iBitNo >= 32)
6063 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6064 else if (iGprSrc >= 8)
6065 pbCodeBuf[off++] = X86_OP_REX_B;
6066 pbCodeBuf[off++] = 0x0f;
6067 pbCodeBuf[off++] = 0xba;
6068 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6069 pbCodeBuf[off++] = iBitNo;
6070 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
6071 }
6072
6073#elif defined(RT_ARCH_ARM64)
6074 /* Use the TBNZ instruction here. */
6075 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6076 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
6077 pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
6078
6079#else
6080# error "Port me!"
6081#endif
6082 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6083 return off;
6084}
6085
6086
6087/**
6088 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
6089 * @a iGprSrc.
6090 *
6091 * @note On ARM64 the range is only +/-8191 instructions.
6092 */
6093DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6094 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
6095{
6096 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
6097}
6098
6099
6100/**
6101 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
6102 * _set_ in @a iGprSrc.
6103 *
6104 * @note On ARM64 the range is only +/-8191 instructions.
6105 */
6106DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6107 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
6108{
6109 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
6110}
6111
6112
6113/**
6114 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
6115 * flags accordingly.
6116 */
6117DECL_INLINE_THROW(uint32_t)
6118iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
6119{
6120 Assert(fBits != 0);
6121#ifdef RT_ARCH_AMD64
6122
6123 if (fBits >= UINT32_MAX)
6124 {
6125 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6126
6127 /* test Ev,Gv */
6128 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6129 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
6130 pbCodeBuf[off++] = 0x85;
6131 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
6132
6133 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6134 }
6135 else if (fBits <= UINT32_MAX)
6136 {
6137 /* test Eb, imm8 or test Ev, imm32 */
6138 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6139 if (fBits <= UINT8_MAX)
6140 {
6141 if (iGprSrc >= 4)
6142 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6143 pbCodeBuf[off++] = 0xf6;
6144 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6145 pbCodeBuf[off++] = (uint8_t)fBits;
6146 }
6147 else
6148 {
6149 if (iGprSrc >= 8)
6150 pbCodeBuf[off++] = X86_OP_REX_B;
6151 pbCodeBuf[off++] = 0xf7;
6152 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6153 pbCodeBuf[off++] = RT_BYTE1(fBits);
6154 pbCodeBuf[off++] = RT_BYTE2(fBits);
6155 pbCodeBuf[off++] = RT_BYTE3(fBits);
6156 pbCodeBuf[off++] = RT_BYTE4(fBits);
6157 }
6158 }
6159 /** @todo implement me. */
6160 else
6161 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
6162
6163#elif defined(RT_ARCH_ARM64)
6164 uint32_t uImmR = 0;
6165 uint32_t uImmNandS = 0;
6166 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
6167 {
6168 /* ands xzr, iGprSrc, #fBits */
6169 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6170 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
6171 }
6172 else
6173 {
6174 /* ands xzr, iGprSrc, iTmpReg */
6175 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6176 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6177 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
6178 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6179 }
6180
6181#else
6182# error "Port me!"
6183#endif
6184 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6185 return off;
6186}
6187
6188
6189/**
6190 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
6191 * @a iGprSrc, setting CPU flags accordingly.
6192 *
6193 * @note For ARM64 this only supports @a fBits values that can be expressed
6194 * using the two 6-bit immediates of the ANDS instruction. The caller
6195 * must make sure this is possible!
6196 */
6197DECL_FORCE_INLINE_THROW(uint32_t)
6198iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits)
6199{
6200 Assert(fBits != 0);
6201
6202#ifdef RT_ARCH_AMD64
6203 if (fBits <= UINT8_MAX)
6204 {
6205 /* test Eb, imm8 */
6206 if (iGprSrc >= 4)
6207 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6208 pCodeBuf[off++] = 0xf6;
6209 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6210 pCodeBuf[off++] = (uint8_t)fBits;
6211 }
6212 else
6213 {
6214 /* test Ev, imm32 */
6215 if (iGprSrc >= 8)
6216 pCodeBuf[off++] = X86_OP_REX_B;
6217 pCodeBuf[off++] = 0xf7;
6218 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6219 pCodeBuf[off++] = RT_BYTE1(fBits);
6220 pCodeBuf[off++] = RT_BYTE2(fBits);
6221 pCodeBuf[off++] = RT_BYTE3(fBits);
6222 pCodeBuf[off++] = RT_BYTE4(fBits);
6223 }
6224
6225#elif defined(RT_ARCH_ARM64)
6226 /* ands xzr, src, #fBits */
6227 uint32_t uImmR = 0;
6228 uint32_t uImmNandS = 0;
6229 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6230 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6231 else
6232# ifdef IEM_WITH_THROW_CATCH
6233 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6234# else
6235 AssertReleaseFailedStmt(off = UINT32_MAX);
6236# endif
6237
6238#else
6239# error "Port me!"
6240#endif
6241 return off;
6242}
6243
6244
6245
6246/**
6247 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
6248 * @a iGprSrc, setting CPU flags accordingly.
6249 *
6250 * @note For ARM64 this only supports @a fBits values that can be expressed
6251 * using the two 6-bit immediates of the ANDS instruction. The caller
6252 * must make sure this is possible!
6253 */
6254DECL_FORCE_INLINE_THROW(uint32_t)
6255iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
6256{
6257 Assert(fBits != 0);
6258
6259#ifdef RT_ARCH_AMD64
6260 /* test Eb, imm8 */
6261 if (iGprSrc >= 4)
6262 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6263 pCodeBuf[off++] = 0xf6;
6264 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6265 pCodeBuf[off++] = fBits;
6266
6267#elif defined(RT_ARCH_ARM64)
6268 /* ands xzr, src, #fBits */
6269 uint32_t uImmR = 0;
6270 uint32_t uImmNandS = 0;
6271 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6272 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6273 else
6274# ifdef IEM_WITH_THROW_CATCH
6275 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6276# else
6277 AssertReleaseFailedStmt(off = UINT32_MAX);
6278# endif
6279
6280#else
6281# error "Port me!"
6282#endif
6283 return off;
6284}
6285
6286
6287/**
6288 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
6289 * @a iGprSrc, setting CPU flags accordingly.
6290 */
6291DECL_INLINE_THROW(uint32_t)
6292iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
6293{
6294 Assert(fBits != 0);
6295
6296#ifdef RT_ARCH_AMD64
6297 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
6298
6299#elif defined(RT_ARCH_ARM64)
6300 /* ands xzr, src, [tmp|#imm] */
6301 uint32_t uImmR = 0;
6302 uint32_t uImmNandS = 0;
6303 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6304 {
6305 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6306 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6307 }
6308 else
6309 {
6310 /* Use temporary register for the 64-bit immediate. */
6311 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6312 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6313 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
6314 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6315 }
6316
6317#else
6318# error "Port me!"
6319#endif
6320 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6321 return off;
6322}
6323
6324
6325/**
6326 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
6327 * are set in @a iGprSrc.
6328 */
6329DECL_INLINE_THROW(uint32_t)
6330iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6331 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
6332{
6333 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
6334
6335 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
6336 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6337
6338 return off;
6339}
6340
6341
6342/**
6343 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
6344 * are set in @a iGprSrc.
6345 */
6346DECL_INLINE_THROW(uint32_t)
6347iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6348 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
6349{
6350 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
6351
6352 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
6353 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
6354
6355 return off;
6356}
6357
6358
6359/**
6360 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6361 *
6362 * The operand size is given by @a f64Bit.
6363 */
6364DECL_FORCE_INLINE_THROW(uint32_t)
6365iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6366 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
6367{
6368 Assert(idxLabel < pReNative->cLabels);
6369
6370#ifdef RT_ARCH_AMD64
6371 /* test reg32,reg32 / test reg64,reg64 */
6372 if (f64Bit)
6373 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
6374 else if (iGprSrc >= 8)
6375 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
6376 pCodeBuf[off++] = 0x85;
6377 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
6378
6379 /* jnz idxLabel */
6380 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
6381 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6382
6383#elif defined(RT_ARCH_ARM64)
6384 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6385 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
6386 iGprSrc, f64Bit);
6387 else
6388 {
6389 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6390 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
6391 }
6392
6393#else
6394# error "Port me!"
6395#endif
6396 return off;
6397}
6398
6399
6400/**
6401 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6402 *
6403 * The operand size is given by @a f64Bit.
6404 */
6405DECL_FORCE_INLINE_THROW(uint32_t)
6406iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
6407 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
6408{
6409#ifdef RT_ARCH_AMD64
6410 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
6411 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
6412#elif defined(RT_ARCH_ARM64)
6413 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
6414 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
6415#else
6416# error "Port me!"
6417#endif
6418 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6419 return off;
6420}
6421
6422
6423/* if (Grp1 == 0) Jmp idxLabel; */
6424
6425/**
6426 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
6427 *
6428 * The operand size is given by @a f64Bit.
6429 */
6430DECL_FORCE_INLINE_THROW(uint32_t)
6431iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6432 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6433{
6434 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
6435 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
6436}
6437
6438
6439/**
6440 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
6441 *
6442 * The operand size is given by @a f64Bit.
6443 */
6444DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6445 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6446{
6447 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
6448}
6449
6450
6451/**
6452 * Emits code that jumps to a new label if @a iGprSrc is zero.
6453 *
6454 * The operand size is given by @a f64Bit.
6455 */
6456DECL_INLINE_THROW(uint32_t)
6457iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
6458 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6459{
6460 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6461 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
6462}
6463
6464
6465/* if (Grp1 != 0) Jmp idxLabel; */
6466
6467/**
6468 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6469 *
6470 * The operand size is given by @a f64Bit.
6471 */
6472DECL_FORCE_INLINE_THROW(uint32_t)
6473iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6474 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6475{
6476 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
6477 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
6478}
6479
6480
6481/**
6482 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6483 *
6484 * The operand size is given by @a f64Bit.
6485 */
6486DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6487 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6488{
6489 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
6490}
6491
6492
6493/**
6494 * Emits code that jumps to a new label if @a iGprSrc is not zero.
6495 *
6496 * The operand size is given by @a f64Bit.
6497 */
6498DECL_INLINE_THROW(uint32_t)
6499iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
6500 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6501{
6502 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6503 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
6504}
6505
6506
6507/* if (Grp1 != Gpr2) Jmp idxLabel; */
6508
6509/**
6510 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
6511 * differs.
6512 */
6513DECL_INLINE_THROW(uint32_t)
6514iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6515 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
6516{
6517 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
6518 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6519 return off;
6520}
6521
6522
6523/**
6524 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
6525 */
6526DECL_INLINE_THROW(uint32_t)
6527iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6528 uint8_t iGprLeft, uint8_t iGprRight,
6529 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6530{
6531 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6532 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
6533}
6534
6535
6536/* if (Grp != Imm) Jmp idxLabel; */
6537
6538/**
6539 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
6540 */
6541DECL_INLINE_THROW(uint32_t)
6542iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6543 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
6544{
6545 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
6546 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6547 return off;
6548}
6549
6550
6551/**
6552 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
6553 */
6554DECL_INLINE_THROW(uint32_t)
6555iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6556 uint8_t iGprSrc, uint64_t uImm,
6557 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6558{
6559 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6560 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
6561}
6562
6563
6564/**
6565 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
6566 * @a uImm.
6567 */
6568DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6569 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
6570{
6571 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
6572 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6573 return off;
6574}
6575
6576
6577/**
6578 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
6579 * @a uImm.
6580 */
6581DECL_INLINE_THROW(uint32_t)
6582iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6583 uint8_t iGprSrc, uint32_t uImm,
6584 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6585{
6586 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6587 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
6588}
6589
6590
6591/**
6592 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
6593 * @a uImm.
6594 */
6595DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6596 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
6597{
6598 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
6599 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6600 return off;
6601}
6602
6603
6604/**
6605 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
6606 * @a uImm.
6607 */
6608DECL_INLINE_THROW(uint32_t)
6609iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6610 uint8_t iGprSrc, uint16_t uImm,
6611 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6612{
6613 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6614 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
6615}
6616
6617
6618/* if (Grp == Imm) Jmp idxLabel; */
6619
6620/**
6621 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
6622 */
6623DECL_INLINE_THROW(uint32_t)
6624iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6625 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
6626{
6627 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
6628 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
6629 return off;
6630}
6631
6632
6633/**
6634 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
6635 */
6636DECL_INLINE_THROW(uint32_t)
6637iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
6638 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6639{
6640 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6641 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
6642}
6643
6644
6645/**
6646 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
6647 */
6648DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6649 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
6650{
6651 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
6652 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
6653 return off;
6654}
6655
6656
6657/**
6658 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
6659 */
6660DECL_INLINE_THROW(uint32_t)
6661iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
6662 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6663{
6664 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6665 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
6666}
6667
6668
6669/**
6670 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
6671 *
6672 * @note ARM64: Helper register is required (idxTmpReg).
6673 */
6674DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6675 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
6676 uint8_t idxTmpReg = UINT8_MAX)
6677{
6678 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
6679 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
6680 return off;
6681}
6682
6683
6684/**
6685 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
6686 *
6687 * @note ARM64: Helper register is required (idxTmpReg).
6688 */
6689DECL_INLINE_THROW(uint32_t)
6690iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
6691 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
6692 uint8_t idxTmpReg = UINT8_MAX)
6693{
6694 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6695 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
6696}
6697
6698
6699/*********************************************************************************************************************************
6700* Calls. *
6701*********************************************************************************************************************************/
6702
6703/**
6704 * Emits a call to a 64-bit address.
6705 */
6706DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
6707{
6708#ifdef RT_ARCH_AMD64
6709 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
6710
6711 /* call rax */
6712 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6713 pbCodeBuf[off++] = 0xff;
6714 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
6715
6716#elif defined(RT_ARCH_ARM64)
6717 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
6718
6719 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6720 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
6721
6722#else
6723# error "port me"
6724#endif
6725 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6726 return off;
6727}
6728
6729
6730/**
6731 * Emits code to load a stack variable into an argument GPR.
6732 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
6733 */
6734DECL_FORCE_INLINE_THROW(uint32_t)
6735iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
6736 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
6737 bool fSpilledVarsInVolatileRegs = false)
6738{
6739 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6740 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6741 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6742
6743 uint8_t const idxRegVar = pVar->idxReg;
6744 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
6745 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
6746 || !fSpilledVarsInVolatileRegs ))
6747 {
6748 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
6749 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
6750 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
6751 if (!offAddend)
6752 {
6753 if (idxRegArg != idxRegVar)
6754 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
6755 }
6756 else
6757 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
6758 }
6759 else
6760 {
6761 uint8_t const idxStackSlot = pVar->idxStackSlot;
6762 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6763 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
6764 if (offAddend)
6765 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
6766 }
6767 return off;
6768}
6769
6770
6771/**
6772 * Emits code to load a stack or immediate variable value into an argument GPR,
6773 * optional with a addend.
6774 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
6775 */
6776DECL_FORCE_INLINE_THROW(uint32_t)
6777iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
6778 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
6779 bool fSpilledVarsInVolatileRegs = false)
6780{
6781 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6782 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6783 if (pVar->enmKind == kIemNativeVarKind_Immediate)
6784 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
6785 else
6786 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
6787 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
6788 return off;
6789}
6790
6791
6792/**
6793 * Emits code to load the variable address into an argument GRP.
6794 *
6795 * This only works for uninitialized and stack variables.
6796 */
6797DECL_FORCE_INLINE_THROW(uint32_t)
6798iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
6799 bool fFlushShadows)
6800{
6801 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6802 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6803 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
6804 || pVar->enmKind == kIemNativeVarKind_Stack,
6805 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6806
6807 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
6808 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
6809
6810 uint8_t const idxRegVar = pVar->idxReg;
6811 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
6812 {
6813 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
6814 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
6815 Assert(pVar->idxReg == UINT8_MAX);
6816 }
6817 Assert( pVar->idxStackSlot != UINT8_MAX
6818 && pVar->idxReg == UINT8_MAX);
6819
6820 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
6821}
6822
6823
6824#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6825
6826/**
6827 * Emits a 128-bit vector register store to a VCpu value.
6828 */
6829DECL_FORCE_INLINE_THROW(uint32_t)
6830iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
6831{
6832#ifdef RT_ARCH_AMD64
6833 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
6834 pCodeBuf[off++] = 0x66;
6835 if (iVecReg >= 8)
6836 pCodeBuf[off++] = X86_OP_REX_R;
6837 pCodeBuf[off++] = 0x0f;
6838 pCodeBuf[off++] = 0x7f;
6839 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
6840#elif defined(RT_ARCH_ARM64)
6841 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
6842
6843#else
6844# error "port me"
6845#endif
6846 return off;
6847}
6848
6849
6850/**
6851 * Emits a 128-bit vector register load of a VCpu value.
6852 */
6853DECL_INLINE_THROW(uint32_t)
6854iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
6855{
6856#ifdef RT_ARCH_AMD64
6857 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
6858#elif defined(RT_ARCH_ARM64)
6859 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
6860#else
6861# error "port me"
6862#endif
6863 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6864 return off;
6865}
6866
6867
6868/**
6869 * Emits a high 128-bit vector register store to a VCpu value.
6870 */
6871DECL_FORCE_INLINE_THROW(uint32_t)
6872iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
6873{
6874#ifdef RT_ARCH_AMD64
6875 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
6876 pCodeBuf[off++] = X86_OP_VEX3;
6877 if (iVecReg >= 8)
6878 pCodeBuf[off++] = 0x63;
6879 else
6880 pCodeBuf[off++] = 0xe3;
6881 pCodeBuf[off++] = 0x7d;
6882 pCodeBuf[off++] = 0x39;
6883 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
6884 pCodeBuf[off++] = 0x01; /* Immediate */
6885#elif defined(RT_ARCH_ARM64)
6886 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
6887#else
6888# error "port me"
6889#endif
6890 return off;
6891}
6892
6893
6894/**
6895 * Emits a high 128-bit vector register load of a VCpu value.
6896 */
6897DECL_INLINE_THROW(uint32_t)
6898iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
6899{
6900#ifdef RT_ARCH_AMD64
6901 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
6902#elif defined(RT_ARCH_ARM64)
6903 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
6904 Assert(!(iVecReg & 0x1));
6905 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
6906#else
6907# error "port me"
6908#endif
6909 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6910 return off;
6911}
6912
6913
6914/**
6915 * Emits a 128-bit vector register load of a VCpu value.
6916 */
6917DECL_FORCE_INLINE_THROW(uint32_t)
6918iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
6919{
6920#ifdef RT_ARCH_AMD64
6921 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
6922 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6923 if (iVecReg >= 8)
6924 pCodeBuf[off++] = X86_OP_REX_R;
6925 pCodeBuf[off++] = 0x0f;
6926 pCodeBuf[off++] = 0x6f;
6927 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
6928#elif defined(RT_ARCH_ARM64)
6929 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
6930
6931#else
6932# error "port me"
6933#endif
6934 return off;
6935}
6936
6937
6938/**
6939 * Emits a 128-bit vector register load of a VCpu value.
6940 */
6941DECL_INLINE_THROW(uint32_t)
6942iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
6943{
6944#ifdef RT_ARCH_AMD64
6945 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
6946#elif defined(RT_ARCH_ARM64)
6947 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
6948#else
6949# error "port me"
6950#endif
6951 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6952 return off;
6953}
6954
6955
6956/**
6957 * Emits a 128-bit vector register load of a VCpu value.
6958 */
6959DECL_FORCE_INLINE_THROW(uint32_t)
6960iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
6961{
6962#ifdef RT_ARCH_AMD64
6963 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
6964 pCodeBuf[off++] = X86_OP_VEX3;
6965 if (iVecReg >= 8)
6966 pCodeBuf[off++] = 0x63;
6967 else
6968 pCodeBuf[off++] = 0xe3;
6969 pCodeBuf[off++] = X86_OP_VEX3_BYTE3_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE3_P_066H);
6970 pCodeBuf[off++] = 0x38;
6971 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
6972 pCodeBuf[off++] = 0x01; /* Immediate */
6973#elif defined(RT_ARCH_ARM64)
6974 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
6975#else
6976# error "port me"
6977#endif
6978 return off;
6979}
6980
6981
6982/**
6983 * Emits a 128-bit vector register load of a VCpu value.
6984 */
6985DECL_INLINE_THROW(uint32_t)
6986iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
6987{
6988#ifdef RT_ARCH_AMD64
6989 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
6990#elif defined(RT_ARCH_ARM64)
6991 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
6992 Assert(!(iVecReg & 0x1));
6993 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
6994#else
6995# error "port me"
6996#endif
6997 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6998 return off;
6999}
7000
7001
7002/**
7003 * Emits a vecdst = vecsrc load.
7004 */
7005DECL_FORCE_INLINE(uint32_t)
7006iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7007{
7008#ifdef RT_ARCH_AMD64
7009 /* movdqu vecdst, vecsrc */
7010 pCodeBuf[off++] = 0xf3;
7011
7012 if ((iVecRegDst | iVecRegSrc) >= 8)
7013 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
7014 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
7015 : X86_OP_REX_R;
7016 pCodeBuf[off++] = 0x0f;
7017 pCodeBuf[off++] = 0x6f;
7018 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7019
7020#elif defined(RT_ARCH_ARM64)
7021 /* mov dst, src; alias for: orr dst, src, src */
7022 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
7023
7024#else
7025# error "port me"
7026#endif
7027 return off;
7028}
7029
7030
7031/**
7032 * Emits a vecdst = vecsrc load, 128-bit.
7033 */
7034DECL_INLINE_THROW(uint32_t)
7035iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7036{
7037#ifdef RT_ARCH_AMD64
7038 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
7039#elif defined(RT_ARCH_ARM64)
7040 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
7041#else
7042# error "port me"
7043#endif
7044 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7045 return off;
7046}
7047
7048
7049/**
7050 * Emits a vecdst = vecsrc load, 256-bit.
7051 */
7052DECL_INLINE_THROW(uint32_t)
7053iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7054{
7055#ifdef RT_ARCH_AMD64
7056 /* vmovdqa ymm, ymm */
7057 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7058 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
7059 {
7060 pbCodeBuf[off++] = X86_OP_VEX3;
7061 pbCodeBuf[off++] = 0x41;
7062 pbCodeBuf[off++] = 0x7d;
7063 pbCodeBuf[off++] = 0x6f;
7064 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7065 }
7066 else
7067 {
7068 pbCodeBuf[off++] = X86_OP_VEX2;
7069 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
7070 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
7071 pbCodeBuf[off++] = iVecRegSrc >= 8
7072 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
7073 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7074 }
7075#elif defined(RT_ARCH_ARM64)
7076 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7077 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
7078 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
7079 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
7080#else
7081# error "port me"
7082#endif
7083 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7084 return off;
7085}
7086
7087
7088/**
7089 * Emits a gprdst = vecsrc[x] load, 64-bit.
7090 */
7091DECL_FORCE_INLINE(uint32_t)
7092iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
7093{
7094#ifdef RT_ARCH_AMD64
7095 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
7096 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7097 pCodeBuf[off++] = X86_OP_REX_W
7098 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
7099 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
7100 pCodeBuf[off++] = 0x0f;
7101 pCodeBuf[off++] = 0x3a;
7102 pCodeBuf[off++] = 0x16;
7103 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
7104 pCodeBuf[off++] = iQWord;
7105#elif defined(RT_ARCH_ARM64)
7106 /* umov gprdst, vecsrc[iQWord] */
7107 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovSz_U64);
7108#else
7109# error "port me"
7110#endif
7111 return off;
7112}
7113
7114
7115/**
7116 * Emits a gprdst = vecsrc[x] load, 64-bit.
7117 */
7118DECL_INLINE_THROW(uint32_t)
7119iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
7120{
7121 Assert(iQWord <= 1);
7122
7123#ifdef RT_ARCH_AMD64
7124 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iQWord);
7125#elif defined(RT_ARCH_ARM64)
7126 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
7127#else
7128# error "port me"
7129#endif
7130 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7131 return off;
7132}
7133
7134
7135/**
7136 * Emits a gprdst = vecsrc[x] load, 32-bit.
7137 */
7138DECL_FORCE_INLINE(uint32_t)
7139iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
7140{
7141#ifdef RT_ARCH_AMD64
7142 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
7143 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7144 if (iGprDst >= 8 || iVecRegSrc >= 8)
7145 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
7146 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
7147 pCodeBuf[off++] = 0x0f;
7148 pCodeBuf[off++] = 0x3a;
7149 pCodeBuf[off++] = 0x16;
7150 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
7151 pCodeBuf[off++] = iDWord;
7152#elif defined(RT_ARCH_ARM64)
7153 /* umov gprdst, vecsrc[iDWord] */
7154 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovSz_U32, false /*fDst64Bit*/);
7155#else
7156# error "port me"
7157#endif
7158 return off;
7159}
7160
7161
7162/**
7163 * Emits a gprdst = vecsrc[x] load, 32-bit.
7164 */
7165DECL_INLINE_THROW(uint32_t)
7166iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
7167{
7168 Assert(iDWord <= 3);
7169
7170#ifdef RT_ARCH_AMD64
7171 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iDWord);
7172#elif defined(RT_ARCH_ARM64)
7173 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
7174#else
7175# error "port me"
7176#endif
7177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7178 return off;
7179}
7180
7181
7182/**
7183 * Emits a vecdst[128:255] = 0 store.
7184 */
7185DECL_FORCE_INLINE(uint32_t)
7186iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
7187{
7188#ifdef RT_ARCH_AMD64
7189 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
7190 if (iVecReg < 8)
7191 {
7192 pCodeBuf[off++] = X86_OP_VEX2;
7193 pCodeBuf[off++] = 0xf9;
7194 }
7195 else
7196 {
7197 pCodeBuf[off++] = X86_OP_VEX3;
7198 pCodeBuf[off++] = 0x41;
7199 pCodeBuf[off++] = 0x79;
7200 }
7201 pCodeBuf[off++] = 0x6f;
7202 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
7203#elif defined(RT_ARCH_ARM64)
7204 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7205 Assert(!(iVecReg & 0x1));
7206 /* eor vecreg, vecreg, vecreg */
7207 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
7208#else
7209# error "port me"
7210#endif
7211 return off;
7212}
7213
7214
7215/**
7216 * Emits a vecdst[128:255] = 0 store.
7217 */
7218DECL_INLINE_THROW(uint32_t)
7219iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
7220{
7221#ifdef RT_ARCH_AMD64
7222 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
7223#elif defined(RT_ARCH_ARM64)
7224 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
7225#else
7226# error "port me"
7227#endif
7228 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7229 return off;
7230}
7231
7232#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
7233
7234/** @} */
7235
7236#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
7237
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette