VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 104058

Last change on this file since 104058 was 104058, checked in by vboxsync, 10 months ago

VMM/IEM: Make IEM_MC_FETCH_YREG_U128() take a iDQWord parameter to select the whether to fetch the low or high half of the 256-bit register, bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 317.3 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 104058 2024-03-26 13:41:59Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 pu32CodeBuf[off++] = 0xd503201f;
71
72 RT_NOREF(uInfo);
73#else
74# error "port me"
75#endif
76 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
77 return off;
78}
79
80
81/**
82 * Emit a breakpoint instruction.
83 */
84DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
85{
86#ifdef RT_ARCH_AMD64
87 pCodeBuf[off++] = 0xcc;
88 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
89
90#elif defined(RT_ARCH_ARM64)
91 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
92
93#else
94# error "error"
95#endif
96 return off;
97}
98
99
100/**
101 * Emit a breakpoint instruction.
102 */
103DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
104{
105#ifdef RT_ARCH_AMD64
106 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
107#elif defined(RT_ARCH_ARM64)
108 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
109#else
110# error "error"
111#endif
112 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
113 return off;
114}
115
116
117/*********************************************************************************************************************************
118* Loads, Stores and Related Stuff. *
119*********************************************************************************************************************************/
120
121#ifdef RT_ARCH_AMD64
122/**
123 * Common bit of iemNativeEmitLoadGprByGpr and friends.
124 */
125DECL_FORCE_INLINE(uint32_t)
126iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
127{
128 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
129 {
130 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
131 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
132 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
133 }
134 else if (offDisp == (int8_t)offDisp)
135 {
136 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
137 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
138 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
139 pbCodeBuf[off++] = (uint8_t)offDisp;
140 }
141 else
142 {
143 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
144 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
145 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
146 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
147 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
148 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
149 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
150 }
151 return off;
152}
153#endif /* RT_ARCH_AMD64 */
154
155/**
156 * Emits setting a GPR to zero.
157 */
158DECL_INLINE_THROW(uint32_t)
159iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
160{
161#ifdef RT_ARCH_AMD64
162 /* xor gpr32, gpr32 */
163 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
164 if (iGpr >= 8)
165 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
166 pbCodeBuf[off++] = 0x33;
167 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
168
169#elif defined(RT_ARCH_ARM64)
170 /* mov gpr, #0x0 */
171 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
172 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
173
174#else
175# error "port me"
176#endif
177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
178 return off;
179}
180
181
182/**
183 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
184 * buffer space.
185 *
186 * Max buffer consumption:
187 * - AMD64: 10 instruction bytes.
188 * - ARM64: 4 instruction words (16 bytes).
189 */
190DECL_FORCE_INLINE(uint32_t)
191iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
192{
193#ifdef RT_ARCH_AMD64
194 if (uImm64 == 0)
195 {
196 /* xor gpr, gpr */
197 if (iGpr >= 8)
198 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
199 pCodeBuf[off++] = 0x33;
200 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
201 }
202 else if (uImm64 <= UINT32_MAX)
203 {
204 /* mov gpr, imm32 */
205 if (iGpr >= 8)
206 pCodeBuf[off++] = X86_OP_REX_B;
207 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
208 pCodeBuf[off++] = RT_BYTE1(uImm64);
209 pCodeBuf[off++] = RT_BYTE2(uImm64);
210 pCodeBuf[off++] = RT_BYTE3(uImm64);
211 pCodeBuf[off++] = RT_BYTE4(uImm64);
212 }
213 else if (uImm64 == (uint64_t)(int32_t)uImm64)
214 {
215 /* mov gpr, sx(imm32) */
216 if (iGpr < 8)
217 pCodeBuf[off++] = X86_OP_REX_W;
218 else
219 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
220 pCodeBuf[off++] = 0xc7;
221 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
222 pCodeBuf[off++] = RT_BYTE1(uImm64);
223 pCodeBuf[off++] = RT_BYTE2(uImm64);
224 pCodeBuf[off++] = RT_BYTE3(uImm64);
225 pCodeBuf[off++] = RT_BYTE4(uImm64);
226 }
227 else
228 {
229 /* mov gpr, imm64 */
230 if (iGpr < 8)
231 pCodeBuf[off++] = X86_OP_REX_W;
232 else
233 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
234 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
235 pCodeBuf[off++] = RT_BYTE1(uImm64);
236 pCodeBuf[off++] = RT_BYTE2(uImm64);
237 pCodeBuf[off++] = RT_BYTE3(uImm64);
238 pCodeBuf[off++] = RT_BYTE4(uImm64);
239 pCodeBuf[off++] = RT_BYTE5(uImm64);
240 pCodeBuf[off++] = RT_BYTE6(uImm64);
241 pCodeBuf[off++] = RT_BYTE7(uImm64);
242 pCodeBuf[off++] = RT_BYTE8(uImm64);
243 }
244
245#elif defined(RT_ARCH_ARM64)
246 /*
247 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
248 * supply remaining bits using 'movk grp, imm16, lsl #x'.
249 *
250 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
251 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
252 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
253 * after the first non-zero immediate component so we switch to movk for
254 * the remainder.
255 */
256 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
257 + !((uImm64 >> 16) & UINT16_MAX)
258 + !((uImm64 >> 32) & UINT16_MAX)
259 + !((uImm64 >> 48) & UINT16_MAX);
260 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
261 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
262 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
263 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
264 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
265 if (cFfffHalfWords <= cZeroHalfWords)
266 {
267 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
268
269 /* movz gpr, imm16 */
270 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
271 if (uImmPart || cZeroHalfWords == 4)
272 {
273 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
274 fMovBase |= RT_BIT_32(29);
275 }
276 /* mov[z/k] gpr, imm16, lsl #16 */
277 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
278 if (uImmPart)
279 {
280 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
281 fMovBase |= RT_BIT_32(29);
282 }
283 /* mov[z/k] gpr, imm16, lsl #32 */
284 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
285 if (uImmPart)
286 {
287 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
288 fMovBase |= RT_BIT_32(29);
289 }
290 /* mov[z/k] gpr, imm16, lsl #48 */
291 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
292 if (uImmPart)
293 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
294 }
295 else
296 {
297 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
298
299 /* find the first half-word that isn't UINT16_MAX. */
300 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
301 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
302 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
303
304 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
305 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
306 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
307 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
308 /* movk gpr, imm16 */
309 if (iHwNotFfff != 0)
310 {
311 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
312 if (uImmPart != UINT32_C(0xffff))
313 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
314 }
315 /* movk gpr, imm16, lsl #16 */
316 if (iHwNotFfff != 1)
317 {
318 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
319 if (uImmPart != UINT32_C(0xffff))
320 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
321 }
322 /* movk gpr, imm16, lsl #32 */
323 if (iHwNotFfff != 2)
324 {
325 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
326 if (uImmPart != UINT32_C(0xffff))
327 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
328 }
329 /* movk gpr, imm16, lsl #48 */
330 if (iHwNotFfff != 3)
331 {
332 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
333 if (uImmPart != UINT32_C(0xffff))
334 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
335 }
336 }
337
338 /** @todo load into 'w' register instead of 'x' when imm64 <= UINT32_MAX?
339 * clang 12.x does that, only to use the 'x' version for the
340 * addressing in the following ldr). */
341
342#else
343# error "port me"
344#endif
345 return off;
346}
347
348
349/**
350 * Emits loading a constant into a 64-bit GPR
351 */
352DECL_INLINE_THROW(uint32_t)
353iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
354{
355#ifdef RT_ARCH_AMD64
356 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
357#elif defined(RT_ARCH_ARM64)
358 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
359#else
360# error "port me"
361#endif
362 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
363 return off;
364}
365
366
367/**
368 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
369 * buffer space.
370 *
371 * Max buffer consumption:
372 * - AMD64: 6 instruction bytes.
373 * - ARM64: 2 instruction words (8 bytes).
374 *
375 * @note The top 32 bits will be cleared.
376 */
377DECLINLINE(uint32_t) iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
378{
379#ifdef RT_ARCH_AMD64
380 if (uImm32 == 0)
381 {
382 /* xor gpr, gpr */
383 if (iGpr >= 8)
384 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
385 pCodeBuf[off++] = 0x33;
386 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
387 }
388 else
389 {
390 /* mov gpr, imm32 */
391 if (iGpr >= 8)
392 pCodeBuf[off++] = X86_OP_REX_B;
393 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
394 pCodeBuf[off++] = RT_BYTE1(uImm32);
395 pCodeBuf[off++] = RT_BYTE2(uImm32);
396 pCodeBuf[off++] = RT_BYTE3(uImm32);
397 pCodeBuf[off++] = RT_BYTE4(uImm32);
398 }
399
400#elif defined(RT_ARCH_ARM64)
401 if ((uImm32 >> 16) == 0)
402 /* movz gpr, imm16 */
403 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
404 else if ((uImm32 & UINT32_C(0xffff)) == 0)
405 /* movz gpr, imm16, lsl #16 */
406 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
407 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
408 /* movn gpr, imm16, lsl #16 */
409 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
410 else if ((uImm32 >> 16) == UINT32_C(0xffff))
411 /* movn gpr, imm16 */
412 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
413 else
414 {
415 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
416 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
417 }
418
419#else
420# error "port me"
421#endif
422 return off;
423}
424
425
426/**
427 * Emits loading a constant into a 32-bit GPR.
428 * @note The top 32 bits will be cleared.
429 */
430DECL_INLINE_THROW(uint32_t)
431iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
432{
433#ifdef RT_ARCH_AMD64
434 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
435#elif defined(RT_ARCH_ARM64)
436 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
437#else
438# error "port me"
439#endif
440 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
441 return off;
442}
443
444
445/**
446 * Emits loading a constant into a 8-bit GPR
447 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
448 * only the ARM64 version does that.
449 */
450DECL_INLINE_THROW(uint32_t)
451iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
452{
453#ifdef RT_ARCH_AMD64
454 /* mov gpr, imm8 */
455 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
456 if (iGpr >= 8)
457 pbCodeBuf[off++] = X86_OP_REX_B;
458 else if (iGpr >= 4)
459 pbCodeBuf[off++] = X86_OP_REX;
460 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
461 pbCodeBuf[off++] = RT_BYTE1(uImm8);
462
463#elif defined(RT_ARCH_ARM64)
464 /* movz gpr, imm16, lsl #0 */
465 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
466 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
467
468#else
469# error "port me"
470#endif
471 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
472 return off;
473}
474
475
476#ifdef RT_ARCH_AMD64
477/**
478 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
479 */
480DECL_FORCE_INLINE(uint32_t)
481iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
482{
483 if (offVCpu < 128)
484 {
485 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
486 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
487 }
488 else
489 {
490 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
491 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
492 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
493 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
494 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
495 }
496 return off;
497}
498
499#elif defined(RT_ARCH_ARM64)
500
501/**
502 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
503 *
504 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
505 * registers (@a iGprTmp).
506 * @note DON'T try this with prefetch.
507 */
508DECL_FORCE_INLINE_THROW(uint32_t)
509iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
510 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
511{
512 /*
513 * There are a couple of ldr variants that takes an immediate offset, so
514 * try use those if we can, otherwise we have to use the temporary register
515 * help with the addressing.
516 */
517 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
518 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
519 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
520 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
521 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
522 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
523 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
524 {
525 /* The offset is too large, so we must load it into a register and use
526 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
527 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
528 if (iGprTmp == UINT8_MAX)
529 iGprTmp = iGprReg;
530 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
531 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
532 }
533 else
534# ifdef IEM_WITH_THROW_CATCH
535 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
536# else
537 AssertReleaseFailedStmt(off = UINT32_MAX);
538# endif
539
540 return off;
541}
542
543/**
544 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
545 */
546DECL_FORCE_INLINE_THROW(uint32_t)
547iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
548 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
549{
550 /*
551 * There are a couple of ldr variants that takes an immediate offset, so
552 * try use those if we can, otherwise we have to use the temporary register
553 * help with the addressing.
554 */
555 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
556 {
557 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
558 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
559 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
560 }
561 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
562 {
563 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
564 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
565 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
566 }
567 else
568 {
569 /* The offset is too large, so we must load it into a register and use
570 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
571 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
572 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
573 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
574 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
575 IEMNATIVE_REG_FIXED_TMP0);
576 }
577 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
578 return off;
579}
580
581#endif /* RT_ARCH_ARM64 */
582
583
584/**
585 * Emits a 64-bit GPR load of a VCpu value.
586 */
587DECL_FORCE_INLINE_THROW(uint32_t)
588iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
589{
590#ifdef RT_ARCH_AMD64
591 /* mov reg64, mem64 */
592 if (iGpr < 8)
593 pCodeBuf[off++] = X86_OP_REX_W;
594 else
595 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
596 pCodeBuf[off++] = 0x8b;
597 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off,iGpr, offVCpu);
598
599#elif defined(RT_ARCH_ARM64)
600 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
601
602#else
603# error "port me"
604#endif
605 return off;
606}
607
608
609/**
610 * Emits a 64-bit GPR load of a VCpu value.
611 */
612DECL_INLINE_THROW(uint32_t)
613iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
614{
615#ifdef RT_ARCH_AMD64
616 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
617 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
618
619#elif defined(RT_ARCH_ARM64)
620 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
621
622#else
623# error "port me"
624#endif
625 return off;
626}
627
628
629/**
630 * Emits a 32-bit GPR load of a VCpu value.
631 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
632 */
633DECL_INLINE_THROW(uint32_t)
634iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
635{
636#ifdef RT_ARCH_AMD64
637 /* mov reg32, mem32 */
638 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
639 if (iGpr >= 8)
640 pbCodeBuf[off++] = X86_OP_REX_R;
641 pbCodeBuf[off++] = 0x8b;
642 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
644
645#elif defined(RT_ARCH_ARM64)
646 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
647
648#else
649# error "port me"
650#endif
651 return off;
652}
653
654
655/**
656 * Emits a 16-bit GPR load of a VCpu value.
657 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
658 */
659DECL_INLINE_THROW(uint32_t)
660iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
661{
662#ifdef RT_ARCH_AMD64
663 /* movzx reg32, mem16 */
664 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
665 if (iGpr >= 8)
666 pbCodeBuf[off++] = X86_OP_REX_R;
667 pbCodeBuf[off++] = 0x0f;
668 pbCodeBuf[off++] = 0xb7;
669 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
670 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
671
672#elif defined(RT_ARCH_ARM64)
673 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
674
675#else
676# error "port me"
677#endif
678 return off;
679}
680
681
682/**
683 * Emits a 8-bit GPR load of a VCpu value.
684 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
685 */
686DECL_INLINE_THROW(uint32_t)
687iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
688{
689#ifdef RT_ARCH_AMD64
690 /* movzx reg32, mem8 */
691 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
692 if (iGpr >= 8)
693 pbCodeBuf[off++] = X86_OP_REX_R;
694 pbCodeBuf[off++] = 0x0f;
695 pbCodeBuf[off++] = 0xb6;
696 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
697 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
698
699#elif defined(RT_ARCH_ARM64)
700 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
701
702#else
703# error "port me"
704#endif
705 return off;
706}
707
708
709/**
710 * Emits a store of a GPR value to a 64-bit VCpu field.
711 */
712DECL_FORCE_INLINE_THROW(uint32_t)
713iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
714 uint8_t iGprTmp = UINT8_MAX)
715{
716#ifdef RT_ARCH_AMD64
717 /* mov mem64, reg64 */
718 if (iGpr < 8)
719 pCodeBuf[off++] = X86_OP_REX_W;
720 else
721 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
722 pCodeBuf[off++] = 0x89;
723 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
724 RT_NOREF(iGprTmp);
725
726#elif defined(RT_ARCH_ARM64)
727 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
728
729#else
730# error "port me"
731#endif
732 return off;
733}
734
735
736/**
737 * Emits a store of a GPR value to a 64-bit VCpu field.
738 */
739DECL_INLINE_THROW(uint32_t)
740iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
741{
742#ifdef RT_ARCH_AMD64
743 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
744#elif defined(RT_ARCH_ARM64)
745 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
746 IEMNATIVE_REG_FIXED_TMP0);
747#else
748# error "port me"
749#endif
750 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
751 return off;
752}
753
754
755/**
756 * Emits a store of a GPR value to a 32-bit VCpu field.
757 */
758DECL_INLINE_THROW(uint32_t)
759iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
760{
761#ifdef RT_ARCH_AMD64
762 /* mov mem32, reg32 */
763 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
764 if (iGpr >= 8)
765 pbCodeBuf[off++] = X86_OP_REX_R;
766 pbCodeBuf[off++] = 0x89;
767 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
769
770#elif defined(RT_ARCH_ARM64)
771 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
772
773#else
774# error "port me"
775#endif
776 return off;
777}
778
779
780/**
781 * Emits a store of a GPR value to a 16-bit VCpu field.
782 */
783DECL_INLINE_THROW(uint32_t)
784iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
785{
786#ifdef RT_ARCH_AMD64
787 /* mov mem16, reg16 */
788 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
789 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
790 if (iGpr >= 8)
791 pbCodeBuf[off++] = X86_OP_REX_R;
792 pbCodeBuf[off++] = 0x89;
793 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
794 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
795
796#elif defined(RT_ARCH_ARM64)
797 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
798
799#else
800# error "port me"
801#endif
802 return off;
803}
804
805
806/**
807 * Emits a store of a GPR value to a 8-bit VCpu field.
808 */
809DECL_INLINE_THROW(uint32_t)
810iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
811{
812#ifdef RT_ARCH_AMD64
813 /* mov mem8, reg8 */
814 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
815 if (iGpr >= 8)
816 pbCodeBuf[off++] = X86_OP_REX_R;
817 pbCodeBuf[off++] = 0x88;
818 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
820
821#elif defined(RT_ARCH_ARM64)
822 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
823
824#else
825# error "port me"
826#endif
827 return off;
828}
829
830
831/**
832 * Emits a store of an immediate value to a 32-bit VCpu field.
833 *
834 * @note ARM64: Will allocate temporary registers.
835 */
836DECL_FORCE_INLINE_THROW(uint32_t)
837iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
838{
839#ifdef RT_ARCH_AMD64
840 /* mov mem32, imm32 */
841 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
842 pCodeBuf[off++] = 0xc7;
843 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
844 pCodeBuf[off++] = RT_BYTE1(uImm);
845 pCodeBuf[off++] = RT_BYTE2(uImm);
846 pCodeBuf[off++] = RT_BYTE3(uImm);
847 pCodeBuf[off++] = RT_BYTE4(uImm);
848 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
849
850#elif defined(RT_ARCH_ARM64)
851 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
852 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
853 if (idxRegImm != ARMV8_A64_REG_XZR)
854 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
855
856#else
857# error "port me"
858#endif
859 return off;
860}
861
862
863
864/**
865 * Emits a store of an immediate value to a 16-bit VCpu field.
866 *
867 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
868 * offset can be encoded as an immediate or not. The @a offVCpu immediate
869 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
870 */
871DECL_FORCE_INLINE_THROW(uint32_t)
872iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
873 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
874{
875#ifdef RT_ARCH_AMD64
876 /* mov mem16, imm16 */
877 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
878 pCodeBuf[off++] = 0xc7;
879 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
880 pCodeBuf[off++] = RT_BYTE1(uImm);
881 pCodeBuf[off++] = RT_BYTE2(uImm);
882 RT_NOREF(idxTmp1, idxTmp2);
883
884#elif defined(RT_ARCH_ARM64)
885 if (idxTmp1 != UINT8_MAX)
886 {
887 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
888 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
889 sizeof(uint16_t), idxTmp2);
890 }
891 else
892# ifdef IEM_WITH_THROW_CATCH
893 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
894# else
895 AssertReleaseFailedStmt(off = UINT32_MAX);
896# endif
897
898#else
899# error "port me"
900#endif
901 return off;
902}
903
904
905/**
906 * Emits a store of an immediate value to a 8-bit VCpu field.
907 */
908DECL_INLINE_THROW(uint32_t)
909iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu)
910{
911#ifdef RT_ARCH_AMD64
912 /* mov mem8, imm8 */
913 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
914 pbCodeBuf[off++] = 0xc6;
915 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
916 pbCodeBuf[off++] = bImm;
917 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
918
919#elif defined(RT_ARCH_ARM64)
920 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
921 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
922 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
923 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
924
925#else
926# error "port me"
927#endif
928 return off;
929}
930
931
932/**
933 * Emits a load effective address to a GRP of a VCpu field.
934 */
935DECL_INLINE_THROW(uint32_t)
936iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
937{
938#ifdef RT_ARCH_AMD64
939 /* lea gprdst, [rbx + offDisp] */
940 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
941 if (iGprDst < 8)
942 pbCodeBuf[off++] = X86_OP_REX_W;
943 else
944 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
945 pbCodeBuf[off++] = 0x8d;
946 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
947
948#elif defined(RT_ARCH_ARM64)
949 if (offVCpu < (unsigned)_4K)
950 {
951 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
952 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
953 }
954 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
955 {
956 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
957 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
958 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
959 }
960 else
961 {
962 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
963 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
964 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
965 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
966 }
967
968#else
969# error "port me"
970#endif
971 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
972 return off;
973}
974
975
976/** This is just as a typesafe alternative to RT_UOFFSETOF. */
977DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
978{
979 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
980 Assert(off < sizeof(VMCPU));
981 return off;
982}
983
984
985/** This is just as a typesafe alternative to RT_UOFFSETOF. */
986DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
987{
988 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
989 Assert(off < sizeof(VMCPU));
990 return off;
991}
992
993
994/**
995 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
996 *
997 * @note The two temp registers are not required for AMD64. ARM64 always
998 * requires the first, and the 2nd is needed if the offset cannot be
999 * encoded as an immediate.
1000 */
1001DECL_FORCE_INLINE(uint32_t)
1002iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1003{
1004#ifdef RT_ARCH_AMD64
1005 /* inc qword [pVCpu + off] */
1006 pCodeBuf[off++] = X86_OP_REX_W;
1007 pCodeBuf[off++] = 0xff;
1008 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1009 RT_NOREF(idxTmp1, idxTmp2);
1010
1011#elif defined(RT_ARCH_ARM64)
1012 /* Determine how we're to access pVCpu first. */
1013 uint32_t const cbData = sizeof(STAMCOUNTER);
1014 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1015 {
1016 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1017 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1018 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1019 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1020 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1021 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1022 }
1023 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1024 {
1025 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1026 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1027 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1028 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1029 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1030 }
1031 else
1032 {
1033 /* The offset is too large, so we must load it into a register and use
1034 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1035 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1036 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1037 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1038 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1039 }
1040
1041#else
1042# error "port me"
1043#endif
1044 return off;
1045}
1046
1047
1048/**
1049 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1050 *
1051 * @note The two temp registers are not required for AMD64. ARM64 always
1052 * requires the first, and the 2nd is needed if the offset cannot be
1053 * encoded as an immediate.
1054 */
1055DECL_FORCE_INLINE(uint32_t)
1056iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1057{
1058#ifdef RT_ARCH_AMD64
1059 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1060#elif defined(RT_ARCH_ARM64)
1061 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1062#else
1063# error "port me"
1064#endif
1065 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1066 return off;
1067}
1068
1069
1070/**
1071 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1072 *
1073 * @note The two temp registers are not required for AMD64. ARM64 always
1074 * requires the first, and the 2nd is needed if the offset cannot be
1075 * encoded as an immediate.
1076 */
1077DECL_FORCE_INLINE(uint32_t)
1078iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1079{
1080 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1081#ifdef RT_ARCH_AMD64
1082 /* inc dword [pVCpu + offVCpu] */
1083 pCodeBuf[off++] = 0xff;
1084 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1085 RT_NOREF(idxTmp1, idxTmp2);
1086
1087#elif defined(RT_ARCH_ARM64)
1088 /* Determine how we're to access pVCpu first. */
1089 uint32_t const cbData = sizeof(uint32_t);
1090 if (offVCpu < (unsigned)(_4K * cbData))
1091 {
1092 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1093 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1094 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1095 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1096 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1097 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1098 }
1099 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1100 {
1101 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1102 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1103 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1104 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1105 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1106 }
1107 else
1108 {
1109 /* The offset is too large, so we must load it into a register and use
1110 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1111 of the instruction if that'll reduce the constant to 16-bits. */
1112 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1113 {
1114 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1115 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1116 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1117 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1118 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1119 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1120 }
1121 else
1122 {
1123 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1124 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1125 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1126 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1127 }
1128 }
1129
1130#else
1131# error "port me"
1132#endif
1133 return off;
1134}
1135
1136
1137/**
1138 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1139 *
1140 * @note The two temp registers are not required for AMD64. ARM64 always
1141 * requires the first, and the 2nd is needed if the offset cannot be
1142 * encoded as an immediate.
1143 */
1144DECL_FORCE_INLINE(uint32_t)
1145iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1146{
1147#ifdef RT_ARCH_AMD64
1148 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1149#elif defined(RT_ARCH_ARM64)
1150 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1151#else
1152# error "port me"
1153#endif
1154 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1155 return off;
1156}
1157
1158
1159/**
1160 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1161 *
1162 * @note May allocate temporary registers (not AMD64).
1163 */
1164DECL_FORCE_INLINE(uint32_t)
1165iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1166{
1167 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1168#ifdef RT_ARCH_AMD64
1169 /* or dword [pVCpu + offVCpu], imm8/32 */
1170 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1171 if (fMask < 0x80)
1172 {
1173 pCodeBuf[off++] = 0x83;
1174 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1175 pCodeBuf[off++] = (uint8_t)fMask;
1176 }
1177 else
1178 {
1179 pCodeBuf[off++] = 0x81;
1180 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1181 pCodeBuf[off++] = RT_BYTE1(fMask);
1182 pCodeBuf[off++] = RT_BYTE2(fMask);
1183 pCodeBuf[off++] = RT_BYTE3(fMask);
1184 pCodeBuf[off++] = RT_BYTE4(fMask);
1185 }
1186
1187#elif defined(RT_ARCH_ARM64)
1188 /* If the constant is unwieldy we'll need a register to hold it as well. */
1189 uint32_t uImmSizeLen, uImmRotate;
1190 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1191 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1192
1193 /* We need a temp register for holding the member value we're modifying. */
1194 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1195
1196 /* Determine how we're to access pVCpu first. */
1197 uint32_t const cbData = sizeof(uint32_t);
1198 if (offVCpu < (unsigned)(_4K * cbData))
1199 {
1200 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1201 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1202 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1203 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1204 if (idxTmpMask == UINT8_MAX)
1205 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1206 else
1207 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1208 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1209 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1210 }
1211 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1212 {
1213 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1214 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1215 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1216 if (idxTmpMask == UINT8_MAX)
1217 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1218 else
1219 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1220 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1221 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1222 }
1223 else
1224 {
1225 /* The offset is too large, so we must load it into a register and use
1226 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1227 of the instruction if that'll reduce the constant to 16-bits. */
1228 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1229 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1230 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1231 if (fShifted)
1232 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1233 else
1234 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1235
1236 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1237 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1238
1239 if (idxTmpMask == UINT8_MAX)
1240 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1241 else
1242 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1243
1244 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1245 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1246 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1247 }
1248 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1249 if (idxTmpMask != UINT8_MAX)
1250 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1251
1252#else
1253# error "port me"
1254#endif
1255 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1256 return off;
1257}
1258
1259
1260/**
1261 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1262 *
1263 * @note May allocate temporary registers (not AMD64).
1264 */
1265DECL_FORCE_INLINE(uint32_t)
1266iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1267{
1268 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1269#ifdef RT_ARCH_AMD64
1270 /* and dword [pVCpu + offVCpu], imm8/32 */
1271 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1272 if (fMask < 0x80)
1273 {
1274 pCodeBuf[off++] = 0x83;
1275 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1276 pCodeBuf[off++] = (uint8_t)fMask;
1277 }
1278 else
1279 {
1280 pCodeBuf[off++] = 0x81;
1281 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1282 pCodeBuf[off++] = RT_BYTE1(fMask);
1283 pCodeBuf[off++] = RT_BYTE2(fMask);
1284 pCodeBuf[off++] = RT_BYTE3(fMask);
1285 pCodeBuf[off++] = RT_BYTE4(fMask);
1286 }
1287
1288#elif defined(RT_ARCH_ARM64)
1289 /* If the constant is unwieldy we'll need a register to hold it as well. */
1290 uint32_t uImmSizeLen, uImmRotate;
1291 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1292 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1293
1294 /* We need a temp register for holding the member value we're modifying. */
1295 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1296
1297 /* Determine how we're to access pVCpu first. */
1298 uint32_t const cbData = sizeof(uint32_t);
1299 if (offVCpu < (unsigned)(_4K * cbData))
1300 {
1301 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1302 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1303 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1304 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1305 if (idxTmpMask == UINT8_MAX)
1306 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1307 else
1308 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1309 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1310 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1311 }
1312 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1313 {
1314 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1315 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1316 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1317 if (idxTmpMask == UINT8_MAX)
1318 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1319 else
1320 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1321 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1322 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1323 }
1324 else
1325 {
1326 /* The offset is too large, so we must load it into a register and use
1327 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1328 of the instruction if that'll reduce the constant to 16-bits. */
1329 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1330 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1331 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1332 if (fShifted)
1333 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1334 else
1335 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1336
1337 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1338 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1339
1340 if (idxTmpMask == UINT8_MAX)
1341 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1342 else
1343 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1344
1345 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1346 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1347 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1348 }
1349 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1350 if (idxTmpMask != UINT8_MAX)
1351 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1352
1353#else
1354# error "port me"
1355#endif
1356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1357 return off;
1358}
1359
1360
1361/**
1362 * Emits a gprdst = gprsrc load.
1363 */
1364DECL_FORCE_INLINE(uint32_t)
1365iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1366{
1367#ifdef RT_ARCH_AMD64
1368 /* mov gprdst, gprsrc */
1369 if ((iGprDst | iGprSrc) >= 8)
1370 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1371 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1372 : X86_OP_REX_W | X86_OP_REX_R;
1373 else
1374 pCodeBuf[off++] = X86_OP_REX_W;
1375 pCodeBuf[off++] = 0x8b;
1376 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1377
1378#elif defined(RT_ARCH_ARM64)
1379 /* mov dst, src; alias for: orr dst, xzr, src */
1380 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1381
1382#else
1383# error "port me"
1384#endif
1385 return off;
1386}
1387
1388
1389/**
1390 * Emits a gprdst = gprsrc load.
1391 */
1392DECL_INLINE_THROW(uint32_t)
1393iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1394{
1395#ifdef RT_ARCH_AMD64
1396 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1397#elif defined(RT_ARCH_ARM64)
1398 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1399#else
1400# error "port me"
1401#endif
1402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1403 return off;
1404}
1405
1406
1407/**
1408 * Emits a gprdst = gprsrc[31:0] load.
1409 * @note Bits 63 thru 32 are cleared.
1410 */
1411DECL_FORCE_INLINE(uint32_t)
1412iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1413{
1414#ifdef RT_ARCH_AMD64
1415 /* mov gprdst, gprsrc */
1416 if ((iGprDst | iGprSrc) >= 8)
1417 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1418 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1419 : X86_OP_REX_R;
1420 pCodeBuf[off++] = 0x8b;
1421 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1422
1423#elif defined(RT_ARCH_ARM64)
1424 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1425 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1426
1427#else
1428# error "port me"
1429#endif
1430 return off;
1431}
1432
1433
1434/**
1435 * Emits a gprdst = gprsrc[31:0] load.
1436 * @note Bits 63 thru 32 are cleared.
1437 */
1438DECL_INLINE_THROW(uint32_t)
1439iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1440{
1441#ifdef RT_ARCH_AMD64
1442 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1443#elif defined(RT_ARCH_ARM64)
1444 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1445#else
1446# error "port me"
1447#endif
1448 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1449 return off;
1450}
1451
1452
1453/**
1454 * Emits a gprdst = gprsrc[15:0] load.
1455 * @note Bits 63 thru 15 are cleared.
1456 */
1457DECL_INLINE_THROW(uint32_t)
1458iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1459{
1460#ifdef RT_ARCH_AMD64
1461 /* movzx Gv,Ew */
1462 if ((iGprDst | iGprSrc) >= 8)
1463 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1464 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1465 : X86_OP_REX_R;
1466 pCodeBuf[off++] = 0x0f;
1467 pCodeBuf[off++] = 0xb7;
1468 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1469
1470#elif defined(RT_ARCH_ARM64)
1471 /* and gprdst, gprsrc, #0xffff */
1472# if 1
1473 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1474 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1475# else
1476 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1477 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1478# endif
1479
1480#else
1481# error "port me"
1482#endif
1483 return off;
1484}
1485
1486
1487/**
1488 * Emits a gprdst = gprsrc[15:0] load.
1489 * @note Bits 63 thru 15 are cleared.
1490 */
1491DECL_INLINE_THROW(uint32_t)
1492iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1493{
1494#ifdef RT_ARCH_AMD64
1495 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1496#elif defined(RT_ARCH_ARM64)
1497 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1498#else
1499# error "port me"
1500#endif
1501 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1502 return off;
1503}
1504
1505
1506/**
1507 * Emits a gprdst = gprsrc[7:0] load.
1508 * @note Bits 63 thru 8 are cleared.
1509 */
1510DECL_FORCE_INLINE(uint32_t)
1511iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1512{
1513#ifdef RT_ARCH_AMD64
1514 /* movzx Gv,Eb */
1515 if (iGprDst >= 8 || iGprSrc >= 8)
1516 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1517 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1518 : X86_OP_REX_R;
1519 else if (iGprSrc >= 4)
1520 pCodeBuf[off++] = X86_OP_REX;
1521 pCodeBuf[off++] = 0x0f;
1522 pCodeBuf[off++] = 0xb6;
1523 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1524
1525#elif defined(RT_ARCH_ARM64)
1526 /* and gprdst, gprsrc, #0xff */
1527 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1528 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1529
1530#else
1531# error "port me"
1532#endif
1533 return off;
1534}
1535
1536
1537/**
1538 * Emits a gprdst = gprsrc[7:0] load.
1539 * @note Bits 63 thru 8 are cleared.
1540 */
1541DECL_INLINE_THROW(uint32_t)
1542iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1543{
1544#ifdef RT_ARCH_AMD64
1545 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1546#elif defined(RT_ARCH_ARM64)
1547 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1548#else
1549# error "port me"
1550#endif
1551 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1552 return off;
1553}
1554
1555
1556/**
1557 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1558 * @note Bits 63 thru 8 are cleared.
1559 */
1560DECL_INLINE_THROW(uint32_t)
1561iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1562{
1563#ifdef RT_ARCH_AMD64
1564 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1565
1566 /* movzx Gv,Ew */
1567 if ((iGprDst | iGprSrc) >= 8)
1568 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1569 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1570 : X86_OP_REX_R;
1571 pbCodeBuf[off++] = 0x0f;
1572 pbCodeBuf[off++] = 0xb7;
1573 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1574
1575 /* shr Ev,8 */
1576 if (iGprDst >= 8)
1577 pbCodeBuf[off++] = X86_OP_REX_B;
1578 pbCodeBuf[off++] = 0xc1;
1579 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1580 pbCodeBuf[off++] = 8;
1581
1582#elif defined(RT_ARCH_ARM64)
1583 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1584 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1585 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1586
1587#else
1588# error "port me"
1589#endif
1590 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1591 return off;
1592}
1593
1594
1595/**
1596 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1597 */
1598DECL_INLINE_THROW(uint32_t)
1599iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1600{
1601#ifdef RT_ARCH_AMD64
1602 /* movsxd r64, r/m32 */
1603 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1604 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1605 pbCodeBuf[off++] = 0x63;
1606 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1607
1608#elif defined(RT_ARCH_ARM64)
1609 /* sxtw dst, src */
1610 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1611 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1612
1613#else
1614# error "port me"
1615#endif
1616 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1617 return off;
1618}
1619
1620
1621/**
1622 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1623 */
1624DECL_INLINE_THROW(uint32_t)
1625iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1626{
1627#ifdef RT_ARCH_AMD64
1628 /* movsx r64, r/m16 */
1629 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1630 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1631 pbCodeBuf[off++] = 0x0f;
1632 pbCodeBuf[off++] = 0xbf;
1633 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1634
1635#elif defined(RT_ARCH_ARM64)
1636 /* sxth dst, src */
1637 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1638 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1639
1640#else
1641# error "port me"
1642#endif
1643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1644 return off;
1645}
1646
1647
1648/**
1649 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1650 */
1651DECL_INLINE_THROW(uint32_t)
1652iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1653{
1654#ifdef RT_ARCH_AMD64
1655 /* movsx r64, r/m16 */
1656 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1657 if (iGprDst >= 8 || iGprSrc >= 8)
1658 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1659 pbCodeBuf[off++] = 0x0f;
1660 pbCodeBuf[off++] = 0xbf;
1661 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1662
1663#elif defined(RT_ARCH_ARM64)
1664 /* sxth dst32, src */
1665 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1666 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1667
1668#else
1669# error "port me"
1670#endif
1671 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1672 return off;
1673}
1674
1675
1676/**
1677 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1678 */
1679DECL_INLINE_THROW(uint32_t)
1680iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1681{
1682#ifdef RT_ARCH_AMD64
1683 /* movsx r64, r/m8 */
1684 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1685 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1686 pbCodeBuf[off++] = 0x0f;
1687 pbCodeBuf[off++] = 0xbe;
1688 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1689
1690#elif defined(RT_ARCH_ARM64)
1691 /* sxtb dst, src */
1692 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1693 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1694
1695#else
1696# error "port me"
1697#endif
1698 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1699 return off;
1700}
1701
1702
1703/**
1704 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1705 * @note Bits 63 thru 32 are cleared.
1706 */
1707DECL_INLINE_THROW(uint32_t)
1708iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1709{
1710#ifdef RT_ARCH_AMD64
1711 /* movsx r32, r/m8 */
1712 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1713 if (iGprDst >= 8 || iGprSrc >= 8)
1714 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1715 else if (iGprSrc >= 4)
1716 pbCodeBuf[off++] = X86_OP_REX;
1717 pbCodeBuf[off++] = 0x0f;
1718 pbCodeBuf[off++] = 0xbe;
1719 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1720
1721#elif defined(RT_ARCH_ARM64)
1722 /* sxtb dst32, src32 */
1723 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1724 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1725
1726#else
1727# error "port me"
1728#endif
1729 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1730 return off;
1731}
1732
1733
1734/**
1735 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1736 * @note Bits 63 thru 16 are cleared.
1737 */
1738DECL_INLINE_THROW(uint32_t)
1739iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1740{
1741#ifdef RT_ARCH_AMD64
1742 /* movsx r16, r/m8 */
1743 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1744 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1745 if (iGprDst >= 8 || iGprSrc >= 8)
1746 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1747 else if (iGprSrc >= 4)
1748 pbCodeBuf[off++] = X86_OP_REX;
1749 pbCodeBuf[off++] = 0x0f;
1750 pbCodeBuf[off++] = 0xbe;
1751 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1752
1753 /* movzx r32, r/m16 */
1754 if (iGprDst >= 8)
1755 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
1756 pbCodeBuf[off++] = 0x0f;
1757 pbCodeBuf[off++] = 0xb7;
1758 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
1759
1760#elif defined(RT_ARCH_ARM64)
1761 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
1762 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1763 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1764 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1765 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
1766
1767#else
1768# error "port me"
1769#endif
1770 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1771 return off;
1772}
1773
1774
1775/**
1776 * Emits a gprdst = gprsrc + addend load.
1777 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1778 */
1779#ifdef RT_ARCH_AMD64
1780DECL_INLINE_THROW(uint32_t)
1781iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1782 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1783{
1784 Assert(iAddend != 0);
1785
1786 /* lea gprdst, [gprsrc + iAddend] */
1787 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1788 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1789 pbCodeBuf[off++] = 0x8d;
1790 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1791 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1792 return off;
1793}
1794
1795#elif defined(RT_ARCH_ARM64)
1796DECL_INLINE_THROW(uint32_t)
1797iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1798 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1799{
1800 if ((uint32_t)iAddend < 4096)
1801 {
1802 /* add dst, src, uimm12 */
1803 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1804 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
1805 }
1806 else if ((uint32_t)-iAddend < 4096)
1807 {
1808 /* sub dst, src, uimm12 */
1809 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1810 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
1811 }
1812 else
1813 {
1814 Assert(iGprSrc != iGprDst);
1815 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
1816 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1817 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
1818 }
1819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1820 return off;
1821}
1822#else
1823# error "port me"
1824#endif
1825
1826/**
1827 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
1828 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1829 */
1830#ifdef RT_ARCH_AMD64
1831DECL_INLINE_THROW(uint32_t)
1832iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1833 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1834#else
1835DECL_INLINE_THROW(uint32_t)
1836iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1837 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1838#endif
1839{
1840 if (iAddend != 0)
1841 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1842 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
1843}
1844
1845
1846/**
1847 * Emits a gprdst = gprsrc32 + addend load.
1848 * @note Bits 63 thru 32 are cleared.
1849 */
1850DECL_INLINE_THROW(uint32_t)
1851iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1852 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1853{
1854 Assert(iAddend != 0);
1855
1856#ifdef RT_ARCH_AMD64
1857 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
1858 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1859 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
1860 if ((iGprDst | iGprSrc) >= 8)
1861 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1862 pbCodeBuf[off++] = 0x8d;
1863 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1864
1865#elif defined(RT_ARCH_ARM64)
1866 if ((uint32_t)iAddend < 4096)
1867 {
1868 /* add dst, src, uimm12 */
1869 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1870 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
1871 }
1872 else if ((uint32_t)-iAddend < 4096)
1873 {
1874 /* sub dst, src, uimm12 */
1875 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1876 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
1877 }
1878 else
1879 {
1880 Assert(iGprSrc != iGprDst);
1881 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
1882 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1883 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
1884 }
1885
1886#else
1887# error "port me"
1888#endif
1889 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1890 return off;
1891}
1892
1893
1894/**
1895 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
1896 */
1897DECL_INLINE_THROW(uint32_t)
1898iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1899 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1900{
1901 if (iAddend != 0)
1902 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1903 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
1904}
1905
1906
1907/**
1908 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1909 * destination.
1910 */
1911DECL_FORCE_INLINE(uint32_t)
1912iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1913{
1914#ifdef RT_ARCH_AMD64
1915 /* mov reg16, r/m16 */
1916 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1917 if (idxDst >= 8 || idxSrc >= 8)
1918 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
1919 pCodeBuf[off++] = 0x8b;
1920 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
1921
1922#elif defined(RT_ARCH_ARM64)
1923 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
1924 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
1925
1926#else
1927# error "Port me!"
1928#endif
1929 return off;
1930}
1931
1932
1933/**
1934 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1935 * destination.
1936 */
1937DECL_INLINE_THROW(uint32_t)
1938iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1939{
1940#ifdef RT_ARCH_AMD64
1941 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
1942#elif defined(RT_ARCH_ARM64)
1943 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
1944#else
1945# error "Port me!"
1946#endif
1947 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1948 return off;
1949}
1950
1951
1952#ifdef RT_ARCH_AMD64
1953/**
1954 * Common bit of iemNativeEmitLoadGprByBp and friends.
1955 */
1956DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
1957 PIEMRECOMPILERSTATE pReNativeAssert)
1958{
1959 if (offDisp < 128 && offDisp >= -128)
1960 {
1961 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
1962 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
1963 }
1964 else
1965 {
1966 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
1967 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
1968 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
1969 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
1970 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
1971 }
1972 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
1973 return off;
1974}
1975#elif defined(RT_ARCH_ARM64)
1976/**
1977 * Common bit of iemNativeEmitLoadGprByBp and friends.
1978 */
1979DECL_FORCE_INLINE_THROW(uint32_t)
1980iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
1981 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
1982{
1983 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
1984 {
1985 /* str w/ unsigned imm12 (scaled) */
1986 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1987 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
1988 }
1989 else if (offDisp >= -256 && offDisp <= 256)
1990 {
1991 /* stur w/ signed imm9 (unscaled) */
1992 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1993 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
1994 }
1995 else
1996 {
1997 /* Use temporary indexing register. */
1998 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
1999 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2000 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2001 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2002 }
2003 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2004 return off;
2005}
2006#endif
2007
2008
2009/**
2010 * Emits a 64-bit GRP load instruction with an BP relative source address.
2011 */
2012DECL_INLINE_THROW(uint32_t)
2013iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2014{
2015#ifdef RT_ARCH_AMD64
2016 /* mov gprdst, qword [rbp + offDisp] */
2017 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2018 if (iGprDst < 8)
2019 pbCodeBuf[off++] = X86_OP_REX_W;
2020 else
2021 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2022 pbCodeBuf[off++] = 0x8b;
2023 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2024
2025#elif defined(RT_ARCH_ARM64)
2026 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2027
2028#else
2029# error "port me"
2030#endif
2031}
2032
2033
2034/**
2035 * Emits a 32-bit GRP load instruction with an BP relative source address.
2036 * @note Bits 63 thru 32 of the GPR will be cleared.
2037 */
2038DECL_INLINE_THROW(uint32_t)
2039iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2040{
2041#ifdef RT_ARCH_AMD64
2042 /* mov gprdst, dword [rbp + offDisp] */
2043 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2044 if (iGprDst >= 8)
2045 pbCodeBuf[off++] = X86_OP_REX_R;
2046 pbCodeBuf[off++] = 0x8b;
2047 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2048
2049#elif defined(RT_ARCH_ARM64)
2050 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2051
2052#else
2053# error "port me"
2054#endif
2055}
2056
2057
2058/**
2059 * Emits a 16-bit GRP load instruction with an BP relative source address.
2060 * @note Bits 63 thru 16 of the GPR will be cleared.
2061 */
2062DECL_INLINE_THROW(uint32_t)
2063iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2064{
2065#ifdef RT_ARCH_AMD64
2066 /* movzx gprdst, word [rbp + offDisp] */
2067 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2068 if (iGprDst >= 8)
2069 pbCodeBuf[off++] = X86_OP_REX_R;
2070 pbCodeBuf[off++] = 0x0f;
2071 pbCodeBuf[off++] = 0xb7;
2072 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2073
2074#elif defined(RT_ARCH_ARM64)
2075 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2076
2077#else
2078# error "port me"
2079#endif
2080}
2081
2082
2083/**
2084 * Emits a 8-bit GRP load instruction with an BP relative source address.
2085 * @note Bits 63 thru 8 of the GPR will be cleared.
2086 */
2087DECL_INLINE_THROW(uint32_t)
2088iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2089{
2090#ifdef RT_ARCH_AMD64
2091 /* movzx gprdst, byte [rbp + offDisp] */
2092 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2093 if (iGprDst >= 8)
2094 pbCodeBuf[off++] = X86_OP_REX_R;
2095 pbCodeBuf[off++] = 0x0f;
2096 pbCodeBuf[off++] = 0xb6;
2097 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2098
2099#elif defined(RT_ARCH_ARM64)
2100 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2101
2102#else
2103# error "port me"
2104#endif
2105}
2106
2107
2108#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2109/**
2110 * Emits a 128-bit vector register load instruction with an BP relative source address.
2111 */
2112DECL_FORCE_INLINE_THROW(uint32_t)
2113iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2114{
2115#ifdef RT_ARCH_AMD64
2116 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2117
2118 /* movdqu reg128, mem128 */
2119 pbCodeBuf[off++] = 0xf3;
2120 if (iVecRegDst >= 8)
2121 pbCodeBuf[off++] = X86_OP_REX_R;
2122 pbCodeBuf[off++] = 0x0f;
2123 pbCodeBuf[off++] = 0x6f;
2124 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2125#elif defined(RT_ARCH_ARM64)
2126 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2127#else
2128# error "port me"
2129#endif
2130}
2131
2132
2133/**
2134 * Emits a 256-bit vector register load instruction with an BP relative source address.
2135 */
2136DECL_FORCE_INLINE_THROW(uint32_t)
2137iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2138{
2139#ifdef RT_ARCH_AMD64
2140 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2141
2142 /* vmovdqu reg256, mem256 */
2143 pbCodeBuf[off++] = X86_OP_VEX2;
2144 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2145 pbCodeBuf[off++] = 0x6f;
2146 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2147#elif defined(RT_ARCH_ARM64)
2148 /* ASSUMES two consecutive vector registers for the 256-bit value. */
2149 Assert(!(iVecRegDst & 0x1));
2150 off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2151 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2152#else
2153# error "port me"
2154#endif
2155}
2156
2157#endif
2158
2159
2160/**
2161 * Emits a load effective address to a GRP with an BP relative source address.
2162 */
2163DECL_INLINE_THROW(uint32_t)
2164iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2165{
2166#ifdef RT_ARCH_AMD64
2167 /* lea gprdst, [rbp + offDisp] */
2168 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2169 if (iGprDst < 8)
2170 pbCodeBuf[off++] = X86_OP_REX_W;
2171 else
2172 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2173 pbCodeBuf[off++] = 0x8d;
2174 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2175
2176#elif defined(RT_ARCH_ARM64)
2177 if ((uint32_t)offDisp < (unsigned)_4K)
2178 {
2179 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2180 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)offDisp);
2181 }
2182 else if ((uint32_t)-offDisp < (unsigned)_4K)
2183 {
2184 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2185 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2186 }
2187 else
2188 {
2189 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2190 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offDisp >= 0 ? (uint32_t)offDisp : (uint32_t)-offDisp);
2191 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2192 if (offDisp >= 0)
2193 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2194 else
2195 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2196 }
2197
2198#else
2199# error "port me"
2200#endif
2201
2202 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2203 return off;
2204}
2205
2206
2207/**
2208 * Emits a 64-bit GPR store with an BP relative destination address.
2209 *
2210 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2211 */
2212DECL_INLINE_THROW(uint32_t)
2213iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2214{
2215#ifdef RT_ARCH_AMD64
2216 /* mov qword [rbp + offDisp], gprdst */
2217 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2218 if (iGprSrc < 8)
2219 pbCodeBuf[off++] = X86_OP_REX_W;
2220 else
2221 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2222 pbCodeBuf[off++] = 0x89;
2223 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2224
2225#elif defined(RT_ARCH_ARM64)
2226 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2227 {
2228 /* str w/ unsigned imm12 (scaled) */
2229 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2230 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2231 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2232 }
2233 else if (offDisp >= -256 && offDisp <= 256)
2234 {
2235 /* stur w/ signed imm9 (unscaled) */
2236 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2237 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2238 }
2239 else if ((uint32_t)-offDisp < (unsigned)_4K)
2240 {
2241 /* Use temporary indexing register w/ sub uimm12. */
2242 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2243 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2244 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2245 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2246 }
2247 else
2248 {
2249 /* Use temporary indexing register. */
2250 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2251 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2252 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2253 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2254 }
2255 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2256 return off;
2257
2258#else
2259# error "Port me!"
2260#endif
2261}
2262
2263
2264/**
2265 * Emits a 64-bit immediate store with an BP relative destination address.
2266 *
2267 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2268 */
2269DECL_INLINE_THROW(uint32_t)
2270iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2271{
2272#ifdef RT_ARCH_AMD64
2273 if ((int64_t)uImm64 == (int32_t)uImm64)
2274 {
2275 /* mov qword [rbp + offDisp], imm32 - sign extended */
2276 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2277 pbCodeBuf[off++] = X86_OP_REX_W;
2278 pbCodeBuf[off++] = 0xc7;
2279 if (offDisp < 128 && offDisp >= -128)
2280 {
2281 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2282 pbCodeBuf[off++] = (uint8_t)offDisp;
2283 }
2284 else
2285 {
2286 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2287 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2288 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2289 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2290 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2291 }
2292 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2293 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2294 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2295 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2296 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2297 return off;
2298 }
2299#endif
2300
2301 /* Load tmp0, imm64; Store tmp to bp+disp. */
2302 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2303 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2304}
2305
2306
2307#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2308/**
2309 * Emits a 128-bit vector register store with an BP relative destination address.
2310 *
2311 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2312 */
2313DECL_INLINE_THROW(uint32_t)
2314iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2315{
2316#ifdef RT_ARCH_AMD64
2317 /* movdqu [rbp + offDisp], vecsrc */
2318 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2319 pbCodeBuf[off++] = 0xf3;
2320 if (iVecRegSrc >= 8)
2321 pbCodeBuf[off++] = X86_OP_REX_R;
2322 pbCodeBuf[off++] = 0x0f;
2323 pbCodeBuf[off++] = 0x7f;
2324 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2325
2326#elif defined(RT_ARCH_ARM64)
2327 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2328 {
2329 /* str w/ unsigned imm12 (scaled) */
2330 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2331 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc,
2332 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2333 }
2334 else if (offDisp >= -256 && offDisp <= 256)
2335 {
2336 /* stur w/ signed imm9 (unscaled) */
2337 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2338 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp);
2339 }
2340 else if ((uint32_t)-offDisp < (unsigned)_4K)
2341 {
2342 /* Use temporary indexing register w/ sub uimm12. */
2343 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2344 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2345 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2346 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2347 }
2348 else
2349 {
2350 /* Use temporary indexing register. */
2351 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2352 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2353 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP,
2354 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2355 }
2356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2357 return off;
2358
2359#else
2360# error "Port me!"
2361#endif
2362}
2363
2364
2365/**
2366 * Emits a 256-bit vector register store with an BP relative destination address.
2367 *
2368 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2369 */
2370DECL_INLINE_THROW(uint32_t)
2371iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2372{
2373#ifdef RT_ARCH_AMD64
2374 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2375
2376 /* vmovdqu mem256, reg256 */
2377 pbCodeBuf[off++] = X86_OP_VEX2;
2378 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2379 pbCodeBuf[off++] = 0x7f;
2380 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2381#elif defined(RT_ARCH_ARM64)
2382 Assert(!(iVecRegSrc & 0x1));
2383 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp, iVecRegSrc);
2384 return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1);
2385#else
2386# error "Port me!"
2387#endif
2388}
2389#endif
2390
2391#if defined(RT_ARCH_ARM64)
2392
2393/**
2394 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2395 *
2396 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2397 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2398 * caller does not heed this.
2399 *
2400 * @note DON'T try this with prefetch.
2401 */
2402DECL_FORCE_INLINE_THROW(uint32_t)
2403iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2404 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2405{
2406 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2407 {
2408 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2409 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2410 }
2411 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2412 && iGprReg != iGprBase)
2413 || iGprTmp != UINT8_MAX)
2414 {
2415 /* The offset is too large, so we must load it into a register and use
2416 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2417 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2418 if (iGprTmp == UINT8_MAX)
2419 iGprTmp = iGprReg;
2420 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2421 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2422 }
2423 else
2424# ifdef IEM_WITH_THROW_CATCH
2425 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2426# else
2427 AssertReleaseFailedStmt(off = UINT32_MAX);
2428# endif
2429 return off;
2430}
2431
2432/**
2433 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2434 */
2435DECL_FORCE_INLINE_THROW(uint32_t)
2436iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2437 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2438{
2439 /*
2440 * There are a couple of ldr variants that takes an immediate offset, so
2441 * try use those if we can, otherwise we have to use the temporary register
2442 * help with the addressing.
2443 */
2444 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2445 {
2446 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2447 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2448 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2449 }
2450 else
2451 {
2452 /* The offset is too large, so we must load it into a register and use
2453 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2454 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2455 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2456
2457 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2458 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2459
2460 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2461 }
2462 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2463 return off;
2464}
2465
2466# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2467/**
2468 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2469 *
2470 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2471 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2472 * caller does not heed this.
2473 *
2474 * @note DON'T try this with prefetch.
2475 */
2476DECL_FORCE_INLINE_THROW(uint32_t)
2477iemNativeEmitVecRegByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iGprBase, int32_t offDisp,
2478 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2479{
2480 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2481 {
2482 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2483 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2484 }
2485 else if ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2486 || iGprTmp != UINT8_MAX)
2487 {
2488 /* The offset is too large, so we must load it into a register and use
2489 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2490 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2491 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2492 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, iGprTmp);
2493 }
2494 else
2495# ifdef IEM_WITH_THROW_CATCH
2496 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2497# else
2498 AssertReleaseFailedStmt(off = UINT32_MAX);
2499# endif
2500 return off;
2501}
2502# endif
2503
2504
2505/**
2506 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2507 */
2508DECL_FORCE_INLINE_THROW(uint32_t)
2509iemNativeEmitVecRegByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg,
2510 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2511{
2512 /*
2513 * There are a couple of ldr variants that takes an immediate offset, so
2514 * try use those if we can, otherwise we have to use the temporary register
2515 * help with the addressing.
2516 */
2517 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2518 {
2519 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2520 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2521 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2522 }
2523 else
2524 {
2525 /* The offset is too large, so we must load it into a register and use
2526 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2527 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2528 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2529
2530 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2531 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, idxTmpReg);
2532
2533 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2534 }
2535 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2536 return off;
2537}
2538#endif /* RT_ARCH_ARM64 */
2539
2540/**
2541 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2542 *
2543 * @note ARM64: Misaligned @a offDisp values and values not in the
2544 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2545 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2546 * does not heed this.
2547 */
2548DECL_FORCE_INLINE_THROW(uint32_t)
2549iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2550 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2551{
2552#ifdef RT_ARCH_AMD64
2553 /* mov reg64, mem64 */
2554 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2555 pCodeBuf[off++] = 0x8b;
2556 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2557 RT_NOREF(iGprTmp);
2558
2559#elif defined(RT_ARCH_ARM64)
2560 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2561 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2562
2563#else
2564# error "port me"
2565#endif
2566 return off;
2567}
2568
2569
2570/**
2571 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2572 */
2573DECL_INLINE_THROW(uint32_t)
2574iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2575{
2576#ifdef RT_ARCH_AMD64
2577 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2578 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2579
2580#elif defined(RT_ARCH_ARM64)
2581 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2582
2583#else
2584# error "port me"
2585#endif
2586 return off;
2587}
2588
2589
2590/**
2591 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2592 *
2593 * @note ARM64: Misaligned @a offDisp values and values not in the
2594 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2595 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2596 * caller does not heed this.
2597 *
2598 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2599 */
2600DECL_FORCE_INLINE_THROW(uint32_t)
2601iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2602 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2603{
2604#ifdef RT_ARCH_AMD64
2605 /* mov reg32, mem32 */
2606 if (iGprDst >= 8 || iGprBase >= 8)
2607 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2608 pCodeBuf[off++] = 0x8b;
2609 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2610 RT_NOREF(iGprTmp);
2611
2612#elif defined(RT_ARCH_ARM64)
2613 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2614 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2615
2616#else
2617# error "port me"
2618#endif
2619 return off;
2620}
2621
2622
2623/**
2624 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2625 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2626 */
2627DECL_INLINE_THROW(uint32_t)
2628iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2629{
2630#ifdef RT_ARCH_AMD64
2631 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2632 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2633
2634#elif defined(RT_ARCH_ARM64)
2635 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2636
2637#else
2638# error "port me"
2639#endif
2640 return off;
2641}
2642
2643
2644/**
2645 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2646 * sign-extending the value to 64 bits.
2647 *
2648 * @note ARM64: Misaligned @a offDisp values and values not in the
2649 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2650 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2651 * caller does not heed this.
2652 */
2653DECL_FORCE_INLINE_THROW(uint32_t)
2654iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2655 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2656{
2657#ifdef RT_ARCH_AMD64
2658 /* movsxd reg64, mem32 */
2659 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2660 pCodeBuf[off++] = 0x63;
2661 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2662 RT_NOREF(iGprTmp);
2663
2664#elif defined(RT_ARCH_ARM64)
2665 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2666 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2667
2668#else
2669# error "port me"
2670#endif
2671 return off;
2672}
2673
2674
2675/**
2676 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2677 *
2678 * @note ARM64: Misaligned @a offDisp values and values not in the
2679 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2680 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2681 * caller does not heed this.
2682 *
2683 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2684 */
2685DECL_FORCE_INLINE_THROW(uint32_t)
2686iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2687 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2688{
2689#ifdef RT_ARCH_AMD64
2690 /* movzx reg32, mem16 */
2691 if (iGprDst >= 8 || iGprBase >= 8)
2692 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2693 pCodeBuf[off++] = 0x0f;
2694 pCodeBuf[off++] = 0xb7;
2695 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2696 RT_NOREF(iGprTmp);
2697
2698#elif defined(RT_ARCH_ARM64)
2699 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2700 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2701
2702#else
2703# error "port me"
2704#endif
2705 return off;
2706}
2707
2708
2709/**
2710 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2711 * sign-extending the value to 64 bits.
2712 *
2713 * @note ARM64: Misaligned @a offDisp values and values not in the
2714 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2715 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2716 * caller does not heed this.
2717 */
2718DECL_FORCE_INLINE_THROW(uint32_t)
2719iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2720 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2721{
2722#ifdef RT_ARCH_AMD64
2723 /* movsx reg64, mem16 */
2724 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2725 pCodeBuf[off++] = 0x0f;
2726 pCodeBuf[off++] = 0xbf;
2727 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2728 RT_NOREF(iGprTmp);
2729
2730#elif defined(RT_ARCH_ARM64)
2731 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2732 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2733
2734#else
2735# error "port me"
2736#endif
2737 return off;
2738}
2739
2740
2741/**
2742 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2743 * sign-extending the value to 32 bits.
2744 *
2745 * @note ARM64: Misaligned @a offDisp values and values not in the
2746 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2747 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2748 * caller does not heed this.
2749 *
2750 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2751 */
2752DECL_FORCE_INLINE_THROW(uint32_t)
2753iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2754 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2755{
2756#ifdef RT_ARCH_AMD64
2757 /* movsx reg32, mem16 */
2758 if (iGprDst >= 8 || iGprBase >= 8)
2759 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2760 pCodeBuf[off++] = 0x0f;
2761 pCodeBuf[off++] = 0xbf;
2762 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2763 RT_NOREF(iGprTmp);
2764
2765#elif defined(RT_ARCH_ARM64)
2766 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2767 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
2768
2769#else
2770# error "port me"
2771#endif
2772 return off;
2773}
2774
2775
2776/**
2777 * Emits a 8-bit GPR load via a GPR base address with a displacement.
2778 *
2779 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2780 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2781 * same. Will assert / throw if caller does not heed this.
2782 *
2783 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
2784 */
2785DECL_FORCE_INLINE_THROW(uint32_t)
2786iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2787 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2788{
2789#ifdef RT_ARCH_AMD64
2790 /* movzx reg32, mem8 */
2791 if (iGprDst >= 8 || iGprBase >= 8)
2792 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2793 pCodeBuf[off++] = 0x0f;
2794 pCodeBuf[off++] = 0xb6;
2795 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2796 RT_NOREF(iGprTmp);
2797
2798#elif defined(RT_ARCH_ARM64)
2799 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2800 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
2801
2802#else
2803# error "port me"
2804#endif
2805 return off;
2806}
2807
2808
2809/**
2810 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2811 * sign-extending the value to 64 bits.
2812 *
2813 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2814 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2815 * same. Will assert / throw if caller does not heed this.
2816 */
2817DECL_FORCE_INLINE_THROW(uint32_t)
2818iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2819 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2820{
2821#ifdef RT_ARCH_AMD64
2822 /* movsx reg64, mem8 */
2823 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2824 pCodeBuf[off++] = 0x0f;
2825 pCodeBuf[off++] = 0xbe;
2826 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2827 RT_NOREF(iGprTmp);
2828
2829#elif defined(RT_ARCH_ARM64)
2830 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2831 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
2832
2833#else
2834# error "port me"
2835#endif
2836 return off;
2837}
2838
2839
2840/**
2841 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2842 * sign-extending the value to 32 bits.
2843 *
2844 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2845 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2846 * same. Will assert / throw if caller does not heed this.
2847 *
2848 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2849 */
2850DECL_FORCE_INLINE_THROW(uint32_t)
2851iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2852 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2853{
2854#ifdef RT_ARCH_AMD64
2855 /* movsx reg32, mem8 */
2856 if (iGprDst >= 8 || iGprBase >= 8)
2857 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2858 pCodeBuf[off++] = 0x0f;
2859 pCodeBuf[off++] = 0xbe;
2860 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2861 RT_NOREF(iGprTmp);
2862
2863#elif defined(RT_ARCH_ARM64)
2864 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2865 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2866
2867#else
2868# error "port me"
2869#endif
2870 return off;
2871}
2872
2873
2874/**
2875 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2876 * sign-extending the value to 16 bits.
2877 *
2878 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2879 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2880 * same. Will assert / throw if caller does not heed this.
2881 *
2882 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2883 */
2884DECL_FORCE_INLINE_THROW(uint32_t)
2885iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2886 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2887{
2888#ifdef RT_ARCH_AMD64
2889 /* movsx reg32, mem8 */
2890 if (iGprDst >= 8 || iGprBase >= 8)
2891 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2892 pCodeBuf[off++] = 0x0f;
2893 pCodeBuf[off++] = 0xbe;
2894 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2895# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
2896 /* and reg32, 0xffffh */
2897 if (iGprDst >= 8)
2898 pCodeBuf[off++] = X86_OP_REX_B;
2899 pCodeBuf[off++] = 0x81;
2900 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
2901 pCodeBuf[off++] = 0xff;
2902 pCodeBuf[off++] = 0xff;
2903 pCodeBuf[off++] = 0;
2904 pCodeBuf[off++] = 0;
2905# else
2906 /* movzx reg32, reg16 */
2907 if (iGprDst >= 8)
2908 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
2909 pCodeBuf[off++] = 0x0f;
2910 pCodeBuf[off++] = 0xb7;
2911 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2912# endif
2913 RT_NOREF(iGprTmp);
2914
2915#elif defined(RT_ARCH_ARM64)
2916 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2917 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2918 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2919 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
2920
2921#else
2922# error "port me"
2923#endif
2924 return off;
2925}
2926
2927
2928#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2929/**
2930 * Emits a 128-bit vector register load via a GPR base address with a displacement.
2931 *
2932 * @note ARM64: Misaligned @a offDisp values and values not in the
2933 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2934 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2935 * does not heed this.
2936 */
2937DECL_FORCE_INLINE_THROW(uint32_t)
2938iemNativeEmitLoadVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
2939 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2940{
2941#ifdef RT_ARCH_AMD64
2942 /* movdqu reg128, mem128 */
2943 pCodeBuf[off++] = 0xf3;
2944 if (iVecRegDst >= 8 || iGprBase >= 8)
2945 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2946 pCodeBuf[off++] = 0x0f;
2947 pCodeBuf[off++] = 0x6f;
2948 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
2949 RT_NOREF(iGprTmp);
2950
2951#elif defined(RT_ARCH_ARM64)
2952 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
2953 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
2954
2955#else
2956# error "port me"
2957#endif
2958 return off;
2959}
2960
2961
2962/**
2963 * Emits a 128-bit GPR load via a GPR base address with a displacement.
2964 */
2965DECL_INLINE_THROW(uint32_t)
2966iemNativeEmitLoadVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
2967{
2968#ifdef RT_ARCH_AMD64
2969 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
2970 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2971
2972#elif defined(RT_ARCH_ARM64)
2973 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2974
2975#else
2976# error "port me"
2977#endif
2978 return off;
2979}
2980
2981
2982/**
2983 * Emits a 256-bit vector register load via a GPR base address with a displacement.
2984 *
2985 * @note ARM64: Misaligned @a offDisp values and values not in the
2986 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2987 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2988 * does not heed this.
2989 */
2990DECL_FORCE_INLINE_THROW(uint32_t)
2991iemNativeEmitLoadVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
2992 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2993{
2994#ifdef RT_ARCH_AMD64
2995 /* vmovdqu reg256, mem256 */
2996 pCodeBuf[off++] = X86_OP_VEX3;
2997 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
2998 | X86_OP_VEX3_BYTE1_X
2999 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3000 | UINT8_C(0x01);
3001 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3002 pCodeBuf[off++] = 0x6f;
3003 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3004 RT_NOREF(iGprTmp);
3005
3006#elif defined(RT_ARCH_ARM64)
3007 Assert(!(iVecRegDst & 0x1));
3008 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3009 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3010 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3011 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3012#else
3013# error "port me"
3014#endif
3015 return off;
3016}
3017
3018
3019/**
3020 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3021 */
3022DECL_INLINE_THROW(uint32_t)
3023iemNativeEmitLoadVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3024{
3025#ifdef RT_ARCH_AMD64
3026 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3027 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3028
3029#elif defined(RT_ARCH_ARM64)
3030 Assert(!(iVecRegDst & 0x1));
3031 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3032 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3033 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3034 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3035
3036#else
3037# error "port me"
3038#endif
3039 return off;
3040}
3041#endif
3042
3043
3044/**
3045 * Emits a 64-bit GPR store via a GPR base address with a displacement.
3046 *
3047 * @note ARM64: Misaligned @a offDisp values and values not in the
3048 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3049 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3050 * does not heed this.
3051 */
3052DECL_FORCE_INLINE_THROW(uint32_t)
3053iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3054 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3055{
3056#ifdef RT_ARCH_AMD64
3057 /* mov mem64, reg64 */
3058 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3059 pCodeBuf[off++] = 0x89;
3060 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3061 RT_NOREF(iGprTmp);
3062
3063#elif defined(RT_ARCH_ARM64)
3064 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3065 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
3066
3067#else
3068# error "port me"
3069#endif
3070 return off;
3071}
3072
3073
3074/**
3075 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3076 *
3077 * @note ARM64: Misaligned @a offDisp values and values not in the
3078 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3079 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3080 * does not heed this.
3081 */
3082DECL_FORCE_INLINE_THROW(uint32_t)
3083iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3084 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3085{
3086#ifdef RT_ARCH_AMD64
3087 /* mov mem32, reg32 */
3088 if (iGprSrc >= 8 || iGprBase >= 8)
3089 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3090 pCodeBuf[off++] = 0x89;
3091 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3092 RT_NOREF(iGprTmp);
3093
3094#elif defined(RT_ARCH_ARM64)
3095 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3096 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3097
3098#else
3099# error "port me"
3100#endif
3101 return off;
3102}
3103
3104
3105/**
3106 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3107 *
3108 * @note ARM64: Misaligned @a offDisp values and values not in the
3109 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3110 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3111 * does not heed this.
3112 */
3113DECL_FORCE_INLINE_THROW(uint32_t)
3114iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3115 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3116{
3117#ifdef RT_ARCH_AMD64
3118 /* mov mem16, reg16 */
3119 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3120 if (iGprSrc >= 8 || iGprBase >= 8)
3121 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3122 pCodeBuf[off++] = 0x89;
3123 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3124 RT_NOREF(iGprTmp);
3125
3126#elif defined(RT_ARCH_ARM64)
3127 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3128 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3129
3130#else
3131# error "port me"
3132#endif
3133 return off;
3134}
3135
3136
3137/**
3138 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3139 *
3140 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3141 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3142 * same. Will assert / throw if caller does not heed this.
3143 */
3144DECL_FORCE_INLINE_THROW(uint32_t)
3145iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3146 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3147{
3148#ifdef RT_ARCH_AMD64
3149 /* mov mem8, reg8 */
3150 if (iGprSrc >= 8 || iGprBase >= 8)
3151 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3152 else if (iGprSrc >= 4)
3153 pCodeBuf[off++] = X86_OP_REX;
3154 pCodeBuf[off++] = 0x88;
3155 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3156 RT_NOREF(iGprTmp);
3157
3158#elif defined(RT_ARCH_ARM64)
3159 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3160 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3161
3162#else
3163# error "port me"
3164#endif
3165 return off;
3166}
3167
3168
3169/**
3170 * Emits a 64-bit immediate store via a GPR base address with a displacement.
3171 *
3172 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
3173 * AMD64 it depends on the immediate value.
3174 *
3175 * @note ARM64: Misaligned @a offDisp values and values not in the
3176 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3177 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3178 * does not heed this.
3179 */
3180DECL_FORCE_INLINE_THROW(uint32_t)
3181iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
3182 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3183{
3184#ifdef RT_ARCH_AMD64
3185 if ((int32_t)uImm == (int64_t)uImm)
3186 {
3187 /* mov mem64, imm32 (sign-extended) */
3188 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3189 pCodeBuf[off++] = 0xc7;
3190 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3191 pCodeBuf[off++] = RT_BYTE1(uImm);
3192 pCodeBuf[off++] = RT_BYTE2(uImm);
3193 pCodeBuf[off++] = RT_BYTE3(uImm);
3194 pCodeBuf[off++] = RT_BYTE4(uImm);
3195 }
3196 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
3197 {
3198 /* require temporary register. */
3199 if (iGprImmTmp == UINT8_MAX)
3200 iGprImmTmp = iGprTmp;
3201 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3202 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
3203 }
3204 else
3205# ifdef IEM_WITH_THROW_CATCH
3206 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3207# else
3208 AssertReleaseFailedStmt(off = UINT32_MAX);
3209# endif
3210
3211#elif defined(RT_ARCH_ARM64)
3212 if (uImm == 0)
3213 iGprImmTmp = ARMV8_A64_REG_XZR;
3214 else
3215 {
3216 Assert(iGprImmTmp < 31);
3217 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3218 }
3219 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
3220
3221#else
3222# error "port me"
3223#endif
3224 return off;
3225}
3226
3227
3228/**
3229 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3230 *
3231 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3232 *
3233 * @note ARM64: Misaligned @a offDisp values and values not in the
3234 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3235 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3236 * does not heed this.
3237 */
3238DECL_FORCE_INLINE_THROW(uint32_t)
3239iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
3240 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3241{
3242#ifdef RT_ARCH_AMD64
3243 /* mov mem32, imm32 */
3244 if (iGprBase >= 8)
3245 pCodeBuf[off++] = X86_OP_REX_B;
3246 pCodeBuf[off++] = 0xc7;
3247 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3248 pCodeBuf[off++] = RT_BYTE1(uImm);
3249 pCodeBuf[off++] = RT_BYTE2(uImm);
3250 pCodeBuf[off++] = RT_BYTE3(uImm);
3251 pCodeBuf[off++] = RT_BYTE4(uImm);
3252 RT_NOREF(iGprImmTmp, iGprTmp);
3253
3254#elif defined(RT_ARCH_ARM64)
3255 Assert(iGprImmTmp < 31);
3256 if (uImm == 0)
3257 iGprImmTmp = ARMV8_A64_REG_XZR;
3258 else
3259 {
3260 Assert(iGprImmTmp < 31);
3261 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3262 }
3263 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3264 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3265
3266#else
3267# error "port me"
3268#endif
3269 return off;
3270}
3271
3272
3273/**
3274 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3275 *
3276 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3277 *
3278 * @note ARM64: Misaligned @a offDisp values and values not in the
3279 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3280 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3281 * does not heed this.
3282 */
3283DECL_FORCE_INLINE_THROW(uint32_t)
3284iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
3285 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3286{
3287#ifdef RT_ARCH_AMD64
3288 /* mov mem16, imm16 */
3289 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3290 if (iGprBase >= 8)
3291 pCodeBuf[off++] = X86_OP_REX_B;
3292 pCodeBuf[off++] = 0xc7;
3293 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3294 pCodeBuf[off++] = RT_BYTE1(uImm);
3295 pCodeBuf[off++] = RT_BYTE2(uImm);
3296 RT_NOREF(iGprImmTmp, iGprTmp);
3297
3298#elif defined(RT_ARCH_ARM64)
3299 if (uImm == 0)
3300 iGprImmTmp = ARMV8_A64_REG_XZR;
3301 else
3302 {
3303 Assert(iGprImmTmp < 31);
3304 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3305 }
3306 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3307 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3308
3309#else
3310# error "port me"
3311#endif
3312 return off;
3313}
3314
3315
3316/**
3317 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3318 *
3319 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3320 *
3321 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3322 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3323 * same. Will assert / throw if caller does not heed this.
3324 */
3325DECL_FORCE_INLINE_THROW(uint32_t)
3326iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3327 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3328{
3329#ifdef RT_ARCH_AMD64
3330 /* mov mem8, imm8 */
3331 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3332 if (iGprBase >= 8)
3333 pCodeBuf[off++] = X86_OP_REX_B;
3334 pCodeBuf[off++] = 0xc6;
3335 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3336 pCodeBuf[off++] = uImm;
3337 RT_NOREF(iGprImmTmp, iGprTmp);
3338
3339#elif defined(RT_ARCH_ARM64)
3340 if (uImm == 0)
3341 iGprImmTmp = ARMV8_A64_REG_XZR;
3342 else
3343 {
3344 Assert(iGprImmTmp < 31);
3345 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3346 }
3347 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3348 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3349
3350#else
3351# error "port me"
3352#endif
3353 return off;
3354}
3355
3356
3357#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3358/**
3359 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3360 *
3361 * @note ARM64: Misaligned @a offDisp values and values not in the
3362 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3363 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3364 * does not heed this.
3365 */
3366DECL_FORCE_INLINE_THROW(uint32_t)
3367iemNativeEmitStoreVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3368 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3369{
3370#ifdef RT_ARCH_AMD64
3371 /* movdqu mem128, reg128 */
3372 pCodeBuf[off++] = 0xf3;
3373 if (iVecRegDst >= 8 || iGprBase >= 8)
3374 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3375 pCodeBuf[off++] = 0x0f;
3376 pCodeBuf[off++] = 0x7f;
3377 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3378 RT_NOREF(iGprTmp);
3379
3380#elif defined(RT_ARCH_ARM64)
3381 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3382 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3383
3384#else
3385# error "port me"
3386#endif
3387 return off;
3388}
3389
3390
3391/**
3392 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3393 */
3394DECL_INLINE_THROW(uint32_t)
3395iemNativeEmitStoreVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3396{
3397#ifdef RT_ARCH_AMD64
3398 off = iemNativeEmitStoreVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3399 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3400
3401#elif defined(RT_ARCH_ARM64)
3402 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3403
3404#else
3405# error "port me"
3406#endif
3407 return off;
3408}
3409
3410
3411/**
3412 * Emits a 256-bit vector register store via a GPR base address with a displacement.
3413 *
3414 * @note ARM64: Misaligned @a offDisp values and values not in the
3415 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3416 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3417 * does not heed this.
3418 */
3419DECL_FORCE_INLINE_THROW(uint32_t)
3420iemNativeEmitStoreVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3421 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3422{
3423#ifdef RT_ARCH_AMD64
3424 /* vmovdqu mem256, reg256 */
3425 pCodeBuf[off++] = X86_OP_VEX3;
3426 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3427 | X86_OP_VEX3_BYTE1_X
3428 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3429 | UINT8_C(0x01);
3430 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3431 pCodeBuf[off++] = 0x7f;
3432 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3433 RT_NOREF(iGprTmp);
3434
3435#elif defined(RT_ARCH_ARM64)
3436 Assert(!(iVecRegDst & 0x1));
3437 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3438 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3439 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3440 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3441#else
3442# error "port me"
3443#endif
3444 return off;
3445}
3446
3447
3448/**
3449 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3450 */
3451DECL_INLINE_THROW(uint32_t)
3452iemNativeEmitStoreVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3453{
3454#ifdef RT_ARCH_AMD64
3455 off = iemNativeEmitStoreVecRegByGprU256Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3456 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3457
3458#elif defined(RT_ARCH_ARM64)
3459 Assert(!(iVecRegDst & 0x1));
3460 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3461 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3462 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3463 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3464
3465#else
3466# error "port me"
3467#endif
3468 return off;
3469}
3470#endif
3471
3472
3473
3474/*********************************************************************************************************************************
3475* Subtraction and Additions *
3476*********************************************************************************************************************************/
3477
3478/**
3479 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3480 * @note The AMD64 version sets flags.
3481 */
3482DECL_INLINE_THROW(uint32_t)
3483iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3484{
3485#if defined(RT_ARCH_AMD64)
3486 /* sub Gv,Ev */
3487 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3488 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3489 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3490 pbCodeBuf[off++] = 0x2b;
3491 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3492
3493#elif defined(RT_ARCH_ARM64)
3494 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3495 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3496
3497#else
3498# error "Port me"
3499#endif
3500 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3501 return off;
3502}
3503
3504
3505/**
3506 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3507 * @note The AMD64 version sets flags.
3508 */
3509DECL_FORCE_INLINE(uint32_t)
3510iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3511{
3512#if defined(RT_ARCH_AMD64)
3513 /* sub Gv,Ev */
3514 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3515 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3516 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3517 pCodeBuf[off++] = 0x2b;
3518 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3519
3520#elif defined(RT_ARCH_ARM64)
3521 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3522
3523#else
3524# error "Port me"
3525#endif
3526 return off;
3527}
3528
3529
3530/**
3531 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3532 * @note The AMD64 version sets flags.
3533 */
3534DECL_INLINE_THROW(uint32_t)
3535iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3536{
3537#if defined(RT_ARCH_AMD64)
3538 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3539#elif defined(RT_ARCH_ARM64)
3540 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3541#else
3542# error "Port me"
3543#endif
3544 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3545 return off;
3546}
3547
3548
3549/**
3550 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3551 *
3552 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3553 *
3554 * @note Larger constants will require a temporary register. Failing to specify
3555 * one when needed will trigger fatal assertion / throw.
3556 */
3557DECL_FORCE_INLINE_THROW(uint32_t)
3558iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3559 uint8_t iGprTmp = UINT8_MAX)
3560{
3561#ifdef RT_ARCH_AMD64
3562 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3563 if (iSubtrahend == 1)
3564 {
3565 /* dec r/m64 */
3566 pCodeBuf[off++] = 0xff;
3567 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3568 }
3569 else if (iSubtrahend == -1)
3570 {
3571 /* inc r/m64 */
3572 pCodeBuf[off++] = 0xff;
3573 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3574 }
3575 else if ((int8_t)iSubtrahend == iSubtrahend)
3576 {
3577 /* sub r/m64, imm8 */
3578 pCodeBuf[off++] = 0x83;
3579 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3580 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3581 }
3582 else if ((int32_t)iSubtrahend == iSubtrahend)
3583 {
3584 /* sub r/m64, imm32 */
3585 pCodeBuf[off++] = 0x81;
3586 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3587 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3588 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3589 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3590 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3591 }
3592 else if (iGprTmp != UINT8_MAX)
3593 {
3594 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3595 /* sub r/m64, r64 */
3596 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3597 pCodeBuf[off++] = 0x29;
3598 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3599 }
3600 else
3601# ifdef IEM_WITH_THROW_CATCH
3602 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3603# else
3604 AssertReleaseFailedStmt(off = UINT32_MAX);
3605# endif
3606
3607#elif defined(RT_ARCH_ARM64)
3608 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3609 if (uAbsSubtrahend < 4096)
3610 {
3611 if (iSubtrahend >= 0)
3612 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3613 else
3614 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3615 }
3616 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3617 {
3618 if (iSubtrahend >= 0)
3619 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3620 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3621 else
3622 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3623 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3624 }
3625 else if (iGprTmp != UINT8_MAX)
3626 {
3627 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3628 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3629 }
3630 else
3631# ifdef IEM_WITH_THROW_CATCH
3632 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3633# else
3634 AssertReleaseFailedStmt(off = UINT32_MAX);
3635# endif
3636
3637#else
3638# error "Port me"
3639#endif
3640 return off;
3641}
3642
3643
3644/**
3645 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3646 *
3647 * @note Larger constants will require a temporary register. Failing to specify
3648 * one when needed will trigger fatal assertion / throw.
3649 */
3650DECL_INLINE_THROW(uint32_t)
3651iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3652 uint8_t iGprTmp = UINT8_MAX)
3653
3654{
3655#ifdef RT_ARCH_AMD64
3656 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3657#elif defined(RT_ARCH_ARM64)
3658 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3659#else
3660# error "Port me"
3661#endif
3662 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3663 return off;
3664}
3665
3666
3667/**
3668 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3669 *
3670 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3671 *
3672 * @note ARM64: Larger constants will require a temporary register. Failing to
3673 * specify one when needed will trigger fatal assertion / throw.
3674 */
3675DECL_FORCE_INLINE_THROW(uint32_t)
3676iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3677 uint8_t iGprTmp = UINT8_MAX)
3678{
3679#ifdef RT_ARCH_AMD64
3680 if (iGprDst >= 8)
3681 pCodeBuf[off++] = X86_OP_REX_B;
3682 if (iSubtrahend == 1)
3683 {
3684 /* dec r/m32 */
3685 pCodeBuf[off++] = 0xff;
3686 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3687 }
3688 else if (iSubtrahend == -1)
3689 {
3690 /* inc r/m32 */
3691 pCodeBuf[off++] = 0xff;
3692 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3693 }
3694 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3695 {
3696 /* sub r/m32, imm8 */
3697 pCodeBuf[off++] = 0x83;
3698 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3699 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3700 }
3701 else
3702 {
3703 /* sub r/m32, imm32 */
3704 pCodeBuf[off++] = 0x81;
3705 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3706 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3707 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3708 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3709 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3710 }
3711 RT_NOREF(iGprTmp);
3712
3713#elif defined(RT_ARCH_ARM64)
3714 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3715 if (uAbsSubtrahend < 4096)
3716 {
3717 if (iSubtrahend >= 0)
3718 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3719 else
3720 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3721 }
3722 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3723 {
3724 if (iSubtrahend >= 0)
3725 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3726 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3727 else
3728 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3729 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3730 }
3731 else if (iGprTmp != UINT8_MAX)
3732 {
3733 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3734 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3735 }
3736 else
3737# ifdef IEM_WITH_THROW_CATCH
3738 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3739# else
3740 AssertReleaseFailedStmt(off = UINT32_MAX);
3741# endif
3742
3743#else
3744# error "Port me"
3745#endif
3746 return off;
3747}
3748
3749
3750/**
3751 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3752 *
3753 * @note ARM64: Larger constants will require a temporary register. Failing to
3754 * specify one when needed will trigger fatal assertion / throw.
3755 */
3756DECL_INLINE_THROW(uint32_t)
3757iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3758 uint8_t iGprTmp = UINT8_MAX)
3759
3760{
3761#ifdef RT_ARCH_AMD64
3762 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
3763#elif defined(RT_ARCH_ARM64)
3764 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
3765#else
3766# error "Port me"
3767#endif
3768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3769 return off;
3770}
3771
3772
3773/**
3774 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
3775 *
3776 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
3777 * so not suitable as a base for conditional jumps.
3778 *
3779 * @note AMD64: Will only update the lower 16 bits of the register.
3780 * @note ARM64: Will update the entire register.
3781 * @note ARM64: Larger constants will require a temporary register. Failing to
3782 * specify one when needed will trigger fatal assertion / throw.
3783 */
3784DECL_FORCE_INLINE_THROW(uint32_t)
3785iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
3786 uint8_t iGprTmp = UINT8_MAX)
3787{
3788#ifdef RT_ARCH_AMD64
3789 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3790 if (iGprDst >= 8)
3791 pCodeBuf[off++] = X86_OP_REX_B;
3792 if (iSubtrahend == 1)
3793 {
3794 /* dec r/m16 */
3795 pCodeBuf[off++] = 0xff;
3796 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3797 }
3798 else if (iSubtrahend == -1)
3799 {
3800 /* inc r/m16 */
3801 pCodeBuf[off++] = 0xff;
3802 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3803 }
3804 else if ((int8_t)iSubtrahend == iSubtrahend)
3805 {
3806 /* sub r/m16, imm8 */
3807 pCodeBuf[off++] = 0x83;
3808 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3809 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3810 }
3811 else
3812 {
3813 /* sub r/m16, imm16 */
3814 pCodeBuf[off++] = 0x81;
3815 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3816 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
3817 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
3818 }
3819 RT_NOREF(iGprTmp);
3820
3821#elif defined(RT_ARCH_ARM64)
3822 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3823 if (uAbsSubtrahend < 4096)
3824 {
3825 if (iSubtrahend >= 0)
3826 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3827 else
3828 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3829 }
3830 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3831 {
3832 if (iSubtrahend >= 0)
3833 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3834 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3835 else
3836 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3837 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3838 }
3839 else if (iGprTmp != UINT8_MAX)
3840 {
3841 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3842 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3843 }
3844 else
3845# ifdef IEM_WITH_THROW_CATCH
3846 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3847# else
3848 AssertReleaseFailedStmt(off = UINT32_MAX);
3849# endif
3850 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3851
3852#else
3853# error "Port me"
3854#endif
3855 return off;
3856}
3857
3858
3859/**
3860 * Emits adding a 64-bit GPR to another, storing the result in the first.
3861 * @note The AMD64 version sets flags.
3862 */
3863DECL_FORCE_INLINE(uint32_t)
3864iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3865{
3866#if defined(RT_ARCH_AMD64)
3867 /* add Gv,Ev */
3868 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3869 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
3870 pCodeBuf[off++] = 0x03;
3871 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3872
3873#elif defined(RT_ARCH_ARM64)
3874 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
3875
3876#else
3877# error "Port me"
3878#endif
3879 return off;
3880}
3881
3882
3883/**
3884 * Emits adding a 64-bit GPR to another, storing the result in the first.
3885 * @note The AMD64 version sets flags.
3886 */
3887DECL_INLINE_THROW(uint32_t)
3888iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3889{
3890#if defined(RT_ARCH_AMD64)
3891 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3892#elif defined(RT_ARCH_ARM64)
3893 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3894#else
3895# error "Port me"
3896#endif
3897 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3898 return off;
3899}
3900
3901
3902/**
3903 * Emits adding a 64-bit GPR to another, storing the result in the first.
3904 * @note The AMD64 version sets flags.
3905 */
3906DECL_FORCE_INLINE(uint32_t)
3907iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3908{
3909#if defined(RT_ARCH_AMD64)
3910 /* add Gv,Ev */
3911 if (iGprDst >= 8 || iGprAddend >= 8)
3912 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
3913 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
3914 pCodeBuf[off++] = 0x03;
3915 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3916
3917#elif defined(RT_ARCH_ARM64)
3918 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
3919
3920#else
3921# error "Port me"
3922#endif
3923 return off;
3924}
3925
3926
3927/**
3928 * Emits adding a 64-bit GPR to another, storing the result in the first.
3929 * @note The AMD64 version sets flags.
3930 */
3931DECL_INLINE_THROW(uint32_t)
3932iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3933{
3934#if defined(RT_ARCH_AMD64)
3935 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3936#elif defined(RT_ARCH_ARM64)
3937 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3938#else
3939# error "Port me"
3940#endif
3941 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3942 return off;
3943}
3944
3945
3946/**
3947 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3948 */
3949DECL_INLINE_THROW(uint32_t)
3950iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3951{
3952#if defined(RT_ARCH_AMD64)
3953 /* add or inc */
3954 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3955 if (iImm8 != 1)
3956 {
3957 pCodeBuf[off++] = 0x83;
3958 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3959 pCodeBuf[off++] = (uint8_t)iImm8;
3960 }
3961 else
3962 {
3963 pCodeBuf[off++] = 0xff;
3964 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3965 }
3966
3967#elif defined(RT_ARCH_ARM64)
3968 if (iImm8 >= 0)
3969 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
3970 else
3971 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
3972
3973#else
3974# error "Port me"
3975#endif
3976 return off;
3977}
3978
3979
3980/**
3981 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3982 */
3983DECL_INLINE_THROW(uint32_t)
3984iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3985{
3986#if defined(RT_ARCH_AMD64)
3987 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
3988#elif defined(RT_ARCH_ARM64)
3989 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
3990#else
3991# error "Port me"
3992#endif
3993 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3994 return off;
3995}
3996
3997
3998/**
3999 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4000 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4001 */
4002DECL_FORCE_INLINE(uint32_t)
4003iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4004{
4005#if defined(RT_ARCH_AMD64)
4006 /* add or inc */
4007 if (iGprDst >= 8)
4008 pCodeBuf[off++] = X86_OP_REX_B;
4009 if (iImm8 != 1)
4010 {
4011 pCodeBuf[off++] = 0x83;
4012 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4013 pCodeBuf[off++] = (uint8_t)iImm8;
4014 }
4015 else
4016 {
4017 pCodeBuf[off++] = 0xff;
4018 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4019 }
4020
4021#elif defined(RT_ARCH_ARM64)
4022 if (iImm8 >= 0)
4023 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
4024 else
4025 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
4026
4027#else
4028# error "Port me"
4029#endif
4030 return off;
4031}
4032
4033
4034/**
4035 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4036 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4037 */
4038DECL_INLINE_THROW(uint32_t)
4039iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4040{
4041#if defined(RT_ARCH_AMD64)
4042 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4043#elif defined(RT_ARCH_ARM64)
4044 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4045#else
4046# error "Port me"
4047#endif
4048 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4049 return off;
4050}
4051
4052
4053/**
4054 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4055 *
4056 * @note Will assert / throw if @a iGprTmp is not specified when needed.
4057 */
4058DECL_FORCE_INLINE_THROW(uint32_t)
4059iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4060{
4061#if defined(RT_ARCH_AMD64)
4062 if ((int8_t)iAddend == iAddend)
4063 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4064
4065 if ((int32_t)iAddend == iAddend)
4066 {
4067 /* add grp, imm32 */
4068 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4069 pCodeBuf[off++] = 0x81;
4070 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4071 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4072 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4073 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4074 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4075 }
4076 else if (iGprTmp != UINT8_MAX)
4077 {
4078 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4079
4080 /* add dst, tmpreg */
4081 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4082 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
4083 pCodeBuf[off++] = 0x03;
4084 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
4085 }
4086 else
4087# ifdef IEM_WITH_THROW_CATCH
4088 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4089# else
4090 AssertReleaseFailedStmt(off = UINT32_MAX);
4091# endif
4092
4093#elif defined(RT_ARCH_ARM64)
4094 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4095 if (uAbsAddend < 4096)
4096 {
4097 if (iAddend >= 0)
4098 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
4099 else
4100 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
4101 }
4102 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4103 {
4104 if (iAddend >= 0)
4105 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
4106 true /*f64Bit*/, true /*fShift12*/);
4107 else
4108 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
4109 true /*f64Bit*/, true /*fShift12*/);
4110 }
4111 else if (iGprTmp != UINT8_MAX)
4112 {
4113 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4114 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
4115 }
4116 else
4117# ifdef IEM_WITH_THROW_CATCH
4118 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4119# else
4120 AssertReleaseFailedStmt(off = UINT32_MAX);
4121# endif
4122
4123#else
4124# error "Port me"
4125#endif
4126 return off;
4127}
4128
4129
4130/**
4131 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4132 */
4133DECL_INLINE_THROW(uint32_t)
4134iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
4135{
4136#if defined(RT_ARCH_AMD64)
4137 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4138 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
4139
4140 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
4141 {
4142 /* add grp, imm32 */
4143 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4144 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4145 pbCodeBuf[off++] = 0x81;
4146 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4147 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4148 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4149 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4150 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4151 }
4152 else
4153 {
4154 /* Best to use a temporary register to deal with this in the simplest way: */
4155 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4156
4157 /* add dst, tmpreg */
4158 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4159 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4160 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
4161 pbCodeBuf[off++] = 0x03;
4162 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
4163
4164 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4165 }
4166
4167#elif defined(RT_ARCH_ARM64)
4168 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
4169 {
4170 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4171 if (iAddend >= 0)
4172 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend);
4173 else
4174 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend);
4175 }
4176 else
4177 {
4178 /* Use temporary register for the immediate. */
4179 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4180
4181 /* add gprdst, gprdst, tmpreg */
4182 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4183 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg);
4184
4185 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4186 }
4187
4188#else
4189# error "Port me"
4190#endif
4191 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4192 return off;
4193}
4194
4195
4196/**
4197 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4198 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4199 * @note For ARM64 the iAddend value must be in the range 0x000..0xfff,
4200 * or that range shifted 12 bits to the left (e.g. 0x1000..0xfff000 with
4201 * the lower 12 bits always zero). The negative ranges are also allowed,
4202 * making it behave like a subtraction. If the constant does not conform,
4203 * bad stuff will happen.
4204 */
4205DECL_FORCE_INLINE_THROW(uint32_t)
4206iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4207{
4208#if defined(RT_ARCH_AMD64)
4209 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4210 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4211
4212 /* add grp, imm32 */
4213 if (iGprDst >= 8)
4214 pCodeBuf[off++] = X86_OP_REX_B;
4215 pCodeBuf[off++] = 0x81;
4216 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4217 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4218 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4219 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4220 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4221
4222#elif defined(RT_ARCH_ARM64)
4223 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4224 if (uAbsAddend <= 0xfff)
4225 {
4226 if (iAddend >= 0)
4227 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4228 else
4229 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4230 }
4231 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4232 {
4233 if (iAddend >= 0)
4234 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
4235 false /*f64Bit*/, true /*fShift12*/);
4236 else
4237 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
4238 false /*f64Bit*/, true /*fShift12*/);
4239 }
4240 else
4241# ifdef IEM_WITH_THROW_CATCH
4242 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4243# else
4244 AssertReleaseFailedStmt(off = UINT32_MAX);
4245# endif
4246
4247#else
4248# error "Port me"
4249#endif
4250 return off;
4251}
4252
4253
4254/**
4255 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4256 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4257 */
4258DECL_INLINE_THROW(uint32_t)
4259iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4260{
4261#if defined(RT_ARCH_AMD64)
4262 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
4263
4264#elif defined(RT_ARCH_ARM64)
4265 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
4266 {
4267 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4268 if (iAddend >= 0)
4269 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend, false /*f64Bit*/);
4270 else
4271 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend, false /*f64Bit*/);
4272 }
4273 else
4274 {
4275 /* Use temporary register for the immediate. */
4276 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint32_t)iAddend);
4277
4278 /* add gprdst, gprdst, tmpreg */
4279 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4280 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
4281
4282 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4283 }
4284
4285#else
4286# error "Port me"
4287#endif
4288 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4289 return off;
4290}
4291
4292
4293/**
4294 * Emits a 16-bit GPR add with a signed immediate addend.
4295 *
4296 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
4297 * so not suitable as a base for conditional jumps.
4298 *
4299 * @note AMD64: Will only update the lower 16 bits of the register.
4300 * @note ARM64: Will update the entire register.
4301 * @note ARM64: Larger constants will require a temporary register. Failing to
4302 * specify one when needed will trigger fatal assertion / throw.
4303 * @sa iemNativeEmitSubGpr16ImmEx
4304 */
4305DECL_FORCE_INLINE_THROW(uint32_t)
4306iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend,
4307 uint8_t iGprTmp = UINT8_MAX)
4308{
4309#ifdef RT_ARCH_AMD64
4310 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4311 if (iGprDst >= 8)
4312 pCodeBuf[off++] = X86_OP_REX_B;
4313 if (iAddend == 1)
4314 {
4315 /* inc r/m16 */
4316 pCodeBuf[off++] = 0xff;
4317 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4318 }
4319 else if (iAddend == -1)
4320 {
4321 /* dec r/m16 */
4322 pCodeBuf[off++] = 0xff;
4323 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4324 }
4325 else if ((int8_t)iAddend == iAddend)
4326 {
4327 /* add r/m16, imm8 */
4328 pCodeBuf[off++] = 0x83;
4329 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4330 pCodeBuf[off++] = (uint8_t)iAddend;
4331 }
4332 else
4333 {
4334 /* add r/m16, imm16 */
4335 pCodeBuf[off++] = 0x81;
4336 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4337 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
4338 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
4339 }
4340 RT_NOREF(iGprTmp);
4341
4342#elif defined(RT_ARCH_ARM64)
4343 uint32_t uAbsAddend = RT_ABS(iAddend);
4344 if (uAbsAddend < 4096)
4345 {
4346 if (iAddend >= 0)
4347 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4348 else
4349 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4350 }
4351 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4352 {
4353 if (iAddend >= 0)
4354 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
4355 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4356 else
4357 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
4358 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4359 }
4360 else if (iGprTmp != UINT8_MAX)
4361 {
4362 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iAddend);
4363 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4364 }
4365 else
4366# ifdef IEM_WITH_THROW_CATCH
4367 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4368# else
4369 AssertReleaseFailedStmt(off = UINT32_MAX);
4370# endif
4371 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4372
4373#else
4374# error "Port me"
4375#endif
4376 return off;
4377}
4378
4379
4380
4381/**
4382 * Adds two 64-bit GPRs together, storing the result in a third register.
4383 */
4384DECL_FORCE_INLINE(uint32_t)
4385iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4386{
4387#ifdef RT_ARCH_AMD64
4388 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4389 {
4390 /** @todo consider LEA */
4391 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
4392 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
4393 }
4394 else
4395 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4396
4397#elif defined(RT_ARCH_ARM64)
4398 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
4399
4400#else
4401# error "Port me!"
4402#endif
4403 return off;
4404}
4405
4406
4407
4408/**
4409 * Adds two 32-bit GPRs together, storing the result in a third register.
4410 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4411 */
4412DECL_FORCE_INLINE(uint32_t)
4413iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4414{
4415#ifdef RT_ARCH_AMD64
4416 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4417 {
4418 /** @todo consider LEA */
4419 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
4420 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
4421 }
4422 else
4423 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4424
4425#elif defined(RT_ARCH_ARM64)
4426 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
4427
4428#else
4429# error "Port me!"
4430#endif
4431 return off;
4432}
4433
4434
4435/**
4436 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
4437 * third register.
4438 *
4439 * @note The ARM64 version does not work for non-trivial constants if the
4440 * two registers are the same. Will assert / throw exception.
4441 */
4442DECL_FORCE_INLINE_THROW(uint32_t)
4443iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4444{
4445#ifdef RT_ARCH_AMD64
4446 /** @todo consider LEA */
4447 if ((int8_t)iImmAddend == iImmAddend)
4448 {
4449 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4450 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4451 }
4452 else
4453 {
4454 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4455 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4456 }
4457
4458#elif defined(RT_ARCH_ARM64)
4459 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4460 if (uAbsImmAddend < 4096)
4461 {
4462 if (iImmAddend >= 0)
4463 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4464 else
4465 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4466 }
4467 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4468 {
4469 if (iImmAddend >= 0)
4470 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4471 else
4472 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4473 }
4474 else if (iGprDst != iGprAddend)
4475 {
4476 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4477 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4478 }
4479 else
4480# ifdef IEM_WITH_THROW_CATCH
4481 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4482# else
4483 AssertReleaseFailedStmt(off = UINT32_MAX);
4484# endif
4485
4486#else
4487# error "Port me!"
4488#endif
4489 return off;
4490}
4491
4492
4493/**
4494 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4495 * third register.
4496 *
4497 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4498 *
4499 * @note The ARM64 version does not work for non-trivial constants if the
4500 * two registers are the same. Will assert / throw exception.
4501 */
4502DECL_FORCE_INLINE_THROW(uint32_t)
4503iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4504{
4505#ifdef RT_ARCH_AMD64
4506 /** @todo consider LEA */
4507 if ((int8_t)iImmAddend == iImmAddend)
4508 {
4509 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4510 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4511 }
4512 else
4513 {
4514 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4515 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4516 }
4517
4518#elif defined(RT_ARCH_ARM64)
4519 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4520 if (uAbsImmAddend < 4096)
4521 {
4522 if (iImmAddend >= 0)
4523 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4524 else
4525 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4526 }
4527 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4528 {
4529 if (iImmAddend >= 0)
4530 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4531 else
4532 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4533 }
4534 else if (iGprDst != iGprAddend)
4535 {
4536 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4537 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4538 }
4539 else
4540# ifdef IEM_WITH_THROW_CATCH
4541 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4542# else
4543 AssertReleaseFailedStmt(off = UINT32_MAX);
4544# endif
4545
4546#else
4547# error "Port me!"
4548#endif
4549 return off;
4550}
4551
4552
4553/*********************************************************************************************************************************
4554* Unary Operations *
4555*********************************************************************************************************************************/
4556
4557/**
4558 * Emits code for two complement negation of a 64-bit GPR.
4559 */
4560DECL_FORCE_INLINE_THROW(uint32_t)
4561iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4562{
4563#if defined(RT_ARCH_AMD64)
4564 /* neg Ev */
4565 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4566 pCodeBuf[off++] = 0xf7;
4567 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4568
4569#elif defined(RT_ARCH_ARM64)
4570 /* sub dst, xzr, dst */
4571 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4572
4573#else
4574# error "Port me"
4575#endif
4576 return off;
4577}
4578
4579
4580/**
4581 * Emits code for two complement negation of a 64-bit GPR.
4582 */
4583DECL_INLINE_THROW(uint32_t)
4584iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4585{
4586#if defined(RT_ARCH_AMD64)
4587 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4588#elif defined(RT_ARCH_ARM64)
4589 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4590#else
4591# error "Port me"
4592#endif
4593 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4594 return off;
4595}
4596
4597
4598/**
4599 * Emits code for two complement negation of a 32-bit GPR.
4600 * @note bit 32 thru 63 are set to zero.
4601 */
4602DECL_FORCE_INLINE_THROW(uint32_t)
4603iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4604{
4605#if defined(RT_ARCH_AMD64)
4606 /* neg Ev */
4607 if (iGprDst >= 8)
4608 pCodeBuf[off++] = X86_OP_REX_B;
4609 pCodeBuf[off++] = 0xf7;
4610 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4611
4612#elif defined(RT_ARCH_ARM64)
4613 /* sub dst, xzr, dst */
4614 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4615
4616#else
4617# error "Port me"
4618#endif
4619 return off;
4620}
4621
4622
4623/**
4624 * Emits code for two complement negation of a 32-bit GPR.
4625 * @note bit 32 thru 63 are set to zero.
4626 */
4627DECL_INLINE_THROW(uint32_t)
4628iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4629{
4630#if defined(RT_ARCH_AMD64)
4631 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4632#elif defined(RT_ARCH_ARM64)
4633 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4634#else
4635# error "Port me"
4636#endif
4637 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4638 return off;
4639}
4640
4641
4642
4643/*********************************************************************************************************************************
4644* Bit Operations *
4645*********************************************************************************************************************************/
4646
4647/**
4648 * Emits code for clearing bits 16 thru 63 in the GPR.
4649 */
4650DECL_INLINE_THROW(uint32_t)
4651iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4652{
4653#if defined(RT_ARCH_AMD64)
4654 /* movzx Gv,Ew */
4655 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4656 if (iGprDst >= 8)
4657 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4658 pbCodeBuf[off++] = 0x0f;
4659 pbCodeBuf[off++] = 0xb7;
4660 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4661
4662#elif defined(RT_ARCH_ARM64)
4663 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4664# if 1
4665 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4666# else
4667 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4668 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4669# endif
4670#else
4671# error "Port me"
4672#endif
4673 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4674 return off;
4675}
4676
4677
4678/**
4679 * Emits code for AND'ing two 64-bit GPRs.
4680 *
4681 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4682 * and ARM64 hosts.
4683 */
4684DECL_FORCE_INLINE(uint32_t)
4685iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4686{
4687#if defined(RT_ARCH_AMD64)
4688 /* and Gv, Ev */
4689 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4690 pCodeBuf[off++] = 0x23;
4691 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4692 RT_NOREF(fSetFlags);
4693
4694#elif defined(RT_ARCH_ARM64)
4695 if (!fSetFlags)
4696 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4697 else
4698 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4699
4700#else
4701# error "Port me"
4702#endif
4703 return off;
4704}
4705
4706
4707/**
4708 * Emits code for AND'ing two 64-bit GPRs.
4709 *
4710 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4711 * and ARM64 hosts.
4712 */
4713DECL_INLINE_THROW(uint32_t)
4714iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4715{
4716#if defined(RT_ARCH_AMD64)
4717 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4718#elif defined(RT_ARCH_ARM64)
4719 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4720#else
4721# error "Port me"
4722#endif
4723 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4724 return off;
4725}
4726
4727
4728/**
4729 * Emits code for AND'ing two 32-bit GPRs.
4730 */
4731DECL_FORCE_INLINE(uint32_t)
4732iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4733{
4734#if defined(RT_ARCH_AMD64)
4735 /* and Gv, Ev */
4736 if (iGprDst >= 8 || iGprSrc >= 8)
4737 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4738 pCodeBuf[off++] = 0x23;
4739 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4740 RT_NOREF(fSetFlags);
4741
4742#elif defined(RT_ARCH_ARM64)
4743 if (!fSetFlags)
4744 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4745 else
4746 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4747
4748#else
4749# error "Port me"
4750#endif
4751 return off;
4752}
4753
4754
4755/**
4756 * Emits code for AND'ing two 32-bit GPRs.
4757 */
4758DECL_INLINE_THROW(uint32_t)
4759iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4760{
4761#if defined(RT_ARCH_AMD64)
4762 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4763#elif defined(RT_ARCH_ARM64)
4764 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4765#else
4766# error "Port me"
4767#endif
4768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4769 return off;
4770}
4771
4772
4773/**
4774 * Emits code for AND'ing a 64-bit GPRs with a constant.
4775 *
4776 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4777 * and ARM64 hosts.
4778 */
4779DECL_INLINE_THROW(uint32_t)
4780iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4781{
4782#if defined(RT_ARCH_AMD64)
4783 if ((int64_t)uImm == (int8_t)uImm)
4784 {
4785 /* and Ev, imm8 */
4786 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4787 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4788 pbCodeBuf[off++] = 0x83;
4789 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4790 pbCodeBuf[off++] = (uint8_t)uImm;
4791 }
4792 else if ((int64_t)uImm == (int32_t)uImm)
4793 {
4794 /* and Ev, imm32 */
4795 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4796 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4797 pbCodeBuf[off++] = 0x81;
4798 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4799 pbCodeBuf[off++] = RT_BYTE1(uImm);
4800 pbCodeBuf[off++] = RT_BYTE2(uImm);
4801 pbCodeBuf[off++] = RT_BYTE3(uImm);
4802 pbCodeBuf[off++] = RT_BYTE4(uImm);
4803 }
4804 else
4805 {
4806 /* Use temporary register for the 64-bit immediate. */
4807 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4808 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
4809 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4810 }
4811 RT_NOREF(fSetFlags);
4812
4813#elif defined(RT_ARCH_ARM64)
4814 uint32_t uImmR = 0;
4815 uint32_t uImmNandS = 0;
4816 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4817 {
4818 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4819 if (!fSetFlags)
4820 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
4821 else
4822 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
4823 }
4824 else
4825 {
4826 /* Use temporary register for the 64-bit immediate. */
4827 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4828 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4829 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4830 }
4831
4832#else
4833# error "Port me"
4834#endif
4835 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4836 return off;
4837}
4838
4839
4840/**
4841 * Emits code for AND'ing an 32-bit GPRs with a constant.
4842 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4843 * @note For ARM64 this only supports @a uImm values that can be expressed using
4844 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
4845 * make sure this is possible!
4846 */
4847DECL_FORCE_INLINE_THROW(uint32_t)
4848iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4849{
4850#if defined(RT_ARCH_AMD64)
4851 /* and Ev, imm */
4852 if (iGprDst >= 8)
4853 pCodeBuf[off++] = X86_OP_REX_B;
4854 if ((int32_t)uImm == (int8_t)uImm)
4855 {
4856 pCodeBuf[off++] = 0x83;
4857 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4858 pCodeBuf[off++] = (uint8_t)uImm;
4859 }
4860 else
4861 {
4862 pCodeBuf[off++] = 0x81;
4863 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4864 pCodeBuf[off++] = RT_BYTE1(uImm);
4865 pCodeBuf[off++] = RT_BYTE2(uImm);
4866 pCodeBuf[off++] = RT_BYTE3(uImm);
4867 pCodeBuf[off++] = RT_BYTE4(uImm);
4868 }
4869 RT_NOREF(fSetFlags);
4870
4871#elif defined(RT_ARCH_ARM64)
4872 uint32_t uImmR = 0;
4873 uint32_t uImmNandS = 0;
4874 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4875 {
4876 if (!fSetFlags)
4877 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4878 else
4879 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4880 }
4881 else
4882# ifdef IEM_WITH_THROW_CATCH
4883 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4884# else
4885 AssertReleaseFailedStmt(off = UINT32_MAX);
4886# endif
4887
4888#else
4889# error "Port me"
4890#endif
4891 return off;
4892}
4893
4894
4895/**
4896 * Emits code for AND'ing an 32-bit GPRs with a constant.
4897 *
4898 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4899 */
4900DECL_INLINE_THROW(uint32_t)
4901iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4902{
4903#if defined(RT_ARCH_AMD64)
4904 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
4905
4906#elif defined(RT_ARCH_ARM64)
4907 uint32_t uImmR = 0;
4908 uint32_t uImmNandS = 0;
4909 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4910 {
4911 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4912 if (!fSetFlags)
4913 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4914 else
4915 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4916 }
4917 else
4918 {
4919 /* Use temporary register for the 64-bit immediate. */
4920 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4921 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4922 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4923 }
4924
4925#else
4926# error "Port me"
4927#endif
4928 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4929 return off;
4930}
4931
4932
4933/**
4934 * Emits code for AND'ing an 64-bit GPRs with a constant.
4935 *
4936 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4937 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4938 * the same.
4939 */
4940DECL_FORCE_INLINE_THROW(uint32_t)
4941iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
4942 bool fSetFlags = false)
4943{
4944#if defined(RT_ARCH_AMD64)
4945 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4946 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
4947 RT_NOREF(fSetFlags);
4948
4949#elif defined(RT_ARCH_ARM64)
4950 uint32_t uImmR = 0;
4951 uint32_t uImmNandS = 0;
4952 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4953 {
4954 if (!fSetFlags)
4955 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4956 else
4957 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4958 }
4959 else if (iGprDst != iGprSrc)
4960 {
4961 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4962 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4963 }
4964 else
4965# ifdef IEM_WITH_THROW_CATCH
4966 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4967# else
4968 AssertReleaseFailedStmt(off = UINT32_MAX);
4969# endif
4970
4971#else
4972# error "Port me"
4973#endif
4974 return off;
4975}
4976
4977/**
4978 * Emits code for AND'ing an 32-bit GPRs with a constant.
4979 *
4980 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4981 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4982 * the same.
4983 *
4984 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4985 */
4986DECL_FORCE_INLINE_THROW(uint32_t)
4987iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
4988 bool fSetFlags = false)
4989{
4990#if defined(RT_ARCH_AMD64)
4991 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
4992 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
4993 RT_NOREF(fSetFlags);
4994
4995#elif defined(RT_ARCH_ARM64)
4996 uint32_t uImmR = 0;
4997 uint32_t uImmNandS = 0;
4998 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4999 {
5000 if (!fSetFlags)
5001 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5002 else
5003 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5004 }
5005 else if (iGprDst != iGprSrc)
5006 {
5007 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5008 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
5009 }
5010 else
5011# ifdef IEM_WITH_THROW_CATCH
5012 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5013# else
5014 AssertReleaseFailedStmt(off = UINT32_MAX);
5015# endif
5016
5017#else
5018# error "Port me"
5019#endif
5020 return off;
5021}
5022
5023
5024/**
5025 * Emits code for OR'ing two 64-bit GPRs.
5026 */
5027DECL_FORCE_INLINE(uint32_t)
5028iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5029{
5030#if defined(RT_ARCH_AMD64)
5031 /* or Gv, Ev */
5032 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5033 pCodeBuf[off++] = 0x0b;
5034 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5035
5036#elif defined(RT_ARCH_ARM64)
5037 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
5038
5039#else
5040# error "Port me"
5041#endif
5042 return off;
5043}
5044
5045
5046/**
5047 * Emits code for OR'ing two 64-bit GPRs.
5048 */
5049DECL_INLINE_THROW(uint32_t)
5050iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5051{
5052#if defined(RT_ARCH_AMD64)
5053 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5054#elif defined(RT_ARCH_ARM64)
5055 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5056#else
5057# error "Port me"
5058#endif
5059 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5060 return off;
5061}
5062
5063
5064/**
5065 * Emits code for OR'ing two 32-bit GPRs.
5066 * @note Bits 63:32 of the destination GPR will be cleared.
5067 */
5068DECL_FORCE_INLINE(uint32_t)
5069iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5070{
5071#if defined(RT_ARCH_AMD64)
5072 /* or Gv, Ev */
5073 if (iGprDst >= 8 || iGprSrc >= 8)
5074 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5075 pCodeBuf[off++] = 0x0b;
5076 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5077
5078#elif defined(RT_ARCH_ARM64)
5079 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5080
5081#else
5082# error "Port me"
5083#endif
5084 return off;
5085}
5086
5087
5088/**
5089 * Emits code for OR'ing two 32-bit GPRs.
5090 * @note Bits 63:32 of the destination GPR will be cleared.
5091 */
5092DECL_INLINE_THROW(uint32_t)
5093iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5094{
5095#if defined(RT_ARCH_AMD64)
5096 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5097#elif defined(RT_ARCH_ARM64)
5098 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5099#else
5100# error "Port me"
5101#endif
5102 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5103 return off;
5104}
5105
5106
5107/**
5108 * Emits code for OR'ing a 64-bit GPRs with a constant.
5109 */
5110DECL_INLINE_THROW(uint32_t)
5111iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
5112{
5113#if defined(RT_ARCH_AMD64)
5114 if ((int64_t)uImm == (int8_t)uImm)
5115 {
5116 /* or Ev, imm8 */
5117 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5118 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5119 pbCodeBuf[off++] = 0x83;
5120 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5121 pbCodeBuf[off++] = (uint8_t)uImm;
5122 }
5123 else if ((int64_t)uImm == (int32_t)uImm)
5124 {
5125 /* or Ev, imm32 */
5126 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5127 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5128 pbCodeBuf[off++] = 0x81;
5129 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5130 pbCodeBuf[off++] = RT_BYTE1(uImm);
5131 pbCodeBuf[off++] = RT_BYTE2(uImm);
5132 pbCodeBuf[off++] = RT_BYTE3(uImm);
5133 pbCodeBuf[off++] = RT_BYTE4(uImm);
5134 }
5135 else
5136 {
5137 /* Use temporary register for the 64-bit immediate. */
5138 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5139 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
5140 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5141 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5142 }
5143
5144#elif defined(RT_ARCH_ARM64)
5145 uint32_t uImmR = 0;
5146 uint32_t uImmNandS = 0;
5147 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5148 {
5149 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5150 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
5151 }
5152 else
5153 {
5154 /* Use temporary register for the 64-bit immediate. */
5155 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5156 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
5157 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5158 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5159 }
5160
5161#else
5162# error "Port me"
5163#endif
5164 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5165 return off;
5166}
5167
5168
5169/**
5170 * Emits code for OR'ing an 32-bit GPRs with a constant.
5171 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5172 * @note For ARM64 this only supports @a uImm values that can be expressed using
5173 * the two 6-bit immediates of the OR instructions. The caller must make
5174 * sure this is possible!
5175 */
5176DECL_FORCE_INLINE_THROW(uint32_t)
5177iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5178{
5179#if defined(RT_ARCH_AMD64)
5180 /* or Ev, imm */
5181 if (iGprDst >= 8)
5182 pCodeBuf[off++] = X86_OP_REX_B;
5183 if ((int32_t)uImm == (int8_t)uImm)
5184 {
5185 pCodeBuf[off++] = 0x83;
5186 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5187 pCodeBuf[off++] = (uint8_t)uImm;
5188 }
5189 else
5190 {
5191 pCodeBuf[off++] = 0x81;
5192 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5193 pCodeBuf[off++] = RT_BYTE1(uImm);
5194 pCodeBuf[off++] = RT_BYTE2(uImm);
5195 pCodeBuf[off++] = RT_BYTE3(uImm);
5196 pCodeBuf[off++] = RT_BYTE4(uImm);
5197 }
5198
5199#elif defined(RT_ARCH_ARM64)
5200 uint32_t uImmR = 0;
5201 uint32_t uImmNandS = 0;
5202 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5203 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5204 else
5205# ifdef IEM_WITH_THROW_CATCH
5206 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5207# else
5208 AssertReleaseFailedStmt(off = UINT32_MAX);
5209# endif
5210
5211#else
5212# error "Port me"
5213#endif
5214 return off;
5215}
5216
5217
5218/**
5219 * Emits code for OR'ing an 32-bit GPRs with a constant.
5220 *
5221 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5222 */
5223DECL_INLINE_THROW(uint32_t)
5224iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5225{
5226#if defined(RT_ARCH_AMD64)
5227 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5228
5229#elif defined(RT_ARCH_ARM64)
5230 uint32_t uImmR = 0;
5231 uint32_t uImmNandS = 0;
5232 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5233 {
5234 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5235 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5236 }
5237 else
5238 {
5239 /* Use temporary register for the 64-bit immediate. */
5240 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5241 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
5242 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5243 }
5244
5245#else
5246# error "Port me"
5247#endif
5248 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5249 return off;
5250}
5251
5252
5253
5254/**
5255 * ORs two 64-bit GPRs together, storing the result in a third register.
5256 */
5257DECL_FORCE_INLINE(uint32_t)
5258iemNativeEmitGprEqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5259{
5260#ifdef RT_ARCH_AMD64
5261 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5262 {
5263 /** @todo consider LEA */
5264 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc1);
5265 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5266 }
5267 else
5268 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5269
5270#elif defined(RT_ARCH_ARM64)
5271 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2);
5272
5273#else
5274# error "Port me!"
5275#endif
5276 return off;
5277}
5278
5279
5280
5281/**
5282 * Ors two 32-bit GPRs together, storing the result in a third register.
5283 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
5284 */
5285DECL_FORCE_INLINE(uint32_t)
5286iemNativeEmitGpr32EqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5287{
5288#ifdef RT_ARCH_AMD64
5289 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5290 {
5291 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc1);
5292 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5293 }
5294 else
5295 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5296
5297#elif defined(RT_ARCH_ARM64)
5298 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2, false /*f64Bit*/);
5299
5300#else
5301# error "Port me!"
5302#endif
5303 return off;
5304}
5305
5306
5307/**
5308 * Emits code for XOR'ing two 64-bit GPRs.
5309 */
5310DECL_INLINE_THROW(uint32_t)
5311iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5312{
5313#if defined(RT_ARCH_AMD64)
5314 /* and Gv, Ev */
5315 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5316 pCodeBuf[off++] = 0x33;
5317 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5318
5319#elif defined(RT_ARCH_ARM64)
5320 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
5321
5322#else
5323# error "Port me"
5324#endif
5325 return off;
5326}
5327
5328
5329/**
5330 * Emits code for XOR'ing two 64-bit GPRs.
5331 */
5332DECL_INLINE_THROW(uint32_t)
5333iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5334{
5335#if defined(RT_ARCH_AMD64)
5336 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5337#elif defined(RT_ARCH_ARM64)
5338 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5339#else
5340# error "Port me"
5341#endif
5342 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5343 return off;
5344}
5345
5346
5347/**
5348 * Emits code for XOR'ing two 32-bit GPRs.
5349 */
5350DECL_INLINE_THROW(uint32_t)
5351iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5352{
5353#if defined(RT_ARCH_AMD64)
5354 /* and Gv, Ev */
5355 if (iGprDst >= 8 || iGprSrc >= 8)
5356 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5357 pCodeBuf[off++] = 0x33;
5358 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5359
5360#elif defined(RT_ARCH_ARM64)
5361 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5362
5363#else
5364# error "Port me"
5365#endif
5366 return off;
5367}
5368
5369
5370/**
5371 * Emits code for XOR'ing two 32-bit GPRs.
5372 */
5373DECL_INLINE_THROW(uint32_t)
5374iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5375{
5376#if defined(RT_ARCH_AMD64)
5377 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5378#elif defined(RT_ARCH_ARM64)
5379 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5380#else
5381# error "Port me"
5382#endif
5383 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5384 return off;
5385}
5386
5387
5388/**
5389 * Emits code for XOR'ing an 32-bit GPRs with a constant.
5390 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5391 * @note For ARM64 this only supports @a uImm values that can be expressed using
5392 * the two 6-bit immediates of the EOR instructions. The caller must make
5393 * sure this is possible!
5394 */
5395DECL_FORCE_INLINE_THROW(uint32_t)
5396iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5397{
5398#if defined(RT_ARCH_AMD64)
5399 /* and Ev, imm */
5400 if (iGprDst >= 8)
5401 pCodeBuf[off++] = X86_OP_REX_B;
5402 if ((int32_t)uImm == (int8_t)uImm)
5403 {
5404 pCodeBuf[off++] = 0x83;
5405 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5406 pCodeBuf[off++] = (uint8_t)uImm;
5407 }
5408 else
5409 {
5410 pCodeBuf[off++] = 0x81;
5411 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5412 pCodeBuf[off++] = RT_BYTE1(uImm);
5413 pCodeBuf[off++] = RT_BYTE2(uImm);
5414 pCodeBuf[off++] = RT_BYTE3(uImm);
5415 pCodeBuf[off++] = RT_BYTE4(uImm);
5416 }
5417
5418#elif defined(RT_ARCH_ARM64)
5419 uint32_t uImmR = 0;
5420 uint32_t uImmNandS = 0;
5421 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5422 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5423 else
5424# ifdef IEM_WITH_THROW_CATCH
5425 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5426# else
5427 AssertReleaseFailedStmt(off = UINT32_MAX);
5428# endif
5429
5430#else
5431# error "Port me"
5432#endif
5433 return off;
5434}
5435
5436
5437/*********************************************************************************************************************************
5438* Shifting *
5439*********************************************************************************************************************************/
5440
5441/**
5442 * Emits code for shifting a GPR a fixed number of bits to the left.
5443 */
5444DECL_FORCE_INLINE(uint32_t)
5445iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5446{
5447 Assert(cShift > 0 && cShift < 64);
5448
5449#if defined(RT_ARCH_AMD64)
5450 /* shl dst, cShift */
5451 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5452 if (cShift != 1)
5453 {
5454 pCodeBuf[off++] = 0xc1;
5455 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5456 pCodeBuf[off++] = cShift;
5457 }
5458 else
5459 {
5460 pCodeBuf[off++] = 0xd1;
5461 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5462 }
5463
5464#elif defined(RT_ARCH_ARM64)
5465 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
5466
5467#else
5468# error "Port me"
5469#endif
5470 return off;
5471}
5472
5473
5474/**
5475 * Emits code for shifting a GPR a fixed number of bits to the left.
5476 */
5477DECL_INLINE_THROW(uint32_t)
5478iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5479{
5480#if defined(RT_ARCH_AMD64)
5481 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5482#elif defined(RT_ARCH_ARM64)
5483 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5484#else
5485# error "Port me"
5486#endif
5487 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5488 return off;
5489}
5490
5491
5492/**
5493 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5494 */
5495DECL_FORCE_INLINE(uint32_t)
5496iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5497{
5498 Assert(cShift > 0 && cShift < 32);
5499
5500#if defined(RT_ARCH_AMD64)
5501 /* shl dst, cShift */
5502 if (iGprDst >= 8)
5503 pCodeBuf[off++] = X86_OP_REX_B;
5504 if (cShift != 1)
5505 {
5506 pCodeBuf[off++] = 0xc1;
5507 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5508 pCodeBuf[off++] = cShift;
5509 }
5510 else
5511 {
5512 pCodeBuf[off++] = 0xd1;
5513 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5514 }
5515
5516#elif defined(RT_ARCH_ARM64)
5517 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5518
5519#else
5520# error "Port me"
5521#endif
5522 return off;
5523}
5524
5525
5526/**
5527 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5528 */
5529DECL_INLINE_THROW(uint32_t)
5530iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5531{
5532#if defined(RT_ARCH_AMD64)
5533 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5534#elif defined(RT_ARCH_ARM64)
5535 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5536#else
5537# error "Port me"
5538#endif
5539 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5540 return off;
5541}
5542
5543
5544/**
5545 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5546 */
5547DECL_FORCE_INLINE(uint32_t)
5548iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5549{
5550 Assert(cShift > 0 && cShift < 64);
5551
5552#if defined(RT_ARCH_AMD64)
5553 /* shr dst, cShift */
5554 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5555 if (cShift != 1)
5556 {
5557 pCodeBuf[off++] = 0xc1;
5558 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5559 pCodeBuf[off++] = cShift;
5560 }
5561 else
5562 {
5563 pCodeBuf[off++] = 0xd1;
5564 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5565 }
5566
5567#elif defined(RT_ARCH_ARM64)
5568 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5569
5570#else
5571# error "Port me"
5572#endif
5573 return off;
5574}
5575
5576
5577/**
5578 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5579 */
5580DECL_INLINE_THROW(uint32_t)
5581iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5582{
5583#if defined(RT_ARCH_AMD64)
5584 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5585#elif defined(RT_ARCH_ARM64)
5586 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5587#else
5588# error "Port me"
5589#endif
5590 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5591 return off;
5592}
5593
5594
5595/**
5596 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5597 * right.
5598 */
5599DECL_FORCE_INLINE(uint32_t)
5600iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5601{
5602 Assert(cShift > 0 && cShift < 32);
5603
5604#if defined(RT_ARCH_AMD64)
5605 /* shr dst, cShift */
5606 if (iGprDst >= 8)
5607 pCodeBuf[off++] = X86_OP_REX_B;
5608 if (cShift != 1)
5609 {
5610 pCodeBuf[off++] = 0xc1;
5611 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5612 pCodeBuf[off++] = cShift;
5613 }
5614 else
5615 {
5616 pCodeBuf[off++] = 0xd1;
5617 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5618 }
5619
5620#elif defined(RT_ARCH_ARM64)
5621 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5622
5623#else
5624# error "Port me"
5625#endif
5626 return off;
5627}
5628
5629
5630/**
5631 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5632 * right.
5633 */
5634DECL_INLINE_THROW(uint32_t)
5635iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5636{
5637#if defined(RT_ARCH_AMD64)
5638 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5639#elif defined(RT_ARCH_ARM64)
5640 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5641#else
5642# error "Port me"
5643#endif
5644 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5645 return off;
5646}
5647
5648
5649/**
5650 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5651 * right and assigning it to a different GPR.
5652 */
5653DECL_INLINE_THROW(uint32_t)
5654iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5655{
5656 Assert(cShift > 0); Assert(cShift < 32);
5657#if defined(RT_ARCH_AMD64)
5658 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5659 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5660
5661#elif defined(RT_ARCH_ARM64)
5662 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5663
5664#else
5665# error "Port me"
5666#endif
5667 return off;
5668}
5669
5670
5671/**
5672 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5673 */
5674DECL_FORCE_INLINE(uint32_t)
5675iemNativeEmitArithShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5676{
5677 Assert(cShift > 0 && cShift < 64);
5678
5679#if defined(RT_ARCH_AMD64)
5680 /* sar dst, cShift */
5681 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5682 if (cShift != 1)
5683 {
5684 pCodeBuf[off++] = 0xc1;
5685 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5686 pCodeBuf[off++] = cShift;
5687 }
5688 else
5689 {
5690 pCodeBuf[off++] = 0xd1;
5691 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5692 }
5693
5694#elif defined(RT_ARCH_ARM64)
5695 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift);
5696
5697#else
5698# error "Port me"
5699#endif
5700 return off;
5701}
5702
5703
5704/**
5705 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5706 */
5707DECL_INLINE_THROW(uint32_t)
5708iemNativeEmitArithShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5709{
5710#if defined(RT_ARCH_AMD64)
5711 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5712#elif defined(RT_ARCH_ARM64)
5713 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5714#else
5715# error "Port me"
5716#endif
5717 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5718 return off;
5719}
5720
5721
5722/**
5723 * Emits code for (signed) shifting a 32-bit GPR a fixed number of bits to the right.
5724 */
5725DECL_FORCE_INLINE(uint32_t)
5726iemNativeEmitArithShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5727{
5728 Assert(cShift > 0 && cShift < 64);
5729
5730#if defined(RT_ARCH_AMD64)
5731 /* sar dst, cShift */
5732 if (iGprDst >= 8)
5733 pCodeBuf[off++] = X86_OP_REX_B;
5734 if (cShift != 1)
5735 {
5736 pCodeBuf[off++] = 0xc1;
5737 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5738 pCodeBuf[off++] = cShift;
5739 }
5740 else
5741 {
5742 pCodeBuf[off++] = 0xd1;
5743 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5744 }
5745
5746#elif defined(RT_ARCH_ARM64)
5747 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift, false /*f64Bit*/);
5748
5749#else
5750# error "Port me"
5751#endif
5752 return off;
5753}
5754
5755
5756/**
5757 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5758 */
5759DECL_INLINE_THROW(uint32_t)
5760iemNativeEmitArithShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5761{
5762#if defined(RT_ARCH_AMD64)
5763 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5764#elif defined(RT_ARCH_ARM64)
5765 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5766#else
5767# error "Port me"
5768#endif
5769 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5770 return off;
5771}
5772
5773
5774/**
5775 * Emits code for rotating a GPR a fixed number of bits to the left.
5776 */
5777DECL_FORCE_INLINE(uint32_t)
5778iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5779{
5780 Assert(cShift > 0 && cShift < 64);
5781
5782#if defined(RT_ARCH_AMD64)
5783 /* rol dst, cShift */
5784 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5785 if (cShift != 1)
5786 {
5787 pCodeBuf[off++] = 0xc1;
5788 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5789 pCodeBuf[off++] = cShift;
5790 }
5791 else
5792 {
5793 pCodeBuf[off++] = 0xd1;
5794 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5795 }
5796
5797#elif defined(RT_ARCH_ARM64)
5798 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
5799
5800#else
5801# error "Port me"
5802#endif
5803 return off;
5804}
5805
5806
5807#if defined(RT_ARCH_AMD64)
5808/**
5809 * Emits code for rotating a 32-bit GPR a fixed number of bits to the left via carry.
5810 */
5811DECL_FORCE_INLINE(uint32_t)
5812iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5813{
5814 Assert(cShift > 0 && cShift < 32);
5815
5816 /* rcl dst, cShift */
5817 if (iGprDst >= 8)
5818 pCodeBuf[off++] = X86_OP_REX_B;
5819 if (cShift != 1)
5820 {
5821 pCodeBuf[off++] = 0xc1;
5822 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5823 pCodeBuf[off++] = cShift;
5824 }
5825 else
5826 {
5827 pCodeBuf[off++] = 0xd1;
5828 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5829 }
5830
5831 return off;
5832}
5833#endif /* RT_ARCH_AMD64 */
5834
5835
5836
5837/**
5838 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
5839 * @note Bits 63:32 of the destination GPR will be cleared.
5840 */
5841DECL_FORCE_INLINE(uint32_t)
5842iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5843{
5844#if defined(RT_ARCH_AMD64)
5845 /*
5846 * There is no bswap r16 on x86 (the encoding exists but does not work).
5847 * So just use a rol (gcc -O2 is doing that).
5848 *
5849 * rol r16, 0x8
5850 */
5851 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5852 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5853 if (iGpr >= 8)
5854 pbCodeBuf[off++] = X86_OP_REX_B;
5855 pbCodeBuf[off++] = 0xc1;
5856 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
5857 pbCodeBuf[off++] = 0x08;
5858#elif defined(RT_ARCH_ARM64)
5859 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5860
5861 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
5862#else
5863# error "Port me"
5864#endif
5865
5866 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5867 return off;
5868}
5869
5870
5871/**
5872 * Emits code for reversing the byte order in a 32-bit GPR.
5873 * @note Bits 63:32 of the destination GPR will be cleared.
5874 */
5875DECL_FORCE_INLINE(uint32_t)
5876iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5877{
5878#if defined(RT_ARCH_AMD64)
5879 /* bswap r32 */
5880 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5881
5882 if (iGpr >= 8)
5883 pbCodeBuf[off++] = X86_OP_REX_B;
5884 pbCodeBuf[off++] = 0x0f;
5885 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5886#elif defined(RT_ARCH_ARM64)
5887 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5888
5889 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
5890#else
5891# error "Port me"
5892#endif
5893
5894 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5895 return off;
5896}
5897
5898
5899/**
5900 * Emits code for reversing the byte order in a 64-bit GPR.
5901 */
5902DECL_FORCE_INLINE(uint32_t)
5903iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5904{
5905#if defined(RT_ARCH_AMD64)
5906 /* bswap r64 */
5907 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5908
5909 if (iGpr >= 8)
5910 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
5911 else
5912 pbCodeBuf[off++] = X86_OP_REX_W;
5913 pbCodeBuf[off++] = 0x0f;
5914 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5915#elif defined(RT_ARCH_ARM64)
5916 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5917
5918 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
5919#else
5920# error "Port me"
5921#endif
5922
5923 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5924 return off;
5925}
5926
5927
5928/*********************************************************************************************************************************
5929* Compare and Testing *
5930*********************************************************************************************************************************/
5931
5932
5933#ifdef RT_ARCH_ARM64
5934/**
5935 * Emits an ARM64 compare instruction.
5936 */
5937DECL_INLINE_THROW(uint32_t)
5938iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
5939 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
5940{
5941 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5942 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
5943 f64Bit, true /*fSetFlags*/, cShift, enmShift);
5944 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5945 return off;
5946}
5947#endif
5948
5949
5950/**
5951 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5952 * with conditional instruction.
5953 */
5954DECL_FORCE_INLINE(uint32_t)
5955iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5956{
5957#ifdef RT_ARCH_AMD64
5958 /* cmp Gv, Ev */
5959 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5960 pCodeBuf[off++] = 0x3b;
5961 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5962
5963#elif defined(RT_ARCH_ARM64)
5964 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
5965
5966#else
5967# error "Port me!"
5968#endif
5969 return off;
5970}
5971
5972
5973/**
5974 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5975 * with conditional instruction.
5976 */
5977DECL_INLINE_THROW(uint32_t)
5978iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5979{
5980#ifdef RT_ARCH_AMD64
5981 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5982#elif defined(RT_ARCH_ARM64)
5983 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5984#else
5985# error "Port me!"
5986#endif
5987 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5988 return off;
5989}
5990
5991
5992/**
5993 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5994 * with conditional instruction.
5995 */
5996DECL_FORCE_INLINE(uint32_t)
5997iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5998{
5999#ifdef RT_ARCH_AMD64
6000 /* cmp Gv, Ev */
6001 if (iGprLeft >= 8 || iGprRight >= 8)
6002 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
6003 pCodeBuf[off++] = 0x3b;
6004 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
6005
6006#elif defined(RT_ARCH_ARM64)
6007 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
6008
6009#else
6010# error "Port me!"
6011#endif
6012 return off;
6013}
6014
6015
6016/**
6017 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6018 * with conditional instruction.
6019 */
6020DECL_INLINE_THROW(uint32_t)
6021iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6022{
6023#ifdef RT_ARCH_AMD64
6024 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6025#elif defined(RT_ARCH_ARM64)
6026 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6027#else
6028# error "Port me!"
6029#endif
6030 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6031 return off;
6032}
6033
6034
6035/**
6036 * Emits a compare of a 64-bit GPR with a constant value, settings status
6037 * flags/whatever for use with conditional instruction.
6038 */
6039DECL_INLINE_THROW(uint32_t)
6040iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
6041{
6042#ifdef RT_ARCH_AMD64
6043 if (uImm <= UINT32_C(0xff))
6044 {
6045 /* cmp Ev, Ib */
6046 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
6047 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6048 pbCodeBuf[off++] = 0x83;
6049 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6050 pbCodeBuf[off++] = (uint8_t)uImm;
6051 }
6052 else if ((int64_t)uImm == (int32_t)uImm)
6053 {
6054 /* cmp Ev, imm */
6055 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6056 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6057 pbCodeBuf[off++] = 0x81;
6058 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6059 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6060 pbCodeBuf[off++] = RT_BYTE1(uImm);
6061 pbCodeBuf[off++] = RT_BYTE2(uImm);
6062 pbCodeBuf[off++] = RT_BYTE3(uImm);
6063 pbCodeBuf[off++] = RT_BYTE4(uImm);
6064 }
6065 else
6066 {
6067 /* Use temporary register for the immediate. */
6068 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6069 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6070 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6071 }
6072
6073#elif defined(RT_ARCH_ARM64)
6074 /** @todo guess there are clevere things we can do here... */
6075 if (uImm < _4K)
6076 {
6077 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6078 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6079 true /*64Bit*/, true /*fSetFlags*/);
6080 }
6081 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
6082 {
6083 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6084 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
6085 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6086 }
6087 else
6088 {
6089 /* Use temporary register for the immediate. */
6090 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6091 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6092 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6093 }
6094
6095#else
6096# error "Port me!"
6097#endif
6098
6099 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6100 return off;
6101}
6102
6103
6104/**
6105 * Emits a compare of a 32-bit GPR with a constant value, settings status
6106 * flags/whatever for use with conditional instruction.
6107 *
6108 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6109 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6110 * bits all zero). Will release assert or throw exception if the caller
6111 * violates this restriction.
6112 */
6113DECL_FORCE_INLINE_THROW(uint32_t)
6114iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6115{
6116#ifdef RT_ARCH_AMD64
6117 if (iGprLeft >= 8)
6118 pCodeBuf[off++] = X86_OP_REX_B;
6119 if (uImm <= UINT32_C(0x7f))
6120 {
6121 /* cmp Ev, Ib */
6122 pCodeBuf[off++] = 0x83;
6123 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6124 pCodeBuf[off++] = (uint8_t)uImm;
6125 }
6126 else
6127 {
6128 /* cmp Ev, imm */
6129 pCodeBuf[off++] = 0x81;
6130 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6131 pCodeBuf[off++] = RT_BYTE1(uImm);
6132 pCodeBuf[off++] = RT_BYTE2(uImm);
6133 pCodeBuf[off++] = RT_BYTE3(uImm);
6134 pCodeBuf[off++] = RT_BYTE4(uImm);
6135 }
6136
6137#elif defined(RT_ARCH_ARM64)
6138 /** @todo guess there are clevere things we can do here... */
6139 if (uImm < _4K)
6140 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6141 false /*64Bit*/, true /*fSetFlags*/);
6142 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
6143 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6144 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6145 else
6146# ifdef IEM_WITH_THROW_CATCH
6147 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6148# else
6149 AssertReleaseFailedStmt(off = UINT32_MAX);
6150# endif
6151
6152#else
6153# error "Port me!"
6154#endif
6155 return off;
6156}
6157
6158
6159/**
6160 * Emits a compare of a 32-bit GPR with a constant value, settings status
6161 * flags/whatever for use with conditional instruction.
6162 */
6163DECL_INLINE_THROW(uint32_t)
6164iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6165{
6166#ifdef RT_ARCH_AMD64
6167 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
6168
6169#elif defined(RT_ARCH_ARM64)
6170 /** @todo guess there are clevere things we can do here... */
6171 if (uImm < _4K)
6172 {
6173 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6174 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6175 false /*64Bit*/, true /*fSetFlags*/);
6176 }
6177 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
6178 {
6179 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6180 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6181 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6182 }
6183 else
6184 {
6185 /* Use temporary register for the immediate. */
6186 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6187 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
6188 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6189 }
6190
6191#else
6192# error "Port me!"
6193#endif
6194
6195 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6196 return off;
6197}
6198
6199
6200/**
6201 * Emits a compare of a 32-bit GPR with a constant value, settings status
6202 * flags/whatever for use with conditional instruction.
6203 *
6204 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
6205 * 16-bit value from @a iGrpLeft.
6206 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6207 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6208 * bits all zero). Will release assert or throw exception if the caller
6209 * violates this restriction.
6210 */
6211DECL_FORCE_INLINE_THROW(uint32_t)
6212iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6213 uint8_t idxTmpReg = UINT8_MAX)
6214{
6215#ifdef RT_ARCH_AMD64
6216 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6217 if (iGprLeft >= 8)
6218 pCodeBuf[off++] = X86_OP_REX_B;
6219 if (uImm <= UINT32_C(0x7f))
6220 {
6221 /* cmp Ev, Ib */
6222 pCodeBuf[off++] = 0x83;
6223 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6224 pCodeBuf[off++] = (uint8_t)uImm;
6225 }
6226 else
6227 {
6228 /* cmp Ev, imm */
6229 pCodeBuf[off++] = 0x81;
6230 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6231 pCodeBuf[off++] = RT_BYTE1(uImm);
6232 pCodeBuf[off++] = RT_BYTE2(uImm);
6233 }
6234 RT_NOREF(idxTmpReg);
6235
6236#elif defined(RT_ARCH_ARM64)
6237# ifdef IEM_WITH_THROW_CATCH
6238 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6239# else
6240 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
6241# endif
6242 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6243 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
6244 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
6245
6246#else
6247# error "Port me!"
6248#endif
6249 return off;
6250}
6251
6252
6253/**
6254 * Emits a compare of a 16-bit GPR with a constant value, settings status
6255 * flags/whatever for use with conditional instruction.
6256 *
6257 * @note ARM64: Helper register is required (idxTmpReg).
6258 */
6259DECL_INLINE_THROW(uint32_t)
6260iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6261 uint8_t idxTmpReg = UINT8_MAX)
6262{
6263#ifdef RT_ARCH_AMD64
6264 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
6265#elif defined(RT_ARCH_ARM64)
6266 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
6267#else
6268# error "Port me!"
6269#endif
6270 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6271 return off;
6272}
6273
6274
6275
6276/*********************************************************************************************************************************
6277* Branching *
6278*********************************************************************************************************************************/
6279
6280/**
6281 * Emits a JMP rel32 / B imm19 to the given label.
6282 */
6283DECL_FORCE_INLINE_THROW(uint32_t)
6284iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
6285{
6286 Assert(idxLabel < pReNative->cLabels);
6287
6288#ifdef RT_ARCH_AMD64
6289 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6290 {
6291 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
6292 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
6293 {
6294 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
6295 pCodeBuf[off++] = (uint8_t)offRel;
6296 }
6297 else
6298 {
6299 offRel -= 3;
6300 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6301 pCodeBuf[off++] = RT_BYTE1(offRel);
6302 pCodeBuf[off++] = RT_BYTE2(offRel);
6303 pCodeBuf[off++] = RT_BYTE3(offRel);
6304 pCodeBuf[off++] = RT_BYTE4(offRel);
6305 }
6306 }
6307 else
6308 {
6309 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6310 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6311 pCodeBuf[off++] = 0xfe;
6312 pCodeBuf[off++] = 0xff;
6313 pCodeBuf[off++] = 0xff;
6314 pCodeBuf[off++] = 0xff;
6315 }
6316 pCodeBuf[off++] = 0xcc; /* int3 poison */
6317
6318#elif defined(RT_ARCH_ARM64)
6319 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6320 {
6321 pCodeBuf[off] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
6322 off++;
6323 }
6324 else
6325 {
6326 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
6327 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
6328 }
6329
6330#else
6331# error "Port me!"
6332#endif
6333 return off;
6334}
6335
6336
6337/**
6338 * Emits a JMP rel32 / B imm19 to the given label.
6339 */
6340DECL_INLINE_THROW(uint32_t)
6341iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6342{
6343#ifdef RT_ARCH_AMD64
6344 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
6345#elif defined(RT_ARCH_ARM64)
6346 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
6347#else
6348# error "Port me!"
6349#endif
6350 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6351 return off;
6352}
6353
6354
6355/**
6356 * Emits a JMP rel32 / B imm19 to a new undefined label.
6357 */
6358DECL_INLINE_THROW(uint32_t)
6359iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6360{
6361 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6362 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
6363}
6364
6365/** Condition type. */
6366#ifdef RT_ARCH_AMD64
6367typedef enum IEMNATIVEINSTRCOND : uint8_t
6368{
6369 kIemNativeInstrCond_o = 0,
6370 kIemNativeInstrCond_no,
6371 kIemNativeInstrCond_c,
6372 kIemNativeInstrCond_nc,
6373 kIemNativeInstrCond_e,
6374 kIemNativeInstrCond_ne,
6375 kIemNativeInstrCond_be,
6376 kIemNativeInstrCond_nbe,
6377 kIemNativeInstrCond_s,
6378 kIemNativeInstrCond_ns,
6379 kIemNativeInstrCond_p,
6380 kIemNativeInstrCond_np,
6381 kIemNativeInstrCond_l,
6382 kIemNativeInstrCond_nl,
6383 kIemNativeInstrCond_le,
6384 kIemNativeInstrCond_nle
6385} IEMNATIVEINSTRCOND;
6386#elif defined(RT_ARCH_ARM64)
6387typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
6388# define kIemNativeInstrCond_o todo_conditional_codes
6389# define kIemNativeInstrCond_no todo_conditional_codes
6390# define kIemNativeInstrCond_c todo_conditional_codes
6391# define kIemNativeInstrCond_nc todo_conditional_codes
6392# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
6393# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
6394# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
6395# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
6396# define kIemNativeInstrCond_s todo_conditional_codes
6397# define kIemNativeInstrCond_ns todo_conditional_codes
6398# define kIemNativeInstrCond_p todo_conditional_codes
6399# define kIemNativeInstrCond_np todo_conditional_codes
6400# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
6401# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
6402# define kIemNativeInstrCond_le kArmv8InstrCond_Le
6403# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
6404#else
6405# error "Port me!"
6406#endif
6407
6408
6409/**
6410 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6411 */
6412DECL_FORCE_INLINE_THROW(uint32_t)
6413iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6414 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6415{
6416 Assert(idxLabel < pReNative->cLabels);
6417
6418 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
6419#ifdef RT_ARCH_AMD64
6420 if (offLabel >= off)
6421 {
6422 /* jcc rel32 */
6423 pCodeBuf[off++] = 0x0f;
6424 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6425 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6426 pCodeBuf[off++] = 0x00;
6427 pCodeBuf[off++] = 0x00;
6428 pCodeBuf[off++] = 0x00;
6429 pCodeBuf[off++] = 0x00;
6430 }
6431 else
6432 {
6433 int32_t offDisp = offLabel - (off + 2);
6434 if ((int8_t)offDisp == offDisp)
6435 {
6436 /* jcc rel8 */
6437 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6438 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6439 }
6440 else
6441 {
6442 /* jcc rel32 */
6443 offDisp -= 4;
6444 pCodeBuf[off++] = 0x0f;
6445 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6446 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6447 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6448 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6449 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6450 }
6451 }
6452
6453#elif defined(RT_ARCH_ARM64)
6454 if (offLabel >= off)
6455 {
6456 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6457 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
6458 }
6459 else
6460 {
6461 Assert(off - offLabel <= 0x3ffffU);
6462 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
6463 off++;
6464 }
6465
6466#else
6467# error "Port me!"
6468#endif
6469 return off;
6470}
6471
6472
6473/**
6474 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6475 */
6476DECL_INLINE_THROW(uint32_t)
6477iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6478{
6479#ifdef RT_ARCH_AMD64
6480 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
6481#elif defined(RT_ARCH_ARM64)
6482 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
6483#else
6484# error "Port me!"
6485#endif
6486 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6487 return off;
6488}
6489
6490
6491/**
6492 * Emits a Jcc rel32 / B.cc imm19 to a new label.
6493 */
6494DECL_INLINE_THROW(uint32_t)
6495iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6496 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
6497{
6498 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6499 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
6500}
6501
6502
6503/**
6504 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
6505 */
6506DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6507{
6508#ifdef RT_ARCH_AMD64
6509 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
6510#elif defined(RT_ARCH_ARM64)
6511 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
6512#else
6513# error "Port me!"
6514#endif
6515}
6516
6517/**
6518 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
6519 */
6520DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6521 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6522{
6523#ifdef RT_ARCH_AMD64
6524 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
6525#elif defined(RT_ARCH_ARM64)
6526 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
6527#else
6528# error "Port me!"
6529#endif
6530}
6531
6532
6533/**
6534 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
6535 */
6536DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6537{
6538#ifdef RT_ARCH_AMD64
6539 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
6540#elif defined(RT_ARCH_ARM64)
6541 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
6542#else
6543# error "Port me!"
6544#endif
6545}
6546
6547/**
6548 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
6549 */
6550DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6551 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6552{
6553#ifdef RT_ARCH_AMD64
6554 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
6555#elif defined(RT_ARCH_ARM64)
6556 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
6557#else
6558# error "Port me!"
6559#endif
6560}
6561
6562
6563/**
6564 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
6565 */
6566DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6567{
6568#ifdef RT_ARCH_AMD64
6569 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
6570#elif defined(RT_ARCH_ARM64)
6571 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
6572#else
6573# error "Port me!"
6574#endif
6575}
6576
6577/**
6578 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
6579 */
6580DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6581 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6582{
6583#ifdef RT_ARCH_AMD64
6584 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
6585#elif defined(RT_ARCH_ARM64)
6586 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
6587#else
6588# error "Port me!"
6589#endif
6590}
6591
6592
6593/**
6594 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
6595 */
6596DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6597{
6598#ifdef RT_ARCH_AMD64
6599 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
6600#elif defined(RT_ARCH_ARM64)
6601 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
6602#else
6603# error "Port me!"
6604#endif
6605}
6606
6607/**
6608 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
6609 */
6610DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6611 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6612{
6613#ifdef RT_ARCH_AMD64
6614 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
6615#elif defined(RT_ARCH_ARM64)
6616 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
6617#else
6618# error "Port me!"
6619#endif
6620}
6621
6622
6623/**
6624 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
6625 */
6626DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6627{
6628#ifdef RT_ARCH_AMD64
6629 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
6630#elif defined(RT_ARCH_ARM64)
6631 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
6632#else
6633# error "Port me!"
6634#endif
6635}
6636
6637/**
6638 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
6639 */
6640DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6641 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6642{
6643#ifdef RT_ARCH_AMD64
6644 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
6645#elif defined(RT_ARCH_ARM64)
6646 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
6647#else
6648# error "Port me!"
6649#endif
6650}
6651
6652
6653/**
6654 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6655 *
6656 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6657 *
6658 * Only use hardcoded jumps forward when emitting for exactly one
6659 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6660 * the right target address on all platforms!
6661 *
6662 * Please also note that on x86 it is necessary pass off + 256 or higher
6663 * for @a offTarget one believe the intervening code is more than 127
6664 * bytes long.
6665 */
6666DECL_FORCE_INLINE(uint32_t)
6667iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6668{
6669#ifdef RT_ARCH_AMD64
6670 /* jcc rel8 / rel32 */
6671 int32_t offDisp = (int32_t)(offTarget - (off + 2));
6672 if (offDisp < 128 && offDisp >= -128)
6673 {
6674 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6675 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6676 }
6677 else
6678 {
6679 offDisp -= 4;
6680 pCodeBuf[off++] = 0x0f;
6681 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6682 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6683 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6684 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6685 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6686 }
6687
6688#elif defined(RT_ARCH_ARM64)
6689 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
6690 off++;
6691#else
6692# error "Port me!"
6693#endif
6694 return off;
6695}
6696
6697
6698/**
6699 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6700 *
6701 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6702 *
6703 * Only use hardcoded jumps forward when emitting for exactly one
6704 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6705 * the right target address on all platforms!
6706 *
6707 * Please also note that on x86 it is necessary pass off + 256 or higher
6708 * for @a offTarget if one believe the intervening code is more than 127
6709 * bytes long.
6710 */
6711DECL_INLINE_THROW(uint32_t)
6712iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6713{
6714#ifdef RT_ARCH_AMD64
6715 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
6716#elif defined(RT_ARCH_ARM64)
6717 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
6718#else
6719# error "Port me!"
6720#endif
6721 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6722 return off;
6723}
6724
6725
6726/**
6727 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
6728 *
6729 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6730 */
6731DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6732{
6733#ifdef RT_ARCH_AMD64
6734 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
6735#elif defined(RT_ARCH_ARM64)
6736 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
6737#else
6738# error "Port me!"
6739#endif
6740}
6741
6742
6743/**
6744 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
6745 *
6746 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6747 */
6748DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6749{
6750#ifdef RT_ARCH_AMD64
6751 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
6752#elif defined(RT_ARCH_ARM64)
6753 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
6754#else
6755# error "Port me!"
6756#endif
6757}
6758
6759
6760/**
6761 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
6762 *
6763 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6764 */
6765DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6766{
6767#ifdef RT_ARCH_AMD64
6768 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
6769#elif defined(RT_ARCH_ARM64)
6770 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
6771#else
6772# error "Port me!"
6773#endif
6774}
6775
6776
6777/**
6778 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
6779 *
6780 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6781 */
6782DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6783{
6784#ifdef RT_ARCH_AMD64
6785 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
6786#elif defined(RT_ARCH_ARM64)
6787 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
6788#else
6789# error "Port me!"
6790#endif
6791}
6792
6793
6794/**
6795 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6796 *
6797 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6798 */
6799DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
6800{
6801#ifdef RT_ARCH_AMD64
6802 /* jmp rel8 or rel32 */
6803 int32_t offDisp = offTarget - (off + 2);
6804 if (offDisp < 128 && offDisp >= -128)
6805 {
6806 pCodeBuf[off++] = 0xeb;
6807 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6808 }
6809 else
6810 {
6811 offDisp -= 3;
6812 pCodeBuf[off++] = 0xe9;
6813 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6814 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6815 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6816 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6817 }
6818
6819#elif defined(RT_ARCH_ARM64)
6820 pCodeBuf[off] = Armv8A64MkInstrB((int32_t)(offTarget - off));
6821 off++;
6822
6823#else
6824# error "Port me!"
6825#endif
6826 return off;
6827}
6828
6829
6830/**
6831 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6832 *
6833 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6834 */
6835DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6836{
6837#ifdef RT_ARCH_AMD64
6838 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
6839#elif defined(RT_ARCH_ARM64)
6840 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
6841#else
6842# error "Port me!"
6843#endif
6844 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6845 return off;
6846}
6847
6848
6849/**
6850 * Fixes up a conditional jump to a fixed label.
6851 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
6852 * iemNativeEmitJzToFixed, ...
6853 */
6854DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
6855{
6856#ifdef RT_ARCH_AMD64
6857 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
6858 uint8_t const bOpcode = pbCodeBuf[offFixup];
6859 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
6860 {
6861 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
6862 AssertStmt(pbCodeBuf[offFixup + 1] == offTarget - (offFixup + 2),
6863 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
6864 }
6865 else
6866 {
6867 if (bOpcode != 0x0f)
6868 Assert(bOpcode == 0xe9);
6869 else
6870 {
6871 offFixup += 1;
6872 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
6873 }
6874 uint32_t const offRel32 = offTarget - (offFixup + 5);
6875 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
6876 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
6877 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
6878 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
6879 }
6880
6881#elif defined(RT_ARCH_ARM64)
6882 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
6883 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
6884 {
6885 /* B.COND + BC.COND */
6886 int32_t const offDisp = offTarget - offFixup;
6887 Assert(offDisp >= -262144 && offDisp < 262144);
6888 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
6889 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6890 }
6891 else
6892 {
6893 /* B imm26 */
6894 Assert((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000));
6895 int32_t const offDisp = offTarget - offFixup;
6896 Assert(offDisp >= -33554432 && offDisp < 33554432);
6897 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
6898 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6899 }
6900
6901#else
6902# error "Port me!"
6903#endif
6904}
6905
6906
6907#ifdef RT_ARCH_AMD64
6908/**
6909 * For doing bt on a register.
6910 */
6911DECL_INLINE_THROW(uint32_t)
6912iemNativeEmitAmd64TestBitInGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
6913{
6914 Assert(iBitNo < 64);
6915 /* bt Ev, imm8 */
6916 if (iBitNo >= 32)
6917 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6918 else if (iGprSrc >= 8)
6919 pCodeBuf[off++] = X86_OP_REX_B;
6920 pCodeBuf[off++] = 0x0f;
6921 pCodeBuf[off++] = 0xba;
6922 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6923 pCodeBuf[off++] = iBitNo;
6924 return off;
6925}
6926#endif /* RT_ARCH_AMD64 */
6927
6928
6929/**
6930 * Internal helper, don't call directly.
6931 */
6932DECL_INLINE_THROW(uint32_t)
6933iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
6934 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
6935{
6936 Assert(iBitNo < 64);
6937#ifdef RT_ARCH_AMD64
6938 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6939 if (iBitNo < 8)
6940 {
6941 /* test Eb, imm8 */
6942 if (iGprSrc >= 4)
6943 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6944 pbCodeBuf[off++] = 0xf6;
6945 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6946 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
6947 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6948 }
6949 else
6950 {
6951 /* bt Ev, imm8 */
6952 if (iBitNo >= 32)
6953 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6954 else if (iGprSrc >= 8)
6955 pbCodeBuf[off++] = X86_OP_REX_B;
6956 pbCodeBuf[off++] = 0x0f;
6957 pbCodeBuf[off++] = 0xba;
6958 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6959 pbCodeBuf[off++] = iBitNo;
6960 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
6961 }
6962
6963#elif defined(RT_ARCH_ARM64)
6964 /* Use the TBNZ instruction here. */
6965 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6966 if (pReNative->paLabels[idxLabel].enmType > kIemNativeLabelType_LastWholeTbBranch)
6967 {
6968 AssertMsg(pReNative->paLabels[idxLabel].off == UINT32_MAX,
6969 ("TODO: Please enable & test commented out code for jumping back to a predefined label.\n"));
6970 //uint32_t offLabel = pReNative->paLabels[idxLabel].off;
6971 //if (offLabel == UINT32_MAX)
6972 {
6973 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
6974 pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
6975 }
6976 //else
6977 //{
6978 // RT_BREAKPOINT();
6979 // Assert(off - offLabel <= 0x1fffU);
6980 // pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, offLabel - off, iGprSrc, iBitNo);
6981 //
6982 //}
6983 }
6984 else
6985 {
6986 Assert(Armv8A64ConvertImmRImmS2Mask64(0x40, (64U - iBitNo) & 63U) == RT_BIT_64(iBitNo));
6987 pu32CodeBuf[off++] = Armv8A64MkInstrTstImm(iGprSrc, 0x40, (64U - iBitNo) & 63U);
6988 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6989 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(fJmpIfSet ? kArmv8InstrCond_Ne : kArmv8InstrCond_Eq, 0);
6990 }
6991
6992#else
6993# error "Port me!"
6994#endif
6995 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6996 return off;
6997}
6998
6999
7000/**
7001 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
7002 * @a iGprSrc.
7003 *
7004 * @note On ARM64 the range is only +/-8191 instructions.
7005 */
7006DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7007 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7008{
7009 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
7010}
7011
7012
7013/**
7014 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
7015 * _set_ in @a iGprSrc.
7016 *
7017 * @note On ARM64 the range is only +/-8191 instructions.
7018 */
7019DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7020 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7021{
7022 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
7023}
7024
7025
7026/**
7027 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
7028 * flags accordingly.
7029 */
7030DECL_INLINE_THROW(uint32_t)
7031iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
7032{
7033 Assert(fBits != 0);
7034#ifdef RT_ARCH_AMD64
7035
7036 if (fBits >= UINT32_MAX)
7037 {
7038 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7039
7040 /* test Ev,Gv */
7041 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7042 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
7043 pbCodeBuf[off++] = 0x85;
7044 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
7045
7046 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7047 }
7048 else if (fBits <= UINT32_MAX)
7049 {
7050 /* test Eb, imm8 or test Ev, imm32 */
7051 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7052 if (fBits <= UINT8_MAX)
7053 {
7054 if (iGprSrc >= 4)
7055 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7056 pbCodeBuf[off++] = 0xf6;
7057 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7058 pbCodeBuf[off++] = (uint8_t)fBits;
7059 }
7060 else
7061 {
7062 if (iGprSrc >= 8)
7063 pbCodeBuf[off++] = X86_OP_REX_B;
7064 pbCodeBuf[off++] = 0xf7;
7065 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7066 pbCodeBuf[off++] = RT_BYTE1(fBits);
7067 pbCodeBuf[off++] = RT_BYTE2(fBits);
7068 pbCodeBuf[off++] = RT_BYTE3(fBits);
7069 pbCodeBuf[off++] = RT_BYTE4(fBits);
7070 }
7071 }
7072 /** @todo implement me. */
7073 else
7074 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
7075
7076#elif defined(RT_ARCH_ARM64)
7077 uint32_t uImmR = 0;
7078 uint32_t uImmNandS = 0;
7079 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
7080 {
7081 /* ands xzr, iGprSrc, #fBits */
7082 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7083 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
7084 }
7085 else
7086 {
7087 /* ands xzr, iGprSrc, iTmpReg */
7088 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7089 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7090 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
7091 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7092 }
7093
7094#else
7095# error "Port me!"
7096#endif
7097 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7098 return off;
7099}
7100
7101
7102/**
7103 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
7104 * @a iGprSrc, setting CPU flags accordingly.
7105 *
7106 * @note For ARM64 this only supports @a fBits values that can be expressed
7107 * using the two 6-bit immediates of the ANDS instruction. The caller
7108 * must make sure this is possible!
7109 */
7110DECL_FORCE_INLINE_THROW(uint32_t)
7111iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits)
7112{
7113 Assert(fBits != 0);
7114
7115#ifdef RT_ARCH_AMD64
7116 if (fBits <= UINT8_MAX)
7117 {
7118 /* test Eb, imm8 */
7119 if (iGprSrc >= 4)
7120 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7121 pCodeBuf[off++] = 0xf6;
7122 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7123 pCodeBuf[off++] = (uint8_t)fBits;
7124 }
7125 else
7126 {
7127 /* test Ev, imm32 */
7128 if (iGprSrc >= 8)
7129 pCodeBuf[off++] = X86_OP_REX_B;
7130 pCodeBuf[off++] = 0xf7;
7131 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7132 pCodeBuf[off++] = RT_BYTE1(fBits);
7133 pCodeBuf[off++] = RT_BYTE2(fBits);
7134 pCodeBuf[off++] = RT_BYTE3(fBits);
7135 pCodeBuf[off++] = RT_BYTE4(fBits);
7136 }
7137
7138#elif defined(RT_ARCH_ARM64)
7139 /* ands xzr, src, #fBits */
7140 uint32_t uImmR = 0;
7141 uint32_t uImmNandS = 0;
7142 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7143 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7144 else
7145# ifdef IEM_WITH_THROW_CATCH
7146 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7147# else
7148 AssertReleaseFailedStmt(off = UINT32_MAX);
7149# endif
7150
7151#else
7152# error "Port me!"
7153#endif
7154 return off;
7155}
7156
7157
7158
7159/**
7160 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7161 * @a iGprSrc, setting CPU flags accordingly.
7162 *
7163 * @note For ARM64 this only supports @a fBits values that can be expressed
7164 * using the two 6-bit immediates of the ANDS instruction. The caller
7165 * must make sure this is possible!
7166 */
7167DECL_FORCE_INLINE_THROW(uint32_t)
7168iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7169{
7170 Assert(fBits != 0);
7171
7172#ifdef RT_ARCH_AMD64
7173 /* test Eb, imm8 */
7174 if (iGprSrc >= 4)
7175 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7176 pCodeBuf[off++] = 0xf6;
7177 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7178 pCodeBuf[off++] = fBits;
7179
7180#elif defined(RT_ARCH_ARM64)
7181 /* ands xzr, src, #fBits */
7182 uint32_t uImmR = 0;
7183 uint32_t uImmNandS = 0;
7184 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7185 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7186 else
7187# ifdef IEM_WITH_THROW_CATCH
7188 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7189# else
7190 AssertReleaseFailedStmt(off = UINT32_MAX);
7191# endif
7192
7193#else
7194# error "Port me!"
7195#endif
7196 return off;
7197}
7198
7199
7200/**
7201 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7202 * @a iGprSrc, setting CPU flags accordingly.
7203 */
7204DECL_INLINE_THROW(uint32_t)
7205iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7206{
7207 Assert(fBits != 0);
7208
7209#ifdef RT_ARCH_AMD64
7210 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
7211
7212#elif defined(RT_ARCH_ARM64)
7213 /* ands xzr, src, [tmp|#imm] */
7214 uint32_t uImmR = 0;
7215 uint32_t uImmNandS = 0;
7216 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7217 {
7218 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7219 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7220 }
7221 else
7222 {
7223 /* Use temporary register for the 64-bit immediate. */
7224 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7225 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7226 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
7227 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7228 }
7229
7230#else
7231# error "Port me!"
7232#endif
7233 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7234 return off;
7235}
7236
7237
7238/**
7239 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
7240 * are set in @a iGprSrc.
7241 */
7242DECL_INLINE_THROW(uint32_t)
7243iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7244 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7245{
7246 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7247
7248 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7249 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7250
7251 return off;
7252}
7253
7254
7255/**
7256 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
7257 * are set in @a iGprSrc.
7258 */
7259DECL_INLINE_THROW(uint32_t)
7260iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7261 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7262{
7263 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7264
7265 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7266 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7267
7268 return off;
7269}
7270
7271
7272/**
7273 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7274 *
7275 * The operand size is given by @a f64Bit.
7276 */
7277DECL_FORCE_INLINE_THROW(uint32_t)
7278iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7279 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7280{
7281 Assert(idxLabel < pReNative->cLabels);
7282
7283#ifdef RT_ARCH_AMD64
7284 /* test reg32,reg32 / test reg64,reg64 */
7285 if (f64Bit)
7286 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7287 else if (iGprSrc >= 8)
7288 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7289 pCodeBuf[off++] = 0x85;
7290 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7291
7292 /* jnz idxLabel */
7293 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7294 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7295
7296#elif defined(RT_ARCH_ARM64)
7297 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
7298 {
7299 pCodeBuf[off] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
7300 iGprSrc, f64Bit);
7301 off++;
7302 }
7303 else
7304 {
7305 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7306 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
7307 }
7308
7309#else
7310# error "Port me!"
7311#endif
7312 return off;
7313}
7314
7315
7316/**
7317 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7318 *
7319 * The operand size is given by @a f64Bit.
7320 */
7321DECL_FORCE_INLINE_THROW(uint32_t)
7322iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7323 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7324{
7325#ifdef RT_ARCH_AMD64
7326 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7327 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7328#elif defined(RT_ARCH_ARM64)
7329 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
7330 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7331#else
7332# error "Port me!"
7333#endif
7334 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7335 return off;
7336}
7337
7338
7339/* if (Grp1 == 0) Jmp idxLabel; */
7340
7341/**
7342 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7343 *
7344 * The operand size is given by @a f64Bit.
7345 */
7346DECL_FORCE_INLINE_THROW(uint32_t)
7347iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7348 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7349{
7350 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7351 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7352}
7353
7354
7355/**
7356 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7357 *
7358 * The operand size is given by @a f64Bit.
7359 */
7360DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7361 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7362{
7363 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7364}
7365
7366
7367/**
7368 * Emits code that jumps to a new label if @a iGprSrc is zero.
7369 *
7370 * The operand size is given by @a f64Bit.
7371 */
7372DECL_INLINE_THROW(uint32_t)
7373iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7374 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7375{
7376 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7377 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7378}
7379
7380
7381/* if (Grp1 != 0) Jmp idxLabel; */
7382
7383/**
7384 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7385 *
7386 * The operand size is given by @a f64Bit.
7387 */
7388DECL_FORCE_INLINE_THROW(uint32_t)
7389iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7390 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7391{
7392 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7393 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7394}
7395
7396
7397/**
7398 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7399 *
7400 * The operand size is given by @a f64Bit.
7401 */
7402DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7403 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7404{
7405 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7406}
7407
7408
7409/**
7410 * Emits code that jumps to a new label if @a iGprSrc is not zero.
7411 *
7412 * The operand size is given by @a f64Bit.
7413 */
7414DECL_INLINE_THROW(uint32_t)
7415iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7416 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7417{
7418 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7419 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7420}
7421
7422
7423/* if (Grp1 != Gpr2) Jmp idxLabel; */
7424
7425/**
7426 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
7427 * differs.
7428 */
7429DECL_INLINE_THROW(uint32_t)
7430iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7431 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
7432{
7433 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
7434 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7435 return off;
7436}
7437
7438
7439/**
7440 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
7441 */
7442DECL_INLINE_THROW(uint32_t)
7443iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7444 uint8_t iGprLeft, uint8_t iGprRight,
7445 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7446{
7447 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7448 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
7449}
7450
7451
7452/* if (Grp != Imm) Jmp idxLabel; */
7453
7454/**
7455 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
7456 */
7457DECL_INLINE_THROW(uint32_t)
7458iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7459 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7460{
7461 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7462 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7463 return off;
7464}
7465
7466
7467/**
7468 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
7469 */
7470DECL_INLINE_THROW(uint32_t)
7471iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7472 uint8_t iGprSrc, uint64_t uImm,
7473 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7474{
7475 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7476 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7477}
7478
7479
7480/**
7481 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
7482 * @a uImm.
7483 */
7484DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7485 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7486{
7487 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7488 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7489 return off;
7490}
7491
7492
7493/**
7494 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
7495 * @a uImm.
7496 */
7497DECL_INLINE_THROW(uint32_t)
7498iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7499 uint8_t iGprSrc, uint32_t uImm,
7500 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7501{
7502 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7503 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7504}
7505
7506
7507/**
7508 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
7509 * @a uImm.
7510 */
7511DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7512 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
7513{
7514 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
7515 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7516 return off;
7517}
7518
7519
7520/**
7521 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
7522 * @a uImm.
7523 */
7524DECL_INLINE_THROW(uint32_t)
7525iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7526 uint8_t iGprSrc, uint16_t uImm,
7527 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7528{
7529 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7530 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7531}
7532
7533
7534/* if (Grp == Imm) Jmp idxLabel; */
7535
7536/**
7537 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
7538 */
7539DECL_INLINE_THROW(uint32_t)
7540iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7541 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7542{
7543 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7544 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7545 return off;
7546}
7547
7548
7549/**
7550 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
7551 */
7552DECL_INLINE_THROW(uint32_t)
7553iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
7554 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7555{
7556 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7557 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7558}
7559
7560
7561/**
7562 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
7563 */
7564DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7565 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7566{
7567 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7568 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7569 return off;
7570}
7571
7572
7573/**
7574 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
7575 */
7576DECL_INLINE_THROW(uint32_t)
7577iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
7578 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7579{
7580 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7581 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7582}
7583
7584
7585/**
7586 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
7587 *
7588 * @note ARM64: Helper register is required (idxTmpReg).
7589 */
7590DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7591 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
7592 uint8_t idxTmpReg = UINT8_MAX)
7593{
7594 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
7595 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7596 return off;
7597}
7598
7599
7600/**
7601 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
7602 *
7603 * @note ARM64: Helper register is required (idxTmpReg).
7604 */
7605DECL_INLINE_THROW(uint32_t)
7606iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
7607 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
7608 uint8_t idxTmpReg = UINT8_MAX)
7609{
7610 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7611 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
7612}
7613
7614
7615/*********************************************************************************************************************************
7616* Calls. *
7617*********************************************************************************************************************************/
7618
7619/**
7620 * Emits a call to a 64-bit address.
7621 */
7622DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
7623{
7624#ifdef RT_ARCH_AMD64
7625 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
7626
7627 /* call rax */
7628 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7629 pbCodeBuf[off++] = 0xff;
7630 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
7631
7632#elif defined(RT_ARCH_ARM64)
7633 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
7634
7635 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7636 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
7637
7638#else
7639# error "port me"
7640#endif
7641 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7642 return off;
7643}
7644
7645
7646/**
7647 * Emits code to load a stack variable into an argument GPR.
7648 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7649 */
7650DECL_FORCE_INLINE_THROW(uint32_t)
7651iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7652 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
7653 bool fSpilledVarsInVolatileRegs = false)
7654{
7655 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7656 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7657 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7658
7659 uint8_t const idxRegVar = pVar->idxReg;
7660 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
7661 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
7662 || !fSpilledVarsInVolatileRegs ))
7663 {
7664 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
7665 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
7666 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
7667 if (!offAddend)
7668 {
7669 if (idxRegArg != idxRegVar)
7670 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
7671 }
7672 else
7673 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
7674 }
7675 else
7676 {
7677 uint8_t const idxStackSlot = pVar->idxStackSlot;
7678 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7679 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
7680 if (offAddend)
7681 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
7682 }
7683 return off;
7684}
7685
7686
7687/**
7688 * Emits code to load a stack or immediate variable value into an argument GPR,
7689 * optional with a addend.
7690 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7691 */
7692DECL_FORCE_INLINE_THROW(uint32_t)
7693iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7694 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
7695 bool fSpilledVarsInVolatileRegs = false)
7696{
7697 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7698 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7699 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7700 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
7701 else
7702 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
7703 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
7704 return off;
7705}
7706
7707
7708/**
7709 * Emits code to load the variable address into an argument GPR.
7710 *
7711 * This only works for uninitialized and stack variables.
7712 */
7713DECL_FORCE_INLINE_THROW(uint32_t)
7714iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7715 bool fFlushShadows)
7716{
7717 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7718 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7719 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7720 || pVar->enmKind == kIemNativeVarKind_Stack,
7721 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7722 AssertStmt(!pVar->fSimdReg,
7723 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7724
7725 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7726 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7727
7728 uint8_t const idxRegVar = pVar->idxReg;
7729 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
7730 {
7731 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
7732 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
7733 Assert(pVar->idxReg == UINT8_MAX);
7734 }
7735 Assert( pVar->idxStackSlot != UINT8_MAX
7736 && pVar->idxReg == UINT8_MAX);
7737
7738 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7739}
7740
7741
7742#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7743/**
7744 * Emits code to load the variable address into an argument GPR.
7745 *
7746 * This is a special variant intended for SIMD variables only and only called
7747 * by the TLB miss path in the memory fetch/store code because there we pass
7748 * the value by reference and need both the register and stack depending on which
7749 * path is taken (TLB hit vs. miss).
7750 */
7751DECL_FORCE_INLINE_THROW(uint32_t)
7752iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7753 bool fSyncRegWithStack = true)
7754{
7755 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7756 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7757 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7758 || pVar->enmKind == kIemNativeVarKind_Stack,
7759 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7760 AssertStmt(pVar->fSimdReg,
7761 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7762 Assert( pVar->idxStackSlot != UINT8_MAX
7763 && pVar->idxReg != UINT8_MAX);
7764
7765 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7766 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7767
7768 uint8_t const idxRegVar = pVar->idxReg;
7769 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7770 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
7771
7772 if (fSyncRegWithStack)
7773 {
7774 if (pVar->cbVar == sizeof(RTUINT128U))
7775 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDisp, idxRegVar);
7776 else
7777 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDisp, idxRegVar);
7778 }
7779
7780 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7781}
7782
7783
7784/**
7785 * Emits code to sync the host SIMD register assigned to the given SIMD variable.
7786 *
7787 * This is a special helper and only called
7788 * by the TLB miss path in the memory fetch/store code because there we pass
7789 * the value by reference and need to sync the value on the stack with the assigned host register
7790 * after a TLB miss where the value ends up on the stack.
7791 */
7792DECL_FORCE_INLINE_THROW(uint32_t)
7793iemNativeEmitSimdVarSyncStackToRegister(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar)
7794{
7795 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7796 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7797 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7798 || pVar->enmKind == kIemNativeVarKind_Stack,
7799 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7800 AssertStmt(pVar->fSimdReg,
7801 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7802 Assert( pVar->idxStackSlot != UINT8_MAX
7803 && pVar->idxReg != UINT8_MAX);
7804
7805 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7806 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7807
7808 uint8_t const idxRegVar = pVar->idxReg;
7809 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7810 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
7811
7812 if (pVar->cbVar == sizeof(RTUINT128U))
7813 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxRegVar, offBpDisp);
7814 else
7815 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxRegVar, offBpDisp);
7816
7817 return off;
7818}
7819
7820
7821/**
7822 * Emits a gprdst = ~gprsrc store.
7823 */
7824DECL_FORCE_INLINE_THROW(uint32_t)
7825iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7826{
7827#ifdef RT_ARCH_AMD64
7828 if (iGprDst != iGprSrc)
7829 {
7830 /* mov gprdst, gprsrc. */
7831 if (f64Bit)
7832 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
7833 else
7834 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
7835 }
7836
7837 /* not gprdst */
7838 if (f64Bit || iGprDst >= 8)
7839 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
7840 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
7841 pCodeBuf[off++] = 0xf7;
7842 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
7843#elif defined(RT_ARCH_ARM64)
7844 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
7845#else
7846# error "port me"
7847#endif
7848 return off;
7849}
7850
7851
7852/**
7853 * Emits a gprdst = ~gprsrc store.
7854 */
7855DECL_INLINE_THROW(uint32_t)
7856iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7857{
7858#ifdef RT_ARCH_AMD64
7859 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
7860#elif defined(RT_ARCH_ARM64)
7861 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
7862#else
7863# error "port me"
7864#endif
7865 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7866 return off;
7867}
7868
7869
7870/**
7871 * Emits a 128-bit vector register store to a VCpu value.
7872 */
7873DECL_FORCE_INLINE_THROW(uint32_t)
7874iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7875{
7876#ifdef RT_ARCH_AMD64
7877 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
7878 pCodeBuf[off++] = 0x66;
7879 if (iVecReg >= 8)
7880 pCodeBuf[off++] = X86_OP_REX_R;
7881 pCodeBuf[off++] = 0x0f;
7882 pCodeBuf[off++] = 0x7f;
7883 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7884#elif defined(RT_ARCH_ARM64)
7885 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7886
7887#else
7888# error "port me"
7889#endif
7890 return off;
7891}
7892
7893
7894/**
7895 * Emits a 128-bit vector register load of a VCpu value.
7896 */
7897DECL_INLINE_THROW(uint32_t)
7898iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7899{
7900#ifdef RT_ARCH_AMD64
7901 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7902#elif defined(RT_ARCH_ARM64)
7903 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7904#else
7905# error "port me"
7906#endif
7907 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7908 return off;
7909}
7910
7911
7912/**
7913 * Emits a high 128-bit vector register store to a VCpu value.
7914 */
7915DECL_FORCE_INLINE_THROW(uint32_t)
7916iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7917{
7918#ifdef RT_ARCH_AMD64
7919 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
7920 pCodeBuf[off++] = X86_OP_VEX3;
7921 if (iVecReg >= 8)
7922 pCodeBuf[off++] = 0x63;
7923 else
7924 pCodeBuf[off++] = 0xe3;
7925 pCodeBuf[off++] = 0x7d;
7926 pCodeBuf[off++] = 0x39;
7927 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7928 pCodeBuf[off++] = 0x01; /* Immediate */
7929#elif defined(RT_ARCH_ARM64)
7930 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7931#else
7932# error "port me"
7933#endif
7934 return off;
7935}
7936
7937
7938/**
7939 * Emits a high 128-bit vector register load of a VCpu value.
7940 */
7941DECL_INLINE_THROW(uint32_t)
7942iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7943{
7944#ifdef RT_ARCH_AMD64
7945 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7946#elif defined(RT_ARCH_ARM64)
7947 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7948 Assert(!(iVecReg & 0x1));
7949 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
7950#else
7951# error "port me"
7952#endif
7953 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7954 return off;
7955}
7956
7957
7958/**
7959 * Emits a 128-bit vector register load of a VCpu value.
7960 */
7961DECL_FORCE_INLINE_THROW(uint32_t)
7962iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7963{
7964#ifdef RT_ARCH_AMD64
7965 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
7966 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7967 if (iVecReg >= 8)
7968 pCodeBuf[off++] = X86_OP_REX_R;
7969 pCodeBuf[off++] = 0x0f;
7970 pCodeBuf[off++] = 0x6f;
7971 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7972#elif defined(RT_ARCH_ARM64)
7973 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
7974
7975#else
7976# error "port me"
7977#endif
7978 return off;
7979}
7980
7981
7982/**
7983 * Emits a 128-bit vector register load of a VCpu value.
7984 */
7985DECL_INLINE_THROW(uint32_t)
7986iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7987{
7988#ifdef RT_ARCH_AMD64
7989 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7990#elif defined(RT_ARCH_ARM64)
7991 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7992#else
7993# error "port me"
7994#endif
7995 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7996 return off;
7997}
7998
7999
8000/**
8001 * Emits a 128-bit vector register load of a VCpu value.
8002 */
8003DECL_FORCE_INLINE_THROW(uint32_t)
8004iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8005{
8006#ifdef RT_ARCH_AMD64
8007 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
8008 pCodeBuf[off++] = X86_OP_VEX3;
8009 if (iVecReg >= 8)
8010 pCodeBuf[off++] = 0x63;
8011 else
8012 pCodeBuf[off++] = 0xe3;
8013 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8014 pCodeBuf[off++] = 0x38;
8015 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8016 pCodeBuf[off++] = 0x01; /* Immediate */
8017#elif defined(RT_ARCH_ARM64)
8018 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
8019#else
8020# error "port me"
8021#endif
8022 return off;
8023}
8024
8025
8026/**
8027 * Emits a 128-bit vector register load of a VCpu value.
8028 */
8029DECL_INLINE_THROW(uint32_t)
8030iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8031{
8032#ifdef RT_ARCH_AMD64
8033 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
8034#elif defined(RT_ARCH_ARM64)
8035 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8036 Assert(!(iVecReg & 0x1));
8037 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
8038#else
8039# error "port me"
8040#endif
8041 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8042 return off;
8043}
8044
8045
8046/**
8047 * Emits a vecdst = vecsrc load.
8048 */
8049DECL_FORCE_INLINE(uint32_t)
8050iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8051{
8052#ifdef RT_ARCH_AMD64
8053 /* movdqu vecdst, vecsrc */
8054 pCodeBuf[off++] = 0xf3;
8055
8056 if ((iVecRegDst | iVecRegSrc) >= 8)
8057 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
8058 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
8059 : X86_OP_REX_R;
8060 pCodeBuf[off++] = 0x0f;
8061 pCodeBuf[off++] = 0x6f;
8062 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8063
8064#elif defined(RT_ARCH_ARM64)
8065 /* mov dst, src; alias for: orr dst, src, src */
8066 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
8067
8068#else
8069# error "port me"
8070#endif
8071 return off;
8072}
8073
8074
8075/**
8076 * Emits a vecdst = vecsrc load, 128-bit.
8077 */
8078DECL_INLINE_THROW(uint32_t)
8079iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8080{
8081#ifdef RT_ARCH_AMD64
8082 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
8083#elif defined(RT_ARCH_ARM64)
8084 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
8085#else
8086# error "port me"
8087#endif
8088 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8089 return off;
8090}
8091
8092
8093/**
8094 * Emits a vecdst[128:255] = vecsrc[128:255] load.
8095 */
8096DECL_FORCE_INLINE_THROW(uint32_t)
8097iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8098{
8099#ifdef RT_ARCH_AMD64
8100 /* vperm2i128 dst, dst, src, 0x30. */ /* ASSUMES AVX2 support */
8101 pCodeBuf[off++] = X86_OP_VEX3;
8102 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
8103 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8104 pCodeBuf[off++] = 0x46;
8105 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8106 pCodeBuf[off++] = 0x30; /* Immediate, this will leave the low 128 bits of dst untouched and move the high 128 bits from src to dst. */
8107
8108#elif defined(RT_ARCH_ARM64)
8109 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
8110
8111 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128(). */
8112# ifdef IEM_WITH_THROW_CATCH
8113 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
8114# else
8115 AssertReleaseFailedStmt(off = UINT32_MAX);
8116# endif
8117#else
8118# error "port me"
8119#endif
8120 return off;
8121}
8122
8123
8124/**
8125 * Emits a vecdst[128:255] = vecsrc[128:255] load, high 128-bit.
8126 */
8127DECL_INLINE_THROW(uint32_t)
8128iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8129{
8130#ifdef RT_ARCH_AMD64
8131 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
8132#elif defined(RT_ARCH_ARM64)
8133 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8134 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iVecRegSrc + 1);
8135#else
8136# error "port me"
8137#endif
8138 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8139 return off;
8140}
8141
8142
8143/**
8144 * Emits a vecdst[0:127] = vecsrc[128:255] load.
8145 */
8146DECL_FORCE_INLINE_THROW(uint32_t)
8147iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8148{
8149#ifdef RT_ARCH_AMD64
8150 /* vextracti128 dst, src, 1. */ /* ASSUMES AVX2 support */
8151 pCodeBuf[off++] = X86_OP_VEX3;
8152 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegDst >= 8, false, iVecRegSrc >= 8);
8153 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8154 pCodeBuf[off++] = 0x39;
8155 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7);
8156 pCodeBuf[off++] = 0x1;
8157
8158#elif defined(RT_ARCH_ARM64)
8159 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
8160
8161 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(). */
8162# ifdef IEM_WITH_THROW_CATCH
8163 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
8164# else
8165 AssertReleaseFailedStmt(off = UINT32_MAX);
8166# endif
8167#else
8168# error "port me"
8169#endif
8170 return off;
8171}
8172
8173
8174/**
8175 * Emits a vecdst[0:127] = vecsrc[128:255] load, high 128-bit.
8176 */
8177DECL_INLINE_THROW(uint32_t)
8178iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8179{
8180#ifdef RT_ARCH_AMD64
8181 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
8182#elif defined(RT_ARCH_ARM64)
8183 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8184 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc + 1);
8185#else
8186# error "port me"
8187#endif
8188 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8189 return off;
8190}
8191
8192
8193/**
8194 * Emits a vecdst = vecsrc load, 256-bit.
8195 */
8196DECL_INLINE_THROW(uint32_t)
8197iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8198{
8199#ifdef RT_ARCH_AMD64
8200 /* vmovdqa ymm, ymm */
8201 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8202 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
8203 {
8204 pbCodeBuf[off++] = X86_OP_VEX3;
8205 pbCodeBuf[off++] = 0x41;
8206 pbCodeBuf[off++] = 0x7d;
8207 pbCodeBuf[off++] = 0x6f;
8208 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8209 }
8210 else
8211 {
8212 pbCodeBuf[off++] = X86_OP_VEX2;
8213 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
8214 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
8215 pbCodeBuf[off++] = iVecRegSrc >= 8
8216 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
8217 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8218 }
8219#elif defined(RT_ARCH_ARM64)
8220 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8221 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
8222 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
8223 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
8224#else
8225# error "port me"
8226#endif
8227 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8228 return off;
8229}
8230
8231
8232/**
8233 * Emits a vecdst = vecsrc load.
8234 */
8235DECL_FORCE_INLINE(uint32_t)
8236iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8237{
8238#ifdef RT_ARCH_AMD64
8239 /* vinserti128 dst, dst, src, 1. */ /* ASSUMES AVX2 support */
8240 pCodeBuf[off++] = X86_OP_VEX3;
8241 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
8242 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8243 pCodeBuf[off++] = 0x38;
8244 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8245 pCodeBuf[off++] = 0x01; /* Immediate */
8246
8247#elif defined(RT_ARCH_ARM64)
8248 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8249 /* mov dst, src; alias for: orr dst, src, src */
8250 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
8251
8252#else
8253# error "port me"
8254#endif
8255 return off;
8256}
8257
8258
8259/**
8260 * Emits a vecdst[128:255] = vecsrc[0:127] load, 128-bit.
8261 */
8262DECL_INLINE_THROW(uint32_t)
8263iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8264{
8265#ifdef RT_ARCH_AMD64
8266 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
8267#elif defined(RT_ARCH_ARM64)
8268 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
8269#else
8270# error "port me"
8271#endif
8272 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8273 return off;
8274}
8275
8276
8277/**
8278 * Emits a gprdst = vecsrc[x] load, 64-bit.
8279 */
8280DECL_FORCE_INLINE(uint32_t)
8281iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
8282{
8283#ifdef RT_ARCH_AMD64
8284 if (iQWord >= 2)
8285 {
8286 /** @todo Currently not used. */
8287 AssertReleaseFailed();
8288 }
8289 else
8290 {
8291 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
8292 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8293 pCodeBuf[off++] = X86_OP_REX_W
8294 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8295 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8296 pCodeBuf[off++] = 0x0f;
8297 pCodeBuf[off++] = 0x3a;
8298 pCodeBuf[off++] = 0x16;
8299 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8300 pCodeBuf[off++] = iQWord;
8301 }
8302#elif defined(RT_ARCH_ARM64)
8303 /* umov gprdst, vecsrc[iQWord] */
8304 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
8305#else
8306# error "port me"
8307#endif
8308 return off;
8309}
8310
8311
8312/**
8313 * Emits a gprdst = vecsrc[x] load, 64-bit.
8314 */
8315DECL_INLINE_THROW(uint32_t)
8316iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
8317{
8318 Assert(iQWord <= 3);
8319
8320#ifdef RT_ARCH_AMD64
8321 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iQWord);
8322#elif defined(RT_ARCH_ARM64)
8323 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8324 Assert(!(iVecRegSrc & 0x1));
8325 /* Need to access the "high" 128-bit vector register. */
8326 if (iQWord >= 2)
8327 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
8328 else
8329 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
8330#else
8331# error "port me"
8332#endif
8333 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8334 return off;
8335}
8336
8337
8338/**
8339 * Emits a gprdst = vecsrc[x] load, 32-bit.
8340 */
8341DECL_FORCE_INLINE(uint32_t)
8342iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
8343{
8344#ifdef RT_ARCH_AMD64
8345 if (iDWord >= 4)
8346 {
8347 /** @todo Currently not used. */
8348 AssertReleaseFailed();
8349 }
8350 else
8351 {
8352 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
8353 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8354 if (iGprDst >= 8 || iVecRegSrc >= 8)
8355 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8356 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8357 pCodeBuf[off++] = 0x0f;
8358 pCodeBuf[off++] = 0x3a;
8359 pCodeBuf[off++] = 0x16;
8360 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8361 pCodeBuf[off++] = iDWord;
8362 }
8363#elif defined(RT_ARCH_ARM64)
8364 /* umov gprdst, vecsrc[iDWord] */
8365 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
8366#else
8367# error "port me"
8368#endif
8369 return off;
8370}
8371
8372
8373/**
8374 * Emits a gprdst = vecsrc[x] load, 32-bit.
8375 */
8376DECL_INLINE_THROW(uint32_t)
8377iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
8378{
8379 Assert(iDWord <= 7);
8380
8381#ifdef RT_ARCH_AMD64
8382 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iDWord);
8383#elif defined(RT_ARCH_ARM64)
8384 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8385 Assert(!(iVecRegSrc & 0x1));
8386 /* Need to access the "high" 128-bit vector register. */
8387 if (iDWord >= 4)
8388 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
8389 else
8390 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
8391#else
8392# error "port me"
8393#endif
8394 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8395 return off;
8396}
8397
8398
8399/**
8400 * Emits a gprdst = vecsrc[x] load, 16-bit.
8401 */
8402DECL_FORCE_INLINE(uint32_t)
8403iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
8404{
8405#ifdef RT_ARCH_AMD64
8406 if (iWord >= 8)
8407 {
8408 /** @todo Currently not used. */
8409 AssertReleaseFailed();
8410 }
8411 else
8412 {
8413 /* pextrw gpr, vecsrc, #iWord */
8414 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8415 if (iGprDst >= 8 || iVecRegSrc >= 8)
8416 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
8417 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
8418 pCodeBuf[off++] = 0x0f;
8419 pCodeBuf[off++] = 0xc5;
8420 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
8421 pCodeBuf[off++] = iWord;
8422 }
8423#elif defined(RT_ARCH_ARM64)
8424 /* umov gprdst, vecsrc[iWord] */
8425 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
8426#else
8427# error "port me"
8428#endif
8429 return off;
8430}
8431
8432
8433/**
8434 * Emits a gprdst = vecsrc[x] load, 16-bit.
8435 */
8436DECL_INLINE_THROW(uint32_t)
8437iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
8438{
8439 Assert(iWord <= 16);
8440
8441#ifdef RT_ARCH_AMD64
8442 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
8443#elif defined(RT_ARCH_ARM64)
8444 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8445 Assert(!(iVecRegSrc & 0x1));
8446 /* Need to access the "high" 128-bit vector register. */
8447 if (iWord >= 8)
8448 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
8449 else
8450 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
8451#else
8452# error "port me"
8453#endif
8454 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8455 return off;
8456}
8457
8458
8459/**
8460 * Emits a gprdst = vecsrc[x] load, 8-bit.
8461 */
8462DECL_FORCE_INLINE(uint32_t)
8463iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
8464{
8465#ifdef RT_ARCH_AMD64
8466 if (iByte >= 16)
8467 {
8468 /** @todo Currently not used. */
8469 AssertReleaseFailed();
8470 }
8471 else
8472 {
8473 /* pextrb gpr, vecsrc, #iByte */
8474 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8475 if (iGprDst >= 8 || iVecRegSrc >= 8)
8476 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8477 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8478 pCodeBuf[off++] = 0x0f;
8479 pCodeBuf[off++] = 0x3a;
8480 pCodeBuf[off++] = 0x14;
8481 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8482 pCodeBuf[off++] = iByte;
8483 }
8484#elif defined(RT_ARCH_ARM64)
8485 /* umov gprdst, vecsrc[iByte] */
8486 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
8487#else
8488# error "port me"
8489#endif
8490 return off;
8491}
8492
8493
8494/**
8495 * Emits a gprdst = vecsrc[x] load, 8-bit.
8496 */
8497DECL_INLINE_THROW(uint32_t)
8498iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
8499{
8500 Assert(iByte <= 32);
8501
8502#ifdef RT_ARCH_AMD64
8503 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iByte);
8504#elif defined(RT_ARCH_ARM64)
8505 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8506 Assert(!(iVecRegSrc & 0x1));
8507 /* Need to access the "high" 128-bit vector register. */
8508 if (iByte >= 16)
8509 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
8510 else
8511 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
8512#else
8513# error "port me"
8514#endif
8515 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8516 return off;
8517}
8518
8519
8520/**
8521 * Emits a vecdst[x] = gprsrc store, 64-bit.
8522 */
8523DECL_FORCE_INLINE(uint32_t)
8524iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
8525{
8526#ifdef RT_ARCH_AMD64
8527 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
8528 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8529 pCodeBuf[off++] = X86_OP_REX_W
8530 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8531 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8532 pCodeBuf[off++] = 0x0f;
8533 pCodeBuf[off++] = 0x3a;
8534 pCodeBuf[off++] = 0x22;
8535 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8536 pCodeBuf[off++] = iQWord;
8537#elif defined(RT_ARCH_ARM64)
8538 /* ins vecsrc[iQWord], gpr */
8539 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
8540#else
8541# error "port me"
8542#endif
8543 return off;
8544}
8545
8546
8547/**
8548 * Emits a vecdst[x] = gprsrc store, 64-bit.
8549 */
8550DECL_INLINE_THROW(uint32_t)
8551iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
8552{
8553 Assert(iQWord <= 1);
8554
8555#ifdef RT_ARCH_AMD64
8556 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iQWord);
8557#elif defined(RT_ARCH_ARM64)
8558 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
8559#else
8560# error "port me"
8561#endif
8562 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8563 return off;
8564}
8565
8566
8567/**
8568 * Emits a vecdst[x] = gprsrc store, 32-bit.
8569 */
8570DECL_FORCE_INLINE(uint32_t)
8571iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
8572{
8573#ifdef RT_ARCH_AMD64
8574 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
8575 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8576 if (iVecRegDst >= 8 || iGprSrc >= 8)
8577 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8578 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8579 pCodeBuf[off++] = 0x0f;
8580 pCodeBuf[off++] = 0x3a;
8581 pCodeBuf[off++] = 0x22;
8582 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8583 pCodeBuf[off++] = iDWord;
8584#elif defined(RT_ARCH_ARM64)
8585 /* ins vecsrc[iDWord], gpr */
8586 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
8587#else
8588# error "port me"
8589#endif
8590 return off;
8591}
8592
8593
8594/**
8595 * Emits a vecdst[x] = gprsrc store, 64-bit.
8596 */
8597DECL_INLINE_THROW(uint32_t)
8598iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
8599{
8600 Assert(iDWord <= 3);
8601
8602#ifdef RT_ARCH_AMD64
8603 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iDWord);
8604#elif defined(RT_ARCH_ARM64)
8605 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
8606#else
8607# error "port me"
8608#endif
8609 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8610 return off;
8611}
8612
8613
8614/**
8615 * Emits a vecdst.au32[iDWord] = 0 store.
8616 */
8617DECL_FORCE_INLINE(uint32_t)
8618iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
8619{
8620 Assert(iDWord <= 7);
8621
8622#ifdef RT_ARCH_AMD64
8623 /*
8624 * xor tmp0, tmp0
8625 * pinsrd xmm, tmp0, iDword
8626 */
8627 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
8628 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8629 pCodeBuf[off++] = 0x33;
8630 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
8631 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(&pCodeBuf[off], off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
8632#elif defined(RT_ARCH_ARM64)
8633 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8634 Assert(!(iVecReg & 0x1));
8635 /* ins vecsrc[iDWord], wzr */
8636 if (iDWord >= 4)
8637 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
8638 else
8639 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
8640#else
8641# error "port me"
8642#endif
8643 return off;
8644}
8645
8646
8647/**
8648 * Emits a vecdst.au32[iDWord] = 0 store.
8649 */
8650DECL_INLINE_THROW(uint32_t)
8651iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
8652{
8653
8654#ifdef RT_ARCH_AMD64
8655 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
8656#elif defined(RT_ARCH_ARM64)
8657 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
8658#else
8659# error "port me"
8660#endif
8661 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8662 return off;
8663}
8664
8665
8666/**
8667 * Emits a vecdst[0:127] = 0 store.
8668 */
8669DECL_FORCE_INLINE(uint32_t)
8670iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8671{
8672#ifdef RT_ARCH_AMD64
8673 /* pxor xmm, xmm */
8674 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8675 if (iVecReg >= 8)
8676 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
8677 pCodeBuf[off++] = 0x0f;
8678 pCodeBuf[off++] = 0xef;
8679 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8680#elif defined(RT_ARCH_ARM64)
8681 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8682 Assert(!(iVecReg & 0x1));
8683 /* eor vecreg, vecreg, vecreg */
8684 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
8685#else
8686# error "port me"
8687#endif
8688 return off;
8689}
8690
8691
8692/**
8693 * Emits a vecdst[0:127] = 0 store.
8694 */
8695DECL_INLINE_THROW(uint32_t)
8696iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8697{
8698#ifdef RT_ARCH_AMD64
8699 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
8700#elif defined(RT_ARCH_ARM64)
8701 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
8702#else
8703# error "port me"
8704#endif
8705 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8706 return off;
8707}
8708
8709
8710/**
8711 * Emits a vecdst[128:255] = 0 store.
8712 */
8713DECL_FORCE_INLINE(uint32_t)
8714iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8715{
8716#ifdef RT_ARCH_AMD64
8717 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
8718 if (iVecReg < 8)
8719 {
8720 pCodeBuf[off++] = X86_OP_VEX2;
8721 pCodeBuf[off++] = 0xf9;
8722 }
8723 else
8724 {
8725 pCodeBuf[off++] = X86_OP_VEX3;
8726 pCodeBuf[off++] = 0x41;
8727 pCodeBuf[off++] = 0x79;
8728 }
8729 pCodeBuf[off++] = 0x6f;
8730 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8731#elif defined(RT_ARCH_ARM64)
8732 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8733 Assert(!(iVecReg & 0x1));
8734 /* eor vecreg, vecreg, vecreg */
8735 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
8736#else
8737# error "port me"
8738#endif
8739 return off;
8740}
8741
8742
8743/**
8744 * Emits a vecdst[128:255] = 0 store.
8745 */
8746DECL_INLINE_THROW(uint32_t)
8747iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8748{
8749#ifdef RT_ARCH_AMD64
8750 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
8751#elif defined(RT_ARCH_ARM64)
8752 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
8753#else
8754# error "port me"
8755#endif
8756 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8757 return off;
8758}
8759
8760
8761/**
8762 * Emits a vecdst[0:255] = 0 store.
8763 */
8764DECL_FORCE_INLINE(uint32_t)
8765iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8766{
8767#ifdef RT_ARCH_AMD64
8768 /* vpxor ymm, ymm, ymm */
8769 if (iVecReg < 8)
8770 {
8771 pCodeBuf[off++] = X86_OP_VEX2;
8772 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8773 }
8774 else
8775 {
8776 pCodeBuf[off++] = X86_OP_VEX3;
8777 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
8778 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8779 }
8780 pCodeBuf[off++] = 0xef;
8781 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8782#elif defined(RT_ARCH_ARM64)
8783 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8784 Assert(!(iVecReg & 0x1));
8785 /* eor vecreg, vecreg, vecreg */
8786 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
8787 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
8788#else
8789# error "port me"
8790#endif
8791 return off;
8792}
8793
8794
8795/**
8796 * Emits a vecdst[0:255] = 0 store.
8797 */
8798DECL_INLINE_THROW(uint32_t)
8799iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8800{
8801#ifdef RT_ARCH_AMD64
8802 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
8803#elif defined(RT_ARCH_ARM64)
8804 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
8805#else
8806# error "port me"
8807#endif
8808 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8809 return off;
8810}
8811
8812
8813/**
8814 * Emits a vecdst = gprsrc broadcast, 8-bit.
8815 */
8816DECL_FORCE_INLINE(uint32_t)
8817iemNativeEmitSimdBroadcastGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8818{
8819#ifdef RT_ARCH_AMD64
8820 /* pinsrb vecdst, gpr, #0 (ASSUMES SSE 4.1) */
8821 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8822 if (iVecRegDst >= 8 || iGprSrc >= 8)
8823 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8824 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8825 pCodeBuf[off++] = 0x0f;
8826 pCodeBuf[off++] = 0x3a;
8827 pCodeBuf[off++] = 0x20;
8828 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8829 pCodeBuf[off++] = 0x00;
8830
8831 /* vpbroadcastb {y,x}mm, xmm (ASSUMES AVX2). */
8832 pCodeBuf[off++] = X86_OP_VEX3;
8833 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8834 | 0x02 /* opcode map. */
8835 | ( iVecRegDst >= 8
8836 ? 0
8837 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8838 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8839 pCodeBuf[off++] = 0x78;
8840 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8841#elif defined(RT_ARCH_ARM64)
8842 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8843 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8844
8845 /* dup vecsrc, gpr */
8846 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U8);
8847 if (f256Bit)
8848 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U8);
8849#else
8850# error "port me"
8851#endif
8852 return off;
8853}
8854
8855
8856/**
8857 * Emits a vecdst[x] = gprsrc broadcast, 8-bit.
8858 */
8859DECL_INLINE_THROW(uint32_t)
8860iemNativeEmitSimdBroadcastGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8861{
8862#ifdef RT_ARCH_AMD64
8863 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8864#elif defined(RT_ARCH_ARM64)
8865 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8866#else
8867# error "port me"
8868#endif
8869 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8870 return off;
8871}
8872
8873
8874/**
8875 * Emits a vecdst = gprsrc broadcast, 16-bit.
8876 */
8877DECL_FORCE_INLINE(uint32_t)
8878iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8879{
8880#ifdef RT_ARCH_AMD64
8881 /* pinsrw vecdst, gpr, #0 */
8882 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8883 if (iVecRegDst >= 8 || iGprSrc >= 8)
8884 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8885 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8886 pCodeBuf[off++] = 0x0f;
8887 pCodeBuf[off++] = 0xc4;
8888 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8889 pCodeBuf[off++] = 0x00;
8890
8891 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
8892 pCodeBuf[off++] = X86_OP_VEX3;
8893 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8894 | 0x02 /* opcode map. */
8895 | ( iVecRegDst >= 8
8896 ? 0
8897 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8898 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8899 pCodeBuf[off++] = 0x79;
8900 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8901#elif defined(RT_ARCH_ARM64)
8902 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8903 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8904
8905 /* dup vecsrc, gpr */
8906 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
8907 if (f256Bit)
8908 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
8909#else
8910# error "port me"
8911#endif
8912 return off;
8913}
8914
8915
8916/**
8917 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
8918 */
8919DECL_INLINE_THROW(uint32_t)
8920iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8921{
8922#ifdef RT_ARCH_AMD64
8923 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8924#elif defined(RT_ARCH_ARM64)
8925 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8926#else
8927# error "port me"
8928#endif
8929 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8930 return off;
8931}
8932
8933
8934/**
8935 * Emits a vecdst = gprsrc broadcast, 32-bit.
8936 */
8937DECL_FORCE_INLINE(uint32_t)
8938iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8939{
8940#ifdef RT_ARCH_AMD64
8941 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
8942 * vbroadcast needs a memory operand or another xmm register to work... */
8943
8944 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
8945 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8946 if (iVecRegDst >= 8 || iGprSrc >= 8)
8947 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8948 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8949 pCodeBuf[off++] = 0x0f;
8950 pCodeBuf[off++] = 0x3a;
8951 pCodeBuf[off++] = 0x22;
8952 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8953 pCodeBuf[off++] = 0x00;
8954
8955 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
8956 pCodeBuf[off++] = X86_OP_VEX3;
8957 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8958 | 0x02 /* opcode map. */
8959 | ( iVecRegDst >= 8
8960 ? 0
8961 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8962 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8963 pCodeBuf[off++] = 0x58;
8964 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8965#elif defined(RT_ARCH_ARM64)
8966 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8967 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8968
8969 /* dup vecsrc, gpr */
8970 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
8971 if (f256Bit)
8972 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
8973#else
8974# error "port me"
8975#endif
8976 return off;
8977}
8978
8979
8980/**
8981 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
8982 */
8983DECL_INLINE_THROW(uint32_t)
8984iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8985{
8986#ifdef RT_ARCH_AMD64
8987 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8988#elif defined(RT_ARCH_ARM64)
8989 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8990#else
8991# error "port me"
8992#endif
8993 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8994 return off;
8995}
8996
8997
8998/**
8999 * Emits a vecdst = gprsrc broadcast, 64-bit.
9000 */
9001DECL_FORCE_INLINE(uint32_t)
9002iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9003{
9004#ifdef RT_ARCH_AMD64
9005 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
9006 * vbroadcast needs a memory operand or another xmm register to work... */
9007
9008 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
9009 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9010 pCodeBuf[off++] = X86_OP_REX_W
9011 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9012 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9013 pCodeBuf[off++] = 0x0f;
9014 pCodeBuf[off++] = 0x3a;
9015 pCodeBuf[off++] = 0x22;
9016 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9017 pCodeBuf[off++] = 0x00;
9018
9019 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
9020 pCodeBuf[off++] = X86_OP_VEX3;
9021 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9022 | 0x02 /* opcode map. */
9023 | ( iVecRegDst >= 8
9024 ? 0
9025 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9026 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9027 pCodeBuf[off++] = 0x59;
9028 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9029#elif defined(RT_ARCH_ARM64)
9030 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9031 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9032
9033 /* dup vecsrc, gpr */
9034 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
9035 if (f256Bit)
9036 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
9037#else
9038# error "port me"
9039#endif
9040 return off;
9041}
9042
9043
9044/**
9045 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
9046 */
9047DECL_INLINE_THROW(uint32_t)
9048iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9049{
9050#ifdef RT_ARCH_AMD64
9051 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
9052#elif defined(RT_ARCH_ARM64)
9053 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9054#else
9055# error "port me"
9056#endif
9057 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9058 return off;
9059}
9060
9061
9062/**
9063 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
9064 */
9065DECL_FORCE_INLINE(uint32_t)
9066iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9067{
9068#ifdef RT_ARCH_AMD64
9069 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(pCodeBuf, off, iVecRegDst, iVecRegSrc);
9070
9071 /* vinserti128 ymm, ymm, xmm, 1. */ /* ASSUMES AVX2 support */
9072 pCodeBuf[off++] = X86_OP_VEX3;
9073 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
9074 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9075 pCodeBuf[off++] = 0x38;
9076 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9077 pCodeBuf[off++] = 0x01; /* Immediate */
9078#elif defined(RT_ARCH_ARM64)
9079 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9080 Assert(!(iVecRegDst & 0x1));
9081
9082 /* mov dst, src; alias for: orr dst, src, src */
9083 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
9084 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
9085#else
9086# error "port me"
9087#endif
9088 return off;
9089}
9090
9091
9092/**
9093 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
9094 */
9095DECL_INLINE_THROW(uint32_t)
9096iemNativeEmitSimdBroadcastVecRegU128ToVecReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9097{
9098#ifdef RT_ARCH_AMD64
9099 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 11), off, iVecRegDst, iVecRegSrc);
9100#elif defined(RT_ARCH_ARM64)
9101 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecRegDst, iVecRegSrc);
9102#else
9103# error "port me"
9104#endif
9105 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9106 return off;
9107}
9108
9109#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
9110
9111/** @} */
9112
9113#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
9114
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette