VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 103894

Last change on this file since 103894 was 103894, checked in by vboxsync, 11 months ago

VMM/IEM: Add SIMD local variable support and implement native emitters for IEM_MC_FETCH_YREG_U256() and IEM_MC_STORE_YREG_U256_ZX_VLMAX(), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 292.5 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 103894 2024-03-18 13:48:31Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 pu32CodeBuf[off++] = 0xd503201f;
71
72 RT_NOREF(uInfo);
73#else
74# error "port me"
75#endif
76 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
77 return off;
78}
79
80
81/**
82 * Emit a breakpoint instruction.
83 */
84DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
85{
86#ifdef RT_ARCH_AMD64
87 pCodeBuf[off++] = 0xcc;
88 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
89
90#elif defined(RT_ARCH_ARM64)
91 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
92
93#else
94# error "error"
95#endif
96 return off;
97}
98
99
100/**
101 * Emit a breakpoint instruction.
102 */
103DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
104{
105#ifdef RT_ARCH_AMD64
106 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
107#elif defined(RT_ARCH_ARM64)
108 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
109#else
110# error "error"
111#endif
112 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
113 return off;
114}
115
116
117/*********************************************************************************************************************************
118* Loads, Stores and Related Stuff. *
119*********************************************************************************************************************************/
120
121#ifdef RT_ARCH_AMD64
122/**
123 * Common bit of iemNativeEmitLoadGprByGpr and friends.
124 */
125DECL_FORCE_INLINE(uint32_t)
126iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
127{
128 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
129 {
130 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
131 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
132 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
133 }
134 else if (offDisp == (int8_t)offDisp)
135 {
136 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
137 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
138 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
139 pbCodeBuf[off++] = (uint8_t)offDisp;
140 }
141 else
142 {
143 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
144 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
145 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
146 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
147 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
148 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
149 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
150 }
151 return off;
152}
153#endif /* RT_ARCH_AMD64 */
154
155/**
156 * Emits setting a GPR to zero.
157 */
158DECL_INLINE_THROW(uint32_t)
159iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
160{
161#ifdef RT_ARCH_AMD64
162 /* xor gpr32, gpr32 */
163 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
164 if (iGpr >= 8)
165 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
166 pbCodeBuf[off++] = 0x33;
167 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
168
169#elif defined(RT_ARCH_ARM64)
170 /* mov gpr, #0x0 */
171 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
172 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
173
174#else
175# error "port me"
176#endif
177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
178 return off;
179}
180
181
182/**
183 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
184 * buffer space.
185 *
186 * Max buffer consumption:
187 * - AMD64: 10 instruction bytes.
188 * - ARM64: 4 instruction words (16 bytes).
189 */
190DECL_FORCE_INLINE(uint32_t)
191iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
192{
193#ifdef RT_ARCH_AMD64
194 if (uImm64 == 0)
195 {
196 /* xor gpr, gpr */
197 if (iGpr >= 8)
198 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
199 pCodeBuf[off++] = 0x33;
200 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
201 }
202 else if (uImm64 <= UINT32_MAX)
203 {
204 /* mov gpr, imm32 */
205 if (iGpr >= 8)
206 pCodeBuf[off++] = X86_OP_REX_B;
207 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
208 pCodeBuf[off++] = RT_BYTE1(uImm64);
209 pCodeBuf[off++] = RT_BYTE2(uImm64);
210 pCodeBuf[off++] = RT_BYTE3(uImm64);
211 pCodeBuf[off++] = RT_BYTE4(uImm64);
212 }
213 else if (uImm64 == (uint64_t)(int32_t)uImm64)
214 {
215 /* mov gpr, sx(imm32) */
216 if (iGpr < 8)
217 pCodeBuf[off++] = X86_OP_REX_W;
218 else
219 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
220 pCodeBuf[off++] = 0xc7;
221 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
222 pCodeBuf[off++] = RT_BYTE1(uImm64);
223 pCodeBuf[off++] = RT_BYTE2(uImm64);
224 pCodeBuf[off++] = RT_BYTE3(uImm64);
225 pCodeBuf[off++] = RT_BYTE4(uImm64);
226 }
227 else
228 {
229 /* mov gpr, imm64 */
230 if (iGpr < 8)
231 pCodeBuf[off++] = X86_OP_REX_W;
232 else
233 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
234 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
235 pCodeBuf[off++] = RT_BYTE1(uImm64);
236 pCodeBuf[off++] = RT_BYTE2(uImm64);
237 pCodeBuf[off++] = RT_BYTE3(uImm64);
238 pCodeBuf[off++] = RT_BYTE4(uImm64);
239 pCodeBuf[off++] = RT_BYTE5(uImm64);
240 pCodeBuf[off++] = RT_BYTE6(uImm64);
241 pCodeBuf[off++] = RT_BYTE7(uImm64);
242 pCodeBuf[off++] = RT_BYTE8(uImm64);
243 }
244
245#elif defined(RT_ARCH_ARM64)
246 /*
247 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
248 * supply remaining bits using 'movk grp, imm16, lsl #x'.
249 *
250 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
251 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
252 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
253 * after the first non-zero immediate component so we switch to movk for
254 * the remainder.
255 */
256 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
257 + !((uImm64 >> 16) & UINT16_MAX)
258 + !((uImm64 >> 32) & UINT16_MAX)
259 + !((uImm64 >> 48) & UINT16_MAX);
260 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
261 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
262 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
263 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
264 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
265 if (cFfffHalfWords <= cZeroHalfWords)
266 {
267 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
268
269 /* movz gpr, imm16 */
270 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
271 if (uImmPart || cZeroHalfWords == 4)
272 {
273 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
274 fMovBase |= RT_BIT_32(29);
275 }
276 /* mov[z/k] gpr, imm16, lsl #16 */
277 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
278 if (uImmPart)
279 {
280 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
281 fMovBase |= RT_BIT_32(29);
282 }
283 /* mov[z/k] gpr, imm16, lsl #32 */
284 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
285 if (uImmPart)
286 {
287 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
288 fMovBase |= RT_BIT_32(29);
289 }
290 /* mov[z/k] gpr, imm16, lsl #48 */
291 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
292 if (uImmPart)
293 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
294 }
295 else
296 {
297 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
298
299 /* find the first half-word that isn't UINT16_MAX. */
300 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
301 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
302 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
303
304 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
305 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
306 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
307 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
308 /* movk gpr, imm16 */
309 if (iHwNotFfff != 0)
310 {
311 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
312 if (uImmPart != UINT32_C(0xffff))
313 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
314 }
315 /* movk gpr, imm16, lsl #16 */
316 if (iHwNotFfff != 1)
317 {
318 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
319 if (uImmPart != UINT32_C(0xffff))
320 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
321 }
322 /* movk gpr, imm16, lsl #32 */
323 if (iHwNotFfff != 2)
324 {
325 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
326 if (uImmPart != UINT32_C(0xffff))
327 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
328 }
329 /* movk gpr, imm16, lsl #48 */
330 if (iHwNotFfff != 3)
331 {
332 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
333 if (uImmPart != UINT32_C(0xffff))
334 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
335 }
336 }
337
338 /** @todo load into 'w' register instead of 'x' when imm64 <= UINT32_MAX?
339 * clang 12.x does that, only to use the 'x' version for the
340 * addressing in the following ldr). */
341
342#else
343# error "port me"
344#endif
345 return off;
346}
347
348
349/**
350 * Emits loading a constant into a 64-bit GPR
351 */
352DECL_INLINE_THROW(uint32_t)
353iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
354{
355#ifdef RT_ARCH_AMD64
356 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
357#elif defined(RT_ARCH_ARM64)
358 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
359#else
360# error "port me"
361#endif
362 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
363 return off;
364}
365
366
367/**
368 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
369 * buffer space.
370 *
371 * Max buffer consumption:
372 * - AMD64: 6 instruction bytes.
373 * - ARM64: 2 instruction words (8 bytes).
374 *
375 * @note The top 32 bits will be cleared.
376 */
377DECLINLINE(uint32_t) iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
378{
379#ifdef RT_ARCH_AMD64
380 if (uImm32 == 0)
381 {
382 /* xor gpr, gpr */
383 if (iGpr >= 8)
384 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
385 pCodeBuf[off++] = 0x33;
386 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
387 }
388 else
389 {
390 /* mov gpr, imm32 */
391 if (iGpr >= 8)
392 pCodeBuf[off++] = X86_OP_REX_B;
393 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
394 pCodeBuf[off++] = RT_BYTE1(uImm32);
395 pCodeBuf[off++] = RT_BYTE2(uImm32);
396 pCodeBuf[off++] = RT_BYTE3(uImm32);
397 pCodeBuf[off++] = RT_BYTE4(uImm32);
398 }
399
400#elif defined(RT_ARCH_ARM64)
401 if ((uImm32 >> 16) == 0)
402 /* movz gpr, imm16 */
403 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
404 else if ((uImm32 & UINT32_C(0xffff)) == 0)
405 /* movz gpr, imm16, lsl #16 */
406 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
407 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
408 /* movn gpr, imm16, lsl #16 */
409 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
410 else if ((uImm32 >> 16) == UINT32_C(0xffff))
411 /* movn gpr, imm16 */
412 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
413 else
414 {
415 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
416 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
417 }
418
419#else
420# error "port me"
421#endif
422 return off;
423}
424
425
426/**
427 * Emits loading a constant into a 32-bit GPR.
428 * @note The top 32 bits will be cleared.
429 */
430DECL_INLINE_THROW(uint32_t)
431iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
432{
433#ifdef RT_ARCH_AMD64
434 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
435#elif defined(RT_ARCH_ARM64)
436 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
437#else
438# error "port me"
439#endif
440 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
441 return off;
442}
443
444
445/**
446 * Emits loading a constant into a 8-bit GPR
447 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
448 * only the ARM64 version does that.
449 */
450DECL_INLINE_THROW(uint32_t)
451iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
452{
453#ifdef RT_ARCH_AMD64
454 /* mov gpr, imm8 */
455 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
456 if (iGpr >= 8)
457 pbCodeBuf[off++] = X86_OP_REX_B;
458 else if (iGpr >= 4)
459 pbCodeBuf[off++] = X86_OP_REX;
460 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
461 pbCodeBuf[off++] = RT_BYTE1(uImm8);
462
463#elif defined(RT_ARCH_ARM64)
464 /* movz gpr, imm16, lsl #0 */
465 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
466 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
467
468#else
469# error "port me"
470#endif
471 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
472 return off;
473}
474
475
476#ifdef RT_ARCH_AMD64
477/**
478 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
479 */
480DECL_FORCE_INLINE(uint32_t)
481iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
482{
483 if (offVCpu < 128)
484 {
485 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
486 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
487 }
488 else
489 {
490 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
491 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
492 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
493 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
494 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
495 }
496 return off;
497}
498
499#elif defined(RT_ARCH_ARM64)
500
501/**
502 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
503 *
504 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
505 * registers (@a iGprTmp).
506 * @note DON'T try this with prefetch.
507 */
508DECL_FORCE_INLINE_THROW(uint32_t)
509iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
510 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
511{
512 /*
513 * There are a couple of ldr variants that takes an immediate offset, so
514 * try use those if we can, otherwise we have to use the temporary register
515 * help with the addressing.
516 */
517 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
518 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
519 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
520 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
521 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
522 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
523 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
524 {
525 /* The offset is too large, so we must load it into a register and use
526 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
527 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
528 if (iGprTmp == UINT8_MAX)
529 iGprTmp = iGprReg;
530 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
531 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
532 }
533 else
534# ifdef IEM_WITH_THROW_CATCH
535 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
536# else
537 AssertReleaseFailedStmt(off = UINT32_MAX);
538# endif
539
540 return off;
541}
542
543/**
544 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
545 */
546DECL_FORCE_INLINE_THROW(uint32_t)
547iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
548 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
549{
550 /*
551 * There are a couple of ldr variants that takes an immediate offset, so
552 * try use those if we can, otherwise we have to use the temporary register
553 * help with the addressing.
554 */
555 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
556 {
557 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
558 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
559 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
560 }
561 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
562 {
563 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
564 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
565 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
566 }
567 else
568 {
569 /* The offset is too large, so we must load it into a register and use
570 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
571 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
572 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
573 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
574 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
575 IEMNATIVE_REG_FIXED_TMP0);
576 }
577 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
578 return off;
579}
580
581#endif /* RT_ARCH_ARM64 */
582
583
584/**
585 * Emits a 64-bit GPR load of a VCpu value.
586 */
587DECL_FORCE_INLINE_THROW(uint32_t)
588iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
589{
590#ifdef RT_ARCH_AMD64
591 /* mov reg64, mem64 */
592 if (iGpr < 8)
593 pCodeBuf[off++] = X86_OP_REX_W;
594 else
595 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
596 pCodeBuf[off++] = 0x8b;
597 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off,iGpr, offVCpu);
598
599#elif defined(RT_ARCH_ARM64)
600 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
601
602#else
603# error "port me"
604#endif
605 return off;
606}
607
608
609/**
610 * Emits a 64-bit GPR load of a VCpu value.
611 */
612DECL_INLINE_THROW(uint32_t)
613iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
614{
615#ifdef RT_ARCH_AMD64
616 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
617 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
618
619#elif defined(RT_ARCH_ARM64)
620 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
621
622#else
623# error "port me"
624#endif
625 return off;
626}
627
628
629/**
630 * Emits a 32-bit GPR load of a VCpu value.
631 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
632 */
633DECL_INLINE_THROW(uint32_t)
634iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
635{
636#ifdef RT_ARCH_AMD64
637 /* mov reg32, mem32 */
638 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
639 if (iGpr >= 8)
640 pbCodeBuf[off++] = X86_OP_REX_R;
641 pbCodeBuf[off++] = 0x8b;
642 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
644
645#elif defined(RT_ARCH_ARM64)
646 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
647
648#else
649# error "port me"
650#endif
651 return off;
652}
653
654
655/**
656 * Emits a 16-bit GPR load of a VCpu value.
657 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
658 */
659DECL_INLINE_THROW(uint32_t)
660iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
661{
662#ifdef RT_ARCH_AMD64
663 /* movzx reg32, mem16 */
664 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
665 if (iGpr >= 8)
666 pbCodeBuf[off++] = X86_OP_REX_R;
667 pbCodeBuf[off++] = 0x0f;
668 pbCodeBuf[off++] = 0xb7;
669 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
670 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
671
672#elif defined(RT_ARCH_ARM64)
673 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
674
675#else
676# error "port me"
677#endif
678 return off;
679}
680
681
682/**
683 * Emits a 8-bit GPR load of a VCpu value.
684 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
685 */
686DECL_INLINE_THROW(uint32_t)
687iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
688{
689#ifdef RT_ARCH_AMD64
690 /* movzx reg32, mem8 */
691 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
692 if (iGpr >= 8)
693 pbCodeBuf[off++] = X86_OP_REX_R;
694 pbCodeBuf[off++] = 0x0f;
695 pbCodeBuf[off++] = 0xb6;
696 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
697 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
698
699#elif defined(RT_ARCH_ARM64)
700 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
701
702#else
703# error "port me"
704#endif
705 return off;
706}
707
708
709/**
710 * Emits a store of a GPR value to a 64-bit VCpu field.
711 */
712DECL_FORCE_INLINE_THROW(uint32_t)
713iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
714 uint8_t iGprTmp = UINT8_MAX)
715{
716#ifdef RT_ARCH_AMD64
717 /* mov mem64, reg64 */
718 if (iGpr < 8)
719 pCodeBuf[off++] = X86_OP_REX_W;
720 else
721 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
722 pCodeBuf[off++] = 0x89;
723 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
724 RT_NOREF(iGprTmp);
725
726#elif defined(RT_ARCH_ARM64)
727 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
728
729#else
730# error "port me"
731#endif
732 return off;
733}
734
735
736/**
737 * Emits a store of a GPR value to a 64-bit VCpu field.
738 */
739DECL_INLINE_THROW(uint32_t)
740iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
741{
742#ifdef RT_ARCH_AMD64
743 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
744#elif defined(RT_ARCH_ARM64)
745 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
746 IEMNATIVE_REG_FIXED_TMP0);
747#else
748# error "port me"
749#endif
750 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
751 return off;
752}
753
754
755/**
756 * Emits a store of a GPR value to a 32-bit VCpu field.
757 */
758DECL_INLINE_THROW(uint32_t)
759iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
760{
761#ifdef RT_ARCH_AMD64
762 /* mov mem32, reg32 */
763 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
764 if (iGpr >= 8)
765 pbCodeBuf[off++] = X86_OP_REX_R;
766 pbCodeBuf[off++] = 0x89;
767 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
769
770#elif defined(RT_ARCH_ARM64)
771 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
772
773#else
774# error "port me"
775#endif
776 return off;
777}
778
779
780/**
781 * Emits a store of a GPR value to a 16-bit VCpu field.
782 */
783DECL_INLINE_THROW(uint32_t)
784iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
785{
786#ifdef RT_ARCH_AMD64
787 /* mov mem16, reg16 */
788 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
789 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
790 if (iGpr >= 8)
791 pbCodeBuf[off++] = X86_OP_REX_R;
792 pbCodeBuf[off++] = 0x89;
793 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
794 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
795
796#elif defined(RT_ARCH_ARM64)
797 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
798
799#else
800# error "port me"
801#endif
802 return off;
803}
804
805
806/**
807 * Emits a store of a GPR value to a 8-bit VCpu field.
808 */
809DECL_INLINE_THROW(uint32_t)
810iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
811{
812#ifdef RT_ARCH_AMD64
813 /* mov mem8, reg8 */
814 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
815 if (iGpr >= 8)
816 pbCodeBuf[off++] = X86_OP_REX_R;
817 pbCodeBuf[off++] = 0x88;
818 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
820
821#elif defined(RT_ARCH_ARM64)
822 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
823
824#else
825# error "port me"
826#endif
827 return off;
828}
829
830
831/**
832 * Emits a store of an immediate value to a 32-bit VCpu field.
833 *
834 * @note ARM64: Will allocate temporary registers.
835 */
836DECL_FORCE_INLINE_THROW(uint32_t)
837iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
838{
839#ifdef RT_ARCH_AMD64
840 /* mov mem32, imm32 */
841 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
842 pCodeBuf[off++] = 0xc7;
843 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
844 pCodeBuf[off++] = RT_BYTE1(uImm);
845 pCodeBuf[off++] = RT_BYTE2(uImm);
846 pCodeBuf[off++] = RT_BYTE3(uImm);
847 pCodeBuf[off++] = RT_BYTE4(uImm);
848 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
849
850#elif defined(RT_ARCH_ARM64)
851 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
852 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
853 if (idxRegImm != ARMV8_A64_REG_XZR)
854 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
855
856#else
857# error "port me"
858#endif
859 return off;
860}
861
862
863
864/**
865 * Emits a store of an immediate value to a 16-bit VCpu field.
866 *
867 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
868 * offset can be encoded as an immediate or not. The @a offVCpu immediate
869 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
870 */
871DECL_FORCE_INLINE_THROW(uint32_t)
872iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
873 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
874{
875#ifdef RT_ARCH_AMD64
876 /* mov mem16, imm16 */
877 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
878 pCodeBuf[off++] = 0xc7;
879 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
880 pCodeBuf[off++] = RT_BYTE1(uImm);
881 pCodeBuf[off++] = RT_BYTE2(uImm);
882 RT_NOREF(idxTmp1, idxTmp2);
883
884#elif defined(RT_ARCH_ARM64)
885 if (idxTmp1 != UINT8_MAX)
886 {
887 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
888 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
889 sizeof(uint16_t), idxTmp2);
890 }
891 else
892# ifdef IEM_WITH_THROW_CATCH
893 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
894# else
895 AssertReleaseFailedStmt(off = UINT32_MAX);
896# endif
897
898#else
899# error "port me"
900#endif
901 return off;
902}
903
904
905/**
906 * Emits a store of an immediate value to a 8-bit VCpu field.
907 */
908DECL_INLINE_THROW(uint32_t)
909iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu)
910{
911#ifdef RT_ARCH_AMD64
912 /* mov mem8, imm8 */
913 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
914 pbCodeBuf[off++] = 0xc6;
915 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
916 pbCodeBuf[off++] = bImm;
917 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
918
919#elif defined(RT_ARCH_ARM64)
920 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
921 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
922 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
923 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
924
925#else
926# error "port me"
927#endif
928 return off;
929}
930
931
932/**
933 * Emits a load effective address to a GRP of a VCpu field.
934 */
935DECL_INLINE_THROW(uint32_t)
936iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
937{
938#ifdef RT_ARCH_AMD64
939 /* lea gprdst, [rbx + offDisp] */
940 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
941 if (iGprDst < 8)
942 pbCodeBuf[off++] = X86_OP_REX_W;
943 else
944 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
945 pbCodeBuf[off++] = 0x8d;
946 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
947
948#elif defined(RT_ARCH_ARM64)
949 if (offVCpu < (unsigned)_4K)
950 {
951 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
952 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
953 }
954 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
955 {
956 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
957 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
958 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
959 }
960 else
961 {
962 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
963 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
964 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
965 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
966 }
967
968#else
969# error "port me"
970#endif
971 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
972 return off;
973}
974
975
976/** This is just as a typesafe alternative to RT_UOFFSETOF. */
977DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
978{
979 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
980 Assert(off < sizeof(VMCPU));
981 return off;
982}
983
984
985/** This is just as a typesafe alternative to RT_UOFFSETOF. */
986DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
987{
988 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
989 Assert(off < sizeof(VMCPU));
990 return off;
991}
992
993
994/**
995 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
996 *
997 * @note The two temp registers are not required for AMD64. ARM64 always
998 * requires the first, and the 2nd is needed if the offset cannot be
999 * encoded as an immediate.
1000 */
1001DECL_FORCE_INLINE(uint32_t)
1002iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1003{
1004#ifdef RT_ARCH_AMD64
1005 /* inc qword [pVCpu + off] */
1006 pCodeBuf[off++] = X86_OP_REX_W;
1007 pCodeBuf[off++] = 0xff;
1008 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1009 RT_NOREF(idxTmp1, idxTmp2);
1010
1011#elif defined(RT_ARCH_ARM64)
1012 /* Determine how we're to access pVCpu first. */
1013 uint32_t const cbData = sizeof(STAMCOUNTER);
1014 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1015 {
1016 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1017 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1018 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1019 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1020 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1021 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1022 }
1023 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1024 {
1025 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1026 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1027 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1028 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1029 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1030 }
1031 else
1032 {
1033 /* The offset is too large, so we must load it into a register and use
1034 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1035 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1036 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1037 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1038 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1039 }
1040
1041#else
1042# error "port me"
1043#endif
1044 return off;
1045}
1046
1047
1048/**
1049 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1050 *
1051 * @note The two temp registers are not required for AMD64. ARM64 always
1052 * requires the first, and the 2nd is needed if the offset cannot be
1053 * encoded as an immediate.
1054 */
1055DECL_FORCE_INLINE(uint32_t)
1056iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1057{
1058#ifdef RT_ARCH_AMD64
1059 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1060#elif defined(RT_ARCH_ARM64)
1061 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1062#else
1063# error "port me"
1064#endif
1065 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1066 return off;
1067}
1068
1069
1070/**
1071 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1072 *
1073 * @note The two temp registers are not required for AMD64. ARM64 always
1074 * requires the first, and the 2nd is needed if the offset cannot be
1075 * encoded as an immediate.
1076 */
1077DECL_FORCE_INLINE(uint32_t)
1078iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1079{
1080 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1081#ifdef RT_ARCH_AMD64
1082 /* inc dword [pVCpu + offVCpu] */
1083 pCodeBuf[off++] = 0xff;
1084 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1085 RT_NOREF(idxTmp1, idxTmp2);
1086
1087#elif defined(RT_ARCH_ARM64)
1088 /* Determine how we're to access pVCpu first. */
1089 uint32_t const cbData = sizeof(uint32_t);
1090 if (offVCpu < (unsigned)(_4K * cbData))
1091 {
1092 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1093 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1094 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1095 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1096 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1097 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1098 }
1099 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1100 {
1101 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1102 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1103 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1104 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1105 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1106 }
1107 else
1108 {
1109 /* The offset is too large, so we must load it into a register and use
1110 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1111 of the instruction if that'll reduce the constant to 16-bits. */
1112 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1113 {
1114 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1115 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1116 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1117 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1118 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1119 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1120 }
1121 else
1122 {
1123 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1124 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1125 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1126 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1127 }
1128 }
1129
1130#else
1131# error "port me"
1132#endif
1133 return off;
1134}
1135
1136
1137/**
1138 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1139 *
1140 * @note The two temp registers are not required for AMD64. ARM64 always
1141 * requires the first, and the 2nd is needed if the offset cannot be
1142 * encoded as an immediate.
1143 */
1144DECL_FORCE_INLINE(uint32_t)
1145iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1146{
1147#ifdef RT_ARCH_AMD64
1148 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1149#elif defined(RT_ARCH_ARM64)
1150 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1151#else
1152# error "port me"
1153#endif
1154 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1155 return off;
1156}
1157
1158
1159/**
1160 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1161 *
1162 * @note May allocate temporary registers (not AMD64).
1163 */
1164DECL_FORCE_INLINE(uint32_t)
1165iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1166{
1167 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1168#ifdef RT_ARCH_AMD64
1169 /* or dword [pVCpu + offVCpu], imm8/32 */
1170 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1171 if (fMask < 0x80)
1172 {
1173 pCodeBuf[off++] = 0x83;
1174 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1175 pCodeBuf[off++] = (uint8_t)fMask;
1176 }
1177 else
1178 {
1179 pCodeBuf[off++] = 0x81;
1180 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1181 pCodeBuf[off++] = RT_BYTE1(fMask);
1182 pCodeBuf[off++] = RT_BYTE2(fMask);
1183 pCodeBuf[off++] = RT_BYTE3(fMask);
1184 pCodeBuf[off++] = RT_BYTE4(fMask);
1185 }
1186
1187#elif defined(RT_ARCH_ARM64)
1188 /* If the constant is unwieldy we'll need a register to hold it as well. */
1189 uint32_t uImmSizeLen, uImmRotate;
1190 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1191 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1192
1193 /* We need a temp register for holding the member value we're modifying. */
1194 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1195
1196 /* Determine how we're to access pVCpu first. */
1197 uint32_t const cbData = sizeof(uint32_t);
1198 if (offVCpu < (unsigned)(_4K * cbData))
1199 {
1200 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1201 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1202 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1203 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1204 if (idxTmpMask == UINT8_MAX)
1205 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1206 else
1207 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1208 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1209 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1210 }
1211 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1212 {
1213 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1214 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1215 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1216 if (idxTmpMask == UINT8_MAX)
1217 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1218 else
1219 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1220 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1221 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1222 }
1223 else
1224 {
1225 /* The offset is too large, so we must load it into a register and use
1226 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1227 of the instruction if that'll reduce the constant to 16-bits. */
1228 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1229 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1230 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1231 if (fShifted)
1232 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1233 else
1234 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1235
1236 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1237 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1238
1239 if (idxTmpMask == UINT8_MAX)
1240 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1241 else
1242 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1243
1244 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1245 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1246 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1247 }
1248 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1249 if (idxTmpMask != UINT8_MAX)
1250 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1251
1252#else
1253# error "port me"
1254#endif
1255 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1256 return off;
1257}
1258
1259
1260/**
1261 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1262 *
1263 * @note May allocate temporary registers (not AMD64).
1264 */
1265DECL_FORCE_INLINE(uint32_t)
1266iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1267{
1268 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1269#ifdef RT_ARCH_AMD64
1270 /* and dword [pVCpu + offVCpu], imm8/32 */
1271 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1272 if (fMask < 0x80)
1273 {
1274 pCodeBuf[off++] = 0x83;
1275 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1276 pCodeBuf[off++] = (uint8_t)fMask;
1277 }
1278 else
1279 {
1280 pCodeBuf[off++] = 0x81;
1281 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1282 pCodeBuf[off++] = RT_BYTE1(fMask);
1283 pCodeBuf[off++] = RT_BYTE2(fMask);
1284 pCodeBuf[off++] = RT_BYTE3(fMask);
1285 pCodeBuf[off++] = RT_BYTE4(fMask);
1286 }
1287
1288#elif defined(RT_ARCH_ARM64)
1289 /* If the constant is unwieldy we'll need a register to hold it as well. */
1290 uint32_t uImmSizeLen, uImmRotate;
1291 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1292 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1293
1294 /* We need a temp register for holding the member value we're modifying. */
1295 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1296
1297 /* Determine how we're to access pVCpu first. */
1298 uint32_t const cbData = sizeof(uint32_t);
1299 if (offVCpu < (unsigned)(_4K * cbData))
1300 {
1301 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1302 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1303 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1304 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1305 if (idxTmpMask == UINT8_MAX)
1306 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1307 else
1308 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1309 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1310 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1311 }
1312 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1313 {
1314 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1315 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1316 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1317 if (idxTmpMask == UINT8_MAX)
1318 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1319 else
1320 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1321 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1322 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1323 }
1324 else
1325 {
1326 /* The offset is too large, so we must load it into a register and use
1327 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1328 of the instruction if that'll reduce the constant to 16-bits. */
1329 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1330 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1331 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1332 if (fShifted)
1333 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1334 else
1335 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1336
1337 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1338 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1339
1340 if (idxTmpMask == UINT8_MAX)
1341 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1342 else
1343 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1344
1345 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1346 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1347 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1348 }
1349 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1350 if (idxTmpMask != UINT8_MAX)
1351 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1352
1353#else
1354# error "port me"
1355#endif
1356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1357 return off;
1358}
1359
1360
1361/**
1362 * Emits a gprdst = gprsrc load.
1363 */
1364DECL_FORCE_INLINE(uint32_t)
1365iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1366{
1367#ifdef RT_ARCH_AMD64
1368 /* mov gprdst, gprsrc */
1369 if ((iGprDst | iGprSrc) >= 8)
1370 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1371 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1372 : X86_OP_REX_W | X86_OP_REX_R;
1373 else
1374 pCodeBuf[off++] = X86_OP_REX_W;
1375 pCodeBuf[off++] = 0x8b;
1376 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1377
1378#elif defined(RT_ARCH_ARM64)
1379 /* mov dst, src; alias for: orr dst, xzr, src */
1380 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1381
1382#else
1383# error "port me"
1384#endif
1385 return off;
1386}
1387
1388
1389/**
1390 * Emits a gprdst = gprsrc load.
1391 */
1392DECL_INLINE_THROW(uint32_t)
1393iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1394{
1395#ifdef RT_ARCH_AMD64
1396 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1397#elif defined(RT_ARCH_ARM64)
1398 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1399#else
1400# error "port me"
1401#endif
1402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1403 return off;
1404}
1405
1406
1407/**
1408 * Emits a gprdst = gprsrc[31:0] load.
1409 * @note Bits 63 thru 32 are cleared.
1410 */
1411DECL_FORCE_INLINE(uint32_t)
1412iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1413{
1414#ifdef RT_ARCH_AMD64
1415 /* mov gprdst, gprsrc */
1416 if ((iGprDst | iGprSrc) >= 8)
1417 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1418 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1419 : X86_OP_REX_R;
1420 pCodeBuf[off++] = 0x8b;
1421 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1422
1423#elif defined(RT_ARCH_ARM64)
1424 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1425 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1426
1427#else
1428# error "port me"
1429#endif
1430 return off;
1431}
1432
1433
1434/**
1435 * Emits a gprdst = gprsrc[31:0] load.
1436 * @note Bits 63 thru 32 are cleared.
1437 */
1438DECL_INLINE_THROW(uint32_t)
1439iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1440{
1441#ifdef RT_ARCH_AMD64
1442 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1443#elif defined(RT_ARCH_ARM64)
1444 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1445#else
1446# error "port me"
1447#endif
1448 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1449 return off;
1450}
1451
1452
1453/**
1454 * Emits a gprdst = gprsrc[15:0] load.
1455 * @note Bits 63 thru 15 are cleared.
1456 */
1457DECL_INLINE_THROW(uint32_t)
1458iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1459{
1460#ifdef RT_ARCH_AMD64
1461 /* movzx Gv,Ew */
1462 if ((iGprDst | iGprSrc) >= 8)
1463 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1464 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1465 : X86_OP_REX_R;
1466 pCodeBuf[off++] = 0x0f;
1467 pCodeBuf[off++] = 0xb7;
1468 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1469
1470#elif defined(RT_ARCH_ARM64)
1471 /* and gprdst, gprsrc, #0xffff */
1472# if 1
1473 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1474 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1475# else
1476 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1477 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1478# endif
1479
1480#else
1481# error "port me"
1482#endif
1483 return off;
1484}
1485
1486
1487/**
1488 * Emits a gprdst = gprsrc[15:0] load.
1489 * @note Bits 63 thru 15 are cleared.
1490 */
1491DECL_INLINE_THROW(uint32_t)
1492iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1493{
1494#ifdef RT_ARCH_AMD64
1495 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1496#elif defined(RT_ARCH_ARM64)
1497 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1498#else
1499# error "port me"
1500#endif
1501 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1502 return off;
1503}
1504
1505
1506/**
1507 * Emits a gprdst = gprsrc[7:0] load.
1508 * @note Bits 63 thru 8 are cleared.
1509 */
1510DECL_FORCE_INLINE(uint32_t)
1511iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1512{
1513#ifdef RT_ARCH_AMD64
1514 /* movzx Gv,Eb */
1515 if (iGprDst >= 8 || iGprSrc >= 8)
1516 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1517 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1518 : X86_OP_REX_R;
1519 else if (iGprSrc >= 4)
1520 pCodeBuf[off++] = X86_OP_REX;
1521 pCodeBuf[off++] = 0x0f;
1522 pCodeBuf[off++] = 0xb6;
1523 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1524
1525#elif defined(RT_ARCH_ARM64)
1526 /* and gprdst, gprsrc, #0xff */
1527 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1528 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1529
1530#else
1531# error "port me"
1532#endif
1533 return off;
1534}
1535
1536
1537/**
1538 * Emits a gprdst = gprsrc[7:0] load.
1539 * @note Bits 63 thru 8 are cleared.
1540 */
1541DECL_INLINE_THROW(uint32_t)
1542iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1543{
1544#ifdef RT_ARCH_AMD64
1545 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1546#elif defined(RT_ARCH_ARM64)
1547 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1548#else
1549# error "port me"
1550#endif
1551 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1552 return off;
1553}
1554
1555
1556/**
1557 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1558 * @note Bits 63 thru 8 are cleared.
1559 */
1560DECL_INLINE_THROW(uint32_t)
1561iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1562{
1563#ifdef RT_ARCH_AMD64
1564 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1565
1566 /* movzx Gv,Ew */
1567 if ((iGprDst | iGprSrc) >= 8)
1568 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1569 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1570 : X86_OP_REX_R;
1571 pbCodeBuf[off++] = 0x0f;
1572 pbCodeBuf[off++] = 0xb7;
1573 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1574
1575 /* shr Ev,8 */
1576 if (iGprDst >= 8)
1577 pbCodeBuf[off++] = X86_OP_REX_B;
1578 pbCodeBuf[off++] = 0xc1;
1579 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1580 pbCodeBuf[off++] = 8;
1581
1582#elif defined(RT_ARCH_ARM64)
1583 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1584 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1585 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1586
1587#else
1588# error "port me"
1589#endif
1590 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1591 return off;
1592}
1593
1594
1595/**
1596 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1597 */
1598DECL_INLINE_THROW(uint32_t)
1599iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1600{
1601#ifdef RT_ARCH_AMD64
1602 /* movsxd r64, r/m32 */
1603 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1604 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1605 pbCodeBuf[off++] = 0x63;
1606 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1607
1608#elif defined(RT_ARCH_ARM64)
1609 /* sxtw dst, src */
1610 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1611 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1612
1613#else
1614# error "port me"
1615#endif
1616 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1617 return off;
1618}
1619
1620
1621/**
1622 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1623 */
1624DECL_INLINE_THROW(uint32_t)
1625iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1626{
1627#ifdef RT_ARCH_AMD64
1628 /* movsx r64, r/m16 */
1629 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1630 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1631 pbCodeBuf[off++] = 0x0f;
1632 pbCodeBuf[off++] = 0xbf;
1633 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1634
1635#elif defined(RT_ARCH_ARM64)
1636 /* sxth dst, src */
1637 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1638 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1639
1640#else
1641# error "port me"
1642#endif
1643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1644 return off;
1645}
1646
1647
1648/**
1649 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1650 */
1651DECL_INLINE_THROW(uint32_t)
1652iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1653{
1654#ifdef RT_ARCH_AMD64
1655 /* movsx r64, r/m16 */
1656 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1657 if (iGprDst >= 8 || iGprSrc >= 8)
1658 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1659 pbCodeBuf[off++] = 0x0f;
1660 pbCodeBuf[off++] = 0xbf;
1661 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1662
1663#elif defined(RT_ARCH_ARM64)
1664 /* sxth dst32, src */
1665 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1666 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1667
1668#else
1669# error "port me"
1670#endif
1671 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1672 return off;
1673}
1674
1675
1676/**
1677 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1678 */
1679DECL_INLINE_THROW(uint32_t)
1680iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1681{
1682#ifdef RT_ARCH_AMD64
1683 /* movsx r64, r/m8 */
1684 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1685 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1686 pbCodeBuf[off++] = 0x0f;
1687 pbCodeBuf[off++] = 0xbe;
1688 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1689
1690#elif defined(RT_ARCH_ARM64)
1691 /* sxtb dst, src */
1692 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1693 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1694
1695#else
1696# error "port me"
1697#endif
1698 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1699 return off;
1700}
1701
1702
1703/**
1704 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1705 * @note Bits 63 thru 32 are cleared.
1706 */
1707DECL_INLINE_THROW(uint32_t)
1708iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1709{
1710#ifdef RT_ARCH_AMD64
1711 /* movsx r32, r/m8 */
1712 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1713 if (iGprDst >= 8 || iGprSrc >= 8)
1714 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1715 else if (iGprSrc >= 4)
1716 pbCodeBuf[off++] = X86_OP_REX;
1717 pbCodeBuf[off++] = 0x0f;
1718 pbCodeBuf[off++] = 0xbe;
1719 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1720
1721#elif defined(RT_ARCH_ARM64)
1722 /* sxtb dst32, src32 */
1723 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1724 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1725
1726#else
1727# error "port me"
1728#endif
1729 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1730 return off;
1731}
1732
1733
1734/**
1735 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1736 * @note Bits 63 thru 16 are cleared.
1737 */
1738DECL_INLINE_THROW(uint32_t)
1739iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1740{
1741#ifdef RT_ARCH_AMD64
1742 /* movsx r16, r/m8 */
1743 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1744 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1745 if (iGprDst >= 8 || iGprSrc >= 8)
1746 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1747 else if (iGprSrc >= 4)
1748 pbCodeBuf[off++] = X86_OP_REX;
1749 pbCodeBuf[off++] = 0x0f;
1750 pbCodeBuf[off++] = 0xbe;
1751 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1752
1753 /* movzx r32, r/m16 */
1754 if (iGprDst >= 8)
1755 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
1756 pbCodeBuf[off++] = 0x0f;
1757 pbCodeBuf[off++] = 0xb7;
1758 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
1759
1760#elif defined(RT_ARCH_ARM64)
1761 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
1762 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1763 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1764 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1765 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
1766
1767#else
1768# error "port me"
1769#endif
1770 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1771 return off;
1772}
1773
1774
1775/**
1776 * Emits a gprdst = gprsrc + addend load.
1777 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1778 */
1779#ifdef RT_ARCH_AMD64
1780DECL_INLINE_THROW(uint32_t)
1781iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1782 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1783{
1784 Assert(iAddend != 0);
1785
1786 /* lea gprdst, [gprsrc + iAddend] */
1787 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1788 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1789 pbCodeBuf[off++] = 0x8d;
1790 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1791 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1792 return off;
1793}
1794
1795#elif defined(RT_ARCH_ARM64)
1796DECL_INLINE_THROW(uint32_t)
1797iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1798 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1799{
1800 if ((uint32_t)iAddend < 4096)
1801 {
1802 /* add dst, src, uimm12 */
1803 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1804 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
1805 }
1806 else if ((uint32_t)-iAddend < 4096)
1807 {
1808 /* sub dst, src, uimm12 */
1809 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1810 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
1811 }
1812 else
1813 {
1814 Assert(iGprSrc != iGprDst);
1815 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
1816 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1817 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
1818 }
1819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1820 return off;
1821}
1822#else
1823# error "port me"
1824#endif
1825
1826/**
1827 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
1828 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1829 */
1830#ifdef RT_ARCH_AMD64
1831DECL_INLINE_THROW(uint32_t)
1832iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1833 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1834#else
1835DECL_INLINE_THROW(uint32_t)
1836iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1837 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1838#endif
1839{
1840 if (iAddend != 0)
1841 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1842 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
1843}
1844
1845
1846/**
1847 * Emits a gprdst = gprsrc32 + addend load.
1848 * @note Bits 63 thru 32 are cleared.
1849 */
1850DECL_INLINE_THROW(uint32_t)
1851iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1852 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1853{
1854 Assert(iAddend != 0);
1855
1856#ifdef RT_ARCH_AMD64
1857 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
1858 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1859 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
1860 if ((iGprDst | iGprSrc) >= 8)
1861 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1862 pbCodeBuf[off++] = 0x8d;
1863 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1864
1865#elif defined(RT_ARCH_ARM64)
1866 if ((uint32_t)iAddend < 4096)
1867 {
1868 /* add dst, src, uimm12 */
1869 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1870 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
1871 }
1872 else if ((uint32_t)-iAddend < 4096)
1873 {
1874 /* sub dst, src, uimm12 */
1875 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1876 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
1877 }
1878 else
1879 {
1880 Assert(iGprSrc != iGprDst);
1881 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
1882 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1883 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
1884 }
1885
1886#else
1887# error "port me"
1888#endif
1889 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1890 return off;
1891}
1892
1893
1894/**
1895 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
1896 */
1897DECL_INLINE_THROW(uint32_t)
1898iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1899 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1900{
1901 if (iAddend != 0)
1902 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1903 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
1904}
1905
1906
1907/**
1908 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1909 * destination.
1910 */
1911DECL_FORCE_INLINE(uint32_t)
1912iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1913{
1914#ifdef RT_ARCH_AMD64
1915 /* mov reg16, r/m16 */
1916 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1917 if (idxDst >= 8 || idxSrc >= 8)
1918 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
1919 pCodeBuf[off++] = 0x8b;
1920 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
1921
1922#elif defined(RT_ARCH_ARM64)
1923 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
1924 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
1925
1926#else
1927# error "Port me!"
1928#endif
1929 return off;
1930}
1931
1932
1933/**
1934 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1935 * destination.
1936 */
1937DECL_INLINE_THROW(uint32_t)
1938iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1939{
1940#ifdef RT_ARCH_AMD64
1941 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
1942#elif defined(RT_ARCH_ARM64)
1943 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
1944#else
1945# error "Port me!"
1946#endif
1947 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1948 return off;
1949}
1950
1951
1952#ifdef RT_ARCH_AMD64
1953/**
1954 * Common bit of iemNativeEmitLoadGprByBp and friends.
1955 */
1956DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
1957 PIEMRECOMPILERSTATE pReNativeAssert)
1958{
1959 if (offDisp < 128 && offDisp >= -128)
1960 {
1961 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
1962 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
1963 }
1964 else
1965 {
1966 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
1967 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
1968 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
1969 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
1970 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
1971 }
1972 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
1973 return off;
1974}
1975#elif defined(RT_ARCH_ARM64)
1976/**
1977 * Common bit of iemNativeEmitLoadGprByBp and friends.
1978 */
1979DECL_FORCE_INLINE_THROW(uint32_t)
1980iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
1981 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
1982{
1983 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
1984 {
1985 /* str w/ unsigned imm12 (scaled) */
1986 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1987 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
1988 }
1989 else if (offDisp >= -256 && offDisp <= 256)
1990 {
1991 /* stur w/ signed imm9 (unscaled) */
1992 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1993 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
1994 }
1995 else
1996 {
1997 /* Use temporary indexing register. */
1998 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
1999 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2000 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2001 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2002 }
2003 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2004 return off;
2005}
2006#endif
2007
2008
2009/**
2010 * Emits a 64-bit GRP load instruction with an BP relative source address.
2011 */
2012DECL_INLINE_THROW(uint32_t)
2013iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2014{
2015#ifdef RT_ARCH_AMD64
2016 /* mov gprdst, qword [rbp + offDisp] */
2017 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2018 if (iGprDst < 8)
2019 pbCodeBuf[off++] = X86_OP_REX_W;
2020 else
2021 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2022 pbCodeBuf[off++] = 0x8b;
2023 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2024
2025#elif defined(RT_ARCH_ARM64)
2026 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2027
2028#else
2029# error "port me"
2030#endif
2031}
2032
2033
2034/**
2035 * Emits a 32-bit GRP load instruction with an BP relative source address.
2036 * @note Bits 63 thru 32 of the GPR will be cleared.
2037 */
2038DECL_INLINE_THROW(uint32_t)
2039iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2040{
2041#ifdef RT_ARCH_AMD64
2042 /* mov gprdst, dword [rbp + offDisp] */
2043 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2044 if (iGprDst >= 8)
2045 pbCodeBuf[off++] = X86_OP_REX_R;
2046 pbCodeBuf[off++] = 0x8b;
2047 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2048
2049#elif defined(RT_ARCH_ARM64)
2050 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2051
2052#else
2053# error "port me"
2054#endif
2055}
2056
2057
2058/**
2059 * Emits a 16-bit GRP load instruction with an BP relative source address.
2060 * @note Bits 63 thru 16 of the GPR will be cleared.
2061 */
2062DECL_INLINE_THROW(uint32_t)
2063iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2064{
2065#ifdef RT_ARCH_AMD64
2066 /* movzx gprdst, word [rbp + offDisp] */
2067 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2068 if (iGprDst >= 8)
2069 pbCodeBuf[off++] = X86_OP_REX_R;
2070 pbCodeBuf[off++] = 0x0f;
2071 pbCodeBuf[off++] = 0xb7;
2072 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2073
2074#elif defined(RT_ARCH_ARM64)
2075 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2076
2077#else
2078# error "port me"
2079#endif
2080}
2081
2082
2083/**
2084 * Emits a 8-bit GRP load instruction with an BP relative source address.
2085 * @note Bits 63 thru 8 of the GPR will be cleared.
2086 */
2087DECL_INLINE_THROW(uint32_t)
2088iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2089{
2090#ifdef RT_ARCH_AMD64
2091 /* movzx gprdst, byte [rbp + offDisp] */
2092 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2093 if (iGprDst >= 8)
2094 pbCodeBuf[off++] = X86_OP_REX_R;
2095 pbCodeBuf[off++] = 0x0f;
2096 pbCodeBuf[off++] = 0xb6;
2097 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2098
2099#elif defined(RT_ARCH_ARM64)
2100 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2101
2102#else
2103# error "port me"
2104#endif
2105}
2106
2107
2108#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2109/**
2110 * Emits a 128-bit vector register load instruction with an BP relative source address.
2111 */
2112DECL_FORCE_INLINE_THROW(uint32_t)
2113iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2114{
2115#ifdef RT_ARCH_AMD64
2116 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2117
2118 /* movdqu reg128, mem128 */
2119 pbCodeBuf[off++] = 0xf3;
2120 if (iVecRegDst >= 8)
2121 pbCodeBuf[off++] = X86_OP_REX_R;
2122 pbCodeBuf[off++] = 0x0f;
2123 pbCodeBuf[off++] = 0x6f;
2124 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2125#elif defined(RT_ARCH_ARM64)
2126 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2127#else
2128# error "port me"
2129#endif
2130}
2131
2132
2133/**
2134 * Emits a 256-bit vector register load instruction with an BP relative source address.
2135 */
2136DECL_FORCE_INLINE_THROW(uint32_t)
2137iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2138{
2139#ifdef RT_ARCH_AMD64
2140 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2141
2142 /* vmovdqu reg256, mem256 */
2143 pbCodeBuf[off++] = X86_OP_VEX2;
2144 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2145 pbCodeBuf[off++] = 0x6f;
2146 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2147#elif defined(RT_ARCH_ARM64)
2148 /* ASSUMES two consecutive vector registers for the 256-bit value. */
2149 Assert(!(iVecRegDst & 0x1));
2150 off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2151 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2152#else
2153# error "port me"
2154#endif
2155}
2156
2157#endif
2158
2159
2160/**
2161 * Emits a load effective address to a GRP with an BP relative source address.
2162 */
2163DECL_INLINE_THROW(uint32_t)
2164iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2165{
2166#ifdef RT_ARCH_AMD64
2167 /* lea gprdst, [rbp + offDisp] */
2168 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2169 if (iGprDst < 8)
2170 pbCodeBuf[off++] = X86_OP_REX_W;
2171 else
2172 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2173 pbCodeBuf[off++] = 0x8d;
2174 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2175
2176#elif defined(RT_ARCH_ARM64)
2177 if ((uint32_t)offDisp < (unsigned)_4K)
2178 {
2179 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2180 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)offDisp);
2181 }
2182 else if ((uint32_t)-offDisp < (unsigned)_4K)
2183 {
2184 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2185 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2186 }
2187 else
2188 {
2189 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2190 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offDisp >= 0 ? (uint32_t)offDisp : (uint32_t)-offDisp);
2191 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2192 if (offDisp >= 0)
2193 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2194 else
2195 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2196 }
2197
2198#else
2199# error "port me"
2200#endif
2201
2202 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2203 return off;
2204}
2205
2206
2207/**
2208 * Emits a 64-bit GPR store with an BP relative destination address.
2209 *
2210 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2211 */
2212DECL_INLINE_THROW(uint32_t)
2213iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2214{
2215#ifdef RT_ARCH_AMD64
2216 /* mov qword [rbp + offDisp], gprdst */
2217 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2218 if (iGprSrc < 8)
2219 pbCodeBuf[off++] = X86_OP_REX_W;
2220 else
2221 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2222 pbCodeBuf[off++] = 0x89;
2223 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2224
2225#elif defined(RT_ARCH_ARM64)
2226 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2227 {
2228 /* str w/ unsigned imm12 (scaled) */
2229 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2230 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2231 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2232 }
2233 else if (offDisp >= -256 && offDisp <= 256)
2234 {
2235 /* stur w/ signed imm9 (unscaled) */
2236 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2237 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2238 }
2239 else if ((uint32_t)-offDisp < (unsigned)_4K)
2240 {
2241 /* Use temporary indexing register w/ sub uimm12. */
2242 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2243 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2244 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2245 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2246 }
2247 else
2248 {
2249 /* Use temporary indexing register. */
2250 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2251 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2252 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2253 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2254 }
2255 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2256 return off;
2257
2258#else
2259# error "Port me!"
2260#endif
2261}
2262
2263
2264/**
2265 * Emits a 64-bit immediate store with an BP relative destination address.
2266 *
2267 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2268 */
2269DECL_INLINE_THROW(uint32_t)
2270iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2271{
2272#ifdef RT_ARCH_AMD64
2273 if ((int64_t)uImm64 == (int32_t)uImm64)
2274 {
2275 /* mov qword [rbp + offDisp], imm32 - sign extended */
2276 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2277 pbCodeBuf[off++] = X86_OP_REX_W;
2278 pbCodeBuf[off++] = 0xc7;
2279 if (offDisp < 128 && offDisp >= -128)
2280 {
2281 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2282 pbCodeBuf[off++] = (uint8_t)offDisp;
2283 }
2284 else
2285 {
2286 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2287 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2288 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2289 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2290 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2291 }
2292 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2293 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2294 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2295 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2296 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2297 return off;
2298 }
2299#endif
2300
2301 /* Load tmp0, imm64; Store tmp to bp+disp. */
2302 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2303 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2304}
2305
2306
2307#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2308/**
2309 * Emits a 128-bit vector register store with an BP relative destination address.
2310 *
2311 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2312 */
2313DECL_INLINE_THROW(uint32_t)
2314iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2315{
2316#ifdef RT_ARCH_AMD64
2317 /* movdqu [rbp + offDisp], vecsrc */
2318 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2319 pbCodeBuf[off++] = 0xf3;
2320 if (iVecRegSrc >= 8)
2321 pbCodeBuf[off++] = X86_OP_REX_R;
2322 pbCodeBuf[off++] = 0x0f;
2323 pbCodeBuf[off++] = 0x7f;
2324 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2325
2326#elif defined(RT_ARCH_ARM64)
2327 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2328 {
2329 /* str w/ unsigned imm12 (scaled) */
2330 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2331 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc,
2332 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2333 }
2334 else if (offDisp >= -256 && offDisp <= 256)
2335 {
2336 /* stur w/ signed imm9 (unscaled) */
2337 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2338 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp);
2339 }
2340 else if ((uint32_t)-offDisp < (unsigned)_4K)
2341 {
2342 /* Use temporary indexing register w/ sub uimm12. */
2343 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2344 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2345 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2346 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2347 }
2348 else
2349 {
2350 /* Use temporary indexing register. */
2351 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2352 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2353 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP,
2354 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2355 }
2356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2357 return off;
2358
2359#else
2360# error "Port me!"
2361#endif
2362}
2363
2364
2365/**
2366 * Emits a 256-bit vector register store with an BP relative destination address.
2367 *
2368 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2369 */
2370DECL_INLINE_THROW(uint32_t)
2371iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2372{
2373#ifdef RT_ARCH_AMD64
2374 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2375
2376 /* vmovdqu mem256, reg256 */
2377 pbCodeBuf[off++] = X86_OP_VEX2;
2378 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2379 pbCodeBuf[off++] = 0x7f;
2380 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2381#elif defined(RT_ARCH_ARM64)
2382 Assert(!(iVecRegSrc & 0x1));
2383 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp, iVecRegSrc);
2384 return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1);
2385#else
2386# error "Port me!"
2387#endif
2388}
2389#endif
2390
2391#if defined(RT_ARCH_ARM64)
2392
2393/**
2394 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2395 *
2396 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2397 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2398 * caller does not heed this.
2399 *
2400 * @note DON'T try this with prefetch.
2401 */
2402DECL_FORCE_INLINE_THROW(uint32_t)
2403iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2404 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2405{
2406 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2407 {
2408 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2409 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2410 }
2411 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2412 && iGprReg != iGprBase)
2413 || iGprTmp != UINT8_MAX)
2414 {
2415 /* The offset is too large, so we must load it into a register and use
2416 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2417 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2418 if (iGprTmp == UINT8_MAX)
2419 iGprTmp = iGprReg;
2420 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2421 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2422 }
2423 else
2424# ifdef IEM_WITH_THROW_CATCH
2425 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2426# else
2427 AssertReleaseFailedStmt(off = UINT32_MAX);
2428# endif
2429 return off;
2430}
2431
2432/**
2433 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2434 */
2435DECL_FORCE_INLINE_THROW(uint32_t)
2436iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2437 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2438{
2439 /*
2440 * There are a couple of ldr variants that takes an immediate offset, so
2441 * try use those if we can, otherwise we have to use the temporary register
2442 * help with the addressing.
2443 */
2444 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2445 {
2446 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2447 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2448 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2449 }
2450 else
2451 {
2452 /* The offset is too large, so we must load it into a register and use
2453 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2454 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2455 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2456
2457 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2458 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2459
2460 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2461 }
2462 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2463 return off;
2464}
2465
2466#endif /* RT_ARCH_ARM64 */
2467
2468/**
2469 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2470 *
2471 * @note ARM64: Misaligned @a offDisp values and values not in the
2472 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2473 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2474 * does not heed this.
2475 */
2476DECL_FORCE_INLINE_THROW(uint32_t)
2477iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2478 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2479{
2480#ifdef RT_ARCH_AMD64
2481 /* mov reg64, mem64 */
2482 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2483 pCodeBuf[off++] = 0x8b;
2484 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2485 RT_NOREF(iGprTmp);
2486
2487#elif defined(RT_ARCH_ARM64)
2488 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2489 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2490
2491#else
2492# error "port me"
2493#endif
2494 return off;
2495}
2496
2497
2498/**
2499 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2500 */
2501DECL_INLINE_THROW(uint32_t)
2502iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2503{
2504#ifdef RT_ARCH_AMD64
2505 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2506 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2507
2508#elif defined(RT_ARCH_ARM64)
2509 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2510
2511#else
2512# error "port me"
2513#endif
2514 return off;
2515}
2516
2517
2518/**
2519 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2520 *
2521 * @note ARM64: Misaligned @a offDisp values and values not in the
2522 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2523 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2524 * caller does not heed this.
2525 *
2526 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2527 */
2528DECL_FORCE_INLINE_THROW(uint32_t)
2529iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2530 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2531{
2532#ifdef RT_ARCH_AMD64
2533 /* mov reg32, mem32 */
2534 if (iGprDst >= 8 || iGprBase >= 8)
2535 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2536 pCodeBuf[off++] = 0x8b;
2537 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2538 RT_NOREF(iGprTmp);
2539
2540#elif defined(RT_ARCH_ARM64)
2541 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2542 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2543
2544#else
2545# error "port me"
2546#endif
2547 return off;
2548}
2549
2550
2551/**
2552 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2553 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2554 */
2555DECL_INLINE_THROW(uint32_t)
2556iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2557{
2558#ifdef RT_ARCH_AMD64
2559 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2560 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2561
2562#elif defined(RT_ARCH_ARM64)
2563 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2564
2565#else
2566# error "port me"
2567#endif
2568 return off;
2569}
2570
2571
2572/**
2573 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2574 * sign-extending the value to 64 bits.
2575 *
2576 * @note ARM64: Misaligned @a offDisp values and values not in the
2577 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2578 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2579 * caller does not heed this.
2580 */
2581DECL_FORCE_INLINE_THROW(uint32_t)
2582iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2583 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2584{
2585#ifdef RT_ARCH_AMD64
2586 /* movsxd reg64, mem32 */
2587 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2588 pCodeBuf[off++] = 0x63;
2589 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2590 RT_NOREF(iGprTmp);
2591
2592#elif defined(RT_ARCH_ARM64)
2593 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2594 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2595
2596#else
2597# error "port me"
2598#endif
2599 return off;
2600}
2601
2602
2603/**
2604 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2605 *
2606 * @note ARM64: Misaligned @a offDisp values and values not in the
2607 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2608 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2609 * caller does not heed this.
2610 *
2611 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2612 */
2613DECL_FORCE_INLINE_THROW(uint32_t)
2614iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2615 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2616{
2617#ifdef RT_ARCH_AMD64
2618 /* movzx reg32, mem16 */
2619 if (iGprDst >= 8 || iGprBase >= 8)
2620 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2621 pCodeBuf[off++] = 0x0f;
2622 pCodeBuf[off++] = 0xb7;
2623 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2624 RT_NOREF(iGprTmp);
2625
2626#elif defined(RT_ARCH_ARM64)
2627 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2628 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2629
2630#else
2631# error "port me"
2632#endif
2633 return off;
2634}
2635
2636
2637/**
2638 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2639 * sign-extending the value to 64 bits.
2640 *
2641 * @note ARM64: Misaligned @a offDisp values and values not in the
2642 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2643 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2644 * caller does not heed this.
2645 */
2646DECL_FORCE_INLINE_THROW(uint32_t)
2647iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2648 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2649{
2650#ifdef RT_ARCH_AMD64
2651 /* movsx reg64, mem16 */
2652 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2653 pCodeBuf[off++] = 0x0f;
2654 pCodeBuf[off++] = 0xbf;
2655 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2656 RT_NOREF(iGprTmp);
2657
2658#elif defined(RT_ARCH_ARM64)
2659 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2660 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2661
2662#else
2663# error "port me"
2664#endif
2665 return off;
2666}
2667
2668
2669/**
2670 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2671 * sign-extending the value to 32 bits.
2672 *
2673 * @note ARM64: Misaligned @a offDisp values and values not in the
2674 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2675 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2676 * caller does not heed this.
2677 *
2678 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2679 */
2680DECL_FORCE_INLINE_THROW(uint32_t)
2681iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2682 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2683{
2684#ifdef RT_ARCH_AMD64
2685 /* movsx reg32, mem16 */
2686 if (iGprDst >= 8 || iGprBase >= 8)
2687 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2688 pCodeBuf[off++] = 0x0f;
2689 pCodeBuf[off++] = 0xbf;
2690 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2691 RT_NOREF(iGprTmp);
2692
2693#elif defined(RT_ARCH_ARM64)
2694 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2695 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
2696
2697#else
2698# error "port me"
2699#endif
2700 return off;
2701}
2702
2703
2704/**
2705 * Emits a 8-bit GPR load via a GPR base address with a displacement.
2706 *
2707 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2708 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2709 * same. Will assert / throw if caller does not heed this.
2710 *
2711 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
2712 */
2713DECL_FORCE_INLINE_THROW(uint32_t)
2714iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2715 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2716{
2717#ifdef RT_ARCH_AMD64
2718 /* movzx reg32, mem8 */
2719 if (iGprDst >= 8 || iGprBase >= 8)
2720 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2721 pCodeBuf[off++] = 0x0f;
2722 pCodeBuf[off++] = 0xb6;
2723 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2724 RT_NOREF(iGprTmp);
2725
2726#elif defined(RT_ARCH_ARM64)
2727 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2728 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
2729
2730#else
2731# error "port me"
2732#endif
2733 return off;
2734}
2735
2736
2737/**
2738 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2739 * sign-extending the value to 64 bits.
2740 *
2741 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2742 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2743 * same. Will assert / throw if caller does not heed this.
2744 */
2745DECL_FORCE_INLINE_THROW(uint32_t)
2746iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2747 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2748{
2749#ifdef RT_ARCH_AMD64
2750 /* movsx reg64, mem8 */
2751 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2752 pCodeBuf[off++] = 0x0f;
2753 pCodeBuf[off++] = 0xbe;
2754 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2755 RT_NOREF(iGprTmp);
2756
2757#elif defined(RT_ARCH_ARM64)
2758 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2759 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
2760
2761#else
2762# error "port me"
2763#endif
2764 return off;
2765}
2766
2767
2768/**
2769 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2770 * sign-extending the value to 32 bits.
2771 *
2772 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2773 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2774 * same. Will assert / throw if caller does not heed this.
2775 *
2776 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2777 */
2778DECL_FORCE_INLINE_THROW(uint32_t)
2779iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2780 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2781{
2782#ifdef RT_ARCH_AMD64
2783 /* movsx reg32, mem8 */
2784 if (iGprDst >= 8 || iGprBase >= 8)
2785 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2786 pCodeBuf[off++] = 0x0f;
2787 pCodeBuf[off++] = 0xbe;
2788 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2789 RT_NOREF(iGprTmp);
2790
2791#elif defined(RT_ARCH_ARM64)
2792 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2793 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2794
2795#else
2796# error "port me"
2797#endif
2798 return off;
2799}
2800
2801
2802/**
2803 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2804 * sign-extending the value to 16 bits.
2805 *
2806 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2807 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2808 * same. Will assert / throw if caller does not heed this.
2809 *
2810 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2811 */
2812DECL_FORCE_INLINE_THROW(uint32_t)
2813iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2814 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2815{
2816#ifdef RT_ARCH_AMD64
2817 /* movsx reg32, mem8 */
2818 if (iGprDst >= 8 || iGprBase >= 8)
2819 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2820 pCodeBuf[off++] = 0x0f;
2821 pCodeBuf[off++] = 0xbe;
2822 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2823# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
2824 /* and reg32, 0xffffh */
2825 if (iGprDst >= 8)
2826 pCodeBuf[off++] = X86_OP_REX_B;
2827 pCodeBuf[off++] = 0x81;
2828 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
2829 pCodeBuf[off++] = 0xff;
2830 pCodeBuf[off++] = 0xff;
2831 pCodeBuf[off++] = 0;
2832 pCodeBuf[off++] = 0;
2833# else
2834 /* movzx reg32, reg16 */
2835 if (iGprDst >= 8)
2836 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
2837 pCodeBuf[off++] = 0x0f;
2838 pCodeBuf[off++] = 0xb7;
2839 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2840# endif
2841 RT_NOREF(iGprTmp);
2842
2843#elif defined(RT_ARCH_ARM64)
2844 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2845 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2846 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2847 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
2848
2849#else
2850# error "port me"
2851#endif
2852 return off;
2853}
2854
2855
2856/**
2857 * Emits a 64-bit GPR store via a GPR base address with a displacement.
2858 *
2859 * @note ARM64: Misaligned @a offDisp values and values not in the
2860 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2861 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2862 * does not heed this.
2863 */
2864DECL_FORCE_INLINE_THROW(uint32_t)
2865iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2866 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2867{
2868#ifdef RT_ARCH_AMD64
2869 /* mov mem64, reg64 */
2870 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2871 pCodeBuf[off++] = 0x89;
2872 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2873 RT_NOREF(iGprTmp);
2874
2875#elif defined(RT_ARCH_ARM64)
2876 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2877 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
2878
2879#else
2880# error "port me"
2881#endif
2882 return off;
2883}
2884
2885
2886/**
2887 * Emits a 32-bit GPR store via a GPR base address with a displacement.
2888 *
2889 * @note ARM64: Misaligned @a offDisp values and values not in the
2890 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
2891 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2892 * does not heed this.
2893 */
2894DECL_FORCE_INLINE_THROW(uint32_t)
2895iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2896 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2897{
2898#ifdef RT_ARCH_AMD64
2899 /* mov mem32, reg32 */
2900 if (iGprSrc >= 8 || iGprBase >= 8)
2901 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2902 pCodeBuf[off++] = 0x89;
2903 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2904 RT_NOREF(iGprTmp);
2905
2906#elif defined(RT_ARCH_ARM64)
2907 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2908 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
2909
2910#else
2911# error "port me"
2912#endif
2913 return off;
2914}
2915
2916
2917/**
2918 * Emits a 16-bit GPR store via a GPR base address with a displacement.
2919 *
2920 * @note ARM64: Misaligned @a offDisp values and values not in the
2921 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
2922 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2923 * does not heed this.
2924 */
2925DECL_FORCE_INLINE_THROW(uint32_t)
2926iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2927 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2928{
2929#ifdef RT_ARCH_AMD64
2930 /* mov mem16, reg16 */
2931 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2932 if (iGprSrc >= 8 || iGprBase >= 8)
2933 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2934 pCodeBuf[off++] = 0x89;
2935 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2936 RT_NOREF(iGprTmp);
2937
2938#elif defined(RT_ARCH_ARM64)
2939 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2940 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
2941
2942#else
2943# error "port me"
2944#endif
2945 return off;
2946}
2947
2948
2949/**
2950 * Emits a 8-bit GPR store via a GPR base address with a displacement.
2951 *
2952 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2953 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2954 * same. Will assert / throw if caller does not heed this.
2955 */
2956DECL_FORCE_INLINE_THROW(uint32_t)
2957iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2958 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2959{
2960#ifdef RT_ARCH_AMD64
2961 /* mov mem8, reg8 */
2962 if (iGprSrc >= 8 || iGprBase >= 8)
2963 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2964 else if (iGprSrc >= 4)
2965 pCodeBuf[off++] = X86_OP_REX;
2966 pCodeBuf[off++] = 0x88;
2967 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2968 RT_NOREF(iGprTmp);
2969
2970#elif defined(RT_ARCH_ARM64)
2971 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2972 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
2973
2974#else
2975# error "port me"
2976#endif
2977 return off;
2978}
2979
2980
2981/**
2982 * Emits a 64-bit immediate store via a GPR base address with a displacement.
2983 *
2984 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
2985 * AMD64 it depends on the immediate value.
2986 *
2987 * @note ARM64: Misaligned @a offDisp values and values not in the
2988 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2989 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2990 * does not heed this.
2991 */
2992DECL_FORCE_INLINE_THROW(uint32_t)
2993iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
2994 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2995{
2996#ifdef RT_ARCH_AMD64
2997 if ((int32_t)uImm == (int64_t)uImm)
2998 {
2999 /* mov mem64, imm32 (sign-extended) */
3000 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3001 pCodeBuf[off++] = 0xc7;
3002 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3003 pCodeBuf[off++] = RT_BYTE1(uImm);
3004 pCodeBuf[off++] = RT_BYTE2(uImm);
3005 pCodeBuf[off++] = RT_BYTE3(uImm);
3006 pCodeBuf[off++] = RT_BYTE4(uImm);
3007 }
3008 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
3009 {
3010 /* require temporary register. */
3011 if (iGprImmTmp == UINT8_MAX)
3012 iGprImmTmp = iGprTmp;
3013 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3014 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
3015 }
3016 else
3017# ifdef IEM_WITH_THROW_CATCH
3018 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3019# else
3020 AssertReleaseFailedStmt(off = UINT32_MAX);
3021# endif
3022
3023#elif defined(RT_ARCH_ARM64)
3024 if (uImm == 0)
3025 iGprImmTmp = ARMV8_A64_REG_XZR;
3026 else
3027 {
3028 Assert(iGprImmTmp < 31);
3029 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3030 }
3031 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
3032
3033#else
3034# error "port me"
3035#endif
3036 return off;
3037}
3038
3039
3040/**
3041 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3042 *
3043 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3044 *
3045 * @note ARM64: Misaligned @a offDisp values and values not in the
3046 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3047 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3048 * does not heed this.
3049 */
3050DECL_FORCE_INLINE_THROW(uint32_t)
3051iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
3052 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3053{
3054#ifdef RT_ARCH_AMD64
3055 /* mov mem32, imm32 */
3056 if (iGprBase >= 8)
3057 pCodeBuf[off++] = X86_OP_REX_B;
3058 pCodeBuf[off++] = 0xc7;
3059 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3060 pCodeBuf[off++] = RT_BYTE1(uImm);
3061 pCodeBuf[off++] = RT_BYTE2(uImm);
3062 pCodeBuf[off++] = RT_BYTE3(uImm);
3063 pCodeBuf[off++] = RT_BYTE4(uImm);
3064 RT_NOREF(iGprImmTmp, iGprTmp);
3065
3066#elif defined(RT_ARCH_ARM64)
3067 Assert(iGprImmTmp < 31);
3068 if (uImm == 0)
3069 iGprImmTmp = ARMV8_A64_REG_XZR;
3070 else
3071 {
3072 Assert(iGprImmTmp < 31);
3073 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3074 }
3075 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3076 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3077
3078#else
3079# error "port me"
3080#endif
3081 return off;
3082}
3083
3084
3085/**
3086 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3087 *
3088 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3089 *
3090 * @note ARM64: Misaligned @a offDisp values and values not in the
3091 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3092 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3093 * does not heed this.
3094 */
3095DECL_FORCE_INLINE_THROW(uint32_t)
3096iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
3097 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3098{
3099#ifdef RT_ARCH_AMD64
3100 /* mov mem16, imm16 */
3101 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3102 if (iGprBase >= 8)
3103 pCodeBuf[off++] = X86_OP_REX_B;
3104 pCodeBuf[off++] = 0xc7;
3105 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3106 pCodeBuf[off++] = RT_BYTE1(uImm);
3107 pCodeBuf[off++] = RT_BYTE2(uImm);
3108 RT_NOREF(iGprImmTmp, iGprTmp);
3109
3110#elif defined(RT_ARCH_ARM64)
3111 if (uImm == 0)
3112 iGprImmTmp = ARMV8_A64_REG_XZR;
3113 else
3114 {
3115 Assert(iGprImmTmp < 31);
3116 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3117 }
3118 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3119 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3120
3121#else
3122# error "port me"
3123#endif
3124 return off;
3125}
3126
3127
3128/**
3129 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3130 *
3131 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3132 *
3133 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3134 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3135 * same. Will assert / throw if caller does not heed this.
3136 */
3137DECL_FORCE_INLINE_THROW(uint32_t)
3138iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3139 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3140{
3141#ifdef RT_ARCH_AMD64
3142 /* mov mem8, imm8 */
3143 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3144 if (iGprBase >= 8)
3145 pCodeBuf[off++] = X86_OP_REX_B;
3146 pCodeBuf[off++] = 0xc6;
3147 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3148 pCodeBuf[off++] = uImm;
3149 RT_NOREF(iGprImmTmp, iGprTmp);
3150
3151#elif defined(RT_ARCH_ARM64)
3152 if (uImm == 0)
3153 iGprImmTmp = ARMV8_A64_REG_XZR;
3154 else
3155 {
3156 Assert(iGprImmTmp < 31);
3157 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3158 }
3159 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3160 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3161
3162#else
3163# error "port me"
3164#endif
3165 return off;
3166}
3167
3168
3169
3170/*********************************************************************************************************************************
3171* Subtraction and Additions *
3172*********************************************************************************************************************************/
3173
3174/**
3175 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3176 * @note The AMD64 version sets flags.
3177 */
3178DECL_INLINE_THROW(uint32_t)
3179iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3180{
3181#if defined(RT_ARCH_AMD64)
3182 /* sub Gv,Ev */
3183 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3184 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3185 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3186 pbCodeBuf[off++] = 0x2b;
3187 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3188
3189#elif defined(RT_ARCH_ARM64)
3190 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3191 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3192
3193#else
3194# error "Port me"
3195#endif
3196 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3197 return off;
3198}
3199
3200
3201/**
3202 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3203 * @note The AMD64 version sets flags.
3204 */
3205DECL_FORCE_INLINE(uint32_t)
3206iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3207{
3208#if defined(RT_ARCH_AMD64)
3209 /* sub Gv,Ev */
3210 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3211 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3212 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3213 pCodeBuf[off++] = 0x2b;
3214 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3215
3216#elif defined(RT_ARCH_ARM64)
3217 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3218
3219#else
3220# error "Port me"
3221#endif
3222 return off;
3223}
3224
3225
3226/**
3227 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3228 * @note The AMD64 version sets flags.
3229 */
3230DECL_INLINE_THROW(uint32_t)
3231iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3232{
3233#if defined(RT_ARCH_AMD64)
3234 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3235#elif defined(RT_ARCH_ARM64)
3236 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3237#else
3238# error "Port me"
3239#endif
3240 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3241 return off;
3242}
3243
3244
3245/**
3246 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3247 *
3248 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3249 *
3250 * @note Larger constants will require a temporary register. Failing to specify
3251 * one when needed will trigger fatal assertion / throw.
3252 */
3253DECL_FORCE_INLINE_THROW(uint32_t)
3254iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3255 uint8_t iGprTmp = UINT8_MAX)
3256{
3257#ifdef RT_ARCH_AMD64
3258 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3259 if (iSubtrahend == 1)
3260 {
3261 /* dec r/m64 */
3262 pCodeBuf[off++] = 0xff;
3263 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3264 }
3265 else if (iSubtrahend == -1)
3266 {
3267 /* inc r/m64 */
3268 pCodeBuf[off++] = 0xff;
3269 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3270 }
3271 else if ((int8_t)iSubtrahend == iSubtrahend)
3272 {
3273 /* sub r/m64, imm8 */
3274 pCodeBuf[off++] = 0x83;
3275 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3276 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3277 }
3278 else if ((int32_t)iSubtrahend == iSubtrahend)
3279 {
3280 /* sub r/m64, imm32 */
3281 pCodeBuf[off++] = 0x81;
3282 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3283 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3284 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3285 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3286 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3287 }
3288 else if (iGprTmp != UINT8_MAX)
3289 {
3290 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3291 /* sub r/m64, r64 */
3292 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3293 pCodeBuf[off++] = 0x29;
3294 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3295 }
3296 else
3297# ifdef IEM_WITH_THROW_CATCH
3298 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3299# else
3300 AssertReleaseFailedStmt(off = UINT32_MAX);
3301# endif
3302
3303#elif defined(RT_ARCH_ARM64)
3304 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3305 if (uAbsSubtrahend < 4096)
3306 {
3307 if (iSubtrahend >= 0)
3308 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3309 else
3310 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3311 }
3312 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3313 {
3314 if (iSubtrahend >= 0)
3315 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3316 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3317 else
3318 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3319 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3320 }
3321 else if (iGprTmp != UINT8_MAX)
3322 {
3323 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3324 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3325 }
3326 else
3327# ifdef IEM_WITH_THROW_CATCH
3328 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3329# else
3330 AssertReleaseFailedStmt(off = UINT32_MAX);
3331# endif
3332
3333#else
3334# error "Port me"
3335#endif
3336 return off;
3337}
3338
3339
3340/**
3341 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3342 *
3343 * @note Larger constants will require a temporary register. Failing to specify
3344 * one when needed will trigger fatal assertion / throw.
3345 */
3346DECL_INLINE_THROW(uint32_t)
3347iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3348 uint8_t iGprTmp = UINT8_MAX)
3349
3350{
3351#ifdef RT_ARCH_AMD64
3352 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3353#elif defined(RT_ARCH_ARM64)
3354 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3355#else
3356# error "Port me"
3357#endif
3358 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3359 return off;
3360}
3361
3362
3363/**
3364 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3365 *
3366 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3367 *
3368 * @note ARM64: Larger constants will require a temporary register. Failing to
3369 * specify one when needed will trigger fatal assertion / throw.
3370 */
3371DECL_FORCE_INLINE_THROW(uint32_t)
3372iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3373 uint8_t iGprTmp = UINT8_MAX)
3374{
3375#ifdef RT_ARCH_AMD64
3376 if (iGprDst >= 8)
3377 pCodeBuf[off++] = X86_OP_REX_B;
3378 if (iSubtrahend == 1)
3379 {
3380 /* dec r/m32 */
3381 pCodeBuf[off++] = 0xff;
3382 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3383 }
3384 else if (iSubtrahend == -1)
3385 {
3386 /* inc r/m32 */
3387 pCodeBuf[off++] = 0xff;
3388 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3389 }
3390 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3391 {
3392 /* sub r/m32, imm8 */
3393 pCodeBuf[off++] = 0x83;
3394 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3395 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3396 }
3397 else
3398 {
3399 /* sub r/m32, imm32 */
3400 pCodeBuf[off++] = 0x81;
3401 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3402 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3403 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3404 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3405 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3406 }
3407 RT_NOREF(iGprTmp);
3408
3409#elif defined(RT_ARCH_ARM64)
3410 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3411 if (uAbsSubtrahend < 4096)
3412 {
3413 if (iSubtrahend >= 0)
3414 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3415 else
3416 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3417 }
3418 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3419 {
3420 if (iSubtrahend >= 0)
3421 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3422 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3423 else
3424 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3425 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3426 }
3427 else if (iGprTmp != UINT8_MAX)
3428 {
3429 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3430 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3431 }
3432 else
3433# ifdef IEM_WITH_THROW_CATCH
3434 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3435# else
3436 AssertReleaseFailedStmt(off = UINT32_MAX);
3437# endif
3438
3439#else
3440# error "Port me"
3441#endif
3442 return off;
3443}
3444
3445
3446/**
3447 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3448 *
3449 * @note ARM64: Larger constants will require a temporary register. Failing to
3450 * specify one when needed will trigger fatal assertion / throw.
3451 */
3452DECL_INLINE_THROW(uint32_t)
3453iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3454 uint8_t iGprTmp = UINT8_MAX)
3455
3456{
3457#ifdef RT_ARCH_AMD64
3458 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
3459#elif defined(RT_ARCH_ARM64)
3460 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
3461#else
3462# error "Port me"
3463#endif
3464 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3465 return off;
3466}
3467
3468
3469/**
3470 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
3471 *
3472 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
3473 * so not suitable as a base for conditional jumps.
3474 *
3475 * @note AMD64: Will only update the lower 16 bits of the register.
3476 * @note ARM64: Will update the entire register.
3477 * @note ARM64: Larger constants will require a temporary register. Failing to
3478 * specify one when needed will trigger fatal assertion / throw.
3479 */
3480DECL_FORCE_INLINE_THROW(uint32_t)
3481iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
3482 uint8_t iGprTmp = UINT8_MAX)
3483{
3484#ifdef RT_ARCH_AMD64
3485 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3486 if (iGprDst >= 8)
3487 pCodeBuf[off++] = X86_OP_REX_B;
3488 if (iSubtrahend == 1)
3489 {
3490 /* dec r/m16 */
3491 pCodeBuf[off++] = 0xff;
3492 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3493 }
3494 else if (iSubtrahend == -1)
3495 {
3496 /* inc r/m16 */
3497 pCodeBuf[off++] = 0xff;
3498 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3499 }
3500 else if ((int8_t)iSubtrahend == iSubtrahend)
3501 {
3502 /* sub r/m16, imm8 */
3503 pCodeBuf[off++] = 0x83;
3504 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3505 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3506 }
3507 else
3508 {
3509 /* sub r/m16, imm16 */
3510 pCodeBuf[off++] = 0x81;
3511 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3512 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
3513 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
3514 }
3515 RT_NOREF(iGprTmp);
3516
3517#elif defined(RT_ARCH_ARM64)
3518 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3519 if (uAbsSubtrahend < 4096)
3520 {
3521 if (iSubtrahend >= 0)
3522 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3523 else
3524 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3525 }
3526 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3527 {
3528 if (iSubtrahend >= 0)
3529 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3530 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3531 else
3532 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3533 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3534 }
3535 else if (iGprTmp != UINT8_MAX)
3536 {
3537 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3538 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3539 }
3540 else
3541# ifdef IEM_WITH_THROW_CATCH
3542 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3543# else
3544 AssertReleaseFailedStmt(off = UINT32_MAX);
3545# endif
3546 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3547
3548#else
3549# error "Port me"
3550#endif
3551 return off;
3552}
3553
3554
3555/**
3556 * Emits adding a 64-bit GPR to another, storing the result in the first.
3557 * @note The AMD64 version sets flags.
3558 */
3559DECL_FORCE_INLINE(uint32_t)
3560iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3561{
3562#if defined(RT_ARCH_AMD64)
3563 /* add Gv,Ev */
3564 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3565 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
3566 pCodeBuf[off++] = 0x03;
3567 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3568
3569#elif defined(RT_ARCH_ARM64)
3570 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
3571
3572#else
3573# error "Port me"
3574#endif
3575 return off;
3576}
3577
3578
3579/**
3580 * Emits adding a 64-bit GPR to another, storing the result in the first.
3581 * @note The AMD64 version sets flags.
3582 */
3583DECL_INLINE_THROW(uint32_t)
3584iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3585{
3586#if defined(RT_ARCH_AMD64)
3587 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3588#elif defined(RT_ARCH_ARM64)
3589 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3590#else
3591# error "Port me"
3592#endif
3593 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3594 return off;
3595}
3596
3597
3598/**
3599 * Emits adding a 64-bit GPR to another, storing the result in the first.
3600 * @note The AMD64 version sets flags.
3601 */
3602DECL_FORCE_INLINE(uint32_t)
3603iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3604{
3605#if defined(RT_ARCH_AMD64)
3606 /* add Gv,Ev */
3607 if (iGprDst >= 8 || iGprAddend >= 8)
3608 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
3609 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
3610 pCodeBuf[off++] = 0x03;
3611 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3612
3613#elif defined(RT_ARCH_ARM64)
3614 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
3615
3616#else
3617# error "Port me"
3618#endif
3619 return off;
3620}
3621
3622
3623/**
3624 * Emits adding a 64-bit GPR to another, storing the result in the first.
3625 * @note The AMD64 version sets flags.
3626 */
3627DECL_INLINE_THROW(uint32_t)
3628iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3629{
3630#if defined(RT_ARCH_AMD64)
3631 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3632#elif defined(RT_ARCH_ARM64)
3633 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3634#else
3635# error "Port me"
3636#endif
3637 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3638 return off;
3639}
3640
3641
3642/**
3643 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3644 */
3645DECL_INLINE_THROW(uint32_t)
3646iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3647{
3648#if defined(RT_ARCH_AMD64)
3649 /* add or inc */
3650 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3651 if (iImm8 != 1)
3652 {
3653 pCodeBuf[off++] = 0x83;
3654 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3655 pCodeBuf[off++] = (uint8_t)iImm8;
3656 }
3657 else
3658 {
3659 pCodeBuf[off++] = 0xff;
3660 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3661 }
3662
3663#elif defined(RT_ARCH_ARM64)
3664 if (iImm8 >= 0)
3665 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
3666 else
3667 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
3668
3669#else
3670# error "Port me"
3671#endif
3672 return off;
3673}
3674
3675
3676/**
3677 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3678 */
3679DECL_INLINE_THROW(uint32_t)
3680iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3681{
3682#if defined(RT_ARCH_AMD64)
3683 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
3684#elif defined(RT_ARCH_ARM64)
3685 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
3686#else
3687# error "Port me"
3688#endif
3689 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3690 return off;
3691}
3692
3693
3694/**
3695 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
3696 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3697 */
3698DECL_FORCE_INLINE(uint32_t)
3699iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3700{
3701#if defined(RT_ARCH_AMD64)
3702 /* add or inc */
3703 if (iGprDst >= 8)
3704 pCodeBuf[off++] = X86_OP_REX_B;
3705 if (iImm8 != 1)
3706 {
3707 pCodeBuf[off++] = 0x83;
3708 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3709 pCodeBuf[off++] = (uint8_t)iImm8;
3710 }
3711 else
3712 {
3713 pCodeBuf[off++] = 0xff;
3714 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3715 }
3716
3717#elif defined(RT_ARCH_ARM64)
3718 if (iImm8 >= 0)
3719 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
3720 else
3721 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
3722
3723#else
3724# error "Port me"
3725#endif
3726 return off;
3727}
3728
3729
3730/**
3731 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
3732 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3733 */
3734DECL_INLINE_THROW(uint32_t)
3735iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3736{
3737#if defined(RT_ARCH_AMD64)
3738 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
3739#elif defined(RT_ARCH_ARM64)
3740 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
3741#else
3742# error "Port me"
3743#endif
3744 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3745 return off;
3746}
3747
3748
3749/**
3750 * Emits a 64-bit GPR additions with a 64-bit signed addend.
3751 *
3752 * @note Will assert / throw if @a iGprTmp is not specified when needed.
3753 */
3754DECL_FORCE_INLINE_THROW(uint32_t)
3755iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
3756{
3757#if defined(RT_ARCH_AMD64)
3758 if ((int8_t)iAddend == iAddend)
3759 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
3760
3761 if ((int32_t)iAddend == iAddend)
3762 {
3763 /* add grp, imm32 */
3764 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3765 pCodeBuf[off++] = 0x81;
3766 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3767 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
3768 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
3769 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
3770 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
3771 }
3772 else if (iGprTmp != UINT8_MAX)
3773 {
3774 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
3775
3776 /* add dst, tmpreg */
3777 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3778 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
3779 pCodeBuf[off++] = 0x03;
3780 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
3781 }
3782 else
3783# ifdef IEM_WITH_THROW_CATCH
3784 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3785# else
3786 AssertReleaseFailedStmt(off = UINT32_MAX);
3787# endif
3788
3789#elif defined(RT_ARCH_ARM64)
3790 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
3791 if (uAbsAddend < 4096)
3792 {
3793 if (iAddend >= 0)
3794 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
3795 else
3796 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
3797 }
3798 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
3799 {
3800 if (iAddend >= 0)
3801 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
3802 true /*f64Bit*/, true /*fShift12*/);
3803 else
3804 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
3805 true /*f64Bit*/, true /*fShift12*/);
3806 }
3807 else if (iGprTmp != UINT8_MAX)
3808 {
3809 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
3810 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
3811 }
3812 else
3813# ifdef IEM_WITH_THROW_CATCH
3814 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3815# else
3816 AssertReleaseFailedStmt(off = UINT32_MAX);
3817# endif
3818
3819#else
3820# error "Port me"
3821#endif
3822 return off;
3823}
3824
3825
3826/**
3827 * Emits a 64-bit GPR additions with a 64-bit signed addend.
3828 */
3829DECL_INLINE_THROW(uint32_t)
3830iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
3831{
3832#if defined(RT_ARCH_AMD64)
3833 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
3834 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
3835
3836 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
3837 {
3838 /* add grp, imm32 */
3839 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3840 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3841 pbCodeBuf[off++] = 0x81;
3842 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3843 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
3844 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
3845 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
3846 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
3847 }
3848 else
3849 {
3850 /* Best to use a temporary register to deal with this in the simplest way: */
3851 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
3852
3853 /* add dst, tmpreg */
3854 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3855 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3856 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
3857 pbCodeBuf[off++] = 0x03;
3858 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
3859
3860 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
3861 }
3862
3863#elif defined(RT_ARCH_ARM64)
3864 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
3865 {
3866 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3867 if (iAddend >= 0)
3868 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend);
3869 else
3870 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend);
3871 }
3872 else
3873 {
3874 /* Use temporary register for the immediate. */
3875 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
3876
3877 /* add gprdst, gprdst, tmpreg */
3878 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3879 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg);
3880
3881 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
3882 }
3883
3884#else
3885# error "Port me"
3886#endif
3887 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3888 return off;
3889}
3890
3891
3892/**
3893 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
3894 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3895 * @note For ARM64 the iAddend value must be in the range 0x000..0xfff,
3896 * or that range shifted 12 bits to the left (e.g. 0x1000..0xfff000 with
3897 * the lower 12 bits always zero). The negative ranges are also allowed,
3898 * making it behave like a subtraction. If the constant does not conform,
3899 * bad stuff will happen.
3900 */
3901DECL_FORCE_INLINE_THROW(uint32_t)
3902iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend)
3903{
3904#if defined(RT_ARCH_AMD64)
3905 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
3906 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
3907
3908 /* add grp, imm32 */
3909 if (iGprDst >= 8)
3910 pCodeBuf[off++] = X86_OP_REX_B;
3911 pCodeBuf[off++] = 0x81;
3912 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3913 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
3914 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
3915 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
3916 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
3917
3918#elif defined(RT_ARCH_ARM64)
3919 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
3920 if (uAbsAddend <= 0xfff)
3921 {
3922 if (iAddend >= 0)
3923 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
3924 else
3925 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
3926 }
3927 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
3928 {
3929 if (iAddend >= 0)
3930 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
3931 false /*f64Bit*/, true /*fShift12*/);
3932 else
3933 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
3934 false /*f64Bit*/, true /*fShift12*/);
3935 }
3936 else
3937# ifdef IEM_WITH_THROW_CATCH
3938 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3939# else
3940 AssertReleaseFailedStmt(off = UINT32_MAX);
3941# endif
3942
3943#else
3944# error "Port me"
3945#endif
3946 return off;
3947}
3948
3949
3950/**
3951 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
3952 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3953 */
3954DECL_INLINE_THROW(uint32_t)
3955iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
3956{
3957#if defined(RT_ARCH_AMD64)
3958 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
3959
3960#elif defined(RT_ARCH_ARM64)
3961 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
3962 {
3963 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3964 if (iAddend >= 0)
3965 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend, false /*f64Bit*/);
3966 else
3967 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend, false /*f64Bit*/);
3968 }
3969 else
3970 {
3971 /* Use temporary register for the immediate. */
3972 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint32_t)iAddend);
3973
3974 /* add gprdst, gprdst, tmpreg */
3975 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3976 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
3977
3978 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
3979 }
3980
3981#else
3982# error "Port me"
3983#endif
3984 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3985 return off;
3986}
3987
3988
3989/**
3990 * Emits a 16-bit GPR add with a signed immediate addend.
3991 *
3992 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
3993 * so not suitable as a base for conditional jumps.
3994 *
3995 * @note AMD64: Will only update the lower 16 bits of the register.
3996 * @note ARM64: Will update the entire register.
3997 * @note ARM64: Larger constants will require a temporary register. Failing to
3998 * specify one when needed will trigger fatal assertion / throw.
3999 * @sa iemNativeEmitSubGpr16ImmEx
4000 */
4001DECL_FORCE_INLINE_THROW(uint32_t)
4002iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend,
4003 uint8_t iGprTmp = UINT8_MAX)
4004{
4005#ifdef RT_ARCH_AMD64
4006 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4007 if (iGprDst >= 8)
4008 pCodeBuf[off++] = X86_OP_REX_B;
4009 if (iAddend == 1)
4010 {
4011 /* inc r/m16 */
4012 pCodeBuf[off++] = 0xff;
4013 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4014 }
4015 else if (iAddend == -1)
4016 {
4017 /* dec r/m16 */
4018 pCodeBuf[off++] = 0xff;
4019 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4020 }
4021 else if ((int8_t)iAddend == iAddend)
4022 {
4023 /* add r/m16, imm8 */
4024 pCodeBuf[off++] = 0x83;
4025 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4026 pCodeBuf[off++] = (uint8_t)iAddend;
4027 }
4028 else
4029 {
4030 /* add r/m16, imm16 */
4031 pCodeBuf[off++] = 0x81;
4032 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4033 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
4034 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
4035 }
4036 RT_NOREF(iGprTmp);
4037
4038#elif defined(RT_ARCH_ARM64)
4039 uint32_t uAbsAddend = RT_ABS(iAddend);
4040 if (uAbsAddend < 4096)
4041 {
4042 if (iAddend >= 0)
4043 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4044 else
4045 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4046 }
4047 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4048 {
4049 if (iAddend >= 0)
4050 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
4051 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4052 else
4053 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
4054 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4055 }
4056 else if (iGprTmp != UINT8_MAX)
4057 {
4058 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iAddend);
4059 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4060 }
4061 else
4062# ifdef IEM_WITH_THROW_CATCH
4063 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4064# else
4065 AssertReleaseFailedStmt(off = UINT32_MAX);
4066# endif
4067 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4068
4069#else
4070# error "Port me"
4071#endif
4072 return off;
4073}
4074
4075
4076
4077/**
4078 * Adds two 64-bit GPRs together, storing the result in a third register.
4079 */
4080DECL_FORCE_INLINE(uint32_t)
4081iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4082{
4083#ifdef RT_ARCH_AMD64
4084 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4085 {
4086 /** @todo consider LEA */
4087 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
4088 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
4089 }
4090 else
4091 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4092
4093#elif defined(RT_ARCH_ARM64)
4094 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
4095
4096#else
4097# error "Port me!"
4098#endif
4099 return off;
4100}
4101
4102
4103
4104/**
4105 * Adds two 32-bit GPRs together, storing the result in a third register.
4106 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4107 */
4108DECL_FORCE_INLINE(uint32_t)
4109iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4110{
4111#ifdef RT_ARCH_AMD64
4112 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4113 {
4114 /** @todo consider LEA */
4115 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
4116 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
4117 }
4118 else
4119 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4120
4121#elif defined(RT_ARCH_ARM64)
4122 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
4123
4124#else
4125# error "Port me!"
4126#endif
4127 return off;
4128}
4129
4130
4131/**
4132 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
4133 * third register.
4134 *
4135 * @note The ARM64 version does not work for non-trivial constants if the
4136 * two registers are the same. Will assert / throw exception.
4137 */
4138DECL_FORCE_INLINE_THROW(uint32_t)
4139iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4140{
4141#ifdef RT_ARCH_AMD64
4142 /** @todo consider LEA */
4143 if ((int8_t)iImmAddend == iImmAddend)
4144 {
4145 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4146 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4147 }
4148 else
4149 {
4150 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4151 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4152 }
4153
4154#elif defined(RT_ARCH_ARM64)
4155 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4156 if (uAbsImmAddend < 4096)
4157 {
4158 if (iImmAddend >= 0)
4159 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4160 else
4161 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4162 }
4163 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4164 {
4165 if (iImmAddend >= 0)
4166 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4167 else
4168 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4169 }
4170 else if (iGprDst != iGprAddend)
4171 {
4172 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4173 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4174 }
4175 else
4176# ifdef IEM_WITH_THROW_CATCH
4177 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4178# else
4179 AssertReleaseFailedStmt(off = UINT32_MAX);
4180# endif
4181
4182#else
4183# error "Port me!"
4184#endif
4185 return off;
4186}
4187
4188
4189/**
4190 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4191 * third register.
4192 *
4193 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4194 *
4195 * @note The ARM64 version does not work for non-trivial constants if the
4196 * two registers are the same. Will assert / throw exception.
4197 */
4198DECL_FORCE_INLINE_THROW(uint32_t)
4199iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4200{
4201#ifdef RT_ARCH_AMD64
4202 /** @todo consider LEA */
4203 if ((int8_t)iImmAddend == iImmAddend)
4204 {
4205 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4206 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4207 }
4208 else
4209 {
4210 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4211 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4212 }
4213
4214#elif defined(RT_ARCH_ARM64)
4215 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4216 if (uAbsImmAddend < 4096)
4217 {
4218 if (iImmAddend >= 0)
4219 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4220 else
4221 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4222 }
4223 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4224 {
4225 if (iImmAddend >= 0)
4226 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4227 else
4228 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4229 }
4230 else if (iGprDst != iGprAddend)
4231 {
4232 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4233 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4234 }
4235 else
4236# ifdef IEM_WITH_THROW_CATCH
4237 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4238# else
4239 AssertReleaseFailedStmt(off = UINT32_MAX);
4240# endif
4241
4242#else
4243# error "Port me!"
4244#endif
4245 return off;
4246}
4247
4248
4249/*********************************************************************************************************************************
4250* Unary Operations *
4251*********************************************************************************************************************************/
4252
4253/**
4254 * Emits code for two complement negation of a 64-bit GPR.
4255 */
4256DECL_FORCE_INLINE_THROW(uint32_t)
4257iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4258{
4259#if defined(RT_ARCH_AMD64)
4260 /* neg Ev */
4261 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4262 pCodeBuf[off++] = 0xf7;
4263 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4264
4265#elif defined(RT_ARCH_ARM64)
4266 /* sub dst, xzr, dst */
4267 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4268
4269#else
4270# error "Port me"
4271#endif
4272 return off;
4273}
4274
4275
4276/**
4277 * Emits code for two complement negation of a 64-bit GPR.
4278 */
4279DECL_INLINE_THROW(uint32_t)
4280iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4281{
4282#if defined(RT_ARCH_AMD64)
4283 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4284#elif defined(RT_ARCH_ARM64)
4285 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4286#else
4287# error "Port me"
4288#endif
4289 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4290 return off;
4291}
4292
4293
4294/**
4295 * Emits code for two complement negation of a 32-bit GPR.
4296 * @note bit 32 thru 63 are set to zero.
4297 */
4298DECL_FORCE_INLINE_THROW(uint32_t)
4299iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4300{
4301#if defined(RT_ARCH_AMD64)
4302 /* neg Ev */
4303 if (iGprDst >= 8)
4304 pCodeBuf[off++] = X86_OP_REX_B;
4305 pCodeBuf[off++] = 0xf7;
4306 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4307
4308#elif defined(RT_ARCH_ARM64)
4309 /* sub dst, xzr, dst */
4310 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4311
4312#else
4313# error "Port me"
4314#endif
4315 return off;
4316}
4317
4318
4319/**
4320 * Emits code for two complement negation of a 32-bit GPR.
4321 * @note bit 32 thru 63 are set to zero.
4322 */
4323DECL_INLINE_THROW(uint32_t)
4324iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4325{
4326#if defined(RT_ARCH_AMD64)
4327 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4328#elif defined(RT_ARCH_ARM64)
4329 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4330#else
4331# error "Port me"
4332#endif
4333 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4334 return off;
4335}
4336
4337
4338
4339/*********************************************************************************************************************************
4340* Bit Operations *
4341*********************************************************************************************************************************/
4342
4343/**
4344 * Emits code for clearing bits 16 thru 63 in the GPR.
4345 */
4346DECL_INLINE_THROW(uint32_t)
4347iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4348{
4349#if defined(RT_ARCH_AMD64)
4350 /* movzx Gv,Ew */
4351 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4352 if (iGprDst >= 8)
4353 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4354 pbCodeBuf[off++] = 0x0f;
4355 pbCodeBuf[off++] = 0xb7;
4356 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4357
4358#elif defined(RT_ARCH_ARM64)
4359 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4360# if 1
4361 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4362# else
4363 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4364 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4365# endif
4366#else
4367# error "Port me"
4368#endif
4369 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4370 return off;
4371}
4372
4373
4374/**
4375 * Emits code for AND'ing two 64-bit GPRs.
4376 *
4377 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4378 * and ARM64 hosts.
4379 */
4380DECL_FORCE_INLINE(uint32_t)
4381iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4382{
4383#if defined(RT_ARCH_AMD64)
4384 /* and Gv, Ev */
4385 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4386 pCodeBuf[off++] = 0x23;
4387 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4388 RT_NOREF(fSetFlags);
4389
4390#elif defined(RT_ARCH_ARM64)
4391 if (!fSetFlags)
4392 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4393 else
4394 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4395
4396#else
4397# error "Port me"
4398#endif
4399 return off;
4400}
4401
4402
4403/**
4404 * Emits code for AND'ing two 64-bit GPRs.
4405 *
4406 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4407 * and ARM64 hosts.
4408 */
4409DECL_INLINE_THROW(uint32_t)
4410iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4411{
4412#if defined(RT_ARCH_AMD64)
4413 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4414#elif defined(RT_ARCH_ARM64)
4415 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4416#else
4417# error "Port me"
4418#endif
4419 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4420 return off;
4421}
4422
4423
4424/**
4425 * Emits code for AND'ing two 32-bit GPRs.
4426 */
4427DECL_FORCE_INLINE(uint32_t)
4428iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4429{
4430#if defined(RT_ARCH_AMD64)
4431 /* and Gv, Ev */
4432 if (iGprDst >= 8 || iGprSrc >= 8)
4433 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4434 pCodeBuf[off++] = 0x23;
4435 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4436 RT_NOREF(fSetFlags);
4437
4438#elif defined(RT_ARCH_ARM64)
4439 if (!fSetFlags)
4440 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4441 else
4442 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4443
4444#else
4445# error "Port me"
4446#endif
4447 return off;
4448}
4449
4450
4451/**
4452 * Emits code for AND'ing two 32-bit GPRs.
4453 */
4454DECL_INLINE_THROW(uint32_t)
4455iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4456{
4457#if defined(RT_ARCH_AMD64)
4458 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4459#elif defined(RT_ARCH_ARM64)
4460 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4461#else
4462# error "Port me"
4463#endif
4464 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4465 return off;
4466}
4467
4468
4469/**
4470 * Emits code for AND'ing a 64-bit GPRs with a constant.
4471 *
4472 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4473 * and ARM64 hosts.
4474 */
4475DECL_INLINE_THROW(uint32_t)
4476iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4477{
4478#if defined(RT_ARCH_AMD64)
4479 if ((int64_t)uImm == (int8_t)uImm)
4480 {
4481 /* and Ev, imm8 */
4482 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4483 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4484 pbCodeBuf[off++] = 0x83;
4485 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4486 pbCodeBuf[off++] = (uint8_t)uImm;
4487 }
4488 else if ((int64_t)uImm == (int32_t)uImm)
4489 {
4490 /* and Ev, imm32 */
4491 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4492 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4493 pbCodeBuf[off++] = 0x81;
4494 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4495 pbCodeBuf[off++] = RT_BYTE1(uImm);
4496 pbCodeBuf[off++] = RT_BYTE2(uImm);
4497 pbCodeBuf[off++] = RT_BYTE3(uImm);
4498 pbCodeBuf[off++] = RT_BYTE4(uImm);
4499 }
4500 else
4501 {
4502 /* Use temporary register for the 64-bit immediate. */
4503 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4504 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
4505 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4506 }
4507 RT_NOREF(fSetFlags);
4508
4509#elif defined(RT_ARCH_ARM64)
4510 uint32_t uImmR = 0;
4511 uint32_t uImmNandS = 0;
4512 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4513 {
4514 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4515 if (!fSetFlags)
4516 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
4517 else
4518 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
4519 }
4520 else
4521 {
4522 /* Use temporary register for the 64-bit immediate. */
4523 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4524 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4525 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4526 }
4527
4528#else
4529# error "Port me"
4530#endif
4531 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4532 return off;
4533}
4534
4535
4536/**
4537 * Emits code for AND'ing an 32-bit GPRs with a constant.
4538 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4539 * @note For ARM64 this only supports @a uImm values that can be expressed using
4540 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
4541 * make sure this is possible!
4542 */
4543DECL_FORCE_INLINE_THROW(uint32_t)
4544iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4545{
4546#if defined(RT_ARCH_AMD64)
4547 /* and Ev, imm */
4548 if (iGprDst >= 8)
4549 pCodeBuf[off++] = X86_OP_REX_B;
4550 if ((int32_t)uImm == (int8_t)uImm)
4551 {
4552 pCodeBuf[off++] = 0x83;
4553 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4554 pCodeBuf[off++] = (uint8_t)uImm;
4555 }
4556 else
4557 {
4558 pCodeBuf[off++] = 0x81;
4559 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4560 pCodeBuf[off++] = RT_BYTE1(uImm);
4561 pCodeBuf[off++] = RT_BYTE2(uImm);
4562 pCodeBuf[off++] = RT_BYTE3(uImm);
4563 pCodeBuf[off++] = RT_BYTE4(uImm);
4564 }
4565 RT_NOREF(fSetFlags);
4566
4567#elif defined(RT_ARCH_ARM64)
4568 uint32_t uImmR = 0;
4569 uint32_t uImmNandS = 0;
4570 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4571 {
4572 if (!fSetFlags)
4573 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4574 else
4575 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4576 }
4577 else
4578# ifdef IEM_WITH_THROW_CATCH
4579 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4580# else
4581 AssertReleaseFailedStmt(off = UINT32_MAX);
4582# endif
4583
4584#else
4585# error "Port me"
4586#endif
4587 return off;
4588}
4589
4590
4591/**
4592 * Emits code for AND'ing an 32-bit GPRs with a constant.
4593 *
4594 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4595 */
4596DECL_INLINE_THROW(uint32_t)
4597iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4598{
4599#if defined(RT_ARCH_AMD64)
4600 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
4601
4602#elif defined(RT_ARCH_ARM64)
4603 uint32_t uImmR = 0;
4604 uint32_t uImmNandS = 0;
4605 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4606 {
4607 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4608 if (!fSetFlags)
4609 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4610 else
4611 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4612 }
4613 else
4614 {
4615 /* Use temporary register for the 64-bit immediate. */
4616 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4617 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4618 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4619 }
4620
4621#else
4622# error "Port me"
4623#endif
4624 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4625 return off;
4626}
4627
4628
4629/**
4630 * Emits code for AND'ing an 64-bit GPRs with a constant.
4631 *
4632 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4633 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4634 * the same.
4635 */
4636DECL_FORCE_INLINE_THROW(uint32_t)
4637iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
4638 bool fSetFlags = false)
4639{
4640#if defined(RT_ARCH_AMD64)
4641 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4642 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
4643 RT_NOREF(fSetFlags);
4644
4645#elif defined(RT_ARCH_ARM64)
4646 uint32_t uImmR = 0;
4647 uint32_t uImmNandS = 0;
4648 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4649 {
4650 if (!fSetFlags)
4651 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4652 else
4653 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4654 }
4655 else if (iGprDst != iGprSrc)
4656 {
4657 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4658 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4659 }
4660 else
4661# ifdef IEM_WITH_THROW_CATCH
4662 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4663# else
4664 AssertReleaseFailedStmt(off = UINT32_MAX);
4665# endif
4666
4667#else
4668# error "Port me"
4669#endif
4670 return off;
4671}
4672
4673/**
4674 * Emits code for AND'ing an 32-bit GPRs with a constant.
4675 *
4676 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4677 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4678 * the same.
4679 *
4680 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4681 */
4682DECL_FORCE_INLINE_THROW(uint32_t)
4683iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
4684 bool fSetFlags = false)
4685{
4686#if defined(RT_ARCH_AMD64)
4687 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
4688 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
4689 RT_NOREF(fSetFlags);
4690
4691#elif defined(RT_ARCH_ARM64)
4692 uint32_t uImmR = 0;
4693 uint32_t uImmNandS = 0;
4694 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4695 {
4696 if (!fSetFlags)
4697 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
4698 else
4699 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
4700 }
4701 else if (iGprDst != iGprSrc)
4702 {
4703 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
4704 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4705 }
4706 else
4707# ifdef IEM_WITH_THROW_CATCH
4708 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4709# else
4710 AssertReleaseFailedStmt(off = UINT32_MAX);
4711# endif
4712
4713#else
4714# error "Port me"
4715#endif
4716 return off;
4717}
4718
4719
4720/**
4721 * Emits code for OR'ing two 64-bit GPRs.
4722 */
4723DECL_FORCE_INLINE(uint32_t)
4724iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4725{
4726#if defined(RT_ARCH_AMD64)
4727 /* or Gv, Ev */
4728 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4729 pCodeBuf[off++] = 0x0b;
4730 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4731
4732#elif defined(RT_ARCH_ARM64)
4733 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
4734
4735#else
4736# error "Port me"
4737#endif
4738 return off;
4739}
4740
4741
4742/**
4743 * Emits code for OR'ing two 64-bit GPRs.
4744 */
4745DECL_INLINE_THROW(uint32_t)
4746iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4747{
4748#if defined(RT_ARCH_AMD64)
4749 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4750#elif defined(RT_ARCH_ARM64)
4751 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4752#else
4753# error "Port me"
4754#endif
4755 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4756 return off;
4757}
4758
4759
4760/**
4761 * Emits code for OR'ing two 32-bit GPRs.
4762 * @note Bits 63:32 of the destination GPR will be cleared.
4763 */
4764DECL_FORCE_INLINE(uint32_t)
4765iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4766{
4767#if defined(RT_ARCH_AMD64)
4768 /* or Gv, Ev */
4769 if (iGprDst >= 8 || iGprSrc >= 8)
4770 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4771 pCodeBuf[off++] = 0x0b;
4772 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4773
4774#elif defined(RT_ARCH_ARM64)
4775 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4776
4777#else
4778# error "Port me"
4779#endif
4780 return off;
4781}
4782
4783
4784/**
4785 * Emits code for OR'ing two 32-bit GPRs.
4786 * @note Bits 63:32 of the destination GPR will be cleared.
4787 */
4788DECL_INLINE_THROW(uint32_t)
4789iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4790{
4791#if defined(RT_ARCH_AMD64)
4792 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4793#elif defined(RT_ARCH_ARM64)
4794 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4795#else
4796# error "Port me"
4797#endif
4798 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4799 return off;
4800}
4801
4802
4803/**
4804 * Emits code for OR'ing a 64-bit GPRs with a constant.
4805 */
4806DECL_INLINE_THROW(uint32_t)
4807iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
4808{
4809#if defined(RT_ARCH_AMD64)
4810 if ((int64_t)uImm == (int8_t)uImm)
4811 {
4812 /* or Ev, imm8 */
4813 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4814 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4815 pbCodeBuf[off++] = 0x83;
4816 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4817 pbCodeBuf[off++] = (uint8_t)uImm;
4818 }
4819 else if ((int64_t)uImm == (int32_t)uImm)
4820 {
4821 /* or Ev, imm32 */
4822 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4823 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4824 pbCodeBuf[off++] = 0x81;
4825 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4826 pbCodeBuf[off++] = RT_BYTE1(uImm);
4827 pbCodeBuf[off++] = RT_BYTE2(uImm);
4828 pbCodeBuf[off++] = RT_BYTE3(uImm);
4829 pbCodeBuf[off++] = RT_BYTE4(uImm);
4830 }
4831 else
4832 {
4833 /* Use temporary register for the 64-bit immediate. */
4834 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4835 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
4836 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4837 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4838 }
4839
4840#elif defined(RT_ARCH_ARM64)
4841 uint32_t uImmR = 0;
4842 uint32_t uImmNandS = 0;
4843 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4844 {
4845 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4846 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
4847 }
4848 else
4849 {
4850 /* Use temporary register for the 64-bit immediate. */
4851 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4852 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
4853 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4854 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4855 }
4856
4857#else
4858# error "Port me"
4859#endif
4860 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4861 return off;
4862}
4863
4864
4865/**
4866 * Emits code for OR'ing an 32-bit GPRs with a constant.
4867 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4868 * @note For ARM64 this only supports @a uImm values that can be expressed using
4869 * the two 6-bit immediates of the OR instructions. The caller must make
4870 * sure this is possible!
4871 */
4872DECL_FORCE_INLINE_THROW(uint32_t)
4873iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
4874{
4875#if defined(RT_ARCH_AMD64)
4876 /* or Ev, imm */
4877 if (iGprDst >= 8)
4878 pCodeBuf[off++] = X86_OP_REX_B;
4879 if ((int32_t)uImm == (int8_t)uImm)
4880 {
4881 pCodeBuf[off++] = 0x83;
4882 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4883 pCodeBuf[off++] = (uint8_t)uImm;
4884 }
4885 else
4886 {
4887 pCodeBuf[off++] = 0x81;
4888 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4889 pCodeBuf[off++] = RT_BYTE1(uImm);
4890 pCodeBuf[off++] = RT_BYTE2(uImm);
4891 pCodeBuf[off++] = RT_BYTE3(uImm);
4892 pCodeBuf[off++] = RT_BYTE4(uImm);
4893 }
4894
4895#elif defined(RT_ARCH_ARM64)
4896 uint32_t uImmR = 0;
4897 uint32_t uImmNandS = 0;
4898 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4899 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4900 else
4901# ifdef IEM_WITH_THROW_CATCH
4902 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4903# else
4904 AssertReleaseFailedStmt(off = UINT32_MAX);
4905# endif
4906
4907#else
4908# error "Port me"
4909#endif
4910 return off;
4911}
4912
4913
4914/**
4915 * Emits code for OR'ing an 32-bit GPRs with a constant.
4916 *
4917 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4918 */
4919DECL_INLINE_THROW(uint32_t)
4920iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
4921{
4922#if defined(RT_ARCH_AMD64)
4923 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
4924
4925#elif defined(RT_ARCH_ARM64)
4926 uint32_t uImmR = 0;
4927 uint32_t uImmNandS = 0;
4928 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4929 {
4930 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4931 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4932 }
4933 else
4934 {
4935 /* Use temporary register for the 64-bit immediate. */
4936 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4937 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
4938 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4939 }
4940
4941#else
4942# error "Port me"
4943#endif
4944 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4945 return off;
4946}
4947
4948
4949
4950/**
4951 * ORs two 64-bit GPRs together, storing the result in a third register.
4952 */
4953DECL_FORCE_INLINE(uint32_t)
4954iemNativeEmitGprEqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
4955{
4956#ifdef RT_ARCH_AMD64
4957 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
4958 {
4959 /** @todo consider LEA */
4960 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc1);
4961 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
4962 }
4963 else
4964 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
4965
4966#elif defined(RT_ARCH_ARM64)
4967 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2);
4968
4969#else
4970# error "Port me!"
4971#endif
4972 return off;
4973}
4974
4975
4976
4977/**
4978 * Ors two 32-bit GPRs together, storing the result in a third register.
4979 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4980 */
4981DECL_FORCE_INLINE(uint32_t)
4982iemNativeEmitGpr32EqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
4983{
4984#ifdef RT_ARCH_AMD64
4985 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
4986 {
4987 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc1);
4988 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
4989 }
4990 else
4991 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
4992
4993#elif defined(RT_ARCH_ARM64)
4994 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2, false /*f64Bit*/);
4995
4996#else
4997# error "Port me!"
4998#endif
4999 return off;
5000}
5001
5002
5003/**
5004 * Emits code for XOR'ing two 64-bit GPRs.
5005 */
5006DECL_INLINE_THROW(uint32_t)
5007iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5008{
5009#if defined(RT_ARCH_AMD64)
5010 /* and Gv, Ev */
5011 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5012 pCodeBuf[off++] = 0x33;
5013 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5014
5015#elif defined(RT_ARCH_ARM64)
5016 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
5017
5018#else
5019# error "Port me"
5020#endif
5021 return off;
5022}
5023
5024
5025/**
5026 * Emits code for XOR'ing two 64-bit GPRs.
5027 */
5028DECL_INLINE_THROW(uint32_t)
5029iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5030{
5031#if defined(RT_ARCH_AMD64)
5032 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5033#elif defined(RT_ARCH_ARM64)
5034 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5035#else
5036# error "Port me"
5037#endif
5038 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5039 return off;
5040}
5041
5042
5043/**
5044 * Emits code for XOR'ing two 32-bit GPRs.
5045 */
5046DECL_INLINE_THROW(uint32_t)
5047iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5048{
5049#if defined(RT_ARCH_AMD64)
5050 /* and Gv, Ev */
5051 if (iGprDst >= 8 || iGprSrc >= 8)
5052 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5053 pCodeBuf[off++] = 0x33;
5054 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5055
5056#elif defined(RT_ARCH_ARM64)
5057 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5058
5059#else
5060# error "Port me"
5061#endif
5062 return off;
5063}
5064
5065
5066/**
5067 * Emits code for XOR'ing two 32-bit GPRs.
5068 */
5069DECL_INLINE_THROW(uint32_t)
5070iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5071{
5072#if defined(RT_ARCH_AMD64)
5073 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5074#elif defined(RT_ARCH_ARM64)
5075 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5076#else
5077# error "Port me"
5078#endif
5079 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5080 return off;
5081}
5082
5083
5084/**
5085 * Emits code for XOR'ing an 32-bit GPRs with a constant.
5086 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5087 * @note For ARM64 this only supports @a uImm values that can be expressed using
5088 * the two 6-bit immediates of the EOR instructions. The caller must make
5089 * sure this is possible!
5090 */
5091DECL_FORCE_INLINE_THROW(uint32_t)
5092iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5093{
5094#if defined(RT_ARCH_AMD64)
5095 /* and Ev, imm */
5096 if (iGprDst >= 8)
5097 pCodeBuf[off++] = X86_OP_REX_B;
5098 if ((int32_t)uImm == (int8_t)uImm)
5099 {
5100 pCodeBuf[off++] = 0x83;
5101 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5102 pCodeBuf[off++] = (uint8_t)uImm;
5103 }
5104 else
5105 {
5106 pCodeBuf[off++] = 0x81;
5107 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5108 pCodeBuf[off++] = RT_BYTE1(uImm);
5109 pCodeBuf[off++] = RT_BYTE2(uImm);
5110 pCodeBuf[off++] = RT_BYTE3(uImm);
5111 pCodeBuf[off++] = RT_BYTE4(uImm);
5112 }
5113
5114#elif defined(RT_ARCH_ARM64)
5115 uint32_t uImmR = 0;
5116 uint32_t uImmNandS = 0;
5117 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5118 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5119 else
5120# ifdef IEM_WITH_THROW_CATCH
5121 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5122# else
5123 AssertReleaseFailedStmt(off = UINT32_MAX);
5124# endif
5125
5126#else
5127# error "Port me"
5128#endif
5129 return off;
5130}
5131
5132
5133/*********************************************************************************************************************************
5134* Shifting *
5135*********************************************************************************************************************************/
5136
5137/**
5138 * Emits code for shifting a GPR a fixed number of bits to the left.
5139 */
5140DECL_FORCE_INLINE(uint32_t)
5141iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5142{
5143 Assert(cShift > 0 && cShift < 64);
5144
5145#if defined(RT_ARCH_AMD64)
5146 /* shl dst, cShift */
5147 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5148 if (cShift != 1)
5149 {
5150 pCodeBuf[off++] = 0xc1;
5151 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5152 pCodeBuf[off++] = cShift;
5153 }
5154 else
5155 {
5156 pCodeBuf[off++] = 0xd1;
5157 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5158 }
5159
5160#elif defined(RT_ARCH_ARM64)
5161 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
5162
5163#else
5164# error "Port me"
5165#endif
5166 return off;
5167}
5168
5169
5170/**
5171 * Emits code for shifting a GPR a fixed number of bits to the left.
5172 */
5173DECL_INLINE_THROW(uint32_t)
5174iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5175{
5176#if defined(RT_ARCH_AMD64)
5177 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5178#elif defined(RT_ARCH_ARM64)
5179 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5180#else
5181# error "Port me"
5182#endif
5183 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5184 return off;
5185}
5186
5187
5188/**
5189 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5190 */
5191DECL_FORCE_INLINE(uint32_t)
5192iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5193{
5194 Assert(cShift > 0 && cShift < 32);
5195
5196#if defined(RT_ARCH_AMD64)
5197 /* shl dst, cShift */
5198 if (iGprDst >= 8)
5199 pCodeBuf[off++] = X86_OP_REX_B;
5200 if (cShift != 1)
5201 {
5202 pCodeBuf[off++] = 0xc1;
5203 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5204 pCodeBuf[off++] = cShift;
5205 }
5206 else
5207 {
5208 pCodeBuf[off++] = 0xd1;
5209 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5210 }
5211
5212#elif defined(RT_ARCH_ARM64)
5213 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5214
5215#else
5216# error "Port me"
5217#endif
5218 return off;
5219}
5220
5221
5222/**
5223 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5224 */
5225DECL_INLINE_THROW(uint32_t)
5226iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5227{
5228#if defined(RT_ARCH_AMD64)
5229 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5230#elif defined(RT_ARCH_ARM64)
5231 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5232#else
5233# error "Port me"
5234#endif
5235 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5236 return off;
5237}
5238
5239
5240/**
5241 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5242 */
5243DECL_FORCE_INLINE(uint32_t)
5244iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5245{
5246 Assert(cShift > 0 && cShift < 64);
5247
5248#if defined(RT_ARCH_AMD64)
5249 /* shr dst, cShift */
5250 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5251 if (cShift != 1)
5252 {
5253 pCodeBuf[off++] = 0xc1;
5254 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5255 pCodeBuf[off++] = cShift;
5256 }
5257 else
5258 {
5259 pCodeBuf[off++] = 0xd1;
5260 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5261 }
5262
5263#elif defined(RT_ARCH_ARM64)
5264 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5265
5266#else
5267# error "Port me"
5268#endif
5269 return off;
5270}
5271
5272
5273/**
5274 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5275 */
5276DECL_INLINE_THROW(uint32_t)
5277iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5278{
5279#if defined(RT_ARCH_AMD64)
5280 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5281#elif defined(RT_ARCH_ARM64)
5282 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5283#else
5284# error "Port me"
5285#endif
5286 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5287 return off;
5288}
5289
5290
5291/**
5292 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5293 * right.
5294 */
5295DECL_FORCE_INLINE(uint32_t)
5296iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5297{
5298 Assert(cShift > 0 && cShift < 32);
5299
5300#if defined(RT_ARCH_AMD64)
5301 /* shr dst, cShift */
5302 if (iGprDst >= 8)
5303 pCodeBuf[off++] = X86_OP_REX_B;
5304 if (cShift != 1)
5305 {
5306 pCodeBuf[off++] = 0xc1;
5307 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5308 pCodeBuf[off++] = cShift;
5309 }
5310 else
5311 {
5312 pCodeBuf[off++] = 0xd1;
5313 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5314 }
5315
5316#elif defined(RT_ARCH_ARM64)
5317 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5318
5319#else
5320# error "Port me"
5321#endif
5322 return off;
5323}
5324
5325
5326/**
5327 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5328 * right.
5329 */
5330DECL_INLINE_THROW(uint32_t)
5331iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5332{
5333#if defined(RT_ARCH_AMD64)
5334 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5335#elif defined(RT_ARCH_ARM64)
5336 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5337#else
5338# error "Port me"
5339#endif
5340 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5341 return off;
5342}
5343
5344
5345/**
5346 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5347 * right and assigning it to a different GPR.
5348 */
5349DECL_INLINE_THROW(uint32_t)
5350iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5351{
5352 Assert(cShift > 0); Assert(cShift < 32);
5353#if defined(RT_ARCH_AMD64)
5354 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5355 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5356
5357#elif defined(RT_ARCH_ARM64)
5358 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5359
5360#else
5361# error "Port me"
5362#endif
5363 return off;
5364}
5365
5366
5367/**
5368 * Emits code for rotating a GPR a fixed number of bits to the left.
5369 */
5370DECL_FORCE_INLINE(uint32_t)
5371iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5372{
5373 Assert(cShift > 0 && cShift < 64);
5374
5375#if defined(RT_ARCH_AMD64)
5376 /* rol dst, cShift */
5377 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5378 if (cShift != 1)
5379 {
5380 pCodeBuf[off++] = 0xc1;
5381 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5382 pCodeBuf[off++] = cShift;
5383 }
5384 else
5385 {
5386 pCodeBuf[off++] = 0xd1;
5387 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5388 }
5389
5390#elif defined(RT_ARCH_ARM64)
5391 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
5392
5393#else
5394# error "Port me"
5395#endif
5396 return off;
5397}
5398
5399
5400#if defined(RT_ARCH_AMD64)
5401/**
5402 * Emits code for rotating a 32-bit GPR a fixed number of bits to the left via carry.
5403 */
5404DECL_FORCE_INLINE(uint32_t)
5405iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5406{
5407 Assert(cShift > 0 && cShift < 32);
5408
5409 /* rcl dst, cShift */
5410 if (iGprDst >= 8)
5411 pCodeBuf[off++] = X86_OP_REX_B;
5412 if (cShift != 1)
5413 {
5414 pCodeBuf[off++] = 0xc1;
5415 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5416 pCodeBuf[off++] = cShift;
5417 }
5418 else
5419 {
5420 pCodeBuf[off++] = 0xd1;
5421 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5422 }
5423
5424 return off;
5425}
5426#endif /* RT_ARCH_AMD64 */
5427
5428
5429
5430/**
5431 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
5432 * @note Bits 63:32 of the destination GPR will be cleared.
5433 */
5434DECL_FORCE_INLINE(uint32_t)
5435iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5436{
5437#if defined(RT_ARCH_AMD64)
5438 /*
5439 * There is no bswap r16 on x86 (the encoding exists but does not work).
5440 * So just use a rol (gcc -O2 is doing that).
5441 *
5442 * rol r16, 0x8
5443 */
5444 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5445 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5446 if (iGpr >= 8)
5447 pbCodeBuf[off++] = X86_OP_REX_B;
5448 pbCodeBuf[off++] = 0xc1;
5449 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
5450 pbCodeBuf[off++] = 0x08;
5451#elif defined(RT_ARCH_ARM64)
5452 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5453
5454 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
5455#else
5456# error "Port me"
5457#endif
5458
5459 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5460 return off;
5461}
5462
5463
5464/**
5465 * Emits code for reversing the byte order in a 32-bit GPR.
5466 * @note Bits 63:32 of the destination GPR will be cleared.
5467 */
5468DECL_FORCE_INLINE(uint32_t)
5469iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5470{
5471#if defined(RT_ARCH_AMD64)
5472 /* bswap r32 */
5473 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5474
5475 if (iGpr >= 8)
5476 pbCodeBuf[off++] = X86_OP_REX_B;
5477 pbCodeBuf[off++] = 0x0f;
5478 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5479#elif defined(RT_ARCH_ARM64)
5480 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5481
5482 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
5483#else
5484# error "Port me"
5485#endif
5486
5487 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5488 return off;
5489}
5490
5491
5492/**
5493 * Emits code for reversing the byte order in a 64-bit GPR.
5494 */
5495DECL_FORCE_INLINE(uint32_t)
5496iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5497{
5498#if defined(RT_ARCH_AMD64)
5499 /* bswap r64 */
5500 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5501
5502 if (iGpr >= 8)
5503 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
5504 else
5505 pbCodeBuf[off++] = X86_OP_REX_W;
5506 pbCodeBuf[off++] = 0x0f;
5507 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5508#elif defined(RT_ARCH_ARM64)
5509 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5510
5511 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
5512#else
5513# error "Port me"
5514#endif
5515
5516 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5517 return off;
5518}
5519
5520
5521/*********************************************************************************************************************************
5522* Compare and Testing *
5523*********************************************************************************************************************************/
5524
5525
5526#ifdef RT_ARCH_ARM64
5527/**
5528 * Emits an ARM64 compare instruction.
5529 */
5530DECL_INLINE_THROW(uint32_t)
5531iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
5532 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
5533{
5534 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5535 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
5536 f64Bit, true /*fSetFlags*/, cShift, enmShift);
5537 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5538 return off;
5539}
5540#endif
5541
5542
5543/**
5544 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5545 * with conditional instruction.
5546 */
5547DECL_FORCE_INLINE(uint32_t)
5548iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5549{
5550#ifdef RT_ARCH_AMD64
5551 /* cmp Gv, Ev */
5552 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5553 pCodeBuf[off++] = 0x3b;
5554 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5555
5556#elif defined(RT_ARCH_ARM64)
5557 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
5558
5559#else
5560# error "Port me!"
5561#endif
5562 return off;
5563}
5564
5565
5566/**
5567 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5568 * with conditional instruction.
5569 */
5570DECL_INLINE_THROW(uint32_t)
5571iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5572{
5573#ifdef RT_ARCH_AMD64
5574 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5575#elif defined(RT_ARCH_ARM64)
5576 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5577#else
5578# error "Port me!"
5579#endif
5580 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5581 return off;
5582}
5583
5584
5585/**
5586 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5587 * with conditional instruction.
5588 */
5589DECL_FORCE_INLINE(uint32_t)
5590iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5591{
5592#ifdef RT_ARCH_AMD64
5593 /* cmp Gv, Ev */
5594 if (iGprLeft >= 8 || iGprRight >= 8)
5595 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5596 pCodeBuf[off++] = 0x3b;
5597 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5598
5599#elif defined(RT_ARCH_ARM64)
5600 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
5601
5602#else
5603# error "Port me!"
5604#endif
5605 return off;
5606}
5607
5608
5609/**
5610 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5611 * with conditional instruction.
5612 */
5613DECL_INLINE_THROW(uint32_t)
5614iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5615{
5616#ifdef RT_ARCH_AMD64
5617 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5618#elif defined(RT_ARCH_ARM64)
5619 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5620#else
5621# error "Port me!"
5622#endif
5623 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5624 return off;
5625}
5626
5627
5628/**
5629 * Emits a compare of a 64-bit GPR with a constant value, settings status
5630 * flags/whatever for use with conditional instruction.
5631 */
5632DECL_INLINE_THROW(uint32_t)
5633iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
5634{
5635#ifdef RT_ARCH_AMD64
5636 if (uImm <= UINT32_C(0xff))
5637 {
5638 /* cmp Ev, Ib */
5639 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5640 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
5641 pbCodeBuf[off++] = 0x83;
5642 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5643 pbCodeBuf[off++] = (uint8_t)uImm;
5644 }
5645 else if ((int64_t)uImm == (int32_t)uImm)
5646 {
5647 /* cmp Ev, imm */
5648 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5649 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
5650 pbCodeBuf[off++] = 0x81;
5651 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5652 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5653 pbCodeBuf[off++] = RT_BYTE1(uImm);
5654 pbCodeBuf[off++] = RT_BYTE2(uImm);
5655 pbCodeBuf[off++] = RT_BYTE3(uImm);
5656 pbCodeBuf[off++] = RT_BYTE4(uImm);
5657 }
5658 else
5659 {
5660 /* Use temporary register for the immediate. */
5661 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5662 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
5663 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5664 }
5665
5666#elif defined(RT_ARCH_ARM64)
5667 /** @todo guess there are clevere things we can do here... */
5668 if (uImm < _4K)
5669 {
5670 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5671 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5672 true /*64Bit*/, true /*fSetFlags*/);
5673 }
5674 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5675 {
5676 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5677 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
5678 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5679 }
5680 else
5681 {
5682 /* Use temporary register for the immediate. */
5683 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5684 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
5685 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5686 }
5687
5688#else
5689# error "Port me!"
5690#endif
5691
5692 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5693 return off;
5694}
5695
5696
5697/**
5698 * Emits a compare of a 32-bit GPR with a constant value, settings status
5699 * flags/whatever for use with conditional instruction.
5700 *
5701 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
5702 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
5703 * bits all zero). Will release assert or throw exception if the caller
5704 * violates this restriction.
5705 */
5706DECL_FORCE_INLINE_THROW(uint32_t)
5707iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
5708{
5709#ifdef RT_ARCH_AMD64
5710 if (iGprLeft >= 8)
5711 pCodeBuf[off++] = X86_OP_REX_B;
5712 if (uImm <= UINT32_C(0x7f))
5713 {
5714 /* cmp Ev, Ib */
5715 pCodeBuf[off++] = 0x83;
5716 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5717 pCodeBuf[off++] = (uint8_t)uImm;
5718 }
5719 else
5720 {
5721 /* cmp Ev, imm */
5722 pCodeBuf[off++] = 0x81;
5723 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5724 pCodeBuf[off++] = RT_BYTE1(uImm);
5725 pCodeBuf[off++] = RT_BYTE2(uImm);
5726 pCodeBuf[off++] = RT_BYTE3(uImm);
5727 pCodeBuf[off++] = RT_BYTE4(uImm);
5728 }
5729
5730#elif defined(RT_ARCH_ARM64)
5731 /** @todo guess there are clevere things we can do here... */
5732 if (uImm < _4K)
5733 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5734 false /*64Bit*/, true /*fSetFlags*/);
5735 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5736 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5737 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5738 else
5739# ifdef IEM_WITH_THROW_CATCH
5740 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5741# else
5742 AssertReleaseFailedStmt(off = UINT32_MAX);
5743# endif
5744
5745#else
5746# error "Port me!"
5747#endif
5748 return off;
5749}
5750
5751
5752/**
5753 * Emits a compare of a 32-bit GPR with a constant value, settings status
5754 * flags/whatever for use with conditional instruction.
5755 */
5756DECL_INLINE_THROW(uint32_t)
5757iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
5758{
5759#ifdef RT_ARCH_AMD64
5760 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
5761
5762#elif defined(RT_ARCH_ARM64)
5763 /** @todo guess there are clevere things we can do here... */
5764 if (uImm < _4K)
5765 {
5766 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5767 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5768 false /*64Bit*/, true /*fSetFlags*/);
5769 }
5770 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5771 {
5772 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5773 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5774 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5775 }
5776 else
5777 {
5778 /* Use temporary register for the immediate. */
5779 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5780 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
5781 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5782 }
5783
5784#else
5785# error "Port me!"
5786#endif
5787
5788 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5789 return off;
5790}
5791
5792
5793/**
5794 * Emits a compare of a 32-bit GPR with a constant value, settings status
5795 * flags/whatever for use with conditional instruction.
5796 *
5797 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
5798 * 16-bit value from @a iGrpLeft.
5799 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
5800 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
5801 * bits all zero). Will release assert or throw exception if the caller
5802 * violates this restriction.
5803 */
5804DECL_FORCE_INLINE_THROW(uint32_t)
5805iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
5806 uint8_t idxTmpReg = UINT8_MAX)
5807{
5808#ifdef RT_ARCH_AMD64
5809 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5810 if (iGprLeft >= 8)
5811 pCodeBuf[off++] = X86_OP_REX_B;
5812 if (uImm <= UINT32_C(0x7f))
5813 {
5814 /* cmp Ev, Ib */
5815 pCodeBuf[off++] = 0x83;
5816 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5817 pCodeBuf[off++] = (uint8_t)uImm;
5818 }
5819 else
5820 {
5821 /* cmp Ev, imm */
5822 pCodeBuf[off++] = 0x81;
5823 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5824 pCodeBuf[off++] = RT_BYTE1(uImm);
5825 pCodeBuf[off++] = RT_BYTE2(uImm);
5826 }
5827 RT_NOREF(idxTmpReg);
5828
5829#elif defined(RT_ARCH_ARM64)
5830# ifdef IEM_WITH_THROW_CATCH
5831 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5832# else
5833 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
5834# endif
5835 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
5836 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
5837 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
5838
5839#else
5840# error "Port me!"
5841#endif
5842 return off;
5843}
5844
5845
5846/**
5847 * Emits a compare of a 16-bit GPR with a constant value, settings status
5848 * flags/whatever for use with conditional instruction.
5849 *
5850 * @note ARM64: Helper register is required (idxTmpReg).
5851 */
5852DECL_INLINE_THROW(uint32_t)
5853iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
5854 uint8_t idxTmpReg = UINT8_MAX)
5855{
5856#ifdef RT_ARCH_AMD64
5857 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
5858#elif defined(RT_ARCH_ARM64)
5859 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
5860#else
5861# error "Port me!"
5862#endif
5863 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5864 return off;
5865}
5866
5867
5868
5869/*********************************************************************************************************************************
5870* Branching *
5871*********************************************************************************************************************************/
5872
5873/**
5874 * Emits a JMP rel32 / B imm19 to the given label.
5875 */
5876DECL_FORCE_INLINE_THROW(uint32_t)
5877iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
5878{
5879 Assert(idxLabel < pReNative->cLabels);
5880
5881#ifdef RT_ARCH_AMD64
5882 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
5883 {
5884 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
5885 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
5886 {
5887 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
5888 pCodeBuf[off++] = (uint8_t)offRel;
5889 }
5890 else
5891 {
5892 offRel -= 3;
5893 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
5894 pCodeBuf[off++] = RT_BYTE1(offRel);
5895 pCodeBuf[off++] = RT_BYTE2(offRel);
5896 pCodeBuf[off++] = RT_BYTE3(offRel);
5897 pCodeBuf[off++] = RT_BYTE4(offRel);
5898 }
5899 }
5900 else
5901 {
5902 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
5903 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
5904 pCodeBuf[off++] = 0xfe;
5905 pCodeBuf[off++] = 0xff;
5906 pCodeBuf[off++] = 0xff;
5907 pCodeBuf[off++] = 0xff;
5908 }
5909 pCodeBuf[off++] = 0xcc; /* int3 poison */
5910
5911#elif defined(RT_ARCH_ARM64)
5912 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
5913 pCodeBuf[off++] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
5914 else
5915 {
5916 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
5917 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
5918 }
5919
5920#else
5921# error "Port me!"
5922#endif
5923 return off;
5924}
5925
5926
5927/**
5928 * Emits a JMP rel32 / B imm19 to the given label.
5929 */
5930DECL_INLINE_THROW(uint32_t)
5931iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5932{
5933#ifdef RT_ARCH_AMD64
5934 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
5935#elif defined(RT_ARCH_ARM64)
5936 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
5937#else
5938# error "Port me!"
5939#endif
5940 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5941 return off;
5942}
5943
5944
5945/**
5946 * Emits a JMP rel32 / B imm19 to a new undefined label.
5947 */
5948DECL_INLINE_THROW(uint32_t)
5949iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
5950{
5951 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
5952 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
5953}
5954
5955/** Condition type. */
5956#ifdef RT_ARCH_AMD64
5957typedef enum IEMNATIVEINSTRCOND : uint8_t
5958{
5959 kIemNativeInstrCond_o = 0,
5960 kIemNativeInstrCond_no,
5961 kIemNativeInstrCond_c,
5962 kIemNativeInstrCond_nc,
5963 kIemNativeInstrCond_e,
5964 kIemNativeInstrCond_ne,
5965 kIemNativeInstrCond_be,
5966 kIemNativeInstrCond_nbe,
5967 kIemNativeInstrCond_s,
5968 kIemNativeInstrCond_ns,
5969 kIemNativeInstrCond_p,
5970 kIemNativeInstrCond_np,
5971 kIemNativeInstrCond_l,
5972 kIemNativeInstrCond_nl,
5973 kIemNativeInstrCond_le,
5974 kIemNativeInstrCond_nle
5975} IEMNATIVEINSTRCOND;
5976#elif defined(RT_ARCH_ARM64)
5977typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
5978# define kIemNativeInstrCond_o todo_conditional_codes
5979# define kIemNativeInstrCond_no todo_conditional_codes
5980# define kIemNativeInstrCond_c todo_conditional_codes
5981# define kIemNativeInstrCond_nc todo_conditional_codes
5982# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
5983# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
5984# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
5985# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
5986# define kIemNativeInstrCond_s todo_conditional_codes
5987# define kIemNativeInstrCond_ns todo_conditional_codes
5988# define kIemNativeInstrCond_p todo_conditional_codes
5989# define kIemNativeInstrCond_np todo_conditional_codes
5990# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
5991# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
5992# define kIemNativeInstrCond_le kArmv8InstrCond_Le
5993# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
5994#else
5995# error "Port me!"
5996#endif
5997
5998
5999/**
6000 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6001 */
6002DECL_FORCE_INLINE_THROW(uint32_t)
6003iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6004 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6005{
6006 Assert(idxLabel < pReNative->cLabels);
6007
6008 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
6009#ifdef RT_ARCH_AMD64
6010 if (offLabel >= off)
6011 {
6012 /* jcc rel32 */
6013 pCodeBuf[off++] = 0x0f;
6014 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6015 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6016 pCodeBuf[off++] = 0x00;
6017 pCodeBuf[off++] = 0x00;
6018 pCodeBuf[off++] = 0x00;
6019 pCodeBuf[off++] = 0x00;
6020 }
6021 else
6022 {
6023 int32_t offDisp = offLabel - (off + 2);
6024 if ((int8_t)offDisp == offDisp)
6025 {
6026 /* jcc rel8 */
6027 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6028 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6029 }
6030 else
6031 {
6032 /* jcc rel32 */
6033 offDisp -= 4;
6034 pCodeBuf[off++] = 0x0f;
6035 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6036 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6037 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6038 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6039 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6040 }
6041 }
6042
6043#elif defined(RT_ARCH_ARM64)
6044 if (offLabel >= off)
6045 {
6046 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6047 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
6048 }
6049 else
6050 {
6051 Assert(off - offLabel <= 0x3ffffU);
6052 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
6053 }
6054
6055#else
6056# error "Port me!"
6057#endif
6058 return off;
6059}
6060
6061
6062/**
6063 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6064 */
6065DECL_INLINE_THROW(uint32_t)
6066iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6067{
6068#ifdef RT_ARCH_AMD64
6069 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
6070#elif defined(RT_ARCH_ARM64)
6071 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
6072#else
6073# error "Port me!"
6074#endif
6075 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6076 return off;
6077}
6078
6079
6080/**
6081 * Emits a Jcc rel32 / B.cc imm19 to a new label.
6082 */
6083DECL_INLINE_THROW(uint32_t)
6084iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6085 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
6086{
6087 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6088 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
6089}
6090
6091
6092/**
6093 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
6094 */
6095DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6096{
6097#ifdef RT_ARCH_AMD64
6098 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
6099#elif defined(RT_ARCH_ARM64)
6100 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
6101#else
6102# error "Port me!"
6103#endif
6104}
6105
6106/**
6107 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
6108 */
6109DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6110 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6111{
6112#ifdef RT_ARCH_AMD64
6113 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
6114#elif defined(RT_ARCH_ARM64)
6115 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
6116#else
6117# error "Port me!"
6118#endif
6119}
6120
6121
6122/**
6123 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
6124 */
6125DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6126{
6127#ifdef RT_ARCH_AMD64
6128 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
6129#elif defined(RT_ARCH_ARM64)
6130 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
6131#else
6132# error "Port me!"
6133#endif
6134}
6135
6136/**
6137 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
6138 */
6139DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6140 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6141{
6142#ifdef RT_ARCH_AMD64
6143 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
6144#elif defined(RT_ARCH_ARM64)
6145 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
6146#else
6147# error "Port me!"
6148#endif
6149}
6150
6151
6152/**
6153 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
6154 */
6155DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6156{
6157#ifdef RT_ARCH_AMD64
6158 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
6159#elif defined(RT_ARCH_ARM64)
6160 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
6161#else
6162# error "Port me!"
6163#endif
6164}
6165
6166/**
6167 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
6168 */
6169DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6170 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6171{
6172#ifdef RT_ARCH_AMD64
6173 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
6174#elif defined(RT_ARCH_ARM64)
6175 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
6176#else
6177# error "Port me!"
6178#endif
6179}
6180
6181
6182/**
6183 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
6184 */
6185DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6186{
6187#ifdef RT_ARCH_AMD64
6188 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
6189#elif defined(RT_ARCH_ARM64)
6190 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
6191#else
6192# error "Port me!"
6193#endif
6194}
6195
6196/**
6197 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
6198 */
6199DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6200 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6201{
6202#ifdef RT_ARCH_AMD64
6203 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
6204#elif defined(RT_ARCH_ARM64)
6205 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
6206#else
6207# error "Port me!"
6208#endif
6209}
6210
6211
6212/**
6213 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
6214 */
6215DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6216{
6217#ifdef RT_ARCH_AMD64
6218 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
6219#elif defined(RT_ARCH_ARM64)
6220 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
6221#else
6222# error "Port me!"
6223#endif
6224}
6225
6226/**
6227 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
6228 */
6229DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6230 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6231{
6232#ifdef RT_ARCH_AMD64
6233 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
6234#elif defined(RT_ARCH_ARM64)
6235 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
6236#else
6237# error "Port me!"
6238#endif
6239}
6240
6241
6242/**
6243 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6244 *
6245 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6246 *
6247 * Only use hardcoded jumps forward when emitting for exactly one
6248 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6249 * the right target address on all platforms!
6250 *
6251 * Please also note that on x86 it is necessary pass off + 256 or higher
6252 * for @a offTarget one believe the intervening code is more than 127
6253 * bytes long.
6254 */
6255DECL_FORCE_INLINE(uint32_t)
6256iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6257{
6258#ifdef RT_ARCH_AMD64
6259 /* jcc rel8 / rel32 */
6260 int32_t offDisp = (int32_t)(offTarget - (off + 2));
6261 if (offDisp < 128 && offDisp >= -128)
6262 {
6263 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6264 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6265 }
6266 else
6267 {
6268 offDisp -= 4;
6269 pCodeBuf[off++] = 0x0f;
6270 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6271 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6272 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6273 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6274 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6275 }
6276
6277#elif defined(RT_ARCH_ARM64)
6278 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
6279
6280#else
6281# error "Port me!"
6282#endif
6283 return off;
6284}
6285
6286
6287/**
6288 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6289 *
6290 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6291 *
6292 * Only use hardcoded jumps forward when emitting for exactly one
6293 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6294 * the right target address on all platforms!
6295 *
6296 * Please also note that on x86 it is necessary pass off + 256 or higher
6297 * for @a offTarget if one believe the intervening code is more than 127
6298 * bytes long.
6299 */
6300DECL_INLINE_THROW(uint32_t)
6301iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6302{
6303#ifdef RT_ARCH_AMD64
6304 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
6305#elif defined(RT_ARCH_ARM64)
6306 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
6307#else
6308# error "Port me!"
6309#endif
6310 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6311 return off;
6312}
6313
6314
6315/**
6316 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
6317 *
6318 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6319 */
6320DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6321{
6322#ifdef RT_ARCH_AMD64
6323 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
6324#elif defined(RT_ARCH_ARM64)
6325 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
6326#else
6327# error "Port me!"
6328#endif
6329}
6330
6331
6332/**
6333 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
6334 *
6335 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6336 */
6337DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6338{
6339#ifdef RT_ARCH_AMD64
6340 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
6341#elif defined(RT_ARCH_ARM64)
6342 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
6343#else
6344# error "Port me!"
6345#endif
6346}
6347
6348
6349/**
6350 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
6351 *
6352 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6353 */
6354DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6355{
6356#ifdef RT_ARCH_AMD64
6357 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
6358#elif defined(RT_ARCH_ARM64)
6359 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
6360#else
6361# error "Port me!"
6362#endif
6363}
6364
6365
6366/**
6367 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
6368 *
6369 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6370 */
6371DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6372{
6373#ifdef RT_ARCH_AMD64
6374 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
6375#elif defined(RT_ARCH_ARM64)
6376 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
6377#else
6378# error "Port me!"
6379#endif
6380}
6381
6382
6383/**
6384 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6385 *
6386 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6387 */
6388DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
6389{
6390#ifdef RT_ARCH_AMD64
6391 /* jmp rel8 or rel32 */
6392 int32_t offDisp = offTarget - (off + 2);
6393 if (offDisp < 128 && offDisp >= -128)
6394 {
6395 pCodeBuf[off++] = 0xeb;
6396 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6397 }
6398 else
6399 {
6400 offDisp -= 3;
6401 pCodeBuf[off++] = 0xe9;
6402 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6403 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6404 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6405 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6406 }
6407
6408#elif defined(RT_ARCH_ARM64)
6409 pCodeBuf[off++] = Armv8A64MkInstrB((int32_t)(offTarget - off));
6410
6411#else
6412# error "Port me!"
6413#endif
6414 return off;
6415}
6416
6417
6418/**
6419 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6420 *
6421 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6422 */
6423DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6424{
6425#ifdef RT_ARCH_AMD64
6426 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
6427#elif defined(RT_ARCH_ARM64)
6428 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
6429#else
6430# error "Port me!"
6431#endif
6432 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6433 return off;
6434}
6435
6436
6437/**
6438 * Fixes up a conditional jump to a fixed label.
6439 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
6440 * iemNativeEmitJzToFixed, ...
6441 */
6442DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
6443{
6444#ifdef RT_ARCH_AMD64
6445 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
6446 uint8_t const bOpcode = pbCodeBuf[offFixup];
6447 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
6448 {
6449 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
6450 AssertStmt(pbCodeBuf[offFixup + 1] == offTarget - (offFixup + 2),
6451 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
6452 }
6453 else
6454 {
6455 if (bOpcode != 0x0f)
6456 Assert(bOpcode == 0xe9);
6457 else
6458 {
6459 offFixup += 1;
6460 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
6461 }
6462 uint32_t const offRel32 = offTarget - (offFixup + 5);
6463 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
6464 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
6465 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
6466 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
6467 }
6468
6469#elif defined(RT_ARCH_ARM64)
6470 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
6471 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
6472 {
6473 /* B.COND + BC.COND */
6474 int32_t const offDisp = offTarget - offFixup;
6475 Assert(offDisp >= -262144 && offDisp < 262144);
6476 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
6477 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6478 }
6479 else
6480 {
6481 /* B imm26 */
6482 Assert((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000));
6483 int32_t const offDisp = offTarget - offFixup;
6484 Assert(offDisp >= -33554432 && offDisp < 33554432);
6485 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
6486 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6487 }
6488
6489#else
6490# error "Port me!"
6491#endif
6492}
6493
6494
6495#ifdef RT_ARCH_AMD64
6496/**
6497 * For doing bt on a register.
6498 */
6499DECL_INLINE_THROW(uint32_t)
6500iemNativeEmitAmd64TestBitInGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
6501{
6502 Assert(iBitNo < 64);
6503 /* bt Ev, imm8 */
6504 if (iBitNo >= 32)
6505 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6506 else if (iGprSrc >= 8)
6507 pCodeBuf[off++] = X86_OP_REX_B;
6508 pCodeBuf[off++] = 0x0f;
6509 pCodeBuf[off++] = 0xba;
6510 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6511 pCodeBuf[off++] = iBitNo;
6512 return off;
6513}
6514#endif /* RT_ARCH_AMD64 */
6515
6516
6517/**
6518 * Internal helper, don't call directly.
6519 */
6520DECL_INLINE_THROW(uint32_t)
6521iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
6522 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
6523{
6524 Assert(iBitNo < 64);
6525#ifdef RT_ARCH_AMD64
6526 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6527 if (iBitNo < 8)
6528 {
6529 /* test Eb, imm8 */
6530 if (iGprSrc >= 4)
6531 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6532 pbCodeBuf[off++] = 0xf6;
6533 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6534 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
6535 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6536 }
6537 else
6538 {
6539 /* bt Ev, imm8 */
6540 if (iBitNo >= 32)
6541 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6542 else if (iGprSrc >= 8)
6543 pbCodeBuf[off++] = X86_OP_REX_B;
6544 pbCodeBuf[off++] = 0x0f;
6545 pbCodeBuf[off++] = 0xba;
6546 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6547 pbCodeBuf[off++] = iBitNo;
6548 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
6549 }
6550
6551#elif defined(RT_ARCH_ARM64)
6552 /* Use the TBNZ instruction here. */
6553 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6554 if (pReNative->paLabels[idxLabel].enmType > kIemNativeLabelType_LastWholeTbBranch)
6555 {
6556 AssertMsg(pReNative->paLabels[idxLabel].off == UINT32_MAX,
6557 ("TODO: Please enable & test commented out code for jumping back to a predefined label.\n"));
6558 //uint32_t offLabel = pReNative->paLabels[idxLabel].off;
6559 //if (offLabel == UINT32_MAX)
6560 {
6561 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
6562 pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
6563 }
6564 //else
6565 //{
6566 // RT_BREAKPOINT();
6567 // Assert(off - offLabel <= 0x1fffU);
6568 // pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, offLabel - off, iGprSrc, iBitNo);
6569 //
6570 //}
6571 }
6572 else
6573 {
6574 Assert(Armv8A64ConvertImmRImmS2Mask64(0x40, (64U - iBitNo) & 63U) == RT_BIT_64(iBitNo));
6575 pu32CodeBuf[off++] = Armv8A64MkInstrTstImm(iGprSrc, 0x40, (64U - iBitNo) & 63U);
6576 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6577 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(fJmpIfSet ? kArmv8InstrCond_Ne : kArmv8InstrCond_Eq, 0);
6578 }
6579
6580#else
6581# error "Port me!"
6582#endif
6583 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6584 return off;
6585}
6586
6587
6588/**
6589 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
6590 * @a iGprSrc.
6591 *
6592 * @note On ARM64 the range is only +/-8191 instructions.
6593 */
6594DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6595 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
6596{
6597 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
6598}
6599
6600
6601/**
6602 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
6603 * _set_ in @a iGprSrc.
6604 *
6605 * @note On ARM64 the range is only +/-8191 instructions.
6606 */
6607DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6608 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
6609{
6610 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
6611}
6612
6613
6614/**
6615 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
6616 * flags accordingly.
6617 */
6618DECL_INLINE_THROW(uint32_t)
6619iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
6620{
6621 Assert(fBits != 0);
6622#ifdef RT_ARCH_AMD64
6623
6624 if (fBits >= UINT32_MAX)
6625 {
6626 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6627
6628 /* test Ev,Gv */
6629 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6630 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
6631 pbCodeBuf[off++] = 0x85;
6632 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
6633
6634 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6635 }
6636 else if (fBits <= UINT32_MAX)
6637 {
6638 /* test Eb, imm8 or test Ev, imm32 */
6639 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6640 if (fBits <= UINT8_MAX)
6641 {
6642 if (iGprSrc >= 4)
6643 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6644 pbCodeBuf[off++] = 0xf6;
6645 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6646 pbCodeBuf[off++] = (uint8_t)fBits;
6647 }
6648 else
6649 {
6650 if (iGprSrc >= 8)
6651 pbCodeBuf[off++] = X86_OP_REX_B;
6652 pbCodeBuf[off++] = 0xf7;
6653 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6654 pbCodeBuf[off++] = RT_BYTE1(fBits);
6655 pbCodeBuf[off++] = RT_BYTE2(fBits);
6656 pbCodeBuf[off++] = RT_BYTE3(fBits);
6657 pbCodeBuf[off++] = RT_BYTE4(fBits);
6658 }
6659 }
6660 /** @todo implement me. */
6661 else
6662 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
6663
6664#elif defined(RT_ARCH_ARM64)
6665 uint32_t uImmR = 0;
6666 uint32_t uImmNandS = 0;
6667 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
6668 {
6669 /* ands xzr, iGprSrc, #fBits */
6670 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6671 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
6672 }
6673 else
6674 {
6675 /* ands xzr, iGprSrc, iTmpReg */
6676 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6677 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6678 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
6679 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6680 }
6681
6682#else
6683# error "Port me!"
6684#endif
6685 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6686 return off;
6687}
6688
6689
6690/**
6691 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
6692 * @a iGprSrc, setting CPU flags accordingly.
6693 *
6694 * @note For ARM64 this only supports @a fBits values that can be expressed
6695 * using the two 6-bit immediates of the ANDS instruction. The caller
6696 * must make sure this is possible!
6697 */
6698DECL_FORCE_INLINE_THROW(uint32_t)
6699iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits)
6700{
6701 Assert(fBits != 0);
6702
6703#ifdef RT_ARCH_AMD64
6704 if (fBits <= UINT8_MAX)
6705 {
6706 /* test Eb, imm8 */
6707 if (iGprSrc >= 4)
6708 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6709 pCodeBuf[off++] = 0xf6;
6710 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6711 pCodeBuf[off++] = (uint8_t)fBits;
6712 }
6713 else
6714 {
6715 /* test Ev, imm32 */
6716 if (iGprSrc >= 8)
6717 pCodeBuf[off++] = X86_OP_REX_B;
6718 pCodeBuf[off++] = 0xf7;
6719 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6720 pCodeBuf[off++] = RT_BYTE1(fBits);
6721 pCodeBuf[off++] = RT_BYTE2(fBits);
6722 pCodeBuf[off++] = RT_BYTE3(fBits);
6723 pCodeBuf[off++] = RT_BYTE4(fBits);
6724 }
6725
6726#elif defined(RT_ARCH_ARM64)
6727 /* ands xzr, src, #fBits */
6728 uint32_t uImmR = 0;
6729 uint32_t uImmNandS = 0;
6730 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6731 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6732 else
6733# ifdef IEM_WITH_THROW_CATCH
6734 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6735# else
6736 AssertReleaseFailedStmt(off = UINT32_MAX);
6737# endif
6738
6739#else
6740# error "Port me!"
6741#endif
6742 return off;
6743}
6744
6745
6746
6747/**
6748 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
6749 * @a iGprSrc, setting CPU flags accordingly.
6750 *
6751 * @note For ARM64 this only supports @a fBits values that can be expressed
6752 * using the two 6-bit immediates of the ANDS instruction. The caller
6753 * must make sure this is possible!
6754 */
6755DECL_FORCE_INLINE_THROW(uint32_t)
6756iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
6757{
6758 Assert(fBits != 0);
6759
6760#ifdef RT_ARCH_AMD64
6761 /* test Eb, imm8 */
6762 if (iGprSrc >= 4)
6763 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6764 pCodeBuf[off++] = 0xf6;
6765 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6766 pCodeBuf[off++] = fBits;
6767
6768#elif defined(RT_ARCH_ARM64)
6769 /* ands xzr, src, #fBits */
6770 uint32_t uImmR = 0;
6771 uint32_t uImmNandS = 0;
6772 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6773 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6774 else
6775# ifdef IEM_WITH_THROW_CATCH
6776 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6777# else
6778 AssertReleaseFailedStmt(off = UINT32_MAX);
6779# endif
6780
6781#else
6782# error "Port me!"
6783#endif
6784 return off;
6785}
6786
6787
6788/**
6789 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
6790 * @a iGprSrc, setting CPU flags accordingly.
6791 */
6792DECL_INLINE_THROW(uint32_t)
6793iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
6794{
6795 Assert(fBits != 0);
6796
6797#ifdef RT_ARCH_AMD64
6798 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
6799
6800#elif defined(RT_ARCH_ARM64)
6801 /* ands xzr, src, [tmp|#imm] */
6802 uint32_t uImmR = 0;
6803 uint32_t uImmNandS = 0;
6804 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6805 {
6806 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6807 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6808 }
6809 else
6810 {
6811 /* Use temporary register for the 64-bit immediate. */
6812 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6813 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6814 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
6815 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6816 }
6817
6818#else
6819# error "Port me!"
6820#endif
6821 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6822 return off;
6823}
6824
6825
6826/**
6827 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
6828 * are set in @a iGprSrc.
6829 */
6830DECL_INLINE_THROW(uint32_t)
6831iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6832 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
6833{
6834 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
6835
6836 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
6837 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6838
6839 return off;
6840}
6841
6842
6843/**
6844 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
6845 * are set in @a iGprSrc.
6846 */
6847DECL_INLINE_THROW(uint32_t)
6848iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6849 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
6850{
6851 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
6852
6853 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
6854 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
6855
6856 return off;
6857}
6858
6859
6860/**
6861 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6862 *
6863 * The operand size is given by @a f64Bit.
6864 */
6865DECL_FORCE_INLINE_THROW(uint32_t)
6866iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6867 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
6868{
6869 Assert(idxLabel < pReNative->cLabels);
6870
6871#ifdef RT_ARCH_AMD64
6872 /* test reg32,reg32 / test reg64,reg64 */
6873 if (f64Bit)
6874 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
6875 else if (iGprSrc >= 8)
6876 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
6877 pCodeBuf[off++] = 0x85;
6878 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
6879
6880 /* jnz idxLabel */
6881 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
6882 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6883
6884#elif defined(RT_ARCH_ARM64)
6885 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6886 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
6887 iGprSrc, f64Bit);
6888 else
6889 {
6890 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6891 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
6892 }
6893
6894#else
6895# error "Port me!"
6896#endif
6897 return off;
6898}
6899
6900
6901/**
6902 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6903 *
6904 * The operand size is given by @a f64Bit.
6905 */
6906DECL_FORCE_INLINE_THROW(uint32_t)
6907iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
6908 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
6909{
6910#ifdef RT_ARCH_AMD64
6911 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
6912 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
6913#elif defined(RT_ARCH_ARM64)
6914 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
6915 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
6916#else
6917# error "Port me!"
6918#endif
6919 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6920 return off;
6921}
6922
6923
6924/* if (Grp1 == 0) Jmp idxLabel; */
6925
6926/**
6927 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
6928 *
6929 * The operand size is given by @a f64Bit.
6930 */
6931DECL_FORCE_INLINE_THROW(uint32_t)
6932iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6933 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6934{
6935 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
6936 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
6937}
6938
6939
6940/**
6941 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
6942 *
6943 * The operand size is given by @a f64Bit.
6944 */
6945DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6946 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6947{
6948 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
6949}
6950
6951
6952/**
6953 * Emits code that jumps to a new label if @a iGprSrc is zero.
6954 *
6955 * The operand size is given by @a f64Bit.
6956 */
6957DECL_INLINE_THROW(uint32_t)
6958iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
6959 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6960{
6961 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6962 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
6963}
6964
6965
6966/* if (Grp1 != 0) Jmp idxLabel; */
6967
6968/**
6969 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6970 *
6971 * The operand size is given by @a f64Bit.
6972 */
6973DECL_FORCE_INLINE_THROW(uint32_t)
6974iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6975 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6976{
6977 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
6978 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
6979}
6980
6981
6982/**
6983 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6984 *
6985 * The operand size is given by @a f64Bit.
6986 */
6987DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6988 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6989{
6990 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
6991}
6992
6993
6994/**
6995 * Emits code that jumps to a new label if @a iGprSrc is not zero.
6996 *
6997 * The operand size is given by @a f64Bit.
6998 */
6999DECL_INLINE_THROW(uint32_t)
7000iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7001 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7002{
7003 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7004 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7005}
7006
7007
7008/* if (Grp1 != Gpr2) Jmp idxLabel; */
7009
7010/**
7011 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
7012 * differs.
7013 */
7014DECL_INLINE_THROW(uint32_t)
7015iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7016 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
7017{
7018 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
7019 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7020 return off;
7021}
7022
7023
7024/**
7025 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
7026 */
7027DECL_INLINE_THROW(uint32_t)
7028iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7029 uint8_t iGprLeft, uint8_t iGprRight,
7030 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7031{
7032 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7033 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
7034}
7035
7036
7037/* if (Grp != Imm) Jmp idxLabel; */
7038
7039/**
7040 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
7041 */
7042DECL_INLINE_THROW(uint32_t)
7043iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7044 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7045{
7046 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7047 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7048 return off;
7049}
7050
7051
7052/**
7053 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
7054 */
7055DECL_INLINE_THROW(uint32_t)
7056iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7057 uint8_t iGprSrc, uint64_t uImm,
7058 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7059{
7060 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7061 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7062}
7063
7064
7065/**
7066 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
7067 * @a uImm.
7068 */
7069DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7070 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7071{
7072 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7073 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7074 return off;
7075}
7076
7077
7078/**
7079 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
7080 * @a uImm.
7081 */
7082DECL_INLINE_THROW(uint32_t)
7083iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7084 uint8_t iGprSrc, uint32_t uImm,
7085 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7086{
7087 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7088 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7089}
7090
7091
7092/**
7093 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
7094 * @a uImm.
7095 */
7096DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7097 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
7098{
7099 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
7100 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7101 return off;
7102}
7103
7104
7105/**
7106 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
7107 * @a uImm.
7108 */
7109DECL_INLINE_THROW(uint32_t)
7110iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7111 uint8_t iGprSrc, uint16_t uImm,
7112 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7113{
7114 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7115 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7116}
7117
7118
7119/* if (Grp == Imm) Jmp idxLabel; */
7120
7121/**
7122 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
7123 */
7124DECL_INLINE_THROW(uint32_t)
7125iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7126 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7127{
7128 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7129 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7130 return off;
7131}
7132
7133
7134/**
7135 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
7136 */
7137DECL_INLINE_THROW(uint32_t)
7138iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
7139 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7140{
7141 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7142 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7143}
7144
7145
7146/**
7147 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
7148 */
7149DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7150 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7151{
7152 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7153 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7154 return off;
7155}
7156
7157
7158/**
7159 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
7160 */
7161DECL_INLINE_THROW(uint32_t)
7162iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
7163 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7164{
7165 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7166 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7167}
7168
7169
7170/**
7171 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
7172 *
7173 * @note ARM64: Helper register is required (idxTmpReg).
7174 */
7175DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7176 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
7177 uint8_t idxTmpReg = UINT8_MAX)
7178{
7179 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
7180 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7181 return off;
7182}
7183
7184
7185/**
7186 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
7187 *
7188 * @note ARM64: Helper register is required (idxTmpReg).
7189 */
7190DECL_INLINE_THROW(uint32_t)
7191iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
7192 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
7193 uint8_t idxTmpReg = UINT8_MAX)
7194{
7195 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7196 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
7197}
7198
7199
7200/*********************************************************************************************************************************
7201* Calls. *
7202*********************************************************************************************************************************/
7203
7204/**
7205 * Emits a call to a 64-bit address.
7206 */
7207DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
7208{
7209#ifdef RT_ARCH_AMD64
7210 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
7211
7212 /* call rax */
7213 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7214 pbCodeBuf[off++] = 0xff;
7215 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
7216
7217#elif defined(RT_ARCH_ARM64)
7218 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
7219
7220 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7221 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
7222
7223#else
7224# error "port me"
7225#endif
7226 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7227 return off;
7228}
7229
7230
7231/**
7232 * Emits code to load a stack variable into an argument GPR.
7233 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7234 */
7235DECL_FORCE_INLINE_THROW(uint32_t)
7236iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7237 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
7238 bool fSpilledVarsInVolatileRegs = false)
7239{
7240 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7241 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7242 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7243
7244 uint8_t const idxRegVar = pVar->idxReg;
7245 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
7246 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
7247 || !fSpilledVarsInVolatileRegs ))
7248 {
7249 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
7250 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
7251 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
7252 if (!offAddend)
7253 {
7254 if (idxRegArg != idxRegVar)
7255 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
7256 }
7257 else
7258 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
7259 }
7260 else
7261 {
7262 uint8_t const idxStackSlot = pVar->idxStackSlot;
7263 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7264 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
7265 if (offAddend)
7266 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
7267 }
7268 return off;
7269}
7270
7271
7272/**
7273 * Emits code to load a stack or immediate variable value into an argument GPR,
7274 * optional with a addend.
7275 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7276 */
7277DECL_FORCE_INLINE_THROW(uint32_t)
7278iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7279 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
7280 bool fSpilledVarsInVolatileRegs = false)
7281{
7282 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7283 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7284 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7285 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
7286 else
7287 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
7288 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
7289 return off;
7290}
7291
7292
7293/**
7294 * Emits code to load the variable address into an argument GPR.
7295 *
7296 * This only works for uninitialized and stack variables.
7297 */
7298DECL_FORCE_INLINE_THROW(uint32_t)
7299iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7300 bool fFlushShadows)
7301{
7302 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7303 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7304 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7305 || pVar->enmKind == kIemNativeVarKind_Stack,
7306 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7307
7308 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7309 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7310
7311 uint8_t const idxRegVar = pVar->idxReg;
7312#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7313 if ( idxRegVar != UINT8_MAX
7314 && pVar->fSimdReg)
7315 {
7316 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7317 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
7318
7319 if (pVar->cbVar == sizeof(RTUINT128U))
7320 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDisp, idxRegVar);
7321 else
7322 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDisp, idxRegVar);
7323
7324 iemNativeSimdRegFreeVar(pReNative, idxRegVar, fFlushShadows);
7325 Assert(pVar->idxReg == UINT8_MAX);
7326 }
7327 else
7328#endif
7329 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
7330 {
7331 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
7332 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
7333 Assert(pVar->idxReg == UINT8_MAX);
7334 }
7335 Assert( pVar->idxStackSlot != UINT8_MAX
7336 && pVar->idxReg == UINT8_MAX);
7337
7338 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7339}
7340
7341
7342#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7343
7344/**
7345 * Emits a gprdst = ~gprsrc store.
7346 */
7347DECL_FORCE_INLINE_THROW(uint32_t)
7348iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7349{
7350#ifdef RT_ARCH_AMD64
7351 if (iGprDst != iGprSrc)
7352 {
7353 /* mov gprdst, gprsrc. */
7354 if (f64Bit)
7355 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
7356 else
7357 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
7358 }
7359
7360 /* not gprdst */
7361 if (f64Bit || iGprDst >= 8)
7362 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
7363 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
7364 pCodeBuf[off++] = 0xf7;
7365 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
7366#elif defined(RT_ARCH_ARM64)
7367 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
7368#else
7369# error "port me"
7370#endif
7371 return off;
7372}
7373
7374
7375/**
7376 * Emits a gprdst = ~gprsrc store.
7377 */
7378DECL_INLINE_THROW(uint32_t)
7379iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7380{
7381#ifdef RT_ARCH_AMD64
7382 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
7383#elif defined(RT_ARCH_ARM64)
7384 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
7385#else
7386# error "port me"
7387#endif
7388 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7389 return off;
7390}
7391
7392
7393/**
7394 * Emits a 128-bit vector register store to a VCpu value.
7395 */
7396DECL_FORCE_INLINE_THROW(uint32_t)
7397iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7398{
7399#ifdef RT_ARCH_AMD64
7400 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
7401 pCodeBuf[off++] = 0x66;
7402 if (iVecReg >= 8)
7403 pCodeBuf[off++] = X86_OP_REX_R;
7404 pCodeBuf[off++] = 0x0f;
7405 pCodeBuf[off++] = 0x7f;
7406 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7407#elif defined(RT_ARCH_ARM64)
7408 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7409
7410#else
7411# error "port me"
7412#endif
7413 return off;
7414}
7415
7416
7417/**
7418 * Emits a 128-bit vector register load of a VCpu value.
7419 */
7420DECL_INLINE_THROW(uint32_t)
7421iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7422{
7423#ifdef RT_ARCH_AMD64
7424 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7425#elif defined(RT_ARCH_ARM64)
7426 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7427#else
7428# error "port me"
7429#endif
7430 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7431 return off;
7432}
7433
7434
7435/**
7436 * Emits a high 128-bit vector register store to a VCpu value.
7437 */
7438DECL_FORCE_INLINE_THROW(uint32_t)
7439iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7440{
7441#ifdef RT_ARCH_AMD64
7442 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
7443 pCodeBuf[off++] = X86_OP_VEX3;
7444 if (iVecReg >= 8)
7445 pCodeBuf[off++] = 0x63;
7446 else
7447 pCodeBuf[off++] = 0xe3;
7448 pCodeBuf[off++] = 0x7d;
7449 pCodeBuf[off++] = 0x39;
7450 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7451 pCodeBuf[off++] = 0x01; /* Immediate */
7452#elif defined(RT_ARCH_ARM64)
7453 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7454#else
7455# error "port me"
7456#endif
7457 return off;
7458}
7459
7460
7461/**
7462 * Emits a high 128-bit vector register load of a VCpu value.
7463 */
7464DECL_INLINE_THROW(uint32_t)
7465iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7466{
7467#ifdef RT_ARCH_AMD64
7468 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7469#elif defined(RT_ARCH_ARM64)
7470 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7471 Assert(!(iVecReg & 0x1));
7472 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
7473#else
7474# error "port me"
7475#endif
7476 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7477 return off;
7478}
7479
7480
7481/**
7482 * Emits a 128-bit vector register load of a VCpu value.
7483 */
7484DECL_FORCE_INLINE_THROW(uint32_t)
7485iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7486{
7487#ifdef RT_ARCH_AMD64
7488 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
7489 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7490 if (iVecReg >= 8)
7491 pCodeBuf[off++] = X86_OP_REX_R;
7492 pCodeBuf[off++] = 0x0f;
7493 pCodeBuf[off++] = 0x6f;
7494 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7495#elif defined(RT_ARCH_ARM64)
7496 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
7497
7498#else
7499# error "port me"
7500#endif
7501 return off;
7502}
7503
7504
7505/**
7506 * Emits a 128-bit vector register load of a VCpu value.
7507 */
7508DECL_INLINE_THROW(uint32_t)
7509iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7510{
7511#ifdef RT_ARCH_AMD64
7512 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7513#elif defined(RT_ARCH_ARM64)
7514 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7515#else
7516# error "port me"
7517#endif
7518 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7519 return off;
7520}
7521
7522
7523/**
7524 * Emits a 128-bit vector register load of a VCpu value.
7525 */
7526DECL_FORCE_INLINE_THROW(uint32_t)
7527iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7528{
7529#ifdef RT_ARCH_AMD64
7530 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
7531 pCodeBuf[off++] = X86_OP_VEX3;
7532 if (iVecReg >= 8)
7533 pCodeBuf[off++] = 0x63;
7534 else
7535 pCodeBuf[off++] = 0xe3;
7536 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
7537 pCodeBuf[off++] = 0x38;
7538 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7539 pCodeBuf[off++] = 0x01; /* Immediate */
7540#elif defined(RT_ARCH_ARM64)
7541 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
7542#else
7543# error "port me"
7544#endif
7545 return off;
7546}
7547
7548
7549/**
7550 * Emits a 128-bit vector register load of a VCpu value.
7551 */
7552DECL_INLINE_THROW(uint32_t)
7553iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7554{
7555#ifdef RT_ARCH_AMD64
7556 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7557#elif defined(RT_ARCH_ARM64)
7558 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7559 Assert(!(iVecReg & 0x1));
7560 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
7561#else
7562# error "port me"
7563#endif
7564 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7565 return off;
7566}
7567
7568
7569/**
7570 * Emits a vecdst = vecsrc load.
7571 */
7572DECL_FORCE_INLINE(uint32_t)
7573iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7574{
7575#ifdef RT_ARCH_AMD64
7576 /* movdqu vecdst, vecsrc */
7577 pCodeBuf[off++] = 0xf3;
7578
7579 if ((iVecRegDst | iVecRegSrc) >= 8)
7580 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
7581 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
7582 : X86_OP_REX_R;
7583 pCodeBuf[off++] = 0x0f;
7584 pCodeBuf[off++] = 0x6f;
7585 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7586
7587#elif defined(RT_ARCH_ARM64)
7588 /* mov dst, src; alias for: orr dst, src, src */
7589 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
7590
7591#else
7592# error "port me"
7593#endif
7594 return off;
7595}
7596
7597
7598/**
7599 * Emits a vecdst = vecsrc load, 128-bit.
7600 */
7601DECL_INLINE_THROW(uint32_t)
7602iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7603{
7604#ifdef RT_ARCH_AMD64
7605 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
7606#elif defined(RT_ARCH_ARM64)
7607 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
7608#else
7609# error "port me"
7610#endif
7611 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7612 return off;
7613}
7614
7615
7616/**
7617 * Emits a vecdst = vecsrc load, 256-bit.
7618 */
7619DECL_INLINE_THROW(uint32_t)
7620iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7621{
7622#ifdef RT_ARCH_AMD64
7623 /* vmovdqa ymm, ymm */
7624 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7625 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
7626 {
7627 pbCodeBuf[off++] = X86_OP_VEX3;
7628 pbCodeBuf[off++] = 0x41;
7629 pbCodeBuf[off++] = 0x7d;
7630 pbCodeBuf[off++] = 0x6f;
7631 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7632 }
7633 else
7634 {
7635 pbCodeBuf[off++] = X86_OP_VEX2;
7636 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
7637 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
7638 pbCodeBuf[off++] = iVecRegSrc >= 8
7639 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
7640 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7641 }
7642#elif defined(RT_ARCH_ARM64)
7643 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7644 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
7645 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
7646 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
7647#else
7648# error "port me"
7649#endif
7650 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7651 return off;
7652}
7653
7654
7655/**
7656 * Emits a gprdst = vecsrc[x] load, 64-bit.
7657 */
7658DECL_FORCE_INLINE(uint32_t)
7659iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
7660{
7661#ifdef RT_ARCH_AMD64
7662 if (iQWord >= 2)
7663 {
7664 /** @todo Currently not used. */
7665 AssertReleaseFailed();
7666 }
7667 else
7668 {
7669 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
7670 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7671 pCodeBuf[off++] = X86_OP_REX_W
7672 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
7673 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
7674 pCodeBuf[off++] = 0x0f;
7675 pCodeBuf[off++] = 0x3a;
7676 pCodeBuf[off++] = 0x16;
7677 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
7678 pCodeBuf[off++] = iQWord;
7679 }
7680#elif defined(RT_ARCH_ARM64)
7681 /* umov gprdst, vecsrc[iQWord] */
7682 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
7683#else
7684# error "port me"
7685#endif
7686 return off;
7687}
7688
7689
7690/**
7691 * Emits a gprdst = vecsrc[x] load, 64-bit.
7692 */
7693DECL_INLINE_THROW(uint32_t)
7694iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
7695{
7696 Assert(iQWord <= 3);
7697
7698#ifdef RT_ARCH_AMD64
7699 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iQWord);
7700#elif defined(RT_ARCH_ARM64)
7701 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7702 Assert(!(iVecRegSrc & 0x1));
7703 /* Need to access the "high" 128-bit vector register. */
7704 if (iQWord >= 2)
7705 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
7706 else
7707 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
7708#else
7709# error "port me"
7710#endif
7711 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7712 return off;
7713}
7714
7715
7716/**
7717 * Emits a gprdst = vecsrc[x] load, 32-bit.
7718 */
7719DECL_FORCE_INLINE(uint32_t)
7720iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
7721{
7722#ifdef RT_ARCH_AMD64
7723 if (iDWord >= 4)
7724 {
7725 /** @todo Currently not used. */
7726 AssertReleaseFailed();
7727 }
7728 else
7729 {
7730 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
7731 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7732 if (iGprDst >= 8 || iVecRegSrc >= 8)
7733 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
7734 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
7735 pCodeBuf[off++] = 0x0f;
7736 pCodeBuf[off++] = 0x3a;
7737 pCodeBuf[off++] = 0x16;
7738 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
7739 pCodeBuf[off++] = iDWord;
7740 }
7741#elif defined(RT_ARCH_ARM64)
7742 /* umov gprdst, vecsrc[iDWord] */
7743 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
7744#else
7745# error "port me"
7746#endif
7747 return off;
7748}
7749
7750
7751/**
7752 * Emits a gprdst = vecsrc[x] load, 32-bit.
7753 */
7754DECL_INLINE_THROW(uint32_t)
7755iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
7756{
7757 Assert(iDWord <= 7);
7758
7759#ifdef RT_ARCH_AMD64
7760 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iDWord);
7761#elif defined(RT_ARCH_ARM64)
7762 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7763 Assert(!(iVecRegSrc & 0x1));
7764 /* Need to access the "high" 128-bit vector register. */
7765 if (iDWord >= 4)
7766 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
7767 else
7768 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
7769#else
7770# error "port me"
7771#endif
7772 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7773 return off;
7774}
7775
7776
7777/**
7778 * Emits a gprdst = vecsrc[x] load, 16-bit.
7779 */
7780DECL_FORCE_INLINE(uint32_t)
7781iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
7782{
7783#ifdef RT_ARCH_AMD64
7784 if (iWord >= 8)
7785 {
7786 /** @todo Currently not used. */
7787 AssertReleaseFailed();
7788 }
7789 else
7790 {
7791 /* pextrw gpr, vecsrc, #iWord */
7792 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7793 if (iGprDst >= 8 || iVecRegSrc >= 8)
7794 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
7795 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
7796 pCodeBuf[off++] = 0x0f;
7797 pCodeBuf[off++] = 0xc5;
7798 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
7799 pCodeBuf[off++] = iWord;
7800 }
7801#elif defined(RT_ARCH_ARM64)
7802 /* umov gprdst, vecsrc[iWord] */
7803 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
7804#else
7805# error "port me"
7806#endif
7807 return off;
7808}
7809
7810
7811/**
7812 * Emits a gprdst = vecsrc[x] load, 16-bit.
7813 */
7814DECL_INLINE_THROW(uint32_t)
7815iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
7816{
7817 Assert(iWord <= 16);
7818
7819#ifdef RT_ARCH_AMD64
7820 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
7821#elif defined(RT_ARCH_ARM64)
7822 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7823 Assert(!(iVecRegSrc & 0x1));
7824 /* Need to access the "high" 128-bit vector register. */
7825 if (iWord >= 8)
7826 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
7827 else
7828 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
7829#else
7830# error "port me"
7831#endif
7832 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7833 return off;
7834}
7835
7836
7837/**
7838 * Emits a gprdst = vecsrc[x] load, 8-bit.
7839 */
7840DECL_FORCE_INLINE(uint32_t)
7841iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
7842{
7843#ifdef RT_ARCH_AMD64
7844 if (iByte >= 16)
7845 {
7846 /** @todo Currently not used. */
7847 AssertReleaseFailed();
7848 }
7849 else
7850 {
7851 /* pextrb gpr, vecsrc, #iByte */
7852 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7853 if (iGprDst >= 8 || iVecRegSrc >= 8)
7854 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
7855 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
7856 pCodeBuf[off++] = 0x0f;
7857 pCodeBuf[off++] = 0x3a;
7858 pCodeBuf[off++] = 0x14;
7859 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
7860 pCodeBuf[off++] = iByte;
7861 }
7862#elif defined(RT_ARCH_ARM64)
7863 /* umov gprdst, vecsrc[iByte] */
7864 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
7865#else
7866# error "port me"
7867#endif
7868 return off;
7869}
7870
7871
7872/**
7873 * Emits a gprdst = vecsrc[x] load, 8-bit.
7874 */
7875DECL_INLINE_THROW(uint32_t)
7876iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
7877{
7878 Assert(iByte <= 32);
7879
7880#ifdef RT_ARCH_AMD64
7881 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iByte);
7882#elif defined(RT_ARCH_ARM64)
7883 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7884 Assert(!(iVecRegSrc & 0x1));
7885 /* Need to access the "high" 128-bit vector register. */
7886 if (iByte >= 16)
7887 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
7888 else
7889 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
7890#else
7891# error "port me"
7892#endif
7893 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7894 return off;
7895}
7896
7897
7898/**
7899 * Emits a vecdst[x] = gprsrc store, 64-bit.
7900 */
7901DECL_FORCE_INLINE(uint32_t)
7902iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
7903{
7904#ifdef RT_ARCH_AMD64
7905 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
7906 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7907 pCodeBuf[off++] = X86_OP_REX_W
7908 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
7909 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7910 pCodeBuf[off++] = 0x0f;
7911 pCodeBuf[off++] = 0x3a;
7912 pCodeBuf[off++] = 0x22;
7913 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
7914 pCodeBuf[off++] = iQWord;
7915#elif defined(RT_ARCH_ARM64)
7916 /* ins vecsrc[iQWord], gpr */
7917 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
7918#else
7919# error "port me"
7920#endif
7921 return off;
7922}
7923
7924
7925/**
7926 * Emits a vecdst[x] = gprsrc store, 64-bit.
7927 */
7928DECL_INLINE_THROW(uint32_t)
7929iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
7930{
7931 Assert(iQWord <= 1);
7932
7933#ifdef RT_ARCH_AMD64
7934 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iQWord);
7935#elif defined(RT_ARCH_ARM64)
7936 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
7937#else
7938# error "port me"
7939#endif
7940 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7941 return off;
7942}
7943
7944
7945/**
7946 * Emits a vecdst[x] = gprsrc store, 32-bit.
7947 */
7948DECL_FORCE_INLINE(uint32_t)
7949iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
7950{
7951#ifdef RT_ARCH_AMD64
7952 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
7953 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7954 if (iVecRegDst >= 8 || iGprSrc >= 8)
7955 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
7956 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7957 pCodeBuf[off++] = 0x0f;
7958 pCodeBuf[off++] = 0x3a;
7959 pCodeBuf[off++] = 0x22;
7960 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
7961 pCodeBuf[off++] = iDWord;
7962#elif defined(RT_ARCH_ARM64)
7963 /* ins vecsrc[iDWord], gpr */
7964 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
7965#else
7966# error "port me"
7967#endif
7968 return off;
7969}
7970
7971
7972/**
7973 * Emits a vecdst[x] = gprsrc store, 64-bit.
7974 */
7975DECL_INLINE_THROW(uint32_t)
7976iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
7977{
7978 Assert(iDWord <= 3);
7979
7980#ifdef RT_ARCH_AMD64
7981 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iDWord);
7982#elif defined(RT_ARCH_ARM64)
7983 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
7984#else
7985# error "port me"
7986#endif
7987 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7988 return off;
7989}
7990
7991
7992/**
7993 * Emits a vecdst.au32[iDWord] = 0 store.
7994 */
7995DECL_FORCE_INLINE(uint32_t)
7996iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
7997{
7998 Assert(iDWord <= 7);
7999
8000#ifdef RT_ARCH_AMD64
8001 /*
8002 * xor tmp0, tmp0
8003 * pinsrd xmm, tmp0, iDword
8004 */
8005 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
8006 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8007 pCodeBuf[off++] = 0x33;
8008 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
8009 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(&pCodeBuf[off], off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
8010#elif defined(RT_ARCH_ARM64)
8011 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8012 Assert(!(iVecReg & 0x1));
8013 /* ins vecsrc[iDWord], wzr */
8014 if (iDWord >= 4)
8015 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
8016 else
8017 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
8018#else
8019# error "port me"
8020#endif
8021 return off;
8022}
8023
8024
8025/**
8026 * Emits a vecdst.au32[iDWord] = 0 store.
8027 */
8028DECL_INLINE_THROW(uint32_t)
8029iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
8030{
8031
8032#ifdef RT_ARCH_AMD64
8033 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
8034#elif defined(RT_ARCH_ARM64)
8035 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
8036#else
8037# error "port me"
8038#endif
8039 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8040 return off;
8041}
8042
8043
8044/**
8045 * Emits a vecdst[0:127] = 0 store.
8046 */
8047DECL_FORCE_INLINE(uint32_t)
8048iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8049{
8050#ifdef RT_ARCH_AMD64
8051 /* pxor xmm, xmm */
8052 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8053 if (iVecReg >= 8)
8054 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
8055 pCodeBuf[off++] = 0x0f;
8056 pCodeBuf[off++] = 0xef;
8057 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8058#elif defined(RT_ARCH_ARM64)
8059 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8060 Assert(!(iVecReg & 0x1));
8061 /* eor vecreg, vecreg, vecreg */
8062 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
8063#else
8064# error "port me"
8065#endif
8066 return off;
8067}
8068
8069
8070/**
8071 * Emits a vecdst[0:127] = 0 store.
8072 */
8073DECL_INLINE_THROW(uint32_t)
8074iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8075{
8076#ifdef RT_ARCH_AMD64
8077 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
8078#elif defined(RT_ARCH_ARM64)
8079 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
8080#else
8081# error "port me"
8082#endif
8083 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8084 return off;
8085}
8086
8087
8088/**
8089 * Emits a vecdst[128:255] = 0 store.
8090 */
8091DECL_FORCE_INLINE(uint32_t)
8092iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8093{
8094#ifdef RT_ARCH_AMD64
8095 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
8096 if (iVecReg < 8)
8097 {
8098 pCodeBuf[off++] = X86_OP_VEX2;
8099 pCodeBuf[off++] = 0xf9;
8100 }
8101 else
8102 {
8103 pCodeBuf[off++] = X86_OP_VEX3;
8104 pCodeBuf[off++] = 0x41;
8105 pCodeBuf[off++] = 0x79;
8106 }
8107 pCodeBuf[off++] = 0x6f;
8108 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8109#elif defined(RT_ARCH_ARM64)
8110 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8111 Assert(!(iVecReg & 0x1));
8112 /* eor vecreg, vecreg, vecreg */
8113 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
8114#else
8115# error "port me"
8116#endif
8117 return off;
8118}
8119
8120
8121/**
8122 * Emits a vecdst[128:255] = 0 store.
8123 */
8124DECL_INLINE_THROW(uint32_t)
8125iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8126{
8127#ifdef RT_ARCH_AMD64
8128 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
8129#elif defined(RT_ARCH_ARM64)
8130 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
8131#else
8132# error "port me"
8133#endif
8134 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8135 return off;
8136}
8137
8138
8139/**
8140 * Emits a vecdst[0:255] = 0 store.
8141 */
8142DECL_FORCE_INLINE(uint32_t)
8143iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8144{
8145#ifdef RT_ARCH_AMD64
8146 /* vpxor ymm, ymm, ymm */
8147 if (iVecReg < 8)
8148 {
8149 pCodeBuf[off++] = X86_OP_VEX2;
8150 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8151 }
8152 else
8153 {
8154 pCodeBuf[off++] = X86_OP_VEX3;
8155 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
8156 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8157 }
8158 pCodeBuf[off++] = 0xef;
8159 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8160#elif defined(RT_ARCH_ARM64)
8161 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8162 Assert(!(iVecReg & 0x1));
8163 /* eor vecreg, vecreg, vecreg */
8164 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
8165 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
8166#else
8167# error "port me"
8168#endif
8169 return off;
8170}
8171
8172
8173/**
8174 * Emits a vecdst[0:255] = 0 store.
8175 */
8176DECL_INLINE_THROW(uint32_t)
8177iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8178{
8179#ifdef RT_ARCH_AMD64
8180 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
8181#elif defined(RT_ARCH_ARM64)
8182 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
8183#else
8184# error "port me"
8185#endif
8186 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8187 return off;
8188}
8189
8190
8191/**
8192 * Emits a vecdst = gprsrc broadcast, 8-bit.
8193 */
8194DECL_FORCE_INLINE(uint32_t)
8195iemNativeEmitSimdBroadcastGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8196{
8197#ifdef RT_ARCH_AMD64
8198 /* pinsrb vecdst, gpr, #0 (ASSUMES SSE 4.1) */
8199 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8200 if (iVecRegDst >= 8 || iGprSrc >= 8)
8201 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8202 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8203 pCodeBuf[off++] = 0x0f;
8204 pCodeBuf[off++] = 0x3a;
8205 pCodeBuf[off++] = 0x20;
8206 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8207 pCodeBuf[off++] = 0x00;
8208
8209 /* vpbroadcastb {y,x}mm, xmm (ASSUMES AVX2). */
8210 pCodeBuf[off++] = X86_OP_VEX3;
8211 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8212 | 0x02 /* opcode map. */
8213 | ( iVecRegDst >= 8
8214 ? 0
8215 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8216 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8217 pCodeBuf[off++] = 0x78;
8218 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8219#elif defined(RT_ARCH_ARM64)
8220 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8221 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8222
8223 /* dup vecsrc, gpr */
8224 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U8);
8225 if (f256Bit)
8226 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U8);
8227#else
8228# error "port me"
8229#endif
8230 return off;
8231}
8232
8233
8234/**
8235 * Emits a vecdst[x] = gprsrc broadcast, 8-bit.
8236 */
8237DECL_INLINE_THROW(uint32_t)
8238iemNativeEmitSimdBroadcastGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8239{
8240#ifdef RT_ARCH_AMD64
8241 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8242#elif defined(RT_ARCH_ARM64)
8243 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8244#else
8245# error "port me"
8246#endif
8247 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8248 return off;
8249}
8250
8251
8252/**
8253 * Emits a vecdst = gprsrc broadcast, 16-bit.
8254 */
8255DECL_FORCE_INLINE(uint32_t)
8256iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8257{
8258#ifdef RT_ARCH_AMD64
8259 /* pinsrw vecdst, gpr, #0 */
8260 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8261 if (iVecRegDst >= 8 || iGprSrc >= 8)
8262 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8263 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8264 pCodeBuf[off++] = 0x0f;
8265 pCodeBuf[off++] = 0xc4;
8266 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8267 pCodeBuf[off++] = 0x00;
8268
8269 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
8270 pCodeBuf[off++] = X86_OP_VEX3;
8271 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8272 | 0x02 /* opcode map. */
8273 | ( iVecRegDst >= 8
8274 ? 0
8275 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8276 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8277 pCodeBuf[off++] = 0x79;
8278 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8279#elif defined(RT_ARCH_ARM64)
8280 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8281 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8282
8283 /* dup vecsrc, gpr */
8284 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
8285 if (f256Bit)
8286 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
8287#else
8288# error "port me"
8289#endif
8290 return off;
8291}
8292
8293
8294/**
8295 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
8296 */
8297DECL_INLINE_THROW(uint32_t)
8298iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8299{
8300#ifdef RT_ARCH_AMD64
8301 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8302#elif defined(RT_ARCH_ARM64)
8303 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8304#else
8305# error "port me"
8306#endif
8307 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8308 return off;
8309}
8310
8311
8312/**
8313 * Emits a vecdst = gprsrc broadcast, 32-bit.
8314 */
8315DECL_FORCE_INLINE(uint32_t)
8316iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8317{
8318#ifdef RT_ARCH_AMD64
8319 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
8320 * vbroadcast needs a memory operand or another xmm register to work... */
8321
8322 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
8323 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8324 if (iVecRegDst >= 8 || iGprSrc >= 8)
8325 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8326 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8327 pCodeBuf[off++] = 0x0f;
8328 pCodeBuf[off++] = 0x3a;
8329 pCodeBuf[off++] = 0x22;
8330 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8331 pCodeBuf[off++] = 0x00;
8332
8333 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
8334 pCodeBuf[off++] = X86_OP_VEX3;
8335 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8336 | 0x02 /* opcode map. */
8337 | ( iVecRegDst >= 8
8338 ? 0
8339 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8340 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8341 pCodeBuf[off++] = 0x58;
8342 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8343#elif defined(RT_ARCH_ARM64)
8344 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8345 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8346
8347 /* dup vecsrc, gpr */
8348 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
8349 if (f256Bit)
8350 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
8351#else
8352# error "port me"
8353#endif
8354 return off;
8355}
8356
8357
8358/**
8359 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
8360 */
8361DECL_INLINE_THROW(uint32_t)
8362iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8363{
8364#ifdef RT_ARCH_AMD64
8365 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8366#elif defined(RT_ARCH_ARM64)
8367 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8368#else
8369# error "port me"
8370#endif
8371 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8372 return off;
8373}
8374
8375
8376/**
8377 * Emits a vecdst = gprsrc broadcast, 64-bit.
8378 */
8379DECL_FORCE_INLINE(uint32_t)
8380iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8381{
8382#ifdef RT_ARCH_AMD64
8383 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
8384 * vbroadcast needs a memory operand or another xmm register to work... */
8385
8386 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
8387 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8388 pCodeBuf[off++] = X86_OP_REX_W
8389 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8390 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8391 pCodeBuf[off++] = 0x0f;
8392 pCodeBuf[off++] = 0x3a;
8393 pCodeBuf[off++] = 0x22;
8394 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8395 pCodeBuf[off++] = 0x00;
8396
8397 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
8398 pCodeBuf[off++] = X86_OP_VEX3;
8399 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8400 | 0x02 /* opcode map. */
8401 | ( iVecRegDst >= 8
8402 ? 0
8403 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8404 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8405 pCodeBuf[off++] = 0x59;
8406 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8407#elif defined(RT_ARCH_ARM64)
8408 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8409 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8410
8411 /* dup vecsrc, gpr */
8412 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
8413 if (f256Bit)
8414 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
8415#else
8416# error "port me"
8417#endif
8418 return off;
8419}
8420
8421
8422/**
8423 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
8424 */
8425DECL_INLINE_THROW(uint32_t)
8426iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8427{
8428#ifdef RT_ARCH_AMD64
8429 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
8430#elif defined(RT_ARCH_ARM64)
8431 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8432#else
8433# error "port me"
8434#endif
8435 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8436 return off;
8437}
8438
8439#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
8440
8441/** @} */
8442
8443#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
8444
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette