VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 105318

Last change on this file since 105318 was 105318, checked in by vboxsync, 5 months ago

VMM/IEM: Tweak for really large TLBs; save an instruction on ARM64 and'ing with 32-bit constants that aren't more than 16 bits wide; update the iemNativeHlpCheckTlbLookup code and made it work on ARM64. bugref:10687

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 351.4 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 105318 2024-07-13 00:53:36Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 if (uInfo == 0)
71 pu32CodeBuf[off++] = ARMV8_A64_INSTR_NOP;
72 else
73 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(ARMV8_A64_REG_XZR, (uint16_t)uInfo);
74
75 RT_NOREF(uInfo);
76#else
77# error "port me"
78#endif
79 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
80 return off;
81}
82
83
84/**
85 * Emit a breakpoint instruction.
86 */
87DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
88{
89#ifdef RT_ARCH_AMD64
90 pCodeBuf[off++] = 0xcc;
91 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
92
93#elif defined(RT_ARCH_ARM64)
94 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
95
96#else
97# error "error"
98#endif
99 return off;
100}
101
102
103/**
104 * Emit a breakpoint instruction.
105 */
106DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
107{
108#ifdef RT_ARCH_AMD64
109 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
110#elif defined(RT_ARCH_ARM64)
111 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
112#else
113# error "error"
114#endif
115 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
116 return off;
117}
118
119
120/*********************************************************************************************************************************
121* Loads, Stores and Related Stuff. *
122*********************************************************************************************************************************/
123
124#ifdef RT_ARCH_AMD64
125/**
126 * Common bit of iemNativeEmitLoadGprByGpr and friends.
127 */
128DECL_FORCE_INLINE(uint32_t)
129iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
130{
131 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
132 {
133 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
134 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
135 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
136 }
137 else if (offDisp == (int8_t)offDisp)
138 {
139 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
140 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
141 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
142 pbCodeBuf[off++] = (uint8_t)offDisp;
143 }
144 else
145 {
146 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
147 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
148 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
149 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
150 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
151 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
152 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
153 }
154 return off;
155}
156#endif /* RT_ARCH_AMD64 */
157
158/**
159 * Emits setting a GPR to zero.
160 */
161DECL_INLINE_THROW(uint32_t)
162iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
163{
164#ifdef RT_ARCH_AMD64
165 /* xor gpr32, gpr32 */
166 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
167 if (iGpr >= 8)
168 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
169 pbCodeBuf[off++] = 0x33;
170 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
171
172#elif defined(RT_ARCH_ARM64)
173 /* mov gpr, #0x0 */
174 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
175 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
176
177#else
178# error "port me"
179#endif
180 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
181 return off;
182}
183
184
185/**
186 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
187 * buffer space.
188 *
189 * Max buffer consumption:
190 * - AMD64: 6 instruction bytes.
191 * - ARM64: 2 instruction words (8 bytes).
192 *
193 * @note The top 32 bits will be cleared.
194 */
195DECL_FORCE_INLINE(uint32_t)
196iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
197{
198#ifdef RT_ARCH_AMD64
199 if (uImm32 == 0)
200 {
201 /* xor gpr, gpr */
202 if (iGpr >= 8)
203 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
204 pCodeBuf[off++] = 0x33;
205 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
206 }
207 else
208 {
209 /* mov gpr, imm32 */
210 if (iGpr >= 8)
211 pCodeBuf[off++] = X86_OP_REX_B;
212 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
213 pCodeBuf[off++] = RT_BYTE1(uImm32);
214 pCodeBuf[off++] = RT_BYTE2(uImm32);
215 pCodeBuf[off++] = RT_BYTE3(uImm32);
216 pCodeBuf[off++] = RT_BYTE4(uImm32);
217 }
218
219#elif defined(RT_ARCH_ARM64)
220 if ((uImm32 >> 16) == 0)
221 /* movz gpr, imm16 */
222 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
223 else if ((uImm32 & UINT32_C(0xffff)) == 0)
224 /* movz gpr, imm16, lsl #16 */
225 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
226 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
227 /* movn gpr, imm16, lsl #16 */
228 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
229 else if ((uImm32 >> 16) == UINT32_C(0xffff))
230 /* movn gpr, imm16 */
231 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
232 else
233 {
234 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
235 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
236 }
237
238#else
239# error "port me"
240#endif
241 return off;
242}
243
244
245/**
246 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
247 * buffer space.
248 *
249 * Max buffer consumption:
250 * - AMD64: 10 instruction bytes.
251 * - ARM64: 4 instruction words (16 bytes).
252 */
253DECL_FORCE_INLINE(uint32_t)
254iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
255{
256#ifdef RT_ARCH_AMD64
257 if (uImm64 == 0)
258 {
259 /* xor gpr, gpr */
260 if (iGpr >= 8)
261 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
262 pCodeBuf[off++] = 0x33;
263 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
264 }
265 else if (uImm64 <= UINT32_MAX)
266 {
267 /* mov gpr, imm32 */
268 if (iGpr >= 8)
269 pCodeBuf[off++] = X86_OP_REX_B;
270 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
271 pCodeBuf[off++] = RT_BYTE1(uImm64);
272 pCodeBuf[off++] = RT_BYTE2(uImm64);
273 pCodeBuf[off++] = RT_BYTE3(uImm64);
274 pCodeBuf[off++] = RT_BYTE4(uImm64);
275 }
276 else if (uImm64 == (uint64_t)(int32_t)uImm64)
277 {
278 /* mov gpr, sx(imm32) */
279 if (iGpr < 8)
280 pCodeBuf[off++] = X86_OP_REX_W;
281 else
282 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
283 pCodeBuf[off++] = 0xc7;
284 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
285 pCodeBuf[off++] = RT_BYTE1(uImm64);
286 pCodeBuf[off++] = RT_BYTE2(uImm64);
287 pCodeBuf[off++] = RT_BYTE3(uImm64);
288 pCodeBuf[off++] = RT_BYTE4(uImm64);
289 }
290 else
291 {
292 /* mov gpr, imm64 */
293 if (iGpr < 8)
294 pCodeBuf[off++] = X86_OP_REX_W;
295 else
296 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
297 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
298 pCodeBuf[off++] = RT_BYTE1(uImm64);
299 pCodeBuf[off++] = RT_BYTE2(uImm64);
300 pCodeBuf[off++] = RT_BYTE3(uImm64);
301 pCodeBuf[off++] = RT_BYTE4(uImm64);
302 pCodeBuf[off++] = RT_BYTE5(uImm64);
303 pCodeBuf[off++] = RT_BYTE6(uImm64);
304 pCodeBuf[off++] = RT_BYTE7(uImm64);
305 pCodeBuf[off++] = RT_BYTE8(uImm64);
306 }
307
308#elif defined(RT_ARCH_ARM64)
309 /*
310 * Quick simplification: Do 32-bit load if top half is zero.
311 */
312 if (uImm64 <= UINT32_MAX)
313 return iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGpr, (uint32_t)uImm64);
314
315 /*
316 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
317 * supply remaining bits using 'movk grp, imm16, lsl #x'.
318 *
319 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
320 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
321 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
322 * after the first non-zero immediate component so we switch to movk for
323 * the remainder.
324 */
325 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
326 + !((uImm64 >> 16) & UINT16_MAX)
327 + !((uImm64 >> 32) & UINT16_MAX)
328 + !((uImm64 >> 48) & UINT16_MAX);
329 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
330 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
331 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
332 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
333 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
334 if (cFfffHalfWords <= cZeroHalfWords)
335 {
336 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
337
338 /* movz gpr, imm16 */
339 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
340 if (uImmPart || cZeroHalfWords == 4)
341 {
342 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
343 fMovBase |= RT_BIT_32(29);
344 }
345 /* mov[z/k] gpr, imm16, lsl #16 */
346 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
347 if (uImmPart)
348 {
349 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
350 fMovBase |= RT_BIT_32(29);
351 }
352 /* mov[z/k] gpr, imm16, lsl #32 */
353 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
354 if (uImmPart)
355 {
356 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
357 fMovBase |= RT_BIT_32(29);
358 }
359 /* mov[z/k] gpr, imm16, lsl #48 */
360 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
361 if (uImmPart)
362 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
363 }
364 else
365 {
366 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
367
368 /* find the first half-word that isn't UINT16_MAX. */
369 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
370 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
371 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
372
373 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
374 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
375 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
376 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
377 /* movk gpr, imm16 */
378 if (iHwNotFfff != 0)
379 {
380 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
381 if (uImmPart != UINT32_C(0xffff))
382 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
383 }
384 /* movk gpr, imm16, lsl #16 */
385 if (iHwNotFfff != 1)
386 {
387 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
388 if (uImmPart != UINT32_C(0xffff))
389 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
390 }
391 /* movk gpr, imm16, lsl #32 */
392 if (iHwNotFfff != 2)
393 {
394 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
395 if (uImmPart != UINT32_C(0xffff))
396 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
397 }
398 /* movk gpr, imm16, lsl #48 */
399 if (iHwNotFfff != 3)
400 {
401 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
402 if (uImmPart != UINT32_C(0xffff))
403 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
404 }
405 }
406
407#else
408# error "port me"
409#endif
410 return off;
411}
412
413
414/**
415 * Emits loading a constant into a 64-bit GPR
416 */
417DECL_INLINE_THROW(uint32_t)
418iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
419{
420#ifdef RT_ARCH_AMD64
421 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
422#elif defined(RT_ARCH_ARM64)
423 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
424#else
425# error "port me"
426#endif
427 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
428 return off;
429}
430
431
432/**
433 * Emits loading a constant into a 32-bit GPR.
434 * @note The top 32 bits will be cleared.
435 */
436DECL_INLINE_THROW(uint32_t)
437iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
438{
439#ifdef RT_ARCH_AMD64
440 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
441#elif defined(RT_ARCH_ARM64)
442 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
443#else
444# error "port me"
445#endif
446 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
447 return off;
448}
449
450
451/**
452 * Emits loading a constant into a 8-bit GPR
453 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
454 * only the ARM64 version does that.
455 */
456DECL_INLINE_THROW(uint32_t)
457iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
458{
459#ifdef RT_ARCH_AMD64
460 /* mov gpr, imm8 */
461 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
462 if (iGpr >= 8)
463 pbCodeBuf[off++] = X86_OP_REX_B;
464 else if (iGpr >= 4)
465 pbCodeBuf[off++] = X86_OP_REX;
466 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
467 pbCodeBuf[off++] = RT_BYTE1(uImm8);
468
469#elif defined(RT_ARCH_ARM64)
470 /* movz gpr, imm16, lsl #0 */
471 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
472 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
473
474#else
475# error "port me"
476#endif
477 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
478 return off;
479}
480
481
482#ifdef RT_ARCH_AMD64
483/**
484 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
485 */
486DECL_FORCE_INLINE(uint32_t)
487iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
488{
489 if (offVCpu < 128)
490 {
491 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
492 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
493 }
494 else
495 {
496 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
497 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
498 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
499 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
500 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
501 }
502 return off;
503}
504
505#elif defined(RT_ARCH_ARM64)
506
507/**
508 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
509 *
510 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
511 * registers (@a iGprTmp).
512 * @note DON'T try this with prefetch.
513 */
514DECL_FORCE_INLINE_THROW(uint32_t)
515iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
516 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
517{
518 /*
519 * There are a couple of ldr variants that takes an immediate offset, so
520 * try use those if we can, otherwise we have to use the temporary register
521 * help with the addressing.
522 */
523 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
524 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
525 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
526 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
527 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
528 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
529 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
530 {
531 /* The offset is too large, so we must load it into a register and use
532 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
533 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
534 if (iGprTmp == UINT8_MAX)
535 iGprTmp = iGprReg;
536 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
537 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
538 }
539 else
540# ifdef IEM_WITH_THROW_CATCH
541 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
542# else
543 AssertReleaseFailedStmt(off = UINT32_MAX);
544# endif
545
546 return off;
547}
548
549/**
550 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
551 */
552DECL_FORCE_INLINE_THROW(uint32_t)
553iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
554 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
555{
556 /*
557 * There are a couple of ldr variants that takes an immediate offset, so
558 * try use those if we can, otherwise we have to use the temporary register
559 * help with the addressing.
560 */
561 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
562 {
563 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
564 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
565 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
566 }
567 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
568 {
569 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
570 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
571 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
572 }
573 else
574 {
575 /* The offset is too large, so we must load it into a register and use
576 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
577 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
578 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
579 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
580 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
581 IEMNATIVE_REG_FIXED_TMP0);
582 }
583 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
584 return off;
585}
586
587#endif /* RT_ARCH_ARM64 */
588
589
590/**
591 * Emits a 64-bit GPR load of a VCpu value.
592 */
593DECL_FORCE_INLINE_THROW(uint32_t)
594iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
595{
596#ifdef RT_ARCH_AMD64
597 /* mov reg64, mem64 */
598 if (iGpr < 8)
599 pCodeBuf[off++] = X86_OP_REX_W;
600 else
601 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
602 pCodeBuf[off++] = 0x8b;
603 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off,iGpr, offVCpu);
604
605#elif defined(RT_ARCH_ARM64)
606 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
607
608#else
609# error "port me"
610#endif
611 return off;
612}
613
614
615/**
616 * Emits a 64-bit GPR load of a VCpu value.
617 */
618DECL_INLINE_THROW(uint32_t)
619iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
620{
621#ifdef RT_ARCH_AMD64
622 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
623 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
624
625#elif defined(RT_ARCH_ARM64)
626 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
627
628#else
629# error "port me"
630#endif
631 return off;
632}
633
634/**
635 * Emits a 32-bit GPR load of a VCpu value.
636 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
637 */
638DECL_INLINE_THROW(uint32_t)
639iemNativeEmitLoadGprFromVCpuU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
640{
641#ifdef RT_ARCH_AMD64
642 /* mov reg32, mem32 */
643 if (iGpr >= 8)
644 pCodeBuf[off++] = X86_OP_REX_R;
645 pCodeBuf[off++] = 0x8b;
646 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
647
648#elif defined(RT_ARCH_ARM64)
649 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
650
651#else
652# error "port me"
653#endif
654 return off;
655}
656
657
658/**
659 * Emits a 32-bit GPR load of a VCpu value.
660 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
661 */
662DECL_INLINE_THROW(uint32_t)
663iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
664{
665#ifdef RT_ARCH_AMD64
666 off = iemNativeEmitLoadGprFromVCpuU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
667 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
668
669#elif defined(RT_ARCH_ARM64)
670 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
671
672#else
673# error "port me"
674#endif
675 return off;
676}
677
678
679/**
680 * Emits a 16-bit GPR load of a VCpu value.
681 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
682 */
683DECL_INLINE_THROW(uint32_t)
684iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
685{
686#ifdef RT_ARCH_AMD64
687 /* movzx reg32, mem16 */
688 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
689 if (iGpr >= 8)
690 pbCodeBuf[off++] = X86_OP_REX_R;
691 pbCodeBuf[off++] = 0x0f;
692 pbCodeBuf[off++] = 0xb7;
693 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
694 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
695
696#elif defined(RT_ARCH_ARM64)
697 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
698
699#else
700# error "port me"
701#endif
702 return off;
703}
704
705
706/**
707 * Emits a 8-bit GPR load of a VCpu value.
708 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
709 */
710DECL_INLINE_THROW(uint32_t)
711iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
712{
713#ifdef RT_ARCH_AMD64
714 /* movzx reg32, mem8 */
715 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
716 if (iGpr >= 8)
717 pbCodeBuf[off++] = X86_OP_REX_R;
718 pbCodeBuf[off++] = 0x0f;
719 pbCodeBuf[off++] = 0xb6;
720 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
721 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
722
723#elif defined(RT_ARCH_ARM64)
724 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
725
726#else
727# error "port me"
728#endif
729 return off;
730}
731
732
733/**
734 * Emits a store of a GPR value to a 64-bit VCpu field.
735 */
736DECL_FORCE_INLINE_THROW(uint32_t)
737iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
738 uint8_t iGprTmp = UINT8_MAX)
739{
740#ifdef RT_ARCH_AMD64
741 /* mov mem64, reg64 */
742 if (iGpr < 8)
743 pCodeBuf[off++] = X86_OP_REX_W;
744 else
745 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
746 pCodeBuf[off++] = 0x89;
747 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
748 RT_NOREF(iGprTmp);
749
750#elif defined(RT_ARCH_ARM64)
751 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
752
753#else
754# error "port me"
755#endif
756 return off;
757}
758
759
760/**
761 * Emits a store of a GPR value to a 64-bit VCpu field.
762 */
763DECL_INLINE_THROW(uint32_t)
764iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
765{
766#ifdef RT_ARCH_AMD64
767 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
768#elif defined(RT_ARCH_ARM64)
769 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
770 IEMNATIVE_REG_FIXED_TMP0);
771#else
772# error "port me"
773#endif
774 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
775 return off;
776}
777
778
779/**
780 * Emits a store of a GPR value to a 32-bit VCpu field.
781 */
782DECL_INLINE_THROW(uint32_t)
783iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
784{
785#ifdef RT_ARCH_AMD64
786 /* mov mem32, reg32 */
787 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
788 if (iGpr >= 8)
789 pbCodeBuf[off++] = X86_OP_REX_R;
790 pbCodeBuf[off++] = 0x89;
791 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
792 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
793
794#elif defined(RT_ARCH_ARM64)
795 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
796
797#else
798# error "port me"
799#endif
800 return off;
801}
802
803
804/**
805 * Emits a store of a GPR value to a 16-bit VCpu field.
806 */
807DECL_INLINE_THROW(uint32_t)
808iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
809{
810#ifdef RT_ARCH_AMD64
811 /* mov mem16, reg16 */
812 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
813 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
814 if (iGpr >= 8)
815 pbCodeBuf[off++] = X86_OP_REX_R;
816 pbCodeBuf[off++] = 0x89;
817 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
818 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
819
820#elif defined(RT_ARCH_ARM64)
821 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
822
823#else
824# error "port me"
825#endif
826 return off;
827}
828
829
830/**
831 * Emits a store of a GPR value to a 8-bit VCpu field.
832 */
833DECL_INLINE_THROW(uint32_t)
834iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
835{
836#ifdef RT_ARCH_AMD64
837 /* mov mem8, reg8 */
838 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
839 if (iGpr >= 8)
840 pbCodeBuf[off++] = X86_OP_REX_R;
841 pbCodeBuf[off++] = 0x88;
842 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
843 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
844
845#elif defined(RT_ARCH_ARM64)
846 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
847
848#else
849# error "port me"
850#endif
851 return off;
852}
853
854
855/**
856 * Emits a store of an immediate value to a 64-bit VCpu field.
857 *
858 * @note Will allocate temporary registers on both ARM64 and AMD64.
859 */
860DECL_FORCE_INLINE_THROW(uint32_t)
861iemNativeEmitStoreImmToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uImm, uint32_t offVCpu)
862{
863#ifdef RT_ARCH_AMD64
864 /* mov mem32, imm32 */
865 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
866 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxRegImm, offVCpu);
867 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
868 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
869
870#elif defined(RT_ARCH_ARM64)
871 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
872 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t));
873 if (idxRegImm != ARMV8_A64_REG_XZR)
874 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
875
876#else
877# error "port me"
878#endif
879 return off;
880}
881
882
883/**
884 * Emits a store of an immediate value to a 32-bit VCpu field.
885 *
886 * @note ARM64: Will allocate temporary registers.
887 */
888DECL_FORCE_INLINE_THROW(uint32_t)
889iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
890{
891#ifdef RT_ARCH_AMD64
892 /* mov mem32, imm32 */
893 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
894 pCodeBuf[off++] = 0xc7;
895 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
896 pCodeBuf[off++] = RT_BYTE1(uImm);
897 pCodeBuf[off++] = RT_BYTE2(uImm);
898 pCodeBuf[off++] = RT_BYTE3(uImm);
899 pCodeBuf[off++] = RT_BYTE4(uImm);
900 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
901
902#elif defined(RT_ARCH_ARM64)
903 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
904 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
905 if (idxRegImm != ARMV8_A64_REG_XZR)
906 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
907
908#else
909# error "port me"
910#endif
911 return off;
912}
913
914
915
916/**
917 * Emits a store of an immediate value to a 16-bit VCpu field.
918 *
919 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
920 * offset can be encoded as an immediate or not. The @a offVCpu immediate
921 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
922 */
923DECL_FORCE_INLINE_THROW(uint32_t)
924iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
925 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
926{
927#ifdef RT_ARCH_AMD64
928 /* mov mem16, imm16 */
929 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
930 pCodeBuf[off++] = 0xc7;
931 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
932 pCodeBuf[off++] = RT_BYTE1(uImm);
933 pCodeBuf[off++] = RT_BYTE2(uImm);
934 RT_NOREF(idxTmp1, idxTmp2);
935
936#elif defined(RT_ARCH_ARM64)
937 if (idxTmp1 != UINT8_MAX)
938 {
939 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
940 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
941 sizeof(uint16_t), idxTmp2);
942 }
943 else
944# ifdef IEM_WITH_THROW_CATCH
945 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
946# else
947 AssertReleaseFailedStmt(off = UINT32_MAX);
948# endif
949
950#else
951# error "port me"
952#endif
953 return off;
954}
955
956
957/**
958 * Emits a store of an immediate value to a 8-bit VCpu field.
959 */
960DECL_INLINE_THROW(uint32_t)
961iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu)
962{
963#ifdef RT_ARCH_AMD64
964 /* mov mem8, imm8 */
965 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
966 pbCodeBuf[off++] = 0xc6;
967 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
968 pbCodeBuf[off++] = bImm;
969 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
970
971#elif defined(RT_ARCH_ARM64)
972 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
973 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
974 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
975 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
976
977#else
978# error "port me"
979#endif
980 return off;
981}
982
983
984/**
985 * Emits a load effective address to a GRP of a VCpu field.
986 */
987DECL_INLINE_THROW(uint32_t)
988iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
989{
990#ifdef RT_ARCH_AMD64
991 /* lea gprdst, [rbx + offDisp] */
992 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
993 if (iGprDst < 8)
994 pbCodeBuf[off++] = X86_OP_REX_W;
995 else
996 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
997 pbCodeBuf[off++] = 0x8d;
998 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
999
1000#elif defined(RT_ARCH_ARM64)
1001 if (offVCpu < (unsigned)_4K)
1002 {
1003 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1004 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
1005 }
1006 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
1007 {
1008 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1009 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
1010 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
1011 }
1012 else if (offVCpu <= 0xffffffU)
1013 {
1014 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1015 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu >> 12,
1016 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1017 if (offVCpu & 0xfffU)
1018 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, offVCpu & 0xfff);
1019 }
1020 else
1021 {
1022 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
1023 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
1024 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1025 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
1026 }
1027
1028#else
1029# error "port me"
1030#endif
1031 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1032 return off;
1033}
1034
1035
1036/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1037DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
1038{
1039 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
1040 Assert(off < sizeof(VMCPU));
1041 return off;
1042}
1043
1044
1045/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1046DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
1047{
1048 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
1049 Assert(off < sizeof(VMCPU));
1050 return off;
1051}
1052
1053
1054/**
1055 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1056 *
1057 * @note The two temp registers are not required for AMD64. ARM64 always
1058 * requires the first, and the 2nd is needed if the offset cannot be
1059 * encoded as an immediate.
1060 */
1061DECL_FORCE_INLINE(uint32_t)
1062iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1063{
1064#ifdef RT_ARCH_AMD64
1065 /* inc qword [pVCpu + off] */
1066 pCodeBuf[off++] = X86_OP_REX_W;
1067 pCodeBuf[off++] = 0xff;
1068 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1069 RT_NOREF(idxTmp1, idxTmp2);
1070
1071#elif defined(RT_ARCH_ARM64)
1072 /* Determine how we're to access pVCpu first. */
1073 uint32_t const cbData = sizeof(STAMCOUNTER);
1074 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1075 {
1076 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1077 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1078 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1079 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1080 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1081 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1082 }
1083 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1084 {
1085 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1086 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1087 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1088 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1089 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1090 }
1091 else
1092 {
1093 /* The offset is too large, so we must load it into a register and use
1094 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1095 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1096 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1097 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1098 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1099 }
1100
1101#else
1102# error "port me"
1103#endif
1104 return off;
1105}
1106
1107
1108/**
1109 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1110 *
1111 * @note The two temp registers are not required for AMD64. ARM64 always
1112 * requires the first, and the 2nd is needed if the offset cannot be
1113 * encoded as an immediate.
1114 */
1115DECL_FORCE_INLINE(uint32_t)
1116iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1117{
1118#ifdef RT_ARCH_AMD64
1119 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1120#elif defined(RT_ARCH_ARM64)
1121 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1122#else
1123# error "port me"
1124#endif
1125 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1126 return off;
1127}
1128
1129
1130/**
1131 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1132 *
1133 * @note The two temp registers are not required for AMD64. ARM64 always
1134 * requires the first, and the 2nd is needed if the offset cannot be
1135 * encoded as an immediate.
1136 */
1137DECL_FORCE_INLINE(uint32_t)
1138iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1139{
1140 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1141#ifdef RT_ARCH_AMD64
1142 /* inc dword [pVCpu + offVCpu] */
1143 pCodeBuf[off++] = 0xff;
1144 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1145 RT_NOREF(idxTmp1, idxTmp2);
1146
1147#elif defined(RT_ARCH_ARM64)
1148 /* Determine how we're to access pVCpu first. */
1149 uint32_t const cbData = sizeof(uint32_t);
1150 if (offVCpu < (unsigned)(_4K * cbData))
1151 {
1152 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1153 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1,
1154 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1155 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1156 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1,
1157 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1158 }
1159 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1160 {
1161 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1162 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1163 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1164 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1165 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1166 }
1167 else
1168 {
1169 /* The offset is too large, so we must load it into a register and use
1170 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1171 of the instruction if that'll reduce the constant to 16-bits. */
1172 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1173 {
1174 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1175 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1176 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1177 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1178 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1179 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1180 }
1181 else
1182 {
1183 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1184 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1185 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1186 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1187 }
1188 }
1189
1190#else
1191# error "port me"
1192#endif
1193 return off;
1194}
1195
1196
1197/**
1198 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1199 *
1200 * @note The two temp registers are not required for AMD64. ARM64 always
1201 * requires the first, and the 2nd is needed if the offset cannot be
1202 * encoded as an immediate.
1203 */
1204DECL_FORCE_INLINE(uint32_t)
1205iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1206{
1207#ifdef RT_ARCH_AMD64
1208 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1209#elif defined(RT_ARCH_ARM64)
1210 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1211#else
1212# error "port me"
1213#endif
1214 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1215 return off;
1216}
1217
1218
1219/**
1220 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1221 *
1222 * @note May allocate temporary registers (not AMD64).
1223 */
1224DECL_FORCE_INLINE(uint32_t)
1225iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1226{
1227 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1228#ifdef RT_ARCH_AMD64
1229 /* or dword [pVCpu + offVCpu], imm8/32 */
1230 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1231 if (fMask < 0x80)
1232 {
1233 pCodeBuf[off++] = 0x83;
1234 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1235 pCodeBuf[off++] = (uint8_t)fMask;
1236 }
1237 else
1238 {
1239 pCodeBuf[off++] = 0x81;
1240 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1241 pCodeBuf[off++] = RT_BYTE1(fMask);
1242 pCodeBuf[off++] = RT_BYTE2(fMask);
1243 pCodeBuf[off++] = RT_BYTE3(fMask);
1244 pCodeBuf[off++] = RT_BYTE4(fMask);
1245 }
1246
1247#elif defined(RT_ARCH_ARM64)
1248 /* If the constant is unwieldy we'll need a register to hold it as well. */
1249 uint32_t uImmSizeLen, uImmRotate;
1250 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1251 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1252
1253 /* We need a temp register for holding the member value we're modifying. */
1254 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1255
1256 /* Determine how we're to access pVCpu first. */
1257 uint32_t const cbData = sizeof(uint32_t);
1258 if (offVCpu < (unsigned)(_4K * cbData))
1259 {
1260 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1261 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1262 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1263 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1264 if (idxTmpMask == UINT8_MAX)
1265 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1266 else
1267 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1268 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1269 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1270 }
1271 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1272 {
1273 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1274 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1275 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1276 if (idxTmpMask == UINT8_MAX)
1277 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1278 else
1279 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1280 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1281 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1282 }
1283 else
1284 {
1285 /* The offset is too large, so we must load it into a register and use
1286 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1287 of the instruction if that'll reduce the constant to 16-bits. */
1288 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1289 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1290 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1291 if (fShifted)
1292 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1293 else
1294 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1295
1296 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1297 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1298
1299 if (idxTmpMask == UINT8_MAX)
1300 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1301 else
1302 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1303
1304 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1305 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1306 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1307 }
1308 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1309 if (idxTmpMask != UINT8_MAX)
1310 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1311
1312#else
1313# error "port me"
1314#endif
1315 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1316 return off;
1317}
1318
1319
1320/**
1321 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1322 *
1323 * @note May allocate temporary registers (not AMD64).
1324 */
1325DECL_FORCE_INLINE(uint32_t)
1326iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1327{
1328 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1329#ifdef RT_ARCH_AMD64
1330 /* and dword [pVCpu + offVCpu], imm8/32 */
1331 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1332 if (fMask < 0x80)
1333 {
1334 pCodeBuf[off++] = 0x83;
1335 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1336 pCodeBuf[off++] = (uint8_t)fMask;
1337 }
1338 else
1339 {
1340 pCodeBuf[off++] = 0x81;
1341 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1342 pCodeBuf[off++] = RT_BYTE1(fMask);
1343 pCodeBuf[off++] = RT_BYTE2(fMask);
1344 pCodeBuf[off++] = RT_BYTE3(fMask);
1345 pCodeBuf[off++] = RT_BYTE4(fMask);
1346 }
1347
1348#elif defined(RT_ARCH_ARM64)
1349 /* If the constant is unwieldy we'll need a register to hold it as well. */
1350 uint32_t uImmSizeLen, uImmRotate;
1351 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1352 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1353
1354 /* We need a temp register for holding the member value we're modifying. */
1355 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1356
1357 /* Determine how we're to access pVCpu first. */
1358 uint32_t const cbData = sizeof(uint32_t);
1359 if (offVCpu < (unsigned)(_4K * cbData))
1360 {
1361 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1362 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1363 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1364 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1365 if (idxTmpMask == UINT8_MAX)
1366 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1367 else
1368 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1369 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1370 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1371 }
1372 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1373 {
1374 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1375 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1376 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1377 if (idxTmpMask == UINT8_MAX)
1378 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1379 else
1380 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1381 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1382 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1383 }
1384 else
1385 {
1386 /* The offset is too large, so we must load it into a register and use
1387 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1388 of the instruction if that'll reduce the constant to 16-bits. */
1389 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1390 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1391 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1392 if (fShifted)
1393 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1394 else
1395 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1396
1397 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1398 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1399
1400 if (idxTmpMask == UINT8_MAX)
1401 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1402 else
1403 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1404
1405 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1406 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1407 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1408 }
1409 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1410 if (idxTmpMask != UINT8_MAX)
1411 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1412
1413#else
1414# error "port me"
1415#endif
1416 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1417 return off;
1418}
1419
1420
1421/**
1422 * Emits a gprdst = gprsrc load.
1423 */
1424DECL_FORCE_INLINE(uint32_t)
1425iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1426{
1427#ifdef RT_ARCH_AMD64
1428 /* mov gprdst, gprsrc */
1429 if ((iGprDst | iGprSrc) >= 8)
1430 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1431 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1432 : X86_OP_REX_W | X86_OP_REX_R;
1433 else
1434 pCodeBuf[off++] = X86_OP_REX_W;
1435 pCodeBuf[off++] = 0x8b;
1436 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1437
1438#elif defined(RT_ARCH_ARM64)
1439 /* mov dst, src; alias for: orr dst, xzr, src */
1440 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1441
1442#else
1443# error "port me"
1444#endif
1445 return off;
1446}
1447
1448
1449/**
1450 * Emits a gprdst = gprsrc load.
1451 */
1452DECL_INLINE_THROW(uint32_t)
1453iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1454{
1455#ifdef RT_ARCH_AMD64
1456 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1457#elif defined(RT_ARCH_ARM64)
1458 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1459#else
1460# error "port me"
1461#endif
1462 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1463 return off;
1464}
1465
1466
1467/**
1468 * Emits a gprdst = gprsrc[31:0] load.
1469 * @note Bits 63 thru 32 are cleared.
1470 */
1471DECL_FORCE_INLINE(uint32_t)
1472iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1473{
1474#ifdef RT_ARCH_AMD64
1475 /* mov gprdst, gprsrc */
1476 if ((iGprDst | iGprSrc) >= 8)
1477 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1478 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1479 : X86_OP_REX_R;
1480 pCodeBuf[off++] = 0x8b;
1481 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1482
1483#elif defined(RT_ARCH_ARM64)
1484 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1485 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1486
1487#else
1488# error "port me"
1489#endif
1490 return off;
1491}
1492
1493
1494/**
1495 * Emits a gprdst = gprsrc[31:0] load.
1496 * @note Bits 63 thru 32 are cleared.
1497 */
1498DECL_INLINE_THROW(uint32_t)
1499iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1500{
1501#ifdef RT_ARCH_AMD64
1502 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1503#elif defined(RT_ARCH_ARM64)
1504 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1505#else
1506# error "port me"
1507#endif
1508 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1509 return off;
1510}
1511
1512
1513/**
1514 * Emits a gprdst = gprsrc[15:0] load.
1515 * @note Bits 63 thru 15 are cleared.
1516 */
1517DECL_INLINE_THROW(uint32_t)
1518iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1519{
1520#ifdef RT_ARCH_AMD64
1521 /* movzx Gv,Ew */
1522 if ((iGprDst | iGprSrc) >= 8)
1523 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1524 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1525 : X86_OP_REX_R;
1526 pCodeBuf[off++] = 0x0f;
1527 pCodeBuf[off++] = 0xb7;
1528 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1529
1530#elif defined(RT_ARCH_ARM64)
1531 /* and gprdst, gprsrc, #0xffff */
1532# if 1
1533 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1534 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1535# else
1536 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1537 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1538# endif
1539
1540#else
1541# error "port me"
1542#endif
1543 return off;
1544}
1545
1546
1547/**
1548 * Emits a gprdst = gprsrc[15:0] load.
1549 * @note Bits 63 thru 15 are cleared.
1550 */
1551DECL_INLINE_THROW(uint32_t)
1552iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1553{
1554#ifdef RT_ARCH_AMD64
1555 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1556#elif defined(RT_ARCH_ARM64)
1557 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1558#else
1559# error "port me"
1560#endif
1561 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1562 return off;
1563}
1564
1565
1566/**
1567 * Emits a gprdst = gprsrc[7:0] load.
1568 * @note Bits 63 thru 8 are cleared.
1569 */
1570DECL_FORCE_INLINE(uint32_t)
1571iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1572{
1573#ifdef RT_ARCH_AMD64
1574 /* movzx Gv,Eb */
1575 if (iGprDst >= 8 || iGprSrc >= 8)
1576 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1577 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1578 : X86_OP_REX_R;
1579 else if (iGprSrc >= 4)
1580 pCodeBuf[off++] = X86_OP_REX;
1581 pCodeBuf[off++] = 0x0f;
1582 pCodeBuf[off++] = 0xb6;
1583 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1584
1585#elif defined(RT_ARCH_ARM64)
1586 /* and gprdst, gprsrc, #0xff */
1587 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1588 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1589
1590#else
1591# error "port me"
1592#endif
1593 return off;
1594}
1595
1596
1597/**
1598 * Emits a gprdst = gprsrc[7:0] load.
1599 * @note Bits 63 thru 8 are cleared.
1600 */
1601DECL_INLINE_THROW(uint32_t)
1602iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1603{
1604#ifdef RT_ARCH_AMD64
1605 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1606#elif defined(RT_ARCH_ARM64)
1607 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1608#else
1609# error "port me"
1610#endif
1611 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1612 return off;
1613}
1614
1615
1616/**
1617 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1618 * @note Bits 63 thru 8 are cleared.
1619 */
1620DECL_INLINE_THROW(uint32_t)
1621iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1622{
1623#ifdef RT_ARCH_AMD64
1624 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1625
1626 /* movzx Gv,Ew */
1627 if ((iGprDst | iGprSrc) >= 8)
1628 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1629 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1630 : X86_OP_REX_R;
1631 pbCodeBuf[off++] = 0x0f;
1632 pbCodeBuf[off++] = 0xb7;
1633 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1634
1635 /* shr Ev,8 */
1636 if (iGprDst >= 8)
1637 pbCodeBuf[off++] = X86_OP_REX_B;
1638 pbCodeBuf[off++] = 0xc1;
1639 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1640 pbCodeBuf[off++] = 8;
1641
1642#elif defined(RT_ARCH_ARM64)
1643 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1644 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1645 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1646
1647#else
1648# error "port me"
1649#endif
1650 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1651 return off;
1652}
1653
1654
1655/**
1656 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1657 */
1658DECL_INLINE_THROW(uint32_t)
1659iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1660{
1661#ifdef RT_ARCH_AMD64
1662 /* movsxd r64, r/m32 */
1663 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1664 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1665 pbCodeBuf[off++] = 0x63;
1666 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1667
1668#elif defined(RT_ARCH_ARM64)
1669 /* sxtw dst, src */
1670 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1671 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1672
1673#else
1674# error "port me"
1675#endif
1676 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1677 return off;
1678}
1679
1680
1681/**
1682 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1683 */
1684DECL_INLINE_THROW(uint32_t)
1685iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1686{
1687#ifdef RT_ARCH_AMD64
1688 /* movsx r64, r/m16 */
1689 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1690 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1691 pbCodeBuf[off++] = 0x0f;
1692 pbCodeBuf[off++] = 0xbf;
1693 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1694
1695#elif defined(RT_ARCH_ARM64)
1696 /* sxth dst, src */
1697 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1698 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1699
1700#else
1701# error "port me"
1702#endif
1703 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1704 return off;
1705}
1706
1707
1708/**
1709 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1710 */
1711DECL_INLINE_THROW(uint32_t)
1712iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1713{
1714#ifdef RT_ARCH_AMD64
1715 /* movsx r64, r/m16 */
1716 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1717 if (iGprDst >= 8 || iGprSrc >= 8)
1718 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1719 pbCodeBuf[off++] = 0x0f;
1720 pbCodeBuf[off++] = 0xbf;
1721 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1722
1723#elif defined(RT_ARCH_ARM64)
1724 /* sxth dst32, src */
1725 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1726 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1727
1728#else
1729# error "port me"
1730#endif
1731 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1732 return off;
1733}
1734
1735
1736/**
1737 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1738 */
1739DECL_INLINE_THROW(uint32_t)
1740iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1741{
1742#ifdef RT_ARCH_AMD64
1743 /* movsx r64, r/m8 */
1744 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1745 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1746 pbCodeBuf[off++] = 0x0f;
1747 pbCodeBuf[off++] = 0xbe;
1748 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1749
1750#elif defined(RT_ARCH_ARM64)
1751 /* sxtb dst, src */
1752 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1753 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1754
1755#else
1756# error "port me"
1757#endif
1758 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1759 return off;
1760}
1761
1762
1763/**
1764 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1765 * @note Bits 63 thru 32 are cleared.
1766 */
1767DECL_INLINE_THROW(uint32_t)
1768iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1769{
1770#ifdef RT_ARCH_AMD64
1771 /* movsx r32, r/m8 */
1772 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1773 if (iGprDst >= 8 || iGprSrc >= 8)
1774 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1775 else if (iGprSrc >= 4)
1776 pbCodeBuf[off++] = X86_OP_REX;
1777 pbCodeBuf[off++] = 0x0f;
1778 pbCodeBuf[off++] = 0xbe;
1779 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1780
1781#elif defined(RT_ARCH_ARM64)
1782 /* sxtb dst32, src32 */
1783 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1784 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1785
1786#else
1787# error "port me"
1788#endif
1789 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1790 return off;
1791}
1792
1793
1794/**
1795 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1796 * @note Bits 63 thru 16 are cleared.
1797 */
1798DECL_INLINE_THROW(uint32_t)
1799iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1800{
1801#ifdef RT_ARCH_AMD64
1802 /* movsx r16, r/m8 */
1803 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1804 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1805 if (iGprDst >= 8 || iGprSrc >= 8)
1806 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1807 else if (iGprSrc >= 4)
1808 pbCodeBuf[off++] = X86_OP_REX;
1809 pbCodeBuf[off++] = 0x0f;
1810 pbCodeBuf[off++] = 0xbe;
1811 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1812
1813 /* movzx r32, r/m16 */
1814 if (iGprDst >= 8)
1815 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
1816 pbCodeBuf[off++] = 0x0f;
1817 pbCodeBuf[off++] = 0xb7;
1818 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
1819
1820#elif defined(RT_ARCH_ARM64)
1821 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
1822 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1823 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1824 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1825 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
1826
1827#else
1828# error "port me"
1829#endif
1830 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1831 return off;
1832}
1833
1834
1835/**
1836 * Emits a gprdst = gprsrc + addend load.
1837 * @note The addend is 32-bit for AMD64 and 64-bit for ARM64.
1838 */
1839#ifdef RT_ARCH_AMD64
1840DECL_INLINE_THROW(uint32_t)
1841iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1842 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1843{
1844 Assert(iAddend != 0);
1845
1846 /* lea gprdst, [gprsrc + iAddend] */
1847 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1848 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1849 pbCodeBuf[off++] = 0x8d;
1850 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1851 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1852 return off;
1853}
1854
1855#elif defined(RT_ARCH_ARM64)
1856DECL_INLINE_THROW(uint32_t)
1857iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1858 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1859{
1860 if ((uint32_t)iAddend < 4096)
1861 {
1862 /* add dst, src, uimm12 */
1863 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1864 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
1865 }
1866 else if ((uint32_t)-iAddend < 4096)
1867 {
1868 /* sub dst, src, uimm12 */
1869 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1870 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
1871 }
1872 else
1873 {
1874 Assert(iGprSrc != iGprDst);
1875 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
1876 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1877 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
1878 }
1879 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1880 return off;
1881}
1882#else
1883# error "port me"
1884#endif
1885
1886/**
1887 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
1888 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1889 */
1890#ifdef RT_ARCH_AMD64
1891DECL_INLINE_THROW(uint32_t)
1892iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1893 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1894#else
1895DECL_INLINE_THROW(uint32_t)
1896iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1897 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1898#endif
1899{
1900 if (iAddend != 0)
1901 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1902 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
1903}
1904
1905
1906/**
1907 * Emits a gprdst = gprsrc32 + addend load.
1908 * @note Bits 63 thru 32 are cleared.
1909 */
1910DECL_INLINE_THROW(uint32_t)
1911iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1912 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1913{
1914 Assert(iAddend != 0);
1915
1916#ifdef RT_ARCH_AMD64
1917 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
1918 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1919 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
1920 if ((iGprDst | iGprSrc) >= 8)
1921 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1922 pbCodeBuf[off++] = 0x8d;
1923 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1924
1925#elif defined(RT_ARCH_ARM64)
1926 if ((uint32_t)iAddend < 4096)
1927 {
1928 /* add dst, src, uimm12 */
1929 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1930 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
1931 }
1932 else if ((uint32_t)-iAddend < 4096)
1933 {
1934 /* sub dst, src, uimm12 */
1935 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1936 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
1937 }
1938 else
1939 {
1940 Assert(iGprSrc != iGprDst);
1941 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
1942 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1943 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
1944 }
1945
1946#else
1947# error "port me"
1948#endif
1949 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1950 return off;
1951}
1952
1953
1954/**
1955 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
1956 */
1957DECL_INLINE_THROW(uint32_t)
1958iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1959 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1960{
1961 if (iAddend != 0)
1962 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1963 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
1964}
1965
1966
1967/**
1968 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1969 * destination.
1970 */
1971DECL_FORCE_INLINE(uint32_t)
1972iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1973{
1974#ifdef RT_ARCH_AMD64
1975 /* mov reg16, r/m16 */
1976 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1977 if (idxDst >= 8 || idxSrc >= 8)
1978 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
1979 pCodeBuf[off++] = 0x8b;
1980 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
1981
1982#elif defined(RT_ARCH_ARM64)
1983 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
1984 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
1985
1986#else
1987# error "Port me!"
1988#endif
1989 return off;
1990}
1991
1992
1993/**
1994 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1995 * destination.
1996 */
1997DECL_INLINE_THROW(uint32_t)
1998iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1999{
2000#ifdef RT_ARCH_AMD64
2001 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
2002#elif defined(RT_ARCH_ARM64)
2003 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
2004#else
2005# error "Port me!"
2006#endif
2007 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2008 return off;
2009}
2010
2011
2012#ifdef RT_ARCH_AMD64
2013/**
2014 * Common bit of iemNativeEmitLoadGprByBp and friends.
2015 */
2016DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
2017 PIEMRECOMPILERSTATE pReNativeAssert)
2018{
2019 if (offDisp < 128 && offDisp >= -128)
2020 {
2021 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
2022 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
2023 }
2024 else
2025 {
2026 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
2027 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2028 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2029 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2030 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2031 }
2032 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
2033 return off;
2034}
2035#elif defined(RT_ARCH_ARM64)
2036/**
2037 * Common bit of iemNativeEmitLoadGprByBp and friends.
2038 */
2039DECL_FORCE_INLINE_THROW(uint32_t)
2040iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2041 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2042{
2043 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
2044 {
2045 /* str w/ unsigned imm12 (scaled) */
2046 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2047 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
2048 }
2049 else if (offDisp >= -256 && offDisp <= 256)
2050 {
2051 /* stur w/ signed imm9 (unscaled) */
2052 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2053 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
2054 }
2055 else
2056 {
2057 /* Use temporary indexing register. */
2058 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2059 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2060 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2061 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2062 }
2063 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2064 return off;
2065}
2066#endif
2067
2068
2069/**
2070 * Emits a 64-bit GRP load instruction with an BP relative source address.
2071 */
2072DECL_INLINE_THROW(uint32_t)
2073iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2074{
2075#ifdef RT_ARCH_AMD64
2076 /* mov gprdst, qword [rbp + offDisp] */
2077 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2078 if (iGprDst < 8)
2079 pbCodeBuf[off++] = X86_OP_REX_W;
2080 else
2081 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2082 pbCodeBuf[off++] = 0x8b;
2083 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2084
2085#elif defined(RT_ARCH_ARM64)
2086 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2087
2088#else
2089# error "port me"
2090#endif
2091}
2092
2093
2094/**
2095 * Emits a 32-bit GRP load instruction with an BP relative source address.
2096 * @note Bits 63 thru 32 of the GPR will be cleared.
2097 */
2098DECL_INLINE_THROW(uint32_t)
2099iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2100{
2101#ifdef RT_ARCH_AMD64
2102 /* mov gprdst, dword [rbp + offDisp] */
2103 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2104 if (iGprDst >= 8)
2105 pbCodeBuf[off++] = X86_OP_REX_R;
2106 pbCodeBuf[off++] = 0x8b;
2107 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2108
2109#elif defined(RT_ARCH_ARM64)
2110 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2111
2112#else
2113# error "port me"
2114#endif
2115}
2116
2117
2118/**
2119 * Emits a 16-bit GRP load instruction with an BP relative source address.
2120 * @note Bits 63 thru 16 of the GPR will be cleared.
2121 */
2122DECL_INLINE_THROW(uint32_t)
2123iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2124{
2125#ifdef RT_ARCH_AMD64
2126 /* movzx gprdst, word [rbp + offDisp] */
2127 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2128 if (iGprDst >= 8)
2129 pbCodeBuf[off++] = X86_OP_REX_R;
2130 pbCodeBuf[off++] = 0x0f;
2131 pbCodeBuf[off++] = 0xb7;
2132 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2133
2134#elif defined(RT_ARCH_ARM64)
2135 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2136
2137#else
2138# error "port me"
2139#endif
2140}
2141
2142
2143/**
2144 * Emits a 8-bit GRP load instruction with an BP relative source address.
2145 * @note Bits 63 thru 8 of the GPR will be cleared.
2146 */
2147DECL_INLINE_THROW(uint32_t)
2148iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2149{
2150#ifdef RT_ARCH_AMD64
2151 /* movzx gprdst, byte [rbp + offDisp] */
2152 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2153 if (iGprDst >= 8)
2154 pbCodeBuf[off++] = X86_OP_REX_R;
2155 pbCodeBuf[off++] = 0x0f;
2156 pbCodeBuf[off++] = 0xb6;
2157 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2158
2159#elif defined(RT_ARCH_ARM64)
2160 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2161
2162#else
2163# error "port me"
2164#endif
2165}
2166
2167
2168#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2169/**
2170 * Emits a 128-bit vector register load instruction with an BP relative source address.
2171 */
2172DECL_FORCE_INLINE_THROW(uint32_t)
2173iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2174{
2175#ifdef RT_ARCH_AMD64
2176 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2177
2178 /* movdqu reg128, mem128 */
2179 pbCodeBuf[off++] = 0xf3;
2180 if (iVecRegDst >= 8)
2181 pbCodeBuf[off++] = X86_OP_REX_R;
2182 pbCodeBuf[off++] = 0x0f;
2183 pbCodeBuf[off++] = 0x6f;
2184 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2185#elif defined(RT_ARCH_ARM64)
2186 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2187#else
2188# error "port me"
2189#endif
2190}
2191
2192
2193/**
2194 * Emits a 256-bit vector register load instruction with an BP relative source address.
2195 */
2196DECL_FORCE_INLINE_THROW(uint32_t)
2197iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2198{
2199#ifdef RT_ARCH_AMD64
2200 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2201
2202 /* vmovdqu reg256, mem256 */
2203 pbCodeBuf[off++] = X86_OP_VEX2;
2204 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2205 pbCodeBuf[off++] = 0x6f;
2206 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2207#elif defined(RT_ARCH_ARM64)
2208 /* ASSUMES two consecutive vector registers for the 256-bit value. */
2209 Assert(!(iVecRegDst & 0x1));
2210 off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2211 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2212#else
2213# error "port me"
2214#endif
2215}
2216
2217#endif
2218
2219
2220/**
2221 * Emits a load effective address to a GRP with an BP relative source address.
2222 */
2223DECL_INLINE_THROW(uint32_t)
2224iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2225{
2226#ifdef RT_ARCH_AMD64
2227 /* lea gprdst, [rbp + offDisp] */
2228 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2229 if (iGprDst < 8)
2230 pbCodeBuf[off++] = X86_OP_REX_W;
2231 else
2232 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2233 pbCodeBuf[off++] = 0x8d;
2234 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2235
2236#elif defined(RT_ARCH_ARM64)
2237 bool const fSub = offDisp < 0;
2238 uint32_t const offAbsDisp = (uint32_t)RT_ABS(offDisp);
2239 if (offAbsDisp <= 0xffffffU)
2240 {
2241 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2242 if (offAbsDisp <= 0xfffU)
2243 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, ARMV8_A64_REG_BP, offAbsDisp);
2244 else
2245 {
2246 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, ARMV8_A64_REG_BP, offAbsDisp >> 12,
2247 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2248 if (offAbsDisp & 0xfffU)
2249 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, offAbsDisp & 0xfff);
2250 }
2251 }
2252 else
2253 {
2254 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2255 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offAbsDisp);
2256 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2257 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2258 }
2259
2260#else
2261# error "port me"
2262#endif
2263
2264 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2265 return off;
2266}
2267
2268
2269/**
2270 * Emits a 64-bit GPR store with an BP relative destination address.
2271 *
2272 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2273 */
2274DECL_INLINE_THROW(uint32_t)
2275iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2276{
2277#ifdef RT_ARCH_AMD64
2278 /* mov qword [rbp + offDisp], gprdst */
2279 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2280 if (iGprSrc < 8)
2281 pbCodeBuf[off++] = X86_OP_REX_W;
2282 else
2283 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2284 pbCodeBuf[off++] = 0x89;
2285 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2286
2287#elif defined(RT_ARCH_ARM64)
2288 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2289 {
2290 /* str w/ unsigned imm12 (scaled) */
2291 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2292 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2293 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2294 }
2295 else if (offDisp >= -256 && offDisp <= 256)
2296 {
2297 /* stur w/ signed imm9 (unscaled) */
2298 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2299 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2300 }
2301 else if ((uint32_t)-offDisp < (unsigned)_4K)
2302 {
2303 /* Use temporary indexing register w/ sub uimm12. */
2304 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2305 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2306 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2307 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2308 }
2309 else
2310 {
2311 /* Use temporary indexing register. */
2312 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2313 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2314 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2315 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2316 }
2317 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2318 return off;
2319
2320#else
2321# error "Port me!"
2322#endif
2323}
2324
2325
2326/**
2327 * Emits a 64-bit immediate store with an BP relative destination address.
2328 *
2329 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2330 */
2331DECL_INLINE_THROW(uint32_t)
2332iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2333{
2334#ifdef RT_ARCH_AMD64
2335 if ((int64_t)uImm64 == (int32_t)uImm64)
2336 {
2337 /* mov qword [rbp + offDisp], imm32 - sign extended */
2338 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2339 pbCodeBuf[off++] = X86_OP_REX_W;
2340 pbCodeBuf[off++] = 0xc7;
2341 if (offDisp < 128 && offDisp >= -128)
2342 {
2343 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2344 pbCodeBuf[off++] = (uint8_t)offDisp;
2345 }
2346 else
2347 {
2348 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2349 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2350 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2351 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2352 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2353 }
2354 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2355 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2356 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2357 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2358 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2359 return off;
2360 }
2361#endif
2362
2363 /* Load tmp0, imm64; Store tmp to bp+disp. */
2364 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2365 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2366}
2367
2368#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2369
2370/**
2371 * Emits a 128-bit vector register store with an BP relative destination address.
2372 *
2373 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2374 */
2375DECL_INLINE_THROW(uint32_t)
2376iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2377{
2378#ifdef RT_ARCH_AMD64
2379 /* movdqu [rbp + offDisp], vecsrc */
2380 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2381 pbCodeBuf[off++] = 0xf3;
2382 if (iVecRegSrc >= 8)
2383 pbCodeBuf[off++] = X86_OP_REX_R;
2384 pbCodeBuf[off++] = 0x0f;
2385 pbCodeBuf[off++] = 0x7f;
2386 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2387
2388#elif defined(RT_ARCH_ARM64)
2389 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2390 {
2391 /* str w/ unsigned imm12 (scaled) */
2392 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2393 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc,
2394 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2395 }
2396 else if (offDisp >= -256 && offDisp <= 256)
2397 {
2398 /* stur w/ signed imm9 (unscaled) */
2399 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2400 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp);
2401 }
2402 else if ((uint32_t)-offDisp < (unsigned)_4K)
2403 {
2404 /* Use temporary indexing register w/ sub uimm12. */
2405 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2406 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2407 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2408 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2409 }
2410 else
2411 {
2412 /* Use temporary indexing register. */
2413 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2414 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2415 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP,
2416 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2417 }
2418 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2419 return off;
2420
2421#else
2422# error "Port me!"
2423#endif
2424}
2425
2426
2427/**
2428 * Emits a 256-bit vector register store with an BP relative destination address.
2429 *
2430 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2431 */
2432DECL_INLINE_THROW(uint32_t)
2433iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2434{
2435#ifdef RT_ARCH_AMD64
2436 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2437
2438 /* vmovdqu mem256, reg256 */
2439 pbCodeBuf[off++] = X86_OP_VEX2;
2440 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2441 pbCodeBuf[off++] = 0x7f;
2442 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2443#elif defined(RT_ARCH_ARM64)
2444 Assert(!(iVecRegSrc & 0x1));
2445 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp, iVecRegSrc);
2446 return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1);
2447#else
2448# error "Port me!"
2449#endif
2450}
2451
2452#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
2453#if defined(RT_ARCH_ARM64)
2454
2455/**
2456 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2457 *
2458 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2459 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2460 * caller does not heed this.
2461 *
2462 * @note DON'T try this with prefetch.
2463 */
2464DECL_FORCE_INLINE_THROW(uint32_t)
2465iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2466 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2467{
2468 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2469 {
2470 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2471 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2472 }
2473 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2474 && iGprReg != iGprBase)
2475 || iGprTmp != UINT8_MAX)
2476 {
2477 /* The offset is too large, so we must load it into a register and use
2478 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2479 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2480 if (iGprTmp == UINT8_MAX)
2481 iGprTmp = iGprReg;
2482 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2483 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2484 }
2485 else
2486# ifdef IEM_WITH_THROW_CATCH
2487 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2488# else
2489 AssertReleaseFailedStmt(off = UINT32_MAX);
2490# endif
2491 return off;
2492}
2493
2494/**
2495 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2496 */
2497DECL_FORCE_INLINE_THROW(uint32_t)
2498iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2499 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2500{
2501 /*
2502 * There are a couple of ldr variants that takes an immediate offset, so
2503 * try use those if we can, otherwise we have to use the temporary register
2504 * help with the addressing.
2505 */
2506 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2507 {
2508 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2509 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2510 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2511 }
2512 else
2513 {
2514 /* The offset is too large, so we must load it into a register and use
2515 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2516 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2517 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2518
2519 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2520 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2521
2522 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2523 }
2524 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2525 return off;
2526}
2527
2528# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2529/**
2530 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2531 *
2532 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2533 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2534 * caller does not heed this.
2535 *
2536 * @note DON'T try this with prefetch.
2537 */
2538DECL_FORCE_INLINE_THROW(uint32_t)
2539iemNativeEmitVecRegByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iGprBase, int32_t offDisp,
2540 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2541{
2542 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2543 {
2544 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2545 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2546 }
2547 else if ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2548 || iGprTmp != UINT8_MAX)
2549 {
2550 /* The offset is too large, so we must load it into a register and use
2551 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2552 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2553 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2554 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, iGprTmp);
2555 }
2556 else
2557# ifdef IEM_WITH_THROW_CATCH
2558 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2559# else
2560 AssertReleaseFailedStmt(off = UINT32_MAX);
2561# endif
2562 return off;
2563}
2564# endif
2565
2566
2567/**
2568 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2569 */
2570DECL_FORCE_INLINE_THROW(uint32_t)
2571iemNativeEmitVecRegByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg,
2572 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2573{
2574 /*
2575 * There are a couple of ldr variants that takes an immediate offset, so
2576 * try use those if we can, otherwise we have to use the temporary register
2577 * help with the addressing.
2578 */
2579 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2580 {
2581 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2582 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2583 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2584 }
2585 else
2586 {
2587 /* The offset is too large, so we must load it into a register and use
2588 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2589 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2590 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2591
2592 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2593 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, idxTmpReg);
2594
2595 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2596 }
2597 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2598 return off;
2599}
2600#endif /* RT_ARCH_ARM64 */
2601
2602/**
2603 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2604 *
2605 * @note ARM64: Misaligned @a offDisp values and values not in the
2606 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2607 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2608 * does not heed this.
2609 */
2610DECL_FORCE_INLINE_THROW(uint32_t)
2611iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2612 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2613{
2614#ifdef RT_ARCH_AMD64
2615 /* mov reg64, mem64 */
2616 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2617 pCodeBuf[off++] = 0x8b;
2618 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2619 RT_NOREF(iGprTmp);
2620
2621#elif defined(RT_ARCH_ARM64)
2622 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2623 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2624
2625#else
2626# error "port me"
2627#endif
2628 return off;
2629}
2630
2631
2632/**
2633 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2634 */
2635DECL_INLINE_THROW(uint32_t)
2636iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2637{
2638#ifdef RT_ARCH_AMD64
2639 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2640 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2641
2642#elif defined(RT_ARCH_ARM64)
2643 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2644
2645#else
2646# error "port me"
2647#endif
2648 return off;
2649}
2650
2651
2652/**
2653 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2654 *
2655 * @note ARM64: Misaligned @a offDisp values and values not in the
2656 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2657 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2658 * caller does not heed this.
2659 *
2660 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2661 */
2662DECL_FORCE_INLINE_THROW(uint32_t)
2663iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2664 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2665{
2666#ifdef RT_ARCH_AMD64
2667 /* mov reg32, mem32 */
2668 if (iGprDst >= 8 || iGprBase >= 8)
2669 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2670 pCodeBuf[off++] = 0x8b;
2671 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2672 RT_NOREF(iGprTmp);
2673
2674#elif defined(RT_ARCH_ARM64)
2675 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2676 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2677
2678#else
2679# error "port me"
2680#endif
2681 return off;
2682}
2683
2684
2685/**
2686 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2687 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2688 */
2689DECL_INLINE_THROW(uint32_t)
2690iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2691{
2692#ifdef RT_ARCH_AMD64
2693 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2694 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2695
2696#elif defined(RT_ARCH_ARM64)
2697 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2698
2699#else
2700# error "port me"
2701#endif
2702 return off;
2703}
2704
2705
2706/**
2707 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2708 * sign-extending the value to 64 bits.
2709 *
2710 * @note ARM64: Misaligned @a offDisp values and values not in the
2711 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2712 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2713 * caller does not heed this.
2714 */
2715DECL_FORCE_INLINE_THROW(uint32_t)
2716iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2717 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2718{
2719#ifdef RT_ARCH_AMD64
2720 /* movsxd reg64, mem32 */
2721 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2722 pCodeBuf[off++] = 0x63;
2723 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2724 RT_NOREF(iGprTmp);
2725
2726#elif defined(RT_ARCH_ARM64)
2727 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2728 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2729
2730#else
2731# error "port me"
2732#endif
2733 return off;
2734}
2735
2736
2737/**
2738 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2739 *
2740 * @note ARM64: Misaligned @a offDisp values and values not in the
2741 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2742 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2743 * caller does not heed this.
2744 *
2745 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2746 */
2747DECL_FORCE_INLINE_THROW(uint32_t)
2748iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2749 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2750{
2751#ifdef RT_ARCH_AMD64
2752 /* movzx reg32, mem16 */
2753 if (iGprDst >= 8 || iGprBase >= 8)
2754 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2755 pCodeBuf[off++] = 0x0f;
2756 pCodeBuf[off++] = 0xb7;
2757 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2758 RT_NOREF(iGprTmp);
2759
2760#elif defined(RT_ARCH_ARM64)
2761 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2762 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2763
2764#else
2765# error "port me"
2766#endif
2767 return off;
2768}
2769
2770
2771/**
2772 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2773 * sign-extending the value to 64 bits.
2774 *
2775 * @note ARM64: Misaligned @a offDisp values and values not in the
2776 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2777 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2778 * caller does not heed this.
2779 */
2780DECL_FORCE_INLINE_THROW(uint32_t)
2781iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2782 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2783{
2784#ifdef RT_ARCH_AMD64
2785 /* movsx reg64, mem16 */
2786 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2787 pCodeBuf[off++] = 0x0f;
2788 pCodeBuf[off++] = 0xbf;
2789 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2790 RT_NOREF(iGprTmp);
2791
2792#elif defined(RT_ARCH_ARM64)
2793 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2794 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2795
2796#else
2797# error "port me"
2798#endif
2799 return off;
2800}
2801
2802
2803/**
2804 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2805 * sign-extending the value to 32 bits.
2806 *
2807 * @note ARM64: Misaligned @a offDisp values and values not in the
2808 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2809 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2810 * caller does not heed this.
2811 *
2812 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2813 */
2814DECL_FORCE_INLINE_THROW(uint32_t)
2815iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2816 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2817{
2818#ifdef RT_ARCH_AMD64
2819 /* movsx reg32, mem16 */
2820 if (iGprDst >= 8 || iGprBase >= 8)
2821 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2822 pCodeBuf[off++] = 0x0f;
2823 pCodeBuf[off++] = 0xbf;
2824 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2825 RT_NOREF(iGprTmp);
2826
2827#elif defined(RT_ARCH_ARM64)
2828 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2829 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
2830
2831#else
2832# error "port me"
2833#endif
2834 return off;
2835}
2836
2837
2838/**
2839 * Emits a 8-bit GPR load via a GPR base address with a displacement.
2840 *
2841 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2842 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2843 * same. Will assert / throw if caller does not heed this.
2844 *
2845 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
2846 */
2847DECL_FORCE_INLINE_THROW(uint32_t)
2848iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2849 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2850{
2851#ifdef RT_ARCH_AMD64
2852 /* movzx reg32, mem8 */
2853 if (iGprDst >= 8 || iGprBase >= 8)
2854 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2855 pCodeBuf[off++] = 0x0f;
2856 pCodeBuf[off++] = 0xb6;
2857 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2858 RT_NOREF(iGprTmp);
2859
2860#elif defined(RT_ARCH_ARM64)
2861 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2862 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
2863
2864#else
2865# error "port me"
2866#endif
2867 return off;
2868}
2869
2870
2871/**
2872 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2873 * sign-extending the value to 64 bits.
2874 *
2875 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2876 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2877 * same. Will assert / throw if caller does not heed this.
2878 */
2879DECL_FORCE_INLINE_THROW(uint32_t)
2880iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2881 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2882{
2883#ifdef RT_ARCH_AMD64
2884 /* movsx reg64, mem8 */
2885 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2886 pCodeBuf[off++] = 0x0f;
2887 pCodeBuf[off++] = 0xbe;
2888 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2889 RT_NOREF(iGprTmp);
2890
2891#elif defined(RT_ARCH_ARM64)
2892 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2893 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
2894
2895#else
2896# error "port me"
2897#endif
2898 return off;
2899}
2900
2901
2902/**
2903 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2904 * sign-extending the value to 32 bits.
2905 *
2906 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2907 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2908 * same. Will assert / throw if caller does not heed this.
2909 *
2910 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2911 */
2912DECL_FORCE_INLINE_THROW(uint32_t)
2913iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2914 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2915{
2916#ifdef RT_ARCH_AMD64
2917 /* movsx reg32, mem8 */
2918 if (iGprDst >= 8 || iGprBase >= 8)
2919 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2920 pCodeBuf[off++] = 0x0f;
2921 pCodeBuf[off++] = 0xbe;
2922 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2923 RT_NOREF(iGprTmp);
2924
2925#elif defined(RT_ARCH_ARM64)
2926 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2927 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2928
2929#else
2930# error "port me"
2931#endif
2932 return off;
2933}
2934
2935
2936/**
2937 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2938 * sign-extending the value to 16 bits.
2939 *
2940 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2941 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2942 * same. Will assert / throw if caller does not heed this.
2943 *
2944 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2945 */
2946DECL_FORCE_INLINE_THROW(uint32_t)
2947iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2948 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2949{
2950#ifdef RT_ARCH_AMD64
2951 /* movsx reg32, mem8 */
2952 if (iGprDst >= 8 || iGprBase >= 8)
2953 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2954 pCodeBuf[off++] = 0x0f;
2955 pCodeBuf[off++] = 0xbe;
2956 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2957# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
2958 /* and reg32, 0xffffh */
2959 if (iGprDst >= 8)
2960 pCodeBuf[off++] = X86_OP_REX_B;
2961 pCodeBuf[off++] = 0x81;
2962 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
2963 pCodeBuf[off++] = 0xff;
2964 pCodeBuf[off++] = 0xff;
2965 pCodeBuf[off++] = 0;
2966 pCodeBuf[off++] = 0;
2967# else
2968 /* movzx reg32, reg16 */
2969 if (iGprDst >= 8)
2970 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
2971 pCodeBuf[off++] = 0x0f;
2972 pCodeBuf[off++] = 0xb7;
2973 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2974# endif
2975 RT_NOREF(iGprTmp);
2976
2977#elif defined(RT_ARCH_ARM64)
2978 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2979 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2980 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2981 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
2982
2983#else
2984# error "port me"
2985#endif
2986 return off;
2987}
2988
2989
2990#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2991/**
2992 * Emits a 128-bit vector register load via a GPR base address with a displacement.
2993 *
2994 * @note ARM64: Misaligned @a offDisp values and values not in the
2995 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2996 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2997 * does not heed this.
2998 */
2999DECL_FORCE_INLINE_THROW(uint32_t)
3000iemNativeEmitLoadVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3001 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3002{
3003#ifdef RT_ARCH_AMD64
3004 /* movdqu reg128, mem128 */
3005 pCodeBuf[off++] = 0xf3;
3006 if (iVecRegDst >= 8 || iGprBase >= 8)
3007 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3008 pCodeBuf[off++] = 0x0f;
3009 pCodeBuf[off++] = 0x6f;
3010 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3011 RT_NOREF(iGprTmp);
3012
3013#elif defined(RT_ARCH_ARM64)
3014 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3015 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3016
3017#else
3018# error "port me"
3019#endif
3020 return off;
3021}
3022
3023
3024/**
3025 * Emits a 128-bit GPR load via a GPR base address with a displacement.
3026 */
3027DECL_INLINE_THROW(uint32_t)
3028iemNativeEmitLoadVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3029{
3030#ifdef RT_ARCH_AMD64
3031 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3032 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3033
3034#elif defined(RT_ARCH_ARM64)
3035 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3036
3037#else
3038# error "port me"
3039#endif
3040 return off;
3041}
3042
3043
3044/**
3045 * Emits a 256-bit vector register load via a GPR base address with a displacement.
3046 *
3047 * @note ARM64: Misaligned @a offDisp values and values not in the
3048 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3049 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3050 * does not heed this.
3051 */
3052DECL_FORCE_INLINE_THROW(uint32_t)
3053iemNativeEmitLoadVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3054 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3055{
3056#ifdef RT_ARCH_AMD64
3057 /* vmovdqu reg256, mem256 */
3058 pCodeBuf[off++] = X86_OP_VEX3;
3059 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3060 | X86_OP_VEX3_BYTE1_X
3061 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3062 | UINT8_C(0x01);
3063 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3064 pCodeBuf[off++] = 0x6f;
3065 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3066 RT_NOREF(iGprTmp);
3067
3068#elif defined(RT_ARCH_ARM64)
3069 Assert(!(iVecRegDst & 0x1));
3070 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3071 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3072 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3073 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3074#else
3075# error "port me"
3076#endif
3077 return off;
3078}
3079
3080
3081/**
3082 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3083 */
3084DECL_INLINE_THROW(uint32_t)
3085iemNativeEmitLoadVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3086{
3087#ifdef RT_ARCH_AMD64
3088 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3089 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3090
3091#elif defined(RT_ARCH_ARM64)
3092 Assert(!(iVecRegDst & 0x1));
3093 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3094 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3095 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3096 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3097
3098#else
3099# error "port me"
3100#endif
3101 return off;
3102}
3103#endif
3104
3105
3106/**
3107 * Emits a 64-bit GPR store via a GPR base address with a displacement.
3108 *
3109 * @note ARM64: Misaligned @a offDisp values and values not in the
3110 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3111 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3112 * does not heed this.
3113 */
3114DECL_FORCE_INLINE_THROW(uint32_t)
3115iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3116 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3117{
3118#ifdef RT_ARCH_AMD64
3119 /* mov mem64, reg64 */
3120 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3121 pCodeBuf[off++] = 0x89;
3122 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3123 RT_NOREF(iGprTmp);
3124
3125#elif defined(RT_ARCH_ARM64)
3126 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3127 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
3128
3129#else
3130# error "port me"
3131#endif
3132 return off;
3133}
3134
3135
3136/**
3137 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3138 *
3139 * @note ARM64: Misaligned @a offDisp values and values not in the
3140 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3141 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3142 * does not heed this.
3143 */
3144DECL_FORCE_INLINE_THROW(uint32_t)
3145iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3146 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3147{
3148#ifdef RT_ARCH_AMD64
3149 /* mov mem32, reg32 */
3150 if (iGprSrc >= 8 || iGprBase >= 8)
3151 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3152 pCodeBuf[off++] = 0x89;
3153 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3154 RT_NOREF(iGprTmp);
3155
3156#elif defined(RT_ARCH_ARM64)
3157 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3158 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3159
3160#else
3161# error "port me"
3162#endif
3163 return off;
3164}
3165
3166
3167/**
3168 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3169 *
3170 * @note ARM64: Misaligned @a offDisp values and values not in the
3171 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3172 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3173 * does not heed this.
3174 */
3175DECL_FORCE_INLINE_THROW(uint32_t)
3176iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3177 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3178{
3179#ifdef RT_ARCH_AMD64
3180 /* mov mem16, reg16 */
3181 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3182 if (iGprSrc >= 8 || iGprBase >= 8)
3183 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3184 pCodeBuf[off++] = 0x89;
3185 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3186 RT_NOREF(iGprTmp);
3187
3188#elif defined(RT_ARCH_ARM64)
3189 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3190 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3191
3192#else
3193# error "port me"
3194#endif
3195 return off;
3196}
3197
3198
3199/**
3200 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3201 *
3202 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3203 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3204 * same. Will assert / throw if caller does not heed this.
3205 */
3206DECL_FORCE_INLINE_THROW(uint32_t)
3207iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3208 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3209{
3210#ifdef RT_ARCH_AMD64
3211 /* mov mem8, reg8 */
3212 if (iGprSrc >= 8 || iGprBase >= 8)
3213 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3214 else if (iGprSrc >= 4)
3215 pCodeBuf[off++] = X86_OP_REX;
3216 pCodeBuf[off++] = 0x88;
3217 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3218 RT_NOREF(iGprTmp);
3219
3220#elif defined(RT_ARCH_ARM64)
3221 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3222 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3223
3224#else
3225# error "port me"
3226#endif
3227 return off;
3228}
3229
3230
3231/**
3232 * Emits a 64-bit immediate store via a GPR base address with a displacement.
3233 *
3234 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
3235 * AMD64 it depends on the immediate value.
3236 *
3237 * @note ARM64: Misaligned @a offDisp values and values not in the
3238 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3239 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3240 * does not heed this.
3241 */
3242DECL_FORCE_INLINE_THROW(uint32_t)
3243iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
3244 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3245{
3246#ifdef RT_ARCH_AMD64
3247 if ((int32_t)uImm == (int64_t)uImm)
3248 {
3249 /* mov mem64, imm32 (sign-extended) */
3250 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3251 pCodeBuf[off++] = 0xc7;
3252 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3253 pCodeBuf[off++] = RT_BYTE1(uImm);
3254 pCodeBuf[off++] = RT_BYTE2(uImm);
3255 pCodeBuf[off++] = RT_BYTE3(uImm);
3256 pCodeBuf[off++] = RT_BYTE4(uImm);
3257 }
3258 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
3259 {
3260 /* require temporary register. */
3261 if (iGprImmTmp == UINT8_MAX)
3262 iGprImmTmp = iGprTmp;
3263 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3264 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
3265 }
3266 else
3267# ifdef IEM_WITH_THROW_CATCH
3268 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3269# else
3270 AssertReleaseFailedStmt(off = UINT32_MAX);
3271# endif
3272
3273#elif defined(RT_ARCH_ARM64)
3274 if (uImm == 0)
3275 iGprImmTmp = ARMV8_A64_REG_XZR;
3276 else
3277 {
3278 Assert(iGprImmTmp < 31);
3279 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3280 }
3281 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
3282
3283#else
3284# error "port me"
3285#endif
3286 return off;
3287}
3288
3289
3290/**
3291 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3292 *
3293 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3294 *
3295 * @note ARM64: Misaligned @a offDisp values and values not in the
3296 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3297 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3298 * does not heed this.
3299 */
3300DECL_FORCE_INLINE_THROW(uint32_t)
3301iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
3302 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3303{
3304#ifdef RT_ARCH_AMD64
3305 /* mov mem32, imm32 */
3306 if (iGprBase >= 8)
3307 pCodeBuf[off++] = X86_OP_REX_B;
3308 pCodeBuf[off++] = 0xc7;
3309 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3310 pCodeBuf[off++] = RT_BYTE1(uImm);
3311 pCodeBuf[off++] = RT_BYTE2(uImm);
3312 pCodeBuf[off++] = RT_BYTE3(uImm);
3313 pCodeBuf[off++] = RT_BYTE4(uImm);
3314 RT_NOREF(iGprImmTmp, iGprTmp);
3315
3316#elif defined(RT_ARCH_ARM64)
3317 Assert(iGprImmTmp < 31);
3318 if (uImm == 0)
3319 iGprImmTmp = ARMV8_A64_REG_XZR;
3320 else
3321 {
3322 Assert(iGprImmTmp < 31);
3323 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3324 }
3325 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3326 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3327
3328#else
3329# error "port me"
3330#endif
3331 return off;
3332}
3333
3334
3335/**
3336 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3337 *
3338 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3339 *
3340 * @note ARM64: Misaligned @a offDisp values and values not in the
3341 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3342 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3343 * does not heed this.
3344 */
3345DECL_FORCE_INLINE_THROW(uint32_t)
3346iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
3347 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3348{
3349#ifdef RT_ARCH_AMD64
3350 /* mov mem16, imm16 */
3351 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3352 if (iGprBase >= 8)
3353 pCodeBuf[off++] = X86_OP_REX_B;
3354 pCodeBuf[off++] = 0xc7;
3355 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3356 pCodeBuf[off++] = RT_BYTE1(uImm);
3357 pCodeBuf[off++] = RT_BYTE2(uImm);
3358 RT_NOREF(iGprImmTmp, iGprTmp);
3359
3360#elif defined(RT_ARCH_ARM64)
3361 if (uImm == 0)
3362 iGprImmTmp = ARMV8_A64_REG_XZR;
3363 else
3364 {
3365 Assert(iGprImmTmp < 31);
3366 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3367 }
3368 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3369 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3370
3371#else
3372# error "port me"
3373#endif
3374 return off;
3375}
3376
3377
3378/**
3379 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3380 *
3381 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3382 *
3383 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3384 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3385 * same. Will assert / throw if caller does not heed this.
3386 */
3387DECL_FORCE_INLINE_THROW(uint32_t)
3388iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3389 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3390{
3391#ifdef RT_ARCH_AMD64
3392 /* mov mem8, imm8 */
3393 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3394 if (iGprBase >= 8)
3395 pCodeBuf[off++] = X86_OP_REX_B;
3396 pCodeBuf[off++] = 0xc6;
3397 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3398 pCodeBuf[off++] = uImm;
3399 RT_NOREF(iGprImmTmp, iGprTmp);
3400
3401#elif defined(RT_ARCH_ARM64)
3402 if (uImm == 0)
3403 iGprImmTmp = ARMV8_A64_REG_XZR;
3404 else
3405 {
3406 Assert(iGprImmTmp < 31);
3407 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3408 }
3409 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3410 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3411
3412#else
3413# error "port me"
3414#endif
3415 return off;
3416}
3417
3418
3419#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3420/**
3421 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3422 *
3423 * @note ARM64: Misaligned @a offDisp values and values not in the
3424 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3425 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3426 * does not heed this.
3427 */
3428DECL_FORCE_INLINE_THROW(uint32_t)
3429iemNativeEmitStoreVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3430 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3431{
3432#ifdef RT_ARCH_AMD64
3433 /* movdqu mem128, reg128 */
3434 pCodeBuf[off++] = 0xf3;
3435 if (iVecRegDst >= 8 || iGprBase >= 8)
3436 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3437 pCodeBuf[off++] = 0x0f;
3438 pCodeBuf[off++] = 0x7f;
3439 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3440 RT_NOREF(iGprTmp);
3441
3442#elif defined(RT_ARCH_ARM64)
3443 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3444 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3445
3446#else
3447# error "port me"
3448#endif
3449 return off;
3450}
3451
3452
3453/**
3454 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3455 */
3456DECL_INLINE_THROW(uint32_t)
3457iemNativeEmitStoreVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3458{
3459#ifdef RT_ARCH_AMD64
3460 off = iemNativeEmitStoreVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3461 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3462
3463#elif defined(RT_ARCH_ARM64)
3464 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3465
3466#else
3467# error "port me"
3468#endif
3469 return off;
3470}
3471
3472
3473/**
3474 * Emits a 256-bit vector register store via a GPR base address with a displacement.
3475 *
3476 * @note ARM64: Misaligned @a offDisp values and values not in the
3477 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3478 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3479 * does not heed this.
3480 */
3481DECL_FORCE_INLINE_THROW(uint32_t)
3482iemNativeEmitStoreVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3483 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3484{
3485#ifdef RT_ARCH_AMD64
3486 /* vmovdqu mem256, reg256 */
3487 pCodeBuf[off++] = X86_OP_VEX3;
3488 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3489 | X86_OP_VEX3_BYTE1_X
3490 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3491 | UINT8_C(0x01);
3492 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3493 pCodeBuf[off++] = 0x7f;
3494 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3495 RT_NOREF(iGprTmp);
3496
3497#elif defined(RT_ARCH_ARM64)
3498 Assert(!(iVecRegDst & 0x1));
3499 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3500 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3501 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3502 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3503#else
3504# error "port me"
3505#endif
3506 return off;
3507}
3508
3509
3510/**
3511 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3512 */
3513DECL_INLINE_THROW(uint32_t)
3514iemNativeEmitStoreVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3515{
3516#ifdef RT_ARCH_AMD64
3517 off = iemNativeEmitStoreVecRegByGprU256Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3518 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3519
3520#elif defined(RT_ARCH_ARM64)
3521 Assert(!(iVecRegDst & 0x1));
3522 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3523 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3524 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3525 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3526
3527#else
3528# error "port me"
3529#endif
3530 return off;
3531}
3532#endif
3533
3534
3535
3536/*********************************************************************************************************************************
3537* Subtraction and Additions *
3538*********************************************************************************************************************************/
3539
3540/**
3541 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3542 * @note The AMD64 version sets flags.
3543 */
3544DECL_INLINE_THROW(uint32_t)
3545iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3546{
3547#if defined(RT_ARCH_AMD64)
3548 /* sub Gv,Ev */
3549 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3550 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3551 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3552 pbCodeBuf[off++] = 0x2b;
3553 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3554
3555#elif defined(RT_ARCH_ARM64)
3556 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3557 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3558
3559#else
3560# error "Port me"
3561#endif
3562 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3563 return off;
3564}
3565
3566
3567/**
3568 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3569 * @note The AMD64 version sets flags.
3570 */
3571DECL_FORCE_INLINE(uint32_t)
3572iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3573{
3574#if defined(RT_ARCH_AMD64)
3575 /* sub Gv,Ev */
3576 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3577 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3578 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3579 pCodeBuf[off++] = 0x2b;
3580 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3581
3582#elif defined(RT_ARCH_ARM64)
3583 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3584
3585#else
3586# error "Port me"
3587#endif
3588 return off;
3589}
3590
3591
3592/**
3593 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3594 * @note The AMD64 version sets flags.
3595 */
3596DECL_INLINE_THROW(uint32_t)
3597iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3598{
3599#if defined(RT_ARCH_AMD64)
3600 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3601#elif defined(RT_ARCH_ARM64)
3602 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3603#else
3604# error "Port me"
3605#endif
3606 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3607 return off;
3608}
3609
3610
3611/**
3612 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3613 *
3614 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3615 *
3616 * @note Larger constants will require a temporary register. Failing to specify
3617 * one when needed will trigger fatal assertion / throw.
3618 */
3619DECL_FORCE_INLINE_THROW(uint32_t)
3620iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3621 uint8_t iGprTmp = UINT8_MAX)
3622{
3623#ifdef RT_ARCH_AMD64
3624 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3625 if (iSubtrahend == 1)
3626 {
3627 /* dec r/m64 */
3628 pCodeBuf[off++] = 0xff;
3629 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3630 }
3631 else if (iSubtrahend == -1)
3632 {
3633 /* inc r/m64 */
3634 pCodeBuf[off++] = 0xff;
3635 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3636 }
3637 else if ((int8_t)iSubtrahend == iSubtrahend)
3638 {
3639 /* sub r/m64, imm8 */
3640 pCodeBuf[off++] = 0x83;
3641 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3642 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3643 }
3644 else if ((int32_t)iSubtrahend == iSubtrahend)
3645 {
3646 /* sub r/m64, imm32 */
3647 pCodeBuf[off++] = 0x81;
3648 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3649 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3650 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3651 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3652 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3653 }
3654 else if (iGprTmp != UINT8_MAX)
3655 {
3656 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3657 /* sub r/m64, r64 */
3658 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3659 pCodeBuf[off++] = 0x29;
3660 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3661 }
3662 else
3663# ifdef IEM_WITH_THROW_CATCH
3664 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3665# else
3666 AssertReleaseFailedStmt(off = UINT32_MAX);
3667# endif
3668
3669#elif defined(RT_ARCH_ARM64)
3670 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3671 if (uAbsSubtrahend < 4096)
3672 {
3673 if (iSubtrahend >= 0)
3674 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3675 else
3676 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3677 }
3678 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3679 {
3680 if (iSubtrahend >= 0)
3681 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3682 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3683 else
3684 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3685 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3686 }
3687 else if (iGprTmp != UINT8_MAX)
3688 {
3689 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3690 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3691 }
3692 else
3693# ifdef IEM_WITH_THROW_CATCH
3694 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3695# else
3696 AssertReleaseFailedStmt(off = UINT32_MAX);
3697# endif
3698
3699#else
3700# error "Port me"
3701#endif
3702 return off;
3703}
3704
3705
3706/**
3707 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3708 *
3709 * @note Larger constants will require a temporary register. Failing to specify
3710 * one when needed will trigger fatal assertion / throw.
3711 */
3712DECL_INLINE_THROW(uint32_t)
3713iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3714 uint8_t iGprTmp = UINT8_MAX)
3715
3716{
3717#ifdef RT_ARCH_AMD64
3718 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3719#elif defined(RT_ARCH_ARM64)
3720 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3721#else
3722# error "Port me"
3723#endif
3724 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3725 return off;
3726}
3727
3728
3729/**
3730 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3731 *
3732 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3733 *
3734 * @note ARM64: Larger constants will require a temporary register. Failing to
3735 * specify one when needed will trigger fatal assertion / throw.
3736 */
3737DECL_FORCE_INLINE_THROW(uint32_t)
3738iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3739 uint8_t iGprTmp = UINT8_MAX)
3740{
3741#ifdef RT_ARCH_AMD64
3742 if (iGprDst >= 8)
3743 pCodeBuf[off++] = X86_OP_REX_B;
3744 if (iSubtrahend == 1)
3745 {
3746 /* dec r/m32 */
3747 pCodeBuf[off++] = 0xff;
3748 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3749 }
3750 else if (iSubtrahend == -1)
3751 {
3752 /* inc r/m32 */
3753 pCodeBuf[off++] = 0xff;
3754 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3755 }
3756 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3757 {
3758 /* sub r/m32, imm8 */
3759 pCodeBuf[off++] = 0x83;
3760 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3761 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3762 }
3763 else
3764 {
3765 /* sub r/m32, imm32 */
3766 pCodeBuf[off++] = 0x81;
3767 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3768 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3769 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3770 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3771 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3772 }
3773 RT_NOREF(iGprTmp);
3774
3775#elif defined(RT_ARCH_ARM64)
3776 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3777 if (uAbsSubtrahend < 4096)
3778 {
3779 if (iSubtrahend >= 0)
3780 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3781 else
3782 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3783 }
3784 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3785 {
3786 if (iSubtrahend >= 0)
3787 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3788 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3789 else
3790 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3791 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3792 }
3793 else if (iGprTmp != UINT8_MAX)
3794 {
3795 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3796 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3797 }
3798 else
3799# ifdef IEM_WITH_THROW_CATCH
3800 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3801# else
3802 AssertReleaseFailedStmt(off = UINT32_MAX);
3803# endif
3804
3805#else
3806# error "Port me"
3807#endif
3808 return off;
3809}
3810
3811
3812/**
3813 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3814 *
3815 * @note ARM64: Larger constants will require a temporary register. Failing to
3816 * specify one when needed will trigger fatal assertion / throw.
3817 */
3818DECL_INLINE_THROW(uint32_t)
3819iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3820 uint8_t iGprTmp = UINT8_MAX)
3821
3822{
3823#ifdef RT_ARCH_AMD64
3824 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
3825#elif defined(RT_ARCH_ARM64)
3826 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
3827#else
3828# error "Port me"
3829#endif
3830 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3831 return off;
3832}
3833
3834
3835/**
3836 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
3837 *
3838 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
3839 * so not suitable as a base for conditional jumps.
3840 *
3841 * @note AMD64: Will only update the lower 16 bits of the register.
3842 * @note ARM64: Will update the entire register.
3843 * @note ARM64: Larger constants will require a temporary register. Failing to
3844 * specify one when needed will trigger fatal assertion / throw.
3845 */
3846DECL_FORCE_INLINE_THROW(uint32_t)
3847iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
3848 uint8_t iGprTmp = UINT8_MAX)
3849{
3850#ifdef RT_ARCH_AMD64
3851 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3852 if (iGprDst >= 8)
3853 pCodeBuf[off++] = X86_OP_REX_B;
3854 if (iSubtrahend == 1)
3855 {
3856 /* dec r/m16 */
3857 pCodeBuf[off++] = 0xff;
3858 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3859 }
3860 else if (iSubtrahend == -1)
3861 {
3862 /* inc r/m16 */
3863 pCodeBuf[off++] = 0xff;
3864 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3865 }
3866 else if ((int8_t)iSubtrahend == iSubtrahend)
3867 {
3868 /* sub r/m16, imm8 */
3869 pCodeBuf[off++] = 0x83;
3870 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3871 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3872 }
3873 else
3874 {
3875 /* sub r/m16, imm16 */
3876 pCodeBuf[off++] = 0x81;
3877 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3878 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
3879 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
3880 }
3881 RT_NOREF(iGprTmp);
3882
3883#elif defined(RT_ARCH_ARM64)
3884 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3885 if (uAbsSubtrahend < 4096)
3886 {
3887 if (iSubtrahend >= 0)
3888 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3889 else
3890 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3891 }
3892 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3893 {
3894 if (iSubtrahend >= 0)
3895 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3896 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3897 else
3898 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3899 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3900 }
3901 else if (iGprTmp != UINT8_MAX)
3902 {
3903 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3904 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3905 }
3906 else
3907# ifdef IEM_WITH_THROW_CATCH
3908 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3909# else
3910 AssertReleaseFailedStmt(off = UINT32_MAX);
3911# endif
3912 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3913
3914#else
3915# error "Port me"
3916#endif
3917 return off;
3918}
3919
3920
3921/**
3922 * Emits adding a 64-bit GPR to another, storing the result in the first.
3923 * @note The AMD64 version sets flags.
3924 */
3925DECL_FORCE_INLINE(uint32_t)
3926iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3927{
3928#if defined(RT_ARCH_AMD64)
3929 /* add Gv,Ev */
3930 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3931 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
3932 pCodeBuf[off++] = 0x03;
3933 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3934
3935#elif defined(RT_ARCH_ARM64)
3936 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
3937
3938#else
3939# error "Port me"
3940#endif
3941 return off;
3942}
3943
3944
3945/**
3946 * Emits adding a 64-bit GPR to another, storing the result in the first.
3947 * @note The AMD64 version sets flags.
3948 */
3949DECL_INLINE_THROW(uint32_t)
3950iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3951{
3952#if defined(RT_ARCH_AMD64)
3953 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3954#elif defined(RT_ARCH_ARM64)
3955 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3956#else
3957# error "Port me"
3958#endif
3959 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3960 return off;
3961}
3962
3963
3964/**
3965 * Emits adding a 64-bit GPR to another, storing the result in the first.
3966 * @note The AMD64 version sets flags.
3967 */
3968DECL_FORCE_INLINE(uint32_t)
3969iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3970{
3971#if defined(RT_ARCH_AMD64)
3972 /* add Gv,Ev */
3973 if (iGprDst >= 8 || iGprAddend >= 8)
3974 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
3975 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
3976 pCodeBuf[off++] = 0x03;
3977 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3978
3979#elif defined(RT_ARCH_ARM64)
3980 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
3981
3982#else
3983# error "Port me"
3984#endif
3985 return off;
3986}
3987
3988
3989/**
3990 * Emits adding a 64-bit GPR to another, storing the result in the first.
3991 * @note The AMD64 version sets flags.
3992 */
3993DECL_INLINE_THROW(uint32_t)
3994iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3995{
3996#if defined(RT_ARCH_AMD64)
3997 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3998#elif defined(RT_ARCH_ARM64)
3999 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
4000#else
4001# error "Port me"
4002#endif
4003 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4004 return off;
4005}
4006
4007
4008/**
4009 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
4010 */
4011DECL_INLINE_THROW(uint32_t)
4012iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4013{
4014#if defined(RT_ARCH_AMD64)
4015 /* add or inc */
4016 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4017 if (iImm8 != 1)
4018 {
4019 pCodeBuf[off++] = 0x83;
4020 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4021 pCodeBuf[off++] = (uint8_t)iImm8;
4022 }
4023 else
4024 {
4025 pCodeBuf[off++] = 0xff;
4026 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4027 }
4028
4029#elif defined(RT_ARCH_ARM64)
4030 if (iImm8 >= 0)
4031 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
4032 else
4033 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
4034
4035#else
4036# error "Port me"
4037#endif
4038 return off;
4039}
4040
4041
4042/**
4043 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
4044 */
4045DECL_INLINE_THROW(uint32_t)
4046iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4047{
4048#if defined(RT_ARCH_AMD64)
4049 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4050#elif defined(RT_ARCH_ARM64)
4051 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4052#else
4053# error "Port me"
4054#endif
4055 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4056 return off;
4057}
4058
4059
4060/**
4061 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4062 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4063 */
4064DECL_FORCE_INLINE(uint32_t)
4065iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4066{
4067#if defined(RT_ARCH_AMD64)
4068 /* add or inc */
4069 if (iGprDst >= 8)
4070 pCodeBuf[off++] = X86_OP_REX_B;
4071 if (iImm8 != 1)
4072 {
4073 pCodeBuf[off++] = 0x83;
4074 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4075 pCodeBuf[off++] = (uint8_t)iImm8;
4076 }
4077 else
4078 {
4079 pCodeBuf[off++] = 0xff;
4080 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4081 }
4082
4083#elif defined(RT_ARCH_ARM64)
4084 if (iImm8 >= 0)
4085 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
4086 else
4087 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
4088
4089#else
4090# error "Port me"
4091#endif
4092 return off;
4093}
4094
4095
4096/**
4097 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4098 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4099 */
4100DECL_INLINE_THROW(uint32_t)
4101iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4102{
4103#if defined(RT_ARCH_AMD64)
4104 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4105#elif defined(RT_ARCH_ARM64)
4106 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4107#else
4108# error "Port me"
4109#endif
4110 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4111 return off;
4112}
4113
4114
4115/**
4116 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4117 *
4118 * @note Will assert / throw if @a iGprTmp is not specified when needed.
4119 */
4120DECL_FORCE_INLINE_THROW(uint32_t)
4121iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4122{
4123#if defined(RT_ARCH_AMD64)
4124 if ((int8_t)iAddend == iAddend)
4125 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4126
4127 if ((int32_t)iAddend == iAddend)
4128 {
4129 /* add grp, imm32 */
4130 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4131 pCodeBuf[off++] = 0x81;
4132 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4133 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4134 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4135 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4136 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4137 }
4138 else if (iGprTmp != UINT8_MAX)
4139 {
4140 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4141
4142 /* add dst, tmpreg */
4143 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4144 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
4145 pCodeBuf[off++] = 0x03;
4146 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
4147 }
4148 else
4149# ifdef IEM_WITH_THROW_CATCH
4150 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4151# else
4152 AssertReleaseFailedStmt(off = UINT32_MAX);
4153# endif
4154
4155#elif defined(RT_ARCH_ARM64)
4156 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4157 if (uAbsAddend <= 0xffffffU)
4158 {
4159 bool const fSub = iAddend < 0;
4160 if (uAbsAddend > 0xfffU)
4161 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, true /*f64Bit*/,
4162 false /*fSetFlags*/, true /*fShift12*/);
4163 if (uAbsAddend & 0xfffU)
4164 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & UINT32_C(0xfff));
4165 }
4166 else if (iGprTmp != UINT8_MAX)
4167 {
4168 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4169 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
4170 }
4171 else
4172# ifdef IEM_WITH_THROW_CATCH
4173 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4174# else
4175 AssertReleaseFailedStmt(off = UINT32_MAX);
4176# endif
4177
4178#else
4179# error "Port me"
4180#endif
4181 return off;
4182}
4183
4184
4185/**
4186 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4187 */
4188DECL_INLINE_THROW(uint32_t)
4189iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
4190{
4191#if defined(RT_ARCH_AMD64)
4192 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4193 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
4194
4195 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
4196 {
4197 /* add grp, imm32 */
4198 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4199 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4200 pbCodeBuf[off++] = 0x81;
4201 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4202 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4203 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4204 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4205 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4206 }
4207 else
4208 {
4209 /* Best to use a temporary register to deal with this in the simplest way: */
4210 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4211
4212 /* add dst, tmpreg */
4213 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4214 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4215 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
4216 pbCodeBuf[off++] = 0x03;
4217 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
4218
4219 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4220 }
4221
4222#elif defined(RT_ARCH_ARM64)
4223 bool const fSub = iAddend < 0;
4224 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4225 if (uAbsAddend <= 0xffffffU)
4226 {
4227 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4228 if (uAbsAddend > 0xfffU)
4229 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, true /*f64Bit*/,
4230 false /*fSetFlags*/, true /*fShift12*/);
4231 if (uAbsAddend & 0xfffU)
4232 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & UINT32_C(0xfff));
4233 }
4234 else
4235 {
4236 /* Use temporary register for the immediate. */
4237 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uAbsAddend);
4238
4239 /* add gprdst, gprdst, tmpreg */
4240 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4241 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, iGprDst, iTmpReg);
4242
4243 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4244 }
4245
4246#else
4247# error "Port me"
4248#endif
4249 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4250 return off;
4251}
4252
4253
4254/**
4255 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4256 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4257 * @note For ARM64 the iAddend value must be in the range 0x000000..0xffffff.
4258 * The negative ranges are also allowed, making it behave like a
4259 * subtraction. If the constant does not conform, bad stuff will happen.
4260 */
4261DECL_FORCE_INLINE_THROW(uint32_t)
4262iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4263{
4264#if defined(RT_ARCH_AMD64)
4265 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4266 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4267
4268 /* add grp, imm32 */
4269 if (iGprDst >= 8)
4270 pCodeBuf[off++] = X86_OP_REX_B;
4271 pCodeBuf[off++] = 0x81;
4272 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4273 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4274 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4275 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4276 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4277
4278#elif defined(RT_ARCH_ARM64)
4279 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4280 if (uAbsAddend <= 0xffffffU)
4281 {
4282 bool const fSub = iAddend < 0;
4283 if (uAbsAddend > 0xfffU)
4284 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4285 false /*fSetFlags*/, true /*fShift12*/);
4286 if (uAbsAddend & 0xfffU)
4287 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4288 }
4289 else
4290# ifdef IEM_WITH_THROW_CATCH
4291 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4292# else
4293 AssertReleaseFailedStmt(off = UINT32_MAX);
4294# endif
4295
4296#else
4297# error "Port me"
4298#endif
4299 return off;
4300}
4301
4302
4303/**
4304 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4305 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4306 */
4307DECL_INLINE_THROW(uint32_t)
4308iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4309{
4310#if defined(RT_ARCH_AMD64)
4311 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
4312
4313#elif defined(RT_ARCH_ARM64)
4314 bool const fSub = iAddend < 0;
4315 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4316 if (uAbsAddend <= 0xffffffU)
4317 {
4318 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4319 if (uAbsAddend > 0xfffU)
4320 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4321 false /*fSetFlags*/, true /*fShift12*/);
4322 if (uAbsAddend & 0xfffU)
4323 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4324 }
4325 else
4326 {
4327 /* Use temporary register for the immediate. */
4328 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uAbsAddend);
4329
4330 /* add gprdst, gprdst, tmpreg */
4331 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4332 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
4333
4334 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4335 }
4336
4337#else
4338# error "Port me"
4339#endif
4340 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4341 return off;
4342}
4343
4344
4345/**
4346 * Emits a 16-bit GPR add with a signed immediate addend.
4347 *
4348 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
4349 * so not suitable as a base for conditional jumps.
4350 *
4351 * @note AMD64: Will only update the lower 16 bits of the register.
4352 * @note ARM64: Will update the entire register.
4353 * @sa iemNativeEmitSubGpr16ImmEx
4354 */
4355DECL_FORCE_INLINE(uint32_t)
4356iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend)
4357{
4358#ifdef RT_ARCH_AMD64
4359 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4360 if (iGprDst >= 8)
4361 pCodeBuf[off++] = X86_OP_REX_B;
4362 if (iAddend == 1)
4363 {
4364 /* inc r/m16 */
4365 pCodeBuf[off++] = 0xff;
4366 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4367 }
4368 else if (iAddend == -1)
4369 {
4370 /* dec r/m16 */
4371 pCodeBuf[off++] = 0xff;
4372 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4373 }
4374 else if ((int8_t)iAddend == iAddend)
4375 {
4376 /* add r/m16, imm8 */
4377 pCodeBuf[off++] = 0x83;
4378 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4379 pCodeBuf[off++] = (uint8_t)iAddend;
4380 }
4381 else
4382 {
4383 /* add r/m16, imm16 */
4384 pCodeBuf[off++] = 0x81;
4385 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4386 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
4387 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
4388 }
4389
4390#elif defined(RT_ARCH_ARM64)
4391 bool const fSub = iAddend < 0;
4392 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4393 if (uAbsAddend > 0xfffU)
4394 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4395 false /*fSetFlags*/, true /*fShift12*/);
4396 if (uAbsAddend & 0xfffU)
4397 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4398 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4399
4400#else
4401# error "Port me"
4402#endif
4403 return off;
4404}
4405
4406
4407
4408/**
4409 * Adds two 64-bit GPRs together, storing the result in a third register.
4410 */
4411DECL_FORCE_INLINE(uint32_t)
4412iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4413{
4414#ifdef RT_ARCH_AMD64
4415 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4416 {
4417 /** @todo consider LEA */
4418 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
4419 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
4420 }
4421 else
4422 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4423
4424#elif defined(RT_ARCH_ARM64)
4425 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
4426
4427#else
4428# error "Port me!"
4429#endif
4430 return off;
4431}
4432
4433
4434
4435/**
4436 * Adds two 32-bit GPRs together, storing the result in a third register.
4437 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4438 */
4439DECL_FORCE_INLINE(uint32_t)
4440iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4441{
4442#ifdef RT_ARCH_AMD64
4443 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4444 {
4445 /** @todo consider LEA */
4446 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
4447 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
4448 }
4449 else
4450 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4451
4452#elif defined(RT_ARCH_ARM64)
4453 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
4454
4455#else
4456# error "Port me!"
4457#endif
4458 return off;
4459}
4460
4461
4462/**
4463 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
4464 * third register.
4465 *
4466 * @note The ARM64 version does not work for non-trivial constants if the
4467 * two registers are the same. Will assert / throw exception.
4468 */
4469DECL_FORCE_INLINE_THROW(uint32_t)
4470iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4471{
4472#ifdef RT_ARCH_AMD64
4473 /** @todo consider LEA */
4474 if ((int8_t)iImmAddend == iImmAddend)
4475 {
4476 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4477 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4478 }
4479 else
4480 {
4481 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4482 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4483 }
4484
4485#elif defined(RT_ARCH_ARM64)
4486 bool const fSub = iImmAddend < 0;
4487 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4488 if (uAbsImmAddend <= 0xfffU)
4489 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend);
4490 else if (uAbsImmAddend <= 0xffffffU)
4491 {
4492 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend >> 12,
4493 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
4494 if (uAbsImmAddend & 0xfffU)
4495 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsImmAddend & UINT32_C(0xfff));
4496 }
4497 else if (iGprDst != iGprAddend)
4498 {
4499 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4500 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4501 }
4502 else
4503# ifdef IEM_WITH_THROW_CATCH
4504 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4505# else
4506 AssertReleaseFailedStmt(off = UINT32_MAX);
4507# endif
4508
4509#else
4510# error "Port me!"
4511#endif
4512 return off;
4513}
4514
4515
4516/**
4517 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4518 * third register.
4519 *
4520 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4521 *
4522 * @note The ARM64 version does not work for non-trivial constants if the
4523 * two registers are the same. Will assert / throw exception.
4524 */
4525DECL_FORCE_INLINE_THROW(uint32_t)
4526iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4527{
4528#ifdef RT_ARCH_AMD64
4529 /** @todo consider LEA */
4530 if ((int8_t)iImmAddend == iImmAddend)
4531 {
4532 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4533 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4534 }
4535 else
4536 {
4537 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4538 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4539 }
4540
4541#elif defined(RT_ARCH_ARM64)
4542 bool const fSub = iImmAddend < 0;
4543 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4544 if (uAbsImmAddend <= 0xfffU)
4545 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4546 else if (uAbsImmAddend <= 0xffffffU)
4547 {
4548 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend >> 12,
4549 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
4550 if (uAbsImmAddend & 0xfffU)
4551 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsImmAddend & 0xfff, false /*f64Bit*/);
4552 }
4553 else if (iGprDst != iGprAddend)
4554 {
4555 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4556 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4557 }
4558 else
4559# ifdef IEM_WITH_THROW_CATCH
4560 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4561# else
4562 AssertReleaseFailedStmt(off = UINT32_MAX);
4563# endif
4564
4565#else
4566# error "Port me!"
4567#endif
4568 return off;
4569}
4570
4571
4572/*********************************************************************************************************************************
4573* Unary Operations *
4574*********************************************************************************************************************************/
4575
4576/**
4577 * Emits code for two complement negation of a 64-bit GPR.
4578 */
4579DECL_FORCE_INLINE_THROW(uint32_t)
4580iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4581{
4582#if defined(RT_ARCH_AMD64)
4583 /* neg Ev */
4584 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4585 pCodeBuf[off++] = 0xf7;
4586 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4587
4588#elif defined(RT_ARCH_ARM64)
4589 /* sub dst, xzr, dst */
4590 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4591
4592#else
4593# error "Port me"
4594#endif
4595 return off;
4596}
4597
4598
4599/**
4600 * Emits code for two complement negation of a 64-bit GPR.
4601 */
4602DECL_INLINE_THROW(uint32_t)
4603iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4604{
4605#if defined(RT_ARCH_AMD64)
4606 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4607#elif defined(RT_ARCH_ARM64)
4608 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4609#else
4610# error "Port me"
4611#endif
4612 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4613 return off;
4614}
4615
4616
4617/**
4618 * Emits code for two complement negation of a 32-bit GPR.
4619 * @note bit 32 thru 63 are set to zero.
4620 */
4621DECL_FORCE_INLINE_THROW(uint32_t)
4622iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4623{
4624#if defined(RT_ARCH_AMD64)
4625 /* neg Ev */
4626 if (iGprDst >= 8)
4627 pCodeBuf[off++] = X86_OP_REX_B;
4628 pCodeBuf[off++] = 0xf7;
4629 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4630
4631#elif defined(RT_ARCH_ARM64)
4632 /* sub dst, xzr, dst */
4633 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4634
4635#else
4636# error "Port me"
4637#endif
4638 return off;
4639}
4640
4641
4642/**
4643 * Emits code for two complement negation of a 32-bit GPR.
4644 * @note bit 32 thru 63 are set to zero.
4645 */
4646DECL_INLINE_THROW(uint32_t)
4647iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4648{
4649#if defined(RT_ARCH_AMD64)
4650 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4651#elif defined(RT_ARCH_ARM64)
4652 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4653#else
4654# error "Port me"
4655#endif
4656 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4657 return off;
4658}
4659
4660
4661
4662/*********************************************************************************************************************************
4663* Bit Operations *
4664*********************************************************************************************************************************/
4665
4666/**
4667 * Emits code for clearing bits 16 thru 63 in the GPR.
4668 */
4669DECL_INLINE_THROW(uint32_t)
4670iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4671{
4672#if defined(RT_ARCH_AMD64)
4673 /* movzx Gv,Ew */
4674 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4675 if (iGprDst >= 8)
4676 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4677 pbCodeBuf[off++] = 0x0f;
4678 pbCodeBuf[off++] = 0xb7;
4679 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4680
4681#elif defined(RT_ARCH_ARM64)
4682 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4683# if 1
4684 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4685# else
4686 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4687 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4688# endif
4689#else
4690# error "Port me"
4691#endif
4692 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4693 return off;
4694}
4695
4696
4697/**
4698 * Emits code for AND'ing two 64-bit GPRs.
4699 *
4700 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4701 * and ARM64 hosts.
4702 */
4703DECL_FORCE_INLINE(uint32_t)
4704iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4705{
4706#if defined(RT_ARCH_AMD64)
4707 /* and Gv, Ev */
4708 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4709 pCodeBuf[off++] = 0x23;
4710 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4711 RT_NOREF(fSetFlags);
4712
4713#elif defined(RT_ARCH_ARM64)
4714 if (!fSetFlags)
4715 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4716 else
4717 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4718
4719#else
4720# error "Port me"
4721#endif
4722 return off;
4723}
4724
4725
4726/**
4727 * Emits code for AND'ing two 64-bit GPRs.
4728 *
4729 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4730 * and ARM64 hosts.
4731 */
4732DECL_INLINE_THROW(uint32_t)
4733iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4734{
4735#if defined(RT_ARCH_AMD64)
4736 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4737#elif defined(RT_ARCH_ARM64)
4738 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4739#else
4740# error "Port me"
4741#endif
4742 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4743 return off;
4744}
4745
4746
4747/**
4748 * Emits code for AND'ing two 32-bit GPRs.
4749 */
4750DECL_FORCE_INLINE(uint32_t)
4751iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4752{
4753#if defined(RT_ARCH_AMD64)
4754 /* and Gv, Ev */
4755 if (iGprDst >= 8 || iGprSrc >= 8)
4756 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4757 pCodeBuf[off++] = 0x23;
4758 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4759 RT_NOREF(fSetFlags);
4760
4761#elif defined(RT_ARCH_ARM64)
4762 if (!fSetFlags)
4763 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4764 else
4765 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4766
4767#else
4768# error "Port me"
4769#endif
4770 return off;
4771}
4772
4773
4774/**
4775 * Emits code for AND'ing two 32-bit GPRs.
4776 */
4777DECL_INLINE_THROW(uint32_t)
4778iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4779{
4780#if defined(RT_ARCH_AMD64)
4781 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4782#elif defined(RT_ARCH_ARM64)
4783 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4784#else
4785# error "Port me"
4786#endif
4787 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4788 return off;
4789}
4790
4791
4792/**
4793 * Emits code for AND'ing a 64-bit GPRs with a constant.
4794 *
4795 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4796 * and ARM64 hosts.
4797 */
4798DECL_INLINE_THROW(uint32_t)
4799iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4800{
4801#if defined(RT_ARCH_AMD64)
4802 if ((int64_t)uImm == (int8_t)uImm)
4803 {
4804 /* and Ev, imm8 */
4805 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4806 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4807 pbCodeBuf[off++] = 0x83;
4808 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4809 pbCodeBuf[off++] = (uint8_t)uImm;
4810 }
4811 else if ((int64_t)uImm == (int32_t)uImm)
4812 {
4813 /* and Ev, imm32 */
4814 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4815 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4816 pbCodeBuf[off++] = 0x81;
4817 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4818 pbCodeBuf[off++] = RT_BYTE1(uImm);
4819 pbCodeBuf[off++] = RT_BYTE2(uImm);
4820 pbCodeBuf[off++] = RT_BYTE3(uImm);
4821 pbCodeBuf[off++] = RT_BYTE4(uImm);
4822 }
4823 else
4824 {
4825 /* Use temporary register for the 64-bit immediate. */
4826 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4827 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
4828 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4829 }
4830 RT_NOREF(fSetFlags);
4831
4832#elif defined(RT_ARCH_ARM64)
4833 uint32_t uImmR = 0;
4834 uint32_t uImmNandS = 0;
4835 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4836 {
4837 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4838 if (!fSetFlags)
4839 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
4840 else
4841 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
4842 }
4843 else
4844 {
4845 /* Use temporary register for the 64-bit immediate. */
4846 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4847 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4848 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4849 }
4850
4851#else
4852# error "Port me"
4853#endif
4854 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4855 return off;
4856}
4857
4858
4859/**
4860 * Emits code for AND'ing an 32-bit GPRs with a constant.
4861 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4862 * @note For ARM64 this only supports @a uImm values that can be expressed using
4863 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
4864 * make sure this is possible!
4865 */
4866DECL_FORCE_INLINE_THROW(uint32_t)
4867iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4868{
4869#if defined(RT_ARCH_AMD64)
4870 /* and Ev, imm */
4871 if (iGprDst >= 8)
4872 pCodeBuf[off++] = X86_OP_REX_B;
4873 if ((int32_t)uImm == (int8_t)uImm)
4874 {
4875 pCodeBuf[off++] = 0x83;
4876 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4877 pCodeBuf[off++] = (uint8_t)uImm;
4878 }
4879 else
4880 {
4881 pCodeBuf[off++] = 0x81;
4882 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4883 pCodeBuf[off++] = RT_BYTE1(uImm);
4884 pCodeBuf[off++] = RT_BYTE2(uImm);
4885 pCodeBuf[off++] = RT_BYTE3(uImm);
4886 pCodeBuf[off++] = RT_BYTE4(uImm);
4887 }
4888 RT_NOREF(fSetFlags);
4889
4890#elif defined(RT_ARCH_ARM64)
4891 uint32_t uImmR = 0;
4892 uint32_t uImmNandS = 0;
4893 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4894 {
4895 if (!fSetFlags)
4896 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4897 else
4898 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4899 }
4900 else
4901# ifdef IEM_WITH_THROW_CATCH
4902 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4903# else
4904 AssertReleaseFailedStmt(off = UINT32_MAX);
4905# endif
4906
4907#else
4908# error "Port me"
4909#endif
4910 return off;
4911}
4912
4913
4914/**
4915 * Emits code for AND'ing an 32-bit GPRs with a constant.
4916 *
4917 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4918 */
4919DECL_INLINE_THROW(uint32_t)
4920iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4921{
4922#if defined(RT_ARCH_AMD64)
4923 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
4924
4925#elif defined(RT_ARCH_ARM64)
4926 uint32_t uImmR = 0;
4927 uint32_t uImmNandS = 0;
4928 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4929 {
4930 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4931 if (!fSetFlags)
4932 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4933 else
4934 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4935 }
4936 else
4937 {
4938 /* Use temporary register for the 64-bit immediate. */
4939 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4940 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4941 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4942 }
4943
4944#else
4945# error "Port me"
4946#endif
4947 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4948 return off;
4949}
4950
4951
4952/**
4953 * Emits code for AND'ing an 64-bit GPRs with a constant.
4954 *
4955 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4956 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4957 * the same.
4958 */
4959DECL_FORCE_INLINE_THROW(uint32_t)
4960iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
4961 bool fSetFlags = false)
4962{
4963#if defined(RT_ARCH_AMD64)
4964 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4965 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
4966 RT_NOREF(fSetFlags);
4967
4968#elif defined(RT_ARCH_ARM64)
4969 uint32_t uImmR = 0;
4970 uint32_t uImmNandS = 0;
4971 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4972 {
4973 if (!fSetFlags)
4974 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4975 else
4976 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4977 }
4978 else if (iGprDst != iGprSrc)
4979 {
4980 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4981 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4982 }
4983 else
4984# ifdef IEM_WITH_THROW_CATCH
4985 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4986# else
4987 AssertReleaseFailedStmt(off = UINT32_MAX);
4988# endif
4989
4990#else
4991# error "Port me"
4992#endif
4993 return off;
4994}
4995
4996/**
4997 * Emits code for AND'ing an 32-bit GPRs with a constant.
4998 *
4999 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
5000 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
5001 * the same.
5002 *
5003 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5004 */
5005DECL_FORCE_INLINE_THROW(uint32_t)
5006iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
5007 bool fSetFlags = false)
5008{
5009#if defined(RT_ARCH_AMD64)
5010 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5011 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5012 RT_NOREF(fSetFlags);
5013
5014#elif defined(RT_ARCH_ARM64)
5015 uint32_t uImmR = 0;
5016 uint32_t uImmNandS = 0;
5017 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5018 {
5019 if (!fSetFlags)
5020 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5021 else
5022 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5023 }
5024 else if (iGprDst != iGprSrc)
5025 {
5026 /* If a value greater or equal than 64K isn't more than 16 bits wide,
5027 we can use shifting to save an instruction. We prefer the builtin ctz
5028 here to our own, since the compiler can process uImm at compile time
5029 if it is a constant value (which is often the case). This is useful
5030 for the TLB looup code. */
5031 if (uImm > 0xffffU)
5032 {
5033# if defined(__GNUC__)
5034 unsigned cTrailingZeros = __builtin_ctz(uImm);
5035# else
5036 unsigned cTrailingZeros = ASMBitFirstSetU32(uImm) - 1;
5037# endif
5038 if ((uImm >> cTrailingZeros) <= 0xffffU)
5039 {
5040 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprDst, uImm >> cTrailingZeros);
5041 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprSrc,
5042 iGprDst, true /*f64Bit*/, cTrailingZeros, kArmv8A64InstrShift_Lsl);
5043 return off;
5044 }
5045 }
5046 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5047 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
5048 }
5049 else
5050# ifdef IEM_WITH_THROW_CATCH
5051 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5052# else
5053 AssertReleaseFailedStmt(off = UINT32_MAX);
5054# endif
5055
5056#else
5057# error "Port me"
5058#endif
5059 return off;
5060}
5061
5062
5063/**
5064 * Emits code for OR'ing two 64-bit GPRs.
5065 */
5066DECL_FORCE_INLINE(uint32_t)
5067iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5068{
5069#if defined(RT_ARCH_AMD64)
5070 /* or Gv, Ev */
5071 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5072 pCodeBuf[off++] = 0x0b;
5073 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5074
5075#elif defined(RT_ARCH_ARM64)
5076 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
5077
5078#else
5079# error "Port me"
5080#endif
5081 return off;
5082}
5083
5084
5085/**
5086 * Emits code for OR'ing two 64-bit GPRs.
5087 */
5088DECL_INLINE_THROW(uint32_t)
5089iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5090{
5091#if defined(RT_ARCH_AMD64)
5092 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5093#elif defined(RT_ARCH_ARM64)
5094 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5095#else
5096# error "Port me"
5097#endif
5098 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5099 return off;
5100}
5101
5102
5103/**
5104 * Emits code for OR'ing two 32-bit GPRs.
5105 * @note Bits 63:32 of the destination GPR will be cleared.
5106 */
5107DECL_FORCE_INLINE(uint32_t)
5108iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5109{
5110#if defined(RT_ARCH_AMD64)
5111 /* or Gv, Ev */
5112 if (iGprDst >= 8 || iGprSrc >= 8)
5113 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5114 pCodeBuf[off++] = 0x0b;
5115 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5116
5117#elif defined(RT_ARCH_ARM64)
5118 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5119
5120#else
5121# error "Port me"
5122#endif
5123 return off;
5124}
5125
5126
5127/**
5128 * Emits code for OR'ing two 32-bit GPRs.
5129 * @note Bits 63:32 of the destination GPR will be cleared.
5130 */
5131DECL_INLINE_THROW(uint32_t)
5132iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5133{
5134#if defined(RT_ARCH_AMD64)
5135 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5136#elif defined(RT_ARCH_ARM64)
5137 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5138#else
5139# error "Port me"
5140#endif
5141 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5142 return off;
5143}
5144
5145
5146/**
5147 * Emits code for OR'ing a 64-bit GPRs with a constant.
5148 */
5149DECL_INLINE_THROW(uint32_t)
5150iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
5151{
5152#if defined(RT_ARCH_AMD64)
5153 if ((int64_t)uImm == (int8_t)uImm)
5154 {
5155 /* or Ev, imm8 */
5156 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5157 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5158 pbCodeBuf[off++] = 0x83;
5159 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5160 pbCodeBuf[off++] = (uint8_t)uImm;
5161 }
5162 else if ((int64_t)uImm == (int32_t)uImm)
5163 {
5164 /* or Ev, imm32 */
5165 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5166 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5167 pbCodeBuf[off++] = 0x81;
5168 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5169 pbCodeBuf[off++] = RT_BYTE1(uImm);
5170 pbCodeBuf[off++] = RT_BYTE2(uImm);
5171 pbCodeBuf[off++] = RT_BYTE3(uImm);
5172 pbCodeBuf[off++] = RT_BYTE4(uImm);
5173 }
5174 else
5175 {
5176 /* Use temporary register for the 64-bit immediate. */
5177 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5178 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
5179 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5180 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5181 }
5182
5183#elif defined(RT_ARCH_ARM64)
5184 uint32_t uImmR = 0;
5185 uint32_t uImmNandS = 0;
5186 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5187 {
5188 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5189 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
5190 }
5191 else
5192 {
5193 /* Use temporary register for the 64-bit immediate. */
5194 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5195 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
5196 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5197 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5198 }
5199
5200#else
5201# error "Port me"
5202#endif
5203 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5204 return off;
5205}
5206
5207
5208/**
5209 * Emits code for OR'ing an 32-bit GPRs with a constant.
5210 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5211 * @note For ARM64 this only supports @a uImm values that can be expressed using
5212 * the two 6-bit immediates of the OR instructions. The caller must make
5213 * sure this is possible!
5214 */
5215DECL_FORCE_INLINE_THROW(uint32_t)
5216iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5217{
5218#if defined(RT_ARCH_AMD64)
5219 /* or Ev, imm */
5220 if (iGprDst >= 8)
5221 pCodeBuf[off++] = X86_OP_REX_B;
5222 if ((int32_t)uImm == (int8_t)uImm)
5223 {
5224 pCodeBuf[off++] = 0x83;
5225 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5226 pCodeBuf[off++] = (uint8_t)uImm;
5227 }
5228 else
5229 {
5230 pCodeBuf[off++] = 0x81;
5231 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5232 pCodeBuf[off++] = RT_BYTE1(uImm);
5233 pCodeBuf[off++] = RT_BYTE2(uImm);
5234 pCodeBuf[off++] = RT_BYTE3(uImm);
5235 pCodeBuf[off++] = RT_BYTE4(uImm);
5236 }
5237
5238#elif defined(RT_ARCH_ARM64)
5239 uint32_t uImmR = 0;
5240 uint32_t uImmNandS = 0;
5241 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5242 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5243 else
5244# ifdef IEM_WITH_THROW_CATCH
5245 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5246# else
5247 AssertReleaseFailedStmt(off = UINT32_MAX);
5248# endif
5249
5250#else
5251# error "Port me"
5252#endif
5253 return off;
5254}
5255
5256
5257/**
5258 * Emits code for OR'ing an 32-bit GPRs with a constant.
5259 *
5260 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5261 */
5262DECL_INLINE_THROW(uint32_t)
5263iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5264{
5265#if defined(RT_ARCH_AMD64)
5266 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5267
5268#elif defined(RT_ARCH_ARM64)
5269 uint32_t uImmR = 0;
5270 uint32_t uImmNandS = 0;
5271 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5272 {
5273 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5274 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5275 }
5276 else
5277 {
5278 /* Use temporary register for the 64-bit immediate. */
5279 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5280 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
5281 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5282 }
5283
5284#else
5285# error "Port me"
5286#endif
5287 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5288 return off;
5289}
5290
5291
5292
5293/**
5294 * ORs two 64-bit GPRs together, storing the result in a third register.
5295 */
5296DECL_FORCE_INLINE(uint32_t)
5297iemNativeEmitGprEqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5298{
5299#ifdef RT_ARCH_AMD64
5300 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5301 {
5302 /** @todo consider LEA */
5303 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc1);
5304 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5305 }
5306 else
5307 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5308
5309#elif defined(RT_ARCH_ARM64)
5310 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2);
5311
5312#else
5313# error "Port me!"
5314#endif
5315 return off;
5316}
5317
5318
5319
5320/**
5321 * Ors two 32-bit GPRs together, storing the result in a third register.
5322 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
5323 */
5324DECL_FORCE_INLINE(uint32_t)
5325iemNativeEmitGpr32EqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5326{
5327#ifdef RT_ARCH_AMD64
5328 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5329 {
5330 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc1);
5331 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5332 }
5333 else
5334 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5335
5336#elif defined(RT_ARCH_ARM64)
5337 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2, false /*f64Bit*/);
5338
5339#else
5340# error "Port me!"
5341#endif
5342 return off;
5343}
5344
5345
5346/**
5347 * Emits code for XOR'ing two 64-bit GPRs.
5348 */
5349DECL_INLINE_THROW(uint32_t)
5350iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5351{
5352#if defined(RT_ARCH_AMD64)
5353 /* and Gv, Ev */
5354 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5355 pCodeBuf[off++] = 0x33;
5356 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5357
5358#elif defined(RT_ARCH_ARM64)
5359 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
5360
5361#else
5362# error "Port me"
5363#endif
5364 return off;
5365}
5366
5367
5368/**
5369 * Emits code for XOR'ing two 64-bit GPRs.
5370 */
5371DECL_INLINE_THROW(uint32_t)
5372iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5373{
5374#if defined(RT_ARCH_AMD64)
5375 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5376#elif defined(RT_ARCH_ARM64)
5377 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5378#else
5379# error "Port me"
5380#endif
5381 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5382 return off;
5383}
5384
5385
5386/**
5387 * Emits code for XOR'ing two 32-bit GPRs.
5388 */
5389DECL_INLINE_THROW(uint32_t)
5390iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5391{
5392#if defined(RT_ARCH_AMD64)
5393 /* and Gv, Ev */
5394 if (iGprDst >= 8 || iGprSrc >= 8)
5395 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5396 pCodeBuf[off++] = 0x33;
5397 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5398
5399#elif defined(RT_ARCH_ARM64)
5400 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5401
5402#else
5403# error "Port me"
5404#endif
5405 return off;
5406}
5407
5408
5409/**
5410 * Emits code for XOR'ing two 32-bit GPRs.
5411 */
5412DECL_INLINE_THROW(uint32_t)
5413iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5414{
5415#if defined(RT_ARCH_AMD64)
5416 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5417#elif defined(RT_ARCH_ARM64)
5418 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5419#else
5420# error "Port me"
5421#endif
5422 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5423 return off;
5424}
5425
5426
5427/**
5428 * Emits code for XOR'ing an 32-bit GPRs with a constant.
5429 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5430 * @note For ARM64 this only supports @a uImm values that can be expressed using
5431 * the two 6-bit immediates of the EOR instructions. The caller must make
5432 * sure this is possible!
5433 */
5434DECL_FORCE_INLINE_THROW(uint32_t)
5435iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5436{
5437#if defined(RT_ARCH_AMD64)
5438 /* and Ev, imm */
5439 if (iGprDst >= 8)
5440 pCodeBuf[off++] = X86_OP_REX_B;
5441 if ((int32_t)uImm == (int8_t)uImm)
5442 {
5443 pCodeBuf[off++] = 0x83;
5444 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5445 pCodeBuf[off++] = (uint8_t)uImm;
5446 }
5447 else
5448 {
5449 pCodeBuf[off++] = 0x81;
5450 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5451 pCodeBuf[off++] = RT_BYTE1(uImm);
5452 pCodeBuf[off++] = RT_BYTE2(uImm);
5453 pCodeBuf[off++] = RT_BYTE3(uImm);
5454 pCodeBuf[off++] = RT_BYTE4(uImm);
5455 }
5456
5457#elif defined(RT_ARCH_ARM64)
5458 uint32_t uImmR = 0;
5459 uint32_t uImmNandS = 0;
5460 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5461 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5462 else
5463# ifdef IEM_WITH_THROW_CATCH
5464 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5465# else
5466 AssertReleaseFailedStmt(off = UINT32_MAX);
5467# endif
5468
5469#else
5470# error "Port me"
5471#endif
5472 return off;
5473}
5474
5475
5476/**
5477 * Emits code for XOR'ing two 32-bit GPRs.
5478 */
5479DECL_INLINE_THROW(uint32_t)
5480iemNativeEmitXorGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5481{
5482#if defined(RT_ARCH_AMD64)
5483 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5484#elif defined(RT_ARCH_ARM64)
5485 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, uImm);
5486#else
5487# error "Port me"
5488#endif
5489 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5490 return off;
5491}
5492
5493
5494/*********************************************************************************************************************************
5495* Shifting *
5496*********************************************************************************************************************************/
5497
5498/**
5499 * Emits code for shifting a GPR a fixed number of bits to the left.
5500 */
5501DECL_FORCE_INLINE(uint32_t)
5502iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5503{
5504 Assert(cShift > 0 && cShift < 64);
5505
5506#if defined(RT_ARCH_AMD64)
5507 /* shl dst, cShift */
5508 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5509 if (cShift != 1)
5510 {
5511 pCodeBuf[off++] = 0xc1;
5512 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5513 pCodeBuf[off++] = cShift;
5514 }
5515 else
5516 {
5517 pCodeBuf[off++] = 0xd1;
5518 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5519 }
5520
5521#elif defined(RT_ARCH_ARM64)
5522 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
5523
5524#else
5525# error "Port me"
5526#endif
5527 return off;
5528}
5529
5530
5531/**
5532 * Emits code for shifting a GPR a fixed number of bits to the left.
5533 */
5534DECL_INLINE_THROW(uint32_t)
5535iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5536{
5537#if defined(RT_ARCH_AMD64)
5538 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5539#elif defined(RT_ARCH_ARM64)
5540 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5541#else
5542# error "Port me"
5543#endif
5544 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5545 return off;
5546}
5547
5548
5549/**
5550 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5551 */
5552DECL_FORCE_INLINE(uint32_t)
5553iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5554{
5555 Assert(cShift > 0 && cShift < 32);
5556
5557#if defined(RT_ARCH_AMD64)
5558 /* shl dst, cShift */
5559 if (iGprDst >= 8)
5560 pCodeBuf[off++] = X86_OP_REX_B;
5561 if (cShift != 1)
5562 {
5563 pCodeBuf[off++] = 0xc1;
5564 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5565 pCodeBuf[off++] = cShift;
5566 }
5567 else
5568 {
5569 pCodeBuf[off++] = 0xd1;
5570 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5571 }
5572
5573#elif defined(RT_ARCH_ARM64)
5574 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5575
5576#else
5577# error "Port me"
5578#endif
5579 return off;
5580}
5581
5582
5583/**
5584 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5585 */
5586DECL_INLINE_THROW(uint32_t)
5587iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5588{
5589#if defined(RT_ARCH_AMD64)
5590 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5591#elif defined(RT_ARCH_ARM64)
5592 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5593#else
5594# error "Port me"
5595#endif
5596 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5597 return off;
5598}
5599
5600
5601/**
5602 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5603 */
5604DECL_FORCE_INLINE(uint32_t)
5605iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5606{
5607 Assert(cShift > 0 && cShift < 64);
5608
5609#if defined(RT_ARCH_AMD64)
5610 /* shr dst, cShift */
5611 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5612 if (cShift != 1)
5613 {
5614 pCodeBuf[off++] = 0xc1;
5615 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5616 pCodeBuf[off++] = cShift;
5617 }
5618 else
5619 {
5620 pCodeBuf[off++] = 0xd1;
5621 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5622 }
5623
5624#elif defined(RT_ARCH_ARM64)
5625 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5626
5627#else
5628# error "Port me"
5629#endif
5630 return off;
5631}
5632
5633
5634/**
5635 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5636 */
5637DECL_INLINE_THROW(uint32_t)
5638iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5639{
5640#if defined(RT_ARCH_AMD64)
5641 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5642#elif defined(RT_ARCH_ARM64)
5643 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5644#else
5645# error "Port me"
5646#endif
5647 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5648 return off;
5649}
5650
5651
5652/**
5653 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5654 * right.
5655 */
5656DECL_FORCE_INLINE(uint32_t)
5657iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5658{
5659 Assert(cShift > 0 && cShift < 32);
5660
5661#if defined(RT_ARCH_AMD64)
5662 /* shr dst, cShift */
5663 if (iGprDst >= 8)
5664 pCodeBuf[off++] = X86_OP_REX_B;
5665 if (cShift != 1)
5666 {
5667 pCodeBuf[off++] = 0xc1;
5668 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5669 pCodeBuf[off++] = cShift;
5670 }
5671 else
5672 {
5673 pCodeBuf[off++] = 0xd1;
5674 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5675 }
5676
5677#elif defined(RT_ARCH_ARM64)
5678 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5679
5680#else
5681# error "Port me"
5682#endif
5683 return off;
5684}
5685
5686
5687/**
5688 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5689 * right.
5690 */
5691DECL_INLINE_THROW(uint32_t)
5692iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5693{
5694#if defined(RT_ARCH_AMD64)
5695 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5696#elif defined(RT_ARCH_ARM64)
5697 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5698#else
5699# error "Port me"
5700#endif
5701 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5702 return off;
5703}
5704
5705
5706/**
5707 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5708 * right and assigning it to a different GPR.
5709 */
5710DECL_INLINE_THROW(uint32_t)
5711iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5712{
5713 Assert(cShift > 0); Assert(cShift < 32);
5714#if defined(RT_ARCH_AMD64)
5715 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5716 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5717
5718#elif defined(RT_ARCH_ARM64)
5719 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5720
5721#else
5722# error "Port me"
5723#endif
5724 return off;
5725}
5726
5727
5728/**
5729 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5730 */
5731DECL_FORCE_INLINE(uint32_t)
5732iemNativeEmitArithShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5733{
5734 Assert(cShift > 0 && cShift < 64);
5735
5736#if defined(RT_ARCH_AMD64)
5737 /* sar dst, cShift */
5738 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5739 if (cShift != 1)
5740 {
5741 pCodeBuf[off++] = 0xc1;
5742 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5743 pCodeBuf[off++] = cShift;
5744 }
5745 else
5746 {
5747 pCodeBuf[off++] = 0xd1;
5748 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5749 }
5750
5751#elif defined(RT_ARCH_ARM64)
5752 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift);
5753
5754#else
5755# error "Port me"
5756#endif
5757 return off;
5758}
5759
5760
5761/**
5762 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5763 */
5764DECL_INLINE_THROW(uint32_t)
5765iemNativeEmitArithShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5766{
5767#if defined(RT_ARCH_AMD64)
5768 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5769#elif defined(RT_ARCH_ARM64)
5770 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5771#else
5772# error "Port me"
5773#endif
5774 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5775 return off;
5776}
5777
5778
5779/**
5780 * Emits code for (signed) shifting a 32-bit GPR a fixed number of bits to the right.
5781 */
5782DECL_FORCE_INLINE(uint32_t)
5783iemNativeEmitArithShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5784{
5785 Assert(cShift > 0 && cShift < 64);
5786
5787#if defined(RT_ARCH_AMD64)
5788 /* sar dst, cShift */
5789 if (iGprDst >= 8)
5790 pCodeBuf[off++] = X86_OP_REX_B;
5791 if (cShift != 1)
5792 {
5793 pCodeBuf[off++] = 0xc1;
5794 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5795 pCodeBuf[off++] = cShift;
5796 }
5797 else
5798 {
5799 pCodeBuf[off++] = 0xd1;
5800 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5801 }
5802
5803#elif defined(RT_ARCH_ARM64)
5804 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift, false /*f64Bit*/);
5805
5806#else
5807# error "Port me"
5808#endif
5809 return off;
5810}
5811
5812
5813/**
5814 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5815 */
5816DECL_INLINE_THROW(uint32_t)
5817iemNativeEmitArithShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5818{
5819#if defined(RT_ARCH_AMD64)
5820 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5821#elif defined(RT_ARCH_ARM64)
5822 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5823#else
5824# error "Port me"
5825#endif
5826 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5827 return off;
5828}
5829
5830
5831/**
5832 * Emits code for rotating a GPR a fixed number of bits to the left.
5833 */
5834DECL_FORCE_INLINE(uint32_t)
5835iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5836{
5837 Assert(cShift > 0 && cShift < 64);
5838
5839#if defined(RT_ARCH_AMD64)
5840 /* rol dst, cShift */
5841 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5842 if (cShift != 1)
5843 {
5844 pCodeBuf[off++] = 0xc1;
5845 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5846 pCodeBuf[off++] = cShift;
5847 }
5848 else
5849 {
5850 pCodeBuf[off++] = 0xd1;
5851 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5852 }
5853
5854#elif defined(RT_ARCH_ARM64)
5855 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
5856
5857#else
5858# error "Port me"
5859#endif
5860 return off;
5861}
5862
5863
5864#if defined(RT_ARCH_AMD64)
5865/**
5866 * Emits code for rotating a 32-bit GPR a fixed number of bits to the left via carry.
5867 */
5868DECL_FORCE_INLINE(uint32_t)
5869iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5870{
5871 Assert(cShift > 0 && cShift < 32);
5872
5873 /* rcl dst, cShift */
5874 if (iGprDst >= 8)
5875 pCodeBuf[off++] = X86_OP_REX_B;
5876 if (cShift != 1)
5877 {
5878 pCodeBuf[off++] = 0xc1;
5879 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5880 pCodeBuf[off++] = cShift;
5881 }
5882 else
5883 {
5884 pCodeBuf[off++] = 0xd1;
5885 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5886 }
5887
5888 return off;
5889}
5890#endif /* RT_ARCH_AMD64 */
5891
5892
5893
5894/**
5895 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
5896 * @note Bits 63:32 of the destination GPR will be cleared.
5897 */
5898DECL_FORCE_INLINE(uint32_t)
5899iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5900{
5901#if defined(RT_ARCH_AMD64)
5902 /*
5903 * There is no bswap r16 on x86 (the encoding exists but does not work).
5904 * So just use a rol (gcc -O2 is doing that).
5905 *
5906 * rol r16, 0x8
5907 */
5908 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5909 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5910 if (iGpr >= 8)
5911 pbCodeBuf[off++] = X86_OP_REX_B;
5912 pbCodeBuf[off++] = 0xc1;
5913 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
5914 pbCodeBuf[off++] = 0x08;
5915#elif defined(RT_ARCH_ARM64)
5916 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5917
5918 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
5919#else
5920# error "Port me"
5921#endif
5922
5923 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5924 return off;
5925}
5926
5927
5928/**
5929 * Emits code for reversing the byte order in a 32-bit GPR.
5930 * @note Bits 63:32 of the destination GPR will be cleared.
5931 */
5932DECL_FORCE_INLINE(uint32_t)
5933iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5934{
5935#if defined(RT_ARCH_AMD64)
5936 /* bswap r32 */
5937 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5938
5939 if (iGpr >= 8)
5940 pbCodeBuf[off++] = X86_OP_REX_B;
5941 pbCodeBuf[off++] = 0x0f;
5942 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5943#elif defined(RT_ARCH_ARM64)
5944 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5945
5946 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
5947#else
5948# error "Port me"
5949#endif
5950
5951 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5952 return off;
5953}
5954
5955
5956/**
5957 * Emits code for reversing the byte order in a 64-bit GPR.
5958 */
5959DECL_FORCE_INLINE(uint32_t)
5960iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5961{
5962#if defined(RT_ARCH_AMD64)
5963 /* bswap r64 */
5964 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5965
5966 if (iGpr >= 8)
5967 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
5968 else
5969 pbCodeBuf[off++] = X86_OP_REX_W;
5970 pbCodeBuf[off++] = 0x0f;
5971 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5972#elif defined(RT_ARCH_ARM64)
5973 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5974
5975 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
5976#else
5977# error "Port me"
5978#endif
5979
5980 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5981 return off;
5982}
5983
5984
5985/*********************************************************************************************************************************
5986* Compare and Testing *
5987*********************************************************************************************************************************/
5988
5989
5990#ifdef RT_ARCH_ARM64
5991/**
5992 * Emits an ARM64 compare instruction.
5993 */
5994DECL_INLINE_THROW(uint32_t)
5995iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
5996 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
5997{
5998 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5999 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
6000 f64Bit, true /*fSetFlags*/, cShift, enmShift);
6001 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6002 return off;
6003}
6004#endif
6005
6006
6007/**
6008 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
6009 * with conditional instruction.
6010 */
6011DECL_FORCE_INLINE(uint32_t)
6012iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6013{
6014#ifdef RT_ARCH_AMD64
6015 /* cmp Gv, Ev */
6016 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
6017 pCodeBuf[off++] = 0x3b;
6018 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
6019
6020#elif defined(RT_ARCH_ARM64)
6021 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
6022
6023#else
6024# error "Port me!"
6025#endif
6026 return off;
6027}
6028
6029
6030/**
6031 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
6032 * with conditional instruction.
6033 */
6034DECL_INLINE_THROW(uint32_t)
6035iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6036{
6037#ifdef RT_ARCH_AMD64
6038 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6039#elif defined(RT_ARCH_ARM64)
6040 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6041#else
6042# error "Port me!"
6043#endif
6044 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6045 return off;
6046}
6047
6048
6049/**
6050 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6051 * with conditional instruction.
6052 */
6053DECL_FORCE_INLINE(uint32_t)
6054iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6055{
6056#ifdef RT_ARCH_AMD64
6057 /* cmp Gv, Ev */
6058 if (iGprLeft >= 8 || iGprRight >= 8)
6059 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
6060 pCodeBuf[off++] = 0x3b;
6061 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
6062
6063#elif defined(RT_ARCH_ARM64)
6064 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
6065
6066#else
6067# error "Port me!"
6068#endif
6069 return off;
6070}
6071
6072
6073/**
6074 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6075 * with conditional instruction.
6076 */
6077DECL_INLINE_THROW(uint32_t)
6078iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6079{
6080#ifdef RT_ARCH_AMD64
6081 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6082#elif defined(RT_ARCH_ARM64)
6083 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6084#else
6085# error "Port me!"
6086#endif
6087 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6088 return off;
6089}
6090
6091
6092/**
6093 * Emits a compare of a 64-bit GPR with a constant value, settings status
6094 * flags/whatever for use with conditional instruction.
6095 */
6096DECL_INLINE_THROW(uint32_t)
6097iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
6098{
6099#ifdef RT_ARCH_AMD64
6100 if (uImm <= UINT32_C(0xff))
6101 {
6102 /* cmp Ev, Ib */
6103 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
6104 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6105 pbCodeBuf[off++] = 0x83;
6106 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6107 pbCodeBuf[off++] = (uint8_t)uImm;
6108 }
6109 else if ((int64_t)uImm == (int32_t)uImm)
6110 {
6111 /* cmp Ev, imm */
6112 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6113 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6114 pbCodeBuf[off++] = 0x81;
6115 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6116 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6117 pbCodeBuf[off++] = RT_BYTE1(uImm);
6118 pbCodeBuf[off++] = RT_BYTE2(uImm);
6119 pbCodeBuf[off++] = RT_BYTE3(uImm);
6120 pbCodeBuf[off++] = RT_BYTE4(uImm);
6121 }
6122 else
6123 {
6124 /* Use temporary register for the immediate. */
6125 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6126 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6127 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6128 }
6129
6130#elif defined(RT_ARCH_ARM64)
6131 /** @todo guess there are clevere things we can do here... */
6132 if (uImm < _4K)
6133 {
6134 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6135 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6136 true /*64Bit*/, true /*fSetFlags*/);
6137 }
6138 else if ((uImm & ~(uint64_t)0xfff000) == 0)
6139 {
6140 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6141 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
6142 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6143 }
6144 else
6145 {
6146 /* Use temporary register for the immediate. */
6147 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6148 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6149 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6150 }
6151
6152#else
6153# error "Port me!"
6154#endif
6155
6156 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6157 return off;
6158}
6159
6160
6161/**
6162 * Emits a compare of a 32-bit GPR with a constant value, settings status
6163 * flags/whatever for use with conditional instruction.
6164 *
6165 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6166 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6167 * bits all zero). Will release assert or throw exception if the caller
6168 * violates this restriction.
6169 */
6170DECL_FORCE_INLINE_THROW(uint32_t)
6171iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6172{
6173#ifdef RT_ARCH_AMD64
6174 if (iGprLeft >= 8)
6175 pCodeBuf[off++] = X86_OP_REX_B;
6176 if (uImm <= UINT32_C(0x7f))
6177 {
6178 /* cmp Ev, Ib */
6179 pCodeBuf[off++] = 0x83;
6180 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6181 pCodeBuf[off++] = (uint8_t)uImm;
6182 }
6183 else
6184 {
6185 /* cmp Ev, imm */
6186 pCodeBuf[off++] = 0x81;
6187 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6188 pCodeBuf[off++] = RT_BYTE1(uImm);
6189 pCodeBuf[off++] = RT_BYTE2(uImm);
6190 pCodeBuf[off++] = RT_BYTE3(uImm);
6191 pCodeBuf[off++] = RT_BYTE4(uImm);
6192 }
6193
6194#elif defined(RT_ARCH_ARM64)
6195 /** @todo guess there are clevere things we can do here... */
6196 if (uImm < _4K)
6197 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6198 false /*64Bit*/, true /*fSetFlags*/);
6199 else if ((uImm & ~(uint32_t)0xfff000) == 0)
6200 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6201 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6202 else
6203# ifdef IEM_WITH_THROW_CATCH
6204 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6205# else
6206 AssertReleaseFailedStmt(off = UINT32_MAX);
6207# endif
6208
6209#else
6210# error "Port me!"
6211#endif
6212 return off;
6213}
6214
6215
6216/**
6217 * Emits a compare of a 32-bit GPR with a constant value, settings status
6218 * flags/whatever for use with conditional instruction.
6219 */
6220DECL_INLINE_THROW(uint32_t)
6221iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6222{
6223#ifdef RT_ARCH_AMD64
6224 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
6225
6226#elif defined(RT_ARCH_ARM64)
6227 /** @todo guess there are clevere things we can do here... */
6228 if (uImm < _4K)
6229 {
6230 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6231 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6232 false /*64Bit*/, true /*fSetFlags*/);
6233 }
6234 else if ((uImm & ~(uint32_t)0xfff000) == 0)
6235 {
6236 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6237 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6238 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6239 }
6240 else
6241 {
6242 /* Use temporary register for the immediate. */
6243 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6244 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
6245 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6246 }
6247
6248#else
6249# error "Port me!"
6250#endif
6251
6252 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6253 return off;
6254}
6255
6256
6257/**
6258 * Emits a compare of a 32-bit GPR with a constant value, settings status
6259 * flags/whatever for use with conditional instruction.
6260 *
6261 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
6262 * 16-bit value from @a iGrpLeft.
6263 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6264 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6265 * bits all zero). Will release assert or throw exception if the caller
6266 * violates this restriction.
6267 */
6268DECL_FORCE_INLINE_THROW(uint32_t)
6269iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6270 uint8_t idxTmpReg = UINT8_MAX)
6271{
6272#ifdef RT_ARCH_AMD64
6273 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6274 if (iGprLeft >= 8)
6275 pCodeBuf[off++] = X86_OP_REX_B;
6276 if (uImm <= UINT32_C(0x7f))
6277 {
6278 /* cmp Ev, Ib */
6279 pCodeBuf[off++] = 0x83;
6280 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6281 pCodeBuf[off++] = (uint8_t)uImm;
6282 }
6283 else
6284 {
6285 /* cmp Ev, imm */
6286 pCodeBuf[off++] = 0x81;
6287 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6288 pCodeBuf[off++] = RT_BYTE1(uImm);
6289 pCodeBuf[off++] = RT_BYTE2(uImm);
6290 }
6291 RT_NOREF(idxTmpReg);
6292
6293#elif defined(RT_ARCH_ARM64)
6294# ifdef IEM_WITH_THROW_CATCH
6295 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6296# else
6297 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
6298# endif
6299 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6300 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
6301 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
6302
6303#else
6304# error "Port me!"
6305#endif
6306 return off;
6307}
6308
6309
6310/**
6311 * Emits a compare of a 16-bit GPR with a constant value, settings status
6312 * flags/whatever for use with conditional instruction.
6313 *
6314 * @note ARM64: Helper register is required (idxTmpReg).
6315 */
6316DECL_INLINE_THROW(uint32_t)
6317iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6318 uint8_t idxTmpReg = UINT8_MAX)
6319{
6320#ifdef RT_ARCH_AMD64
6321 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
6322#elif defined(RT_ARCH_ARM64)
6323 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
6324#else
6325# error "Port me!"
6326#endif
6327 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6328 return off;
6329}
6330
6331
6332
6333/*********************************************************************************************************************************
6334* Branching *
6335*********************************************************************************************************************************/
6336
6337/**
6338 * Emits a JMP rel32 / B imm19 to the given label.
6339 */
6340DECL_FORCE_INLINE_THROW(uint32_t)
6341iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
6342{
6343 Assert(idxLabel < pReNative->cLabels);
6344
6345#ifdef RT_ARCH_AMD64
6346 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6347 {
6348 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
6349 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
6350 {
6351 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
6352 pCodeBuf[off++] = (uint8_t)offRel;
6353 }
6354 else
6355 {
6356 offRel -= 3;
6357 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6358 pCodeBuf[off++] = RT_BYTE1(offRel);
6359 pCodeBuf[off++] = RT_BYTE2(offRel);
6360 pCodeBuf[off++] = RT_BYTE3(offRel);
6361 pCodeBuf[off++] = RT_BYTE4(offRel);
6362 }
6363 }
6364 else
6365 {
6366 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6367 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6368 pCodeBuf[off++] = 0xfe;
6369 pCodeBuf[off++] = 0xff;
6370 pCodeBuf[off++] = 0xff;
6371 pCodeBuf[off++] = 0xff;
6372 }
6373 pCodeBuf[off++] = 0xcc; /* int3 poison */
6374
6375#elif defined(RT_ARCH_ARM64)
6376 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6377 {
6378 pCodeBuf[off] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
6379 off++;
6380 }
6381 else
6382 {
6383 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
6384 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
6385 }
6386
6387#else
6388# error "Port me!"
6389#endif
6390 return off;
6391}
6392
6393
6394/**
6395 * Emits a JMP rel32 / B imm19 to the given label.
6396 */
6397DECL_INLINE_THROW(uint32_t)
6398iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6399{
6400#ifdef RT_ARCH_AMD64
6401 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
6402#elif defined(RT_ARCH_ARM64)
6403 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
6404#else
6405# error "Port me!"
6406#endif
6407 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6408 return off;
6409}
6410
6411
6412/**
6413 * Emits a JMP rel32 / B imm19 to a new undefined label.
6414 */
6415DECL_INLINE_THROW(uint32_t)
6416iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6417{
6418 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6419 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
6420}
6421
6422/** Condition type. */
6423#ifdef RT_ARCH_AMD64
6424typedef enum IEMNATIVEINSTRCOND : uint8_t
6425{
6426 kIemNativeInstrCond_o = 0,
6427 kIemNativeInstrCond_no,
6428 kIemNativeInstrCond_c,
6429 kIemNativeInstrCond_nc,
6430 kIemNativeInstrCond_e,
6431 kIemNativeInstrCond_z = kIemNativeInstrCond_e,
6432 kIemNativeInstrCond_ne,
6433 kIemNativeInstrCond_nz = kIemNativeInstrCond_ne,
6434 kIemNativeInstrCond_be,
6435 kIemNativeInstrCond_nbe,
6436 kIemNativeInstrCond_s,
6437 kIemNativeInstrCond_ns,
6438 kIemNativeInstrCond_p,
6439 kIemNativeInstrCond_np,
6440 kIemNativeInstrCond_l,
6441 kIemNativeInstrCond_nl,
6442 kIemNativeInstrCond_le,
6443 kIemNativeInstrCond_nle
6444} IEMNATIVEINSTRCOND;
6445#elif defined(RT_ARCH_ARM64)
6446typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
6447# define kIemNativeInstrCond_o todo_conditional_codes
6448# define kIemNativeInstrCond_no todo_conditional_codes
6449# define kIemNativeInstrCond_c todo_conditional_codes
6450# define kIemNativeInstrCond_nc todo_conditional_codes
6451# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
6452# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
6453# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
6454# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
6455# define kIemNativeInstrCond_s todo_conditional_codes
6456# define kIemNativeInstrCond_ns todo_conditional_codes
6457# define kIemNativeInstrCond_p todo_conditional_codes
6458# define kIemNativeInstrCond_np todo_conditional_codes
6459# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
6460# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
6461# define kIemNativeInstrCond_le kArmv8InstrCond_Le
6462# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
6463#else
6464# error "Port me!"
6465#endif
6466
6467
6468/**
6469 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6470 */
6471DECL_FORCE_INLINE_THROW(uint32_t)
6472iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6473 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6474{
6475 Assert(idxLabel < pReNative->cLabels);
6476
6477 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
6478#ifdef RT_ARCH_AMD64
6479 if (offLabel >= off)
6480 {
6481 /* jcc rel32 */
6482 pCodeBuf[off++] = 0x0f;
6483 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6484 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6485 pCodeBuf[off++] = 0x00;
6486 pCodeBuf[off++] = 0x00;
6487 pCodeBuf[off++] = 0x00;
6488 pCodeBuf[off++] = 0x00;
6489 }
6490 else
6491 {
6492 int32_t offDisp = offLabel - (off + 2);
6493 if ((int8_t)offDisp == offDisp)
6494 {
6495 /* jcc rel8 */
6496 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6497 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6498 }
6499 else
6500 {
6501 /* jcc rel32 */
6502 offDisp -= 4;
6503 pCodeBuf[off++] = 0x0f;
6504 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6505 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6506 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6507 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6508 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6509 }
6510 }
6511
6512#elif defined(RT_ARCH_ARM64)
6513 if (offLabel >= off)
6514 {
6515 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6516 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
6517 }
6518 else
6519 {
6520 Assert(off - offLabel <= 0x3ffffU);
6521 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
6522 off++;
6523 }
6524
6525#else
6526# error "Port me!"
6527#endif
6528 return off;
6529}
6530
6531
6532/**
6533 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6534 */
6535DECL_INLINE_THROW(uint32_t)
6536iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6537{
6538#ifdef RT_ARCH_AMD64
6539 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
6540#elif defined(RT_ARCH_ARM64)
6541 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
6542#else
6543# error "Port me!"
6544#endif
6545 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6546 return off;
6547}
6548
6549
6550/**
6551 * Emits a Jcc rel32 / B.cc imm19 to a new label.
6552 */
6553DECL_INLINE_THROW(uint32_t)
6554iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6555 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
6556{
6557 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6558 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
6559}
6560
6561
6562/**
6563 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
6564 */
6565DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6566{
6567#ifdef RT_ARCH_AMD64
6568 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
6569#elif defined(RT_ARCH_ARM64)
6570 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
6571#else
6572# error "Port me!"
6573#endif
6574}
6575
6576/**
6577 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
6578 */
6579DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6580 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6581{
6582#ifdef RT_ARCH_AMD64
6583 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
6584#elif defined(RT_ARCH_ARM64)
6585 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
6586#else
6587# error "Port me!"
6588#endif
6589}
6590
6591
6592/**
6593 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
6594 */
6595DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6596{
6597#ifdef RT_ARCH_AMD64
6598 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
6599#elif defined(RT_ARCH_ARM64)
6600 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
6601#else
6602# error "Port me!"
6603#endif
6604}
6605
6606/**
6607 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
6608 */
6609DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6610 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6611{
6612#ifdef RT_ARCH_AMD64
6613 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
6614#elif defined(RT_ARCH_ARM64)
6615 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
6616#else
6617# error "Port me!"
6618#endif
6619}
6620
6621
6622/**
6623 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
6624 */
6625DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6626{
6627#ifdef RT_ARCH_AMD64
6628 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
6629#elif defined(RT_ARCH_ARM64)
6630 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
6631#else
6632# error "Port me!"
6633#endif
6634}
6635
6636/**
6637 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
6638 */
6639DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6640 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6641{
6642#ifdef RT_ARCH_AMD64
6643 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
6644#elif defined(RT_ARCH_ARM64)
6645 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
6646#else
6647# error "Port me!"
6648#endif
6649}
6650
6651
6652/**
6653 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
6654 */
6655DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6656{
6657#ifdef RT_ARCH_AMD64
6658 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
6659#elif defined(RT_ARCH_ARM64)
6660 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
6661#else
6662# error "Port me!"
6663#endif
6664}
6665
6666/**
6667 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
6668 */
6669DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6670 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6671{
6672#ifdef RT_ARCH_AMD64
6673 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
6674#elif defined(RT_ARCH_ARM64)
6675 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
6676#else
6677# error "Port me!"
6678#endif
6679}
6680
6681
6682/**
6683 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
6684 */
6685DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6686{
6687#ifdef RT_ARCH_AMD64
6688 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
6689#elif defined(RT_ARCH_ARM64)
6690 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
6691#else
6692# error "Port me!"
6693#endif
6694}
6695
6696/**
6697 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
6698 */
6699DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6700 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6701{
6702#ifdef RT_ARCH_AMD64
6703 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
6704#elif defined(RT_ARCH_ARM64)
6705 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
6706#else
6707# error "Port me!"
6708#endif
6709}
6710
6711
6712/**
6713 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6714 *
6715 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6716 *
6717 * Only use hardcoded jumps forward when emitting for exactly one
6718 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6719 * the right target address on all platforms!
6720 *
6721 * Please also note that on x86 it is necessary pass off + 256 or higher
6722 * for @a offTarget one believe the intervening code is more than 127
6723 * bytes long.
6724 */
6725DECL_FORCE_INLINE(uint32_t)
6726iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6727{
6728#ifdef RT_ARCH_AMD64
6729 /* jcc rel8 / rel32 */
6730 int32_t offDisp = (int32_t)(offTarget - (off + 2));
6731 if (offDisp < 128 && offDisp >= -128)
6732 {
6733 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6734 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6735 }
6736 else
6737 {
6738 offDisp -= 4;
6739 pCodeBuf[off++] = 0x0f;
6740 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6741 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6742 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6743 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6744 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6745 }
6746
6747#elif defined(RT_ARCH_ARM64)
6748 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
6749 off++;
6750#else
6751# error "Port me!"
6752#endif
6753 return off;
6754}
6755
6756
6757/**
6758 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6759 *
6760 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6761 *
6762 * Only use hardcoded jumps forward when emitting for exactly one
6763 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6764 * the right target address on all platforms!
6765 *
6766 * Please also note that on x86 it is necessary pass off + 256 or higher
6767 * for @a offTarget if one believe the intervening code is more than 127
6768 * bytes long.
6769 */
6770DECL_INLINE_THROW(uint32_t)
6771iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6772{
6773#ifdef RT_ARCH_AMD64
6774 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
6775#elif defined(RT_ARCH_ARM64)
6776 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
6777#else
6778# error "Port me!"
6779#endif
6780 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6781 return off;
6782}
6783
6784
6785/**
6786 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
6787 *
6788 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6789 */
6790DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6791{
6792#ifdef RT_ARCH_AMD64
6793 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
6794#elif defined(RT_ARCH_ARM64)
6795 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
6796#else
6797# error "Port me!"
6798#endif
6799}
6800
6801
6802/**
6803 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
6804 *
6805 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6806 */
6807DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6808{
6809#ifdef RT_ARCH_AMD64
6810 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
6811#elif defined(RT_ARCH_ARM64)
6812 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
6813#else
6814# error "Port me!"
6815#endif
6816}
6817
6818
6819/**
6820 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
6821 *
6822 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6823 */
6824DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6825{
6826#ifdef RT_ARCH_AMD64
6827 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
6828#elif defined(RT_ARCH_ARM64)
6829 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
6830#else
6831# error "Port me!"
6832#endif
6833}
6834
6835
6836/**
6837 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
6838 *
6839 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6840 */
6841DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6842{
6843#ifdef RT_ARCH_AMD64
6844 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
6845#elif defined(RT_ARCH_ARM64)
6846 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
6847#else
6848# error "Port me!"
6849#endif
6850}
6851
6852
6853/**
6854 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6855 *
6856 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6857 */
6858DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
6859{
6860#ifdef RT_ARCH_AMD64
6861 /* jmp rel8 or rel32 */
6862 int32_t offDisp = offTarget - (off + 2);
6863 if (offDisp < 128 && offDisp >= -128)
6864 {
6865 pCodeBuf[off++] = 0xeb;
6866 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6867 }
6868 else
6869 {
6870 offDisp -= 3;
6871 pCodeBuf[off++] = 0xe9;
6872 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6873 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6874 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6875 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6876 }
6877
6878#elif defined(RT_ARCH_ARM64)
6879 pCodeBuf[off] = Armv8A64MkInstrB((int32_t)(offTarget - off));
6880 off++;
6881
6882#else
6883# error "Port me!"
6884#endif
6885 return off;
6886}
6887
6888
6889/**
6890 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6891 *
6892 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6893 */
6894DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6895{
6896#ifdef RT_ARCH_AMD64
6897 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
6898#elif defined(RT_ARCH_ARM64)
6899 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
6900#else
6901# error "Port me!"
6902#endif
6903 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6904 return off;
6905}
6906
6907
6908/**
6909 * Fixes up a conditional jump to a fixed label.
6910 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
6911 * iemNativeEmitJzToFixed, ...
6912 */
6913DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
6914{
6915#ifdef RT_ARCH_AMD64
6916 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
6917 uint8_t const bOpcode = pbCodeBuf[offFixup];
6918 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
6919 {
6920 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
6921 AssertStmt((int8_t)pbCodeBuf[offFixup + 1] == (int32_t)(offTarget - (offFixup + 2)),
6922 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
6923 }
6924 else
6925 {
6926 if (bOpcode != 0x0f)
6927 Assert(bOpcode == 0xe9);
6928 else
6929 {
6930 offFixup += 1;
6931 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
6932 }
6933 uint32_t const offRel32 = offTarget - (offFixup + 5);
6934 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
6935 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
6936 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
6937 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
6938 }
6939
6940#elif defined(RT_ARCH_ARM64)
6941 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
6942 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
6943 {
6944 /* B.COND + BC.COND */
6945 int32_t const offDisp = offTarget - offFixup;
6946 Assert(offDisp >= -262144 && offDisp < 262144);
6947 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
6948 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6949 }
6950 else
6951 {
6952 /* B imm26 */
6953 Assert((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000));
6954 int32_t const offDisp = offTarget - offFixup;
6955 Assert(offDisp >= -33554432 && offDisp < 33554432);
6956 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
6957 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6958 }
6959
6960#else
6961# error "Port me!"
6962#endif
6963}
6964
6965
6966#ifdef RT_ARCH_AMD64
6967/**
6968 * For doing bt on a register.
6969 */
6970DECL_INLINE_THROW(uint32_t)
6971iemNativeEmitAmd64TestBitInGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
6972{
6973 Assert(iBitNo < 64);
6974 /* bt Ev, imm8 */
6975 if (iBitNo >= 32)
6976 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6977 else if (iGprSrc >= 8)
6978 pCodeBuf[off++] = X86_OP_REX_B;
6979 pCodeBuf[off++] = 0x0f;
6980 pCodeBuf[off++] = 0xba;
6981 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6982 pCodeBuf[off++] = iBitNo;
6983 return off;
6984}
6985#endif /* RT_ARCH_AMD64 */
6986
6987
6988/**
6989 * Internal helper, don't call directly.
6990 */
6991DECL_INLINE_THROW(uint32_t)
6992iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
6993 uint32_t offTarget, uint32_t *poffFixup, bool fJmpIfSet)
6994{
6995 Assert(iBitNo < 64);
6996#ifdef RT_ARCH_AMD64
6997 if (iBitNo < 8)
6998 {
6999 /* test Eb, imm8 */
7000 if (iGprSrc >= 4)
7001 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7002 pCodeBuf[off++] = 0xf6;
7003 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7004 pCodeBuf[off++] = (uint8_t)1 << iBitNo;
7005 if (poffFixup)
7006 *poffFixup = off;
7007 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7008 }
7009 else
7010 {
7011 /* bt Ev, imm8 */
7012 if (iBitNo >= 32)
7013 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7014 else if (iGprSrc >= 8)
7015 pCodeBuf[off++] = X86_OP_REX_B;
7016 pCodeBuf[off++] = 0x0f;
7017 pCodeBuf[off++] = 0xba;
7018 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7019 pCodeBuf[off++] = iBitNo;
7020 if (poffFixup)
7021 *poffFixup = off;
7022 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
7023 }
7024
7025#elif defined(RT_ARCH_ARM64)
7026 /* Just use the TBNZ instruction here. */
7027 if (poffFixup)
7028 *poffFixup = off;
7029 pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, off - offTarget, iGprSrc, iBitNo);
7030
7031#else
7032# error "Port me!"
7033#endif
7034 return off;
7035}
7036
7037
7038/**
7039 * Emits a jump to @a idxTarget on the condition that bit @a iBitNo _is_ _set_
7040 * in @a iGprSrc.
7041 */
7042DECL_INLINE_THROW(uint32_t)
7043iemNativeEmitTestBitInGprAndJmpToFixedIfSetEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7044 uint32_t offTarget, uint32_t *poffFixup)
7045{
7046 return iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(pCodeBuf, off, iGprSrc, iBitNo, offTarget, poffFixup, true /*fJmpIfSet*/);
7047}
7048
7049
7050/**
7051 * Emits a jump to @a idxTarget on the condition that bit @a iBitNo _is_ _not_
7052 * _set_ in @a iGprSrc.
7053 */
7054DECL_INLINE_THROW(uint32_t)
7055iemNativeEmitTestBitInGprAndJmpToLabelIfNotSetEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7056 uint32_t offTarget, uint32_t *poffFixup)
7057{
7058 return iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(pCodeBuf, off, iGprSrc, iBitNo, offTarget, poffFixup, false /*fJmpIfSet*/);
7059}
7060
7061
7062
7063/**
7064 * Internal helper, don't call directly.
7065 */
7066DECL_INLINE_THROW(uint32_t)
7067iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7068 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
7069{
7070 Assert(iBitNo < 64);
7071#ifdef RT_ARCH_AMD64
7072 if (iBitNo < 8)
7073 {
7074 /* test Eb, imm8 */
7075 if (iGprSrc >= 4)
7076 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7077 pCodeBuf[off++] = 0xf6;
7078 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7079 pCodeBuf[off++] = (uint8_t)1 << iBitNo;
7080 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7081 fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7082 }
7083 else
7084 {
7085 /* bt Ev, imm8 */
7086 if (iBitNo >= 32)
7087 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7088 else if (iGprSrc >= 8)
7089 pCodeBuf[off++] = X86_OP_REX_B;
7090 pCodeBuf[off++] = 0x0f;
7091 pCodeBuf[off++] = 0xba;
7092 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7093 pCodeBuf[off++] = iBitNo;
7094 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7095 fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
7096 }
7097
7098#elif defined(RT_ARCH_ARM64)
7099 /* Use the TBNZ instruction here. */
7100 if (pReNative->paLabels[idxLabel].enmType > kIemNativeLabelType_LastWholeTbBranch)
7101 {
7102 AssertMsg(pReNative->paLabels[idxLabel].off == UINT32_MAX,
7103 ("TODO: Please enable & test commented out code for jumping back to a predefined label.\n"));
7104 //uint32_t offLabel = pReNative->paLabels[idxLabel].off;
7105 //if (offLabel == UINT32_MAX)
7106 {
7107 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
7108 pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
7109 }
7110 //else
7111 //{
7112 // RT_BREAKPOINT();
7113 // Assert(off - offLabel <= 0x1fffU);
7114 // pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, offLabel - off, iGprSrc, iBitNo);
7115 //
7116 //}
7117 }
7118 else
7119 {
7120 Assert(Armv8A64ConvertImmRImmS2Mask64(0x40, (64U - iBitNo) & 63U) == RT_BIT_64(iBitNo));
7121 pCodeBuf[off++] = Armv8A64MkInstrTstImm(iGprSrc, 0x40, (64U - iBitNo) & 63U);
7122 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7123 pCodeBuf[off++] = Armv8A64MkInstrBCond(fJmpIfSet ? kArmv8InstrCond_Ne : kArmv8InstrCond_Eq, 0);
7124 }
7125
7126#else
7127# error "Port me!"
7128#endif
7129 return off;
7130}
7131
7132
7133/**
7134 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
7135 * @a iGprSrc.
7136 */
7137DECL_INLINE_THROW(uint32_t)
7138iemNativeEmitTestBitInGprAndJmpToLabelIfSetEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7139 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7140{
7141 return iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, pCodeBuf, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
7142}
7143
7144
7145/**
7146 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
7147 * _set_ in @a iGprSrc.
7148 */
7149DECL_INLINE_THROW(uint32_t)
7150iemNativeEmitTestBitInGprAndJmpToLabelIfNotSetEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7151 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7152{
7153 return iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, pCodeBuf, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
7154}
7155
7156
7157/**
7158 * Internal helper, don't call directly.
7159 */
7160DECL_INLINE_THROW(uint32_t)
7161iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7162 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
7163{
7164#ifdef RT_ARCH_AMD64
7165 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 5+6), off,
7166 iGprSrc, iBitNo, idxLabel, fJmpIfSet);
7167#elif defined(RT_ARCH_ARM64)
7168 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 2), off,
7169 iGprSrc, iBitNo, idxLabel, fJmpIfSet);
7170#else
7171# error "Port me!"
7172#endif
7173 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7174 return off;
7175}
7176
7177
7178/**
7179 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
7180 * @a iGprSrc.
7181 */
7182DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7183 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7184{
7185 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
7186}
7187
7188
7189/**
7190 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
7191 * _set_ in @a iGprSrc.
7192 */
7193DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7194 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7195{
7196 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
7197}
7198
7199
7200/**
7201 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
7202 * flags accordingly.
7203 */
7204DECL_INLINE_THROW(uint32_t)
7205iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
7206{
7207 Assert(fBits != 0);
7208#ifdef RT_ARCH_AMD64
7209
7210 if (fBits >= UINT32_MAX)
7211 {
7212 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7213
7214 /* test Ev,Gv */
7215 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7216 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
7217 pbCodeBuf[off++] = 0x85;
7218 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
7219
7220 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7221 }
7222 else if (fBits <= UINT32_MAX)
7223 {
7224 /* test Eb, imm8 or test Ev, imm32 */
7225 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7226 if (fBits <= UINT8_MAX)
7227 {
7228 if (iGprSrc >= 4)
7229 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7230 pbCodeBuf[off++] = 0xf6;
7231 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7232 pbCodeBuf[off++] = (uint8_t)fBits;
7233 }
7234 else
7235 {
7236 if (iGprSrc >= 8)
7237 pbCodeBuf[off++] = X86_OP_REX_B;
7238 pbCodeBuf[off++] = 0xf7;
7239 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7240 pbCodeBuf[off++] = RT_BYTE1(fBits);
7241 pbCodeBuf[off++] = RT_BYTE2(fBits);
7242 pbCodeBuf[off++] = RT_BYTE3(fBits);
7243 pbCodeBuf[off++] = RT_BYTE4(fBits);
7244 }
7245 }
7246 /** @todo implement me. */
7247 else
7248 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
7249
7250#elif defined(RT_ARCH_ARM64)
7251 uint32_t uImmR = 0;
7252 uint32_t uImmNandS = 0;
7253 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
7254 {
7255 /* ands xzr, iGprSrc, #fBits */
7256 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7257 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
7258 }
7259 else
7260 {
7261 /* ands xzr, iGprSrc, iTmpReg */
7262 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7263 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7264 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
7265 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7266 }
7267
7268#else
7269# error "Port me!"
7270#endif
7271 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7272 return off;
7273}
7274
7275
7276/**
7277 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
7278 * @a iGprSrc, setting CPU flags accordingly.
7279 *
7280 * @note For ARM64 this only supports @a fBits values that can be expressed
7281 * using the two 6-bit immediates of the ANDS instruction. The caller
7282 * must make sure this is possible!
7283 */
7284DECL_FORCE_INLINE_THROW(uint32_t)
7285iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits)
7286{
7287 Assert(fBits != 0);
7288
7289#ifdef RT_ARCH_AMD64
7290 if (fBits <= UINT8_MAX)
7291 {
7292 /* test Eb, imm8 */
7293 if (iGprSrc >= 4)
7294 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7295 pCodeBuf[off++] = 0xf6;
7296 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7297 pCodeBuf[off++] = (uint8_t)fBits;
7298 }
7299 else
7300 {
7301 /* test Ev, imm32 */
7302 if (iGprSrc >= 8)
7303 pCodeBuf[off++] = X86_OP_REX_B;
7304 pCodeBuf[off++] = 0xf7;
7305 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7306 pCodeBuf[off++] = RT_BYTE1(fBits);
7307 pCodeBuf[off++] = RT_BYTE2(fBits);
7308 pCodeBuf[off++] = RT_BYTE3(fBits);
7309 pCodeBuf[off++] = RT_BYTE4(fBits);
7310 }
7311
7312#elif defined(RT_ARCH_ARM64)
7313 /* ands xzr, src, #fBits */
7314 uint32_t uImmR = 0;
7315 uint32_t uImmNandS = 0;
7316 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7317 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7318 else
7319# ifdef IEM_WITH_THROW_CATCH
7320 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7321# else
7322 AssertReleaseFailedStmt(off = UINT32_MAX);
7323# endif
7324
7325#else
7326# error "Port me!"
7327#endif
7328 return off;
7329}
7330
7331
7332
7333/**
7334 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7335 * @a iGprSrc, setting CPU flags accordingly.
7336 *
7337 * @note For ARM64 this only supports @a fBits values that can be expressed
7338 * using the two 6-bit immediates of the ANDS instruction. The caller
7339 * must make sure this is possible!
7340 */
7341DECL_FORCE_INLINE_THROW(uint32_t)
7342iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7343{
7344 Assert(fBits != 0);
7345
7346#ifdef RT_ARCH_AMD64
7347 /* test Eb, imm8 */
7348 if (iGprSrc >= 4)
7349 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7350 pCodeBuf[off++] = 0xf6;
7351 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7352 pCodeBuf[off++] = fBits;
7353
7354#elif defined(RT_ARCH_ARM64)
7355 /* ands xzr, src, #fBits */
7356 uint32_t uImmR = 0;
7357 uint32_t uImmNandS = 0;
7358 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7359 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7360 else
7361# ifdef IEM_WITH_THROW_CATCH
7362 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7363# else
7364 AssertReleaseFailedStmt(off = UINT32_MAX);
7365# endif
7366
7367#else
7368# error "Port me!"
7369#endif
7370 return off;
7371}
7372
7373
7374/**
7375 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7376 * @a iGprSrc, setting CPU flags accordingly.
7377 */
7378DECL_INLINE_THROW(uint32_t)
7379iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7380{
7381 Assert(fBits != 0);
7382
7383#ifdef RT_ARCH_AMD64
7384 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
7385
7386#elif defined(RT_ARCH_ARM64)
7387 /* ands xzr, src, [tmp|#imm] */
7388 uint32_t uImmR = 0;
7389 uint32_t uImmNandS = 0;
7390 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7391 {
7392 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7393 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7394 }
7395 else
7396 {
7397 /* Use temporary register for the 64-bit immediate. */
7398 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7399 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7400 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
7401 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7402 }
7403
7404#else
7405# error "Port me!"
7406#endif
7407 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7408 return off;
7409}
7410
7411
7412/**
7413 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
7414 * are set in @a iGprSrc.
7415 */
7416DECL_INLINE_THROW(uint32_t)
7417iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7418 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7419{
7420 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7421
7422 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7423 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7424
7425 return off;
7426}
7427
7428
7429/**
7430 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
7431 * are set in @a iGprSrc.
7432 */
7433DECL_INLINE_THROW(uint32_t)
7434iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7435 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7436{
7437 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7438
7439 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7440 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7441
7442 return off;
7443}
7444
7445
7446/**
7447 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7448 *
7449 * The operand size is given by @a f64Bit.
7450 */
7451DECL_FORCE_INLINE_THROW(uint32_t)
7452iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7453 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7454{
7455 Assert(idxLabel < pReNative->cLabels);
7456
7457#ifdef RT_ARCH_AMD64
7458 /* test reg32,reg32 / test reg64,reg64 */
7459 if (f64Bit)
7460 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7461 else if (iGprSrc >= 8)
7462 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7463 pCodeBuf[off++] = 0x85;
7464 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7465
7466 /* jnz idxLabel */
7467 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7468 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7469
7470#elif defined(RT_ARCH_ARM64)
7471 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
7472 {
7473 pCodeBuf[off] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
7474 iGprSrc, f64Bit);
7475 off++;
7476 }
7477 else
7478 {
7479 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7480 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
7481 }
7482
7483#else
7484# error "Port me!"
7485#endif
7486 return off;
7487}
7488
7489
7490/**
7491 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7492 *
7493 * The operand size is given by @a f64Bit.
7494 */
7495DECL_FORCE_INLINE_THROW(uint32_t)
7496iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7497 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7498{
7499#ifdef RT_ARCH_AMD64
7500 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7501 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7502#elif defined(RT_ARCH_ARM64)
7503 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
7504 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7505#else
7506# error "Port me!"
7507#endif
7508 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7509 return off;
7510}
7511
7512
7513/**
7514 * Emits code that jumps to @a offTarget if @a iGprSrc is not zero.
7515 *
7516 * The operand size is given by @a f64Bit.
7517 */
7518DECL_FORCE_INLINE_THROW(uint32_t)
7519iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7520 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t offTarget)
7521{
7522#ifdef RT_ARCH_AMD64
7523 /* test reg32,reg32 / test reg64,reg64 */
7524 if (f64Bit)
7525 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7526 else if (iGprSrc >= 8)
7527 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7528 pCodeBuf[off++] = 0x85;
7529 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7530
7531 /* jnz idxLabel */
7532 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget,
7533 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7534
7535#elif defined(RT_ARCH_ARM64)
7536 pCodeBuf[off] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(offTarget - off), iGprSrc, f64Bit);
7537 off++;
7538
7539#else
7540# error "Port me!"
7541#endif
7542 return off;
7543}
7544
7545
7546/**
7547 * Emits code that jumps to @a offTarget if @a iGprSrc is not zero.
7548 *
7549 * The operand size is given by @a f64Bit.
7550 */
7551DECL_FORCE_INLINE_THROW(uint32_t)
7552iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7553 bool f64Bit, bool fJmpIfNotZero, uint32_t offTarget)
7554{
7555#ifdef RT_ARCH_AMD64
7556 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7557 off, iGprSrc, f64Bit, fJmpIfNotZero, offTarget);
7558#elif defined(RT_ARCH_ARM64)
7559 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1),
7560 off, iGprSrc, f64Bit, fJmpIfNotZero, offTarget);
7561#else
7562# error "Port me!"
7563#endif
7564 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7565 return off;
7566}
7567
7568
7569/* if (Grp1 == 0) Jmp idxLabel; */
7570
7571/**
7572 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7573 *
7574 * The operand size is given by @a f64Bit.
7575 */
7576DECL_FORCE_INLINE_THROW(uint32_t)
7577iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7578 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7579{
7580 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7581 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7582}
7583
7584
7585/**
7586 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7587 *
7588 * The operand size is given by @a f64Bit.
7589 */
7590DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7591 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7592{
7593 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7594}
7595
7596
7597/**
7598 * Emits code that jumps to a new label if @a iGprSrc is zero.
7599 *
7600 * The operand size is given by @a f64Bit.
7601 */
7602DECL_INLINE_THROW(uint32_t)
7603iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7604 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7605{
7606 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7607 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7608}
7609
7610
7611/**
7612 * Emits code that jumps to @a offTarget if @a iGprSrc is zero.
7613 *
7614 * The operand size is given by @a f64Bit.
7615 */
7616DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7617 uint8_t iGprSrc, bool f64Bit, uint32_t offTarget)
7618{
7619 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixed(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, offTarget);
7620}
7621
7622
7623/* if (Grp1 != 0) Jmp idxLabel; */
7624
7625/**
7626 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7627 *
7628 * The operand size is given by @a f64Bit.
7629 */
7630DECL_FORCE_INLINE_THROW(uint32_t)
7631iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7632 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7633{
7634 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7635 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7636}
7637
7638
7639/**
7640 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7641 *
7642 * The operand size is given by @a f64Bit.
7643 */
7644DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7645 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7646{
7647 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7648}
7649
7650
7651/**
7652 * Emits code that jumps to a new label if @a iGprSrc is not zero.
7653 *
7654 * The operand size is given by @a f64Bit.
7655 */
7656DECL_INLINE_THROW(uint32_t)
7657iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7658 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7659{
7660 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7661 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7662}
7663
7664
7665/* if (Grp1 != Gpr2) Jmp idxLabel; */
7666
7667/**
7668 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
7669 * differs.
7670 */
7671DECL_INLINE_THROW(uint32_t)
7672iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7673 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
7674{
7675 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
7676 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7677 return off;
7678}
7679
7680
7681/**
7682 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
7683 */
7684DECL_INLINE_THROW(uint32_t)
7685iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7686 uint8_t iGprLeft, uint8_t iGprRight,
7687 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7688{
7689 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7690 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
7691}
7692
7693
7694/* if (Grp != Imm) Jmp idxLabel; */
7695
7696/**
7697 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
7698 */
7699DECL_INLINE_THROW(uint32_t)
7700iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7701 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7702{
7703 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7704 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7705 return off;
7706}
7707
7708
7709/**
7710 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
7711 */
7712DECL_INLINE_THROW(uint32_t)
7713iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7714 uint8_t iGprSrc, uint64_t uImm,
7715 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7716{
7717 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7718 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7719}
7720
7721
7722/**
7723 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
7724 * @a uImm.
7725 */
7726DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7727 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7728{
7729 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7730 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7731 return off;
7732}
7733
7734
7735/**
7736 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
7737 * @a uImm.
7738 */
7739DECL_INLINE_THROW(uint32_t)
7740iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7741 uint8_t iGprSrc, uint32_t uImm,
7742 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7743{
7744 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7745 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7746}
7747
7748
7749/**
7750 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
7751 * @a uImm.
7752 */
7753DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7754 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
7755{
7756 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
7757 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7758 return off;
7759}
7760
7761
7762/**
7763 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
7764 * @a uImm.
7765 */
7766DECL_INLINE_THROW(uint32_t)
7767iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7768 uint8_t iGprSrc, uint16_t uImm,
7769 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7770{
7771 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7772 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7773}
7774
7775
7776/* if (Grp == Imm) Jmp idxLabel; */
7777
7778/**
7779 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
7780 */
7781DECL_INLINE_THROW(uint32_t)
7782iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7783 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7784{
7785 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7786 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7787 return off;
7788}
7789
7790
7791/**
7792 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
7793 */
7794DECL_INLINE_THROW(uint32_t)
7795iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
7796 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7797{
7798 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7799 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7800}
7801
7802
7803/**
7804 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
7805 */
7806DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7807 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7808{
7809 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7810 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7811 return off;
7812}
7813
7814
7815/**
7816 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
7817 */
7818DECL_INLINE_THROW(uint32_t)
7819iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
7820 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7821{
7822 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7823 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7824}
7825
7826
7827/**
7828 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
7829 *
7830 * @note ARM64: Helper register is required (idxTmpReg).
7831 */
7832DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7833 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
7834 uint8_t idxTmpReg = UINT8_MAX)
7835{
7836 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
7837 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7838 return off;
7839}
7840
7841
7842/**
7843 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
7844 *
7845 * @note ARM64: Helper register is required (idxTmpReg).
7846 */
7847DECL_INLINE_THROW(uint32_t)
7848iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
7849 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
7850 uint8_t idxTmpReg = UINT8_MAX)
7851{
7852 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7853 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
7854}
7855
7856
7857
7858/*********************************************************************************************************************************
7859* Indirect Jumps. *
7860*********************************************************************************************************************************/
7861
7862/**
7863 * Emits an indirect jump a 64-bit address in a GPR.
7864 */
7865DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpViaGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc)
7866{
7867#ifdef RT_ARCH_AMD64
7868 uint8_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
7869 if (iGprSrc >= 8)
7870 pCodeBuf[off++] = X86_OP_REX_B;
7871 pCodeBuf[off++] = 0xff;
7872 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7873
7874#elif defined(RT_ARCH_ARM64)
7875 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7876 pCodeBuf[off++] = Armv8A64MkInstrBr(iGprSrc);
7877
7878#else
7879# error "port me"
7880#endif
7881 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7882 return off;
7883}
7884
7885
7886/**
7887 * Emits an indirect jump to an immediate 64-bit address (uses the temporary GPR).
7888 */
7889DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
7890{
7891 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
7892 return iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP0);
7893}
7894
7895
7896/*********************************************************************************************************************************
7897* Calls. *
7898*********************************************************************************************************************************/
7899
7900/**
7901 * Emits a call to a 64-bit address.
7902 */
7903DECL_FORCE_INLINE(uint32_t) iemNativeEmitCallImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uintptr_t uPfn,
7904#ifdef RT_ARCH_AMD64
7905 uint8_t idxRegTmp = X86_GREG_xAX
7906#elif defined(RT_ARCH_ARM64)
7907 uint8_t idxRegTmp = IEMNATIVE_REG_FIXED_TMP0
7908#else
7909# error "Port me"
7910#endif
7911 )
7912{
7913 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxRegTmp, uPfn);
7914
7915#ifdef RT_ARCH_AMD64
7916 /* call idxRegTmp */
7917 if (idxRegTmp >= 8)
7918 pCodeBuf[off++] = X86_OP_REX_B;
7919 pCodeBuf[off++] = 0xff;
7920 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, idxRegTmp & 7);
7921
7922#elif defined(RT_ARCH_ARM64)
7923 pCodeBuf[off++] = Armv8A64MkInstrBlr(idxRegTmp);
7924
7925#else
7926# error "port me"
7927#endif
7928 return off;
7929}
7930
7931
7932/**
7933 * Emits a call to a 64-bit address.
7934 */
7935DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
7936{
7937#ifdef RT_ARCH_AMD64
7938 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
7939
7940 /* call rax */
7941 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7942 pbCodeBuf[off++] = 0xff;
7943 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
7944
7945#elif defined(RT_ARCH_ARM64)
7946 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
7947
7948 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7949 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
7950
7951#else
7952# error "port me"
7953#endif
7954 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7955 return off;
7956}
7957
7958
7959/**
7960 * Emits code to load a stack variable into an argument GPR.
7961 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7962 */
7963DECL_FORCE_INLINE_THROW(uint32_t)
7964iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7965 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
7966 bool fSpilledVarsInVolatileRegs = false)
7967{
7968 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7969 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7970 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7971
7972 uint8_t const idxRegVar = pVar->idxReg;
7973 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
7974 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
7975 || !fSpilledVarsInVolatileRegs ))
7976 {
7977 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
7978 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
7979 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
7980 if (!offAddend)
7981 {
7982 if (idxRegArg != idxRegVar)
7983 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
7984 }
7985 else
7986 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
7987 }
7988 else
7989 {
7990 uint8_t const idxStackSlot = pVar->idxStackSlot;
7991 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7992 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
7993 if (offAddend)
7994 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
7995 }
7996 return off;
7997}
7998
7999
8000/**
8001 * Emits code to load a stack or immediate variable value into an argument GPR,
8002 * optional with a addend.
8003 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
8004 */
8005DECL_FORCE_INLINE_THROW(uint32_t)
8006iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8007 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
8008 bool fSpilledVarsInVolatileRegs = false)
8009{
8010 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8011 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8012 if (pVar->enmKind == kIemNativeVarKind_Immediate)
8013 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
8014 else
8015 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
8016 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
8017 return off;
8018}
8019
8020
8021/**
8022 * Emits code to load the variable address into an argument GPR.
8023 *
8024 * This only works for uninitialized and stack variables.
8025 */
8026DECL_FORCE_INLINE_THROW(uint32_t)
8027iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8028 bool fFlushShadows)
8029{
8030 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8031 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8032 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
8033 || pVar->enmKind == kIemNativeVarKind_Stack,
8034 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8035 AssertStmt(!pVar->fSimdReg,
8036 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8037
8038 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8039 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
8040
8041 uint8_t const idxRegVar = pVar->idxReg;
8042 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
8043 {
8044 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
8045 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
8046 Assert(pVar->idxReg == UINT8_MAX);
8047 }
8048 Assert( pVar->idxStackSlot != UINT8_MAX
8049 && pVar->idxReg == UINT8_MAX);
8050
8051 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
8052}
8053
8054
8055/*********************************************************************************************************************************
8056* TB exiting helpers. *
8057*********************************************************************************************************************************/
8058
8059/**
8060 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
8061 */
8062DECL_FORCE_INLINE_THROW(uint32_t)
8063iemNativeEmitJccTbExitEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
8064 IEMNATIVELABELTYPE enmExitReason, IEMNATIVEINSTRCOND enmCond)
8065{
8066 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
8067#if defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) && defined(RT_ARCH_AMD64)
8068 /* jcc rel32 */
8069 pCodeBuf[off++] = 0x0f;
8070 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
8071 iemNativeAddTbExitFixup(pReNative, off, enmExitReason);
8072 pCodeBuf[off++] = 0x00;
8073 pCodeBuf[off++] = 0x00;
8074 pCodeBuf[off++] = 0x00;
8075 pCodeBuf[off++] = 0x00;
8076
8077#else
8078 /* ARM64 doesn't have the necessary jump range, so we jump via local label
8079 just like when we keep everything local. */
8080 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8081 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel, enmCond);
8082#endif
8083 return off;
8084}
8085
8086
8087/**
8088 * Emits a Jcc rel32 / B.cc imm19 to the epilog.
8089 */
8090DECL_INLINE_THROW(uint32_t)
8091iemNativeEmitJccTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmExitReason, IEMNATIVEINSTRCOND enmCond)
8092{
8093 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
8094#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
8095# ifdef RT_ARCH_AMD64
8096 off = iemNativeEmitJccTbExitEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, enmExitReason, enmCond);
8097# elif defined(RT_ARCH_ARM64)
8098 off = iemNativeEmitJccTbExitEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 2), off, enmExitReason, enmCond);
8099# else
8100# error "Port me!"
8101# endif
8102 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8103 return off;
8104#else
8105 return iemNativeEmitJccToNewLabel(pReNative, off, enmExitReason, 0 /*uData*/, enmCond);
8106#endif
8107}
8108
8109
8110/**
8111 * Emits a JNZ/JNE rel32 / B.NE imm19 to the TB exit routine with the given reason.
8112 */
8113DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmExitReason)
8114{
8115#ifdef RT_ARCH_AMD64
8116 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_ne);
8117#elif defined(RT_ARCH_ARM64)
8118 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kArmv8InstrCond_Ne);
8119#else
8120# error "Port me!"
8121#endif
8122}
8123
8124
8125/**
8126 * Emits a JZ/JE rel32 / B.EQ imm19 to the TB exit routine with the given reason.
8127 */
8128DECL_INLINE_THROW(uint32_t) iemNativeEmitJzTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmExitReason)
8129{
8130#ifdef RT_ARCH_AMD64
8131 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_e);
8132#elif defined(RT_ARCH_ARM64)
8133 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kArmv8InstrCond_Eq);
8134#else
8135# error "Port me!"
8136#endif
8137}
8138
8139
8140/**
8141 * Emits a JA/JNBE rel32 / B.HI imm19 to the TB exit.
8142 */
8143DECL_INLINE_THROW(uint32_t) iemNativeEmitJaTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmExitReason)
8144{
8145#ifdef RT_ARCH_AMD64
8146 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_nbe);
8147#elif defined(RT_ARCH_ARM64)
8148 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kArmv8InstrCond_Hi);
8149#else
8150# error "Port me!"
8151#endif
8152}
8153
8154
8155/**
8156 * Emits a JL/JNGE rel32 / B.LT imm19 to the TB exit with the given reason.
8157 */
8158DECL_INLINE_THROW(uint32_t) iemNativeEmitJlTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmExitReason)
8159{
8160#ifdef RT_ARCH_AMD64
8161 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_l);
8162#elif defined(RT_ARCH_ARM64)
8163 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kArmv8InstrCond_Lt);
8164#else
8165# error "Port me!"
8166#endif
8167}
8168
8169
8170DECL_INLINE_THROW(uint32_t)
8171iemNativeEmitTbExitEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, IEMNATIVELABELTYPE enmExitReason)
8172{
8173 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
8174#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
8175# ifdef RT_ARCH_AMD64
8176 /* jmp rel32 */
8177 pCodeBuf[off++] = 0xe9;
8178 iemNativeAddTbExitFixup(pReNative, off, enmExitReason);
8179 pCodeBuf[off++] = 0xfe;
8180 pCodeBuf[off++] = 0xff;
8181 pCodeBuf[off++] = 0xff;
8182 pCodeBuf[off++] = 0xff;
8183
8184# elif defined(RT_ARCH_ARM64)
8185 iemNativeAddTbExitFixup(pReNative, off, enmExitReason);
8186 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
8187
8188# else
8189# error "Port me!"
8190# endif
8191 return off;
8192
8193#else
8194 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8195 return iemNativeEmitJmpToLabelEx(pReNative, pCodeBuf, off, idxLabel);
8196#endif
8197}
8198
8199
8200DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmExitReason)
8201{
8202 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
8203#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
8204# ifdef RT_ARCH_AMD64
8205 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
8206
8207 /* jmp rel32 */
8208 pCodeBuf[off++] = 0xe9;
8209 iemNativeAddTbExitFixup(pReNative, off, enmExitReason);
8210 pCodeBuf[off++] = 0xfe;
8211 pCodeBuf[off++] = 0xff;
8212 pCodeBuf[off++] = 0xff;
8213 pCodeBuf[off++] = 0xff;
8214
8215# elif defined(RT_ARCH_ARM64)
8216 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8217 iemNativeAddTbExitFixup(pReNative, off, enmExitReason);
8218 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
8219
8220# else
8221# error "Port me!"
8222# endif
8223 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8224 return off;
8225
8226#else
8227 return iemNativeEmitJmpToNewLabel(pReNative, off, enmExitReason);
8228#endif
8229}
8230
8231
8232/**
8233 * Emits a jump to the TB exit with @a enmExitReason on the condition _any_ of the bits in @a fBits
8234 * are set in @a iGprSrc.
8235 */
8236DECL_INLINE_THROW(uint32_t)
8237iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8238 uint8_t iGprSrc, uint64_t fBits, IEMNATIVELABELTYPE enmExitReason)
8239{
8240 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
8241
8242 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
8243 return iemNativeEmitJnzTbExit(pReNative, off, enmExitReason);
8244}
8245
8246
8247/**
8248 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
8249 * are set in @a iGprSrc.
8250 */
8251DECL_INLINE_THROW(uint32_t)
8252iemNativeEmitTestAnyBitsInGprAndTbExitIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8253 uint8_t iGprSrc, uint64_t fBits, IEMNATIVELABELTYPE enmExitReason)
8254{
8255 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
8256
8257 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
8258 return iemNativeEmitJzTbExit(pReNative, off, enmExitReason);
8259}
8260
8261
8262/**
8263 * Emits code that exits the TB with the given reason if @a iGprLeft and @a iGprRight
8264 * differs.
8265 */
8266DECL_INLINE_THROW(uint32_t)
8267iemNativeEmitTestIfGprNotEqualGprAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8268 uint8_t iGprLeft, uint8_t iGprRight, IEMNATIVELABELTYPE enmExitReason)
8269{
8270 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
8271 off = iemNativeEmitJnzTbExit(pReNative, off, enmExitReason);
8272 return off;
8273}
8274
8275
8276/**
8277 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
8278 * @a uImm.
8279 */
8280DECL_INLINE_THROW(uint32_t)
8281iemNativeEmitTestIfGpr32NotEqualImmAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8282 uint8_t iGprSrc, uint32_t uImm, IEMNATIVELABELTYPE enmExitReason)
8283{
8284 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8285 off = iemNativeEmitJnzTbExit(pReNative, off, enmExitReason);
8286 return off;
8287}
8288
8289
8290/**
8291 * Emits code that exits the current TB if @a iGprSrc differs from @a uImm.
8292 */
8293DECL_INLINE_THROW(uint32_t)
8294iemNativeEmitTestIfGprNotEqualImmAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8295 uint8_t iGprSrc, uint64_t uImm, IEMNATIVELABELTYPE enmExitReason)
8296{
8297 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
8298 off = iemNativeEmitJnzTbExit(pReNative, off, enmExitReason);
8299 return off;
8300}
8301
8302
8303/**
8304 * Emits code that exits the current TB with the given reason if 32-bit @a iGprSrc equals @a uImm.
8305 */
8306DECL_INLINE_THROW(uint32_t)
8307iemNativeEmitTestIfGpr32EqualsImmAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8308 uint8_t iGprSrc, uint32_t uImm, IEMNATIVELABELTYPE enmExitReason)
8309{
8310 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8311 off = iemNativeEmitJzTbExit(pReNative, off, enmExitReason);
8312 return off;
8313}
8314
8315
8316/**
8317 * Emits code to exit the current TB with the reason @a enmExitReason on the condition that bit @a iBitNo _is_ _set_ in
8318 * @a iGprSrc.
8319 *
8320 * @note On ARM64 the range is only +/-8191 instructions.
8321 */
8322DECL_INLINE_THROW(uint32_t)
8323iemNativeEmitTestBitInGprAndTbExitIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8324 uint8_t iGprSrc, uint8_t iBitNo, IEMNATIVELABELTYPE enmExitReason)
8325{
8326 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
8327#if defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) && defined(RT_ARCH_AMD64)
8328 Assert(iBitNo < 64);
8329 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8330 if (iBitNo < 8)
8331 {
8332 /* test Eb, imm8 */
8333 if (iGprSrc >= 4)
8334 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
8335 pbCodeBuf[off++] = 0xf6;
8336 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
8337 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
8338 off = iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_ne);
8339 }
8340 else
8341 {
8342 /* bt Ev, imm8 */
8343 if (iBitNo >= 32)
8344 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8345 else if (iGprSrc >= 8)
8346 pbCodeBuf[off++] = X86_OP_REX_B;
8347 pbCodeBuf[off++] = 0x0f;
8348 pbCodeBuf[off++] = 0xba;
8349 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
8350 pbCodeBuf[off++] = iBitNo;
8351 off = iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_c);
8352 }
8353 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8354 return off;
8355
8356#else
8357 /* ARM64 doesn't have the necessary jump range, so we jump via local label
8358 just like when we keep everything local. */
8359 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8360 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
8361#endif
8362}
8363
8364
8365/**
8366 * Emits code that exits the current TB with @a enmExitReason if @a iGprSrc is not zero.
8367 *
8368 * The operand size is given by @a f64Bit.
8369 */
8370DECL_FORCE_INLINE_THROW(uint32_t)
8371iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
8372 uint8_t iGprSrc, bool f64Bit, IEMNATIVELABELTYPE enmExitReason)
8373{
8374 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
8375#if defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) && defined(RT_ARCH_AMD64)
8376 /* test reg32,reg32 / test reg64,reg64 */
8377 if (f64Bit)
8378 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
8379 else if (iGprSrc >= 8)
8380 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8381 pCodeBuf[off++] = 0x85;
8382 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
8383
8384 /* jnz idxLabel */
8385 return iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, enmExitReason, kIemNativeInstrCond_ne);
8386
8387#else
8388 /* ARM64 doesn't have the necessary jump range, so we jump via local label
8389 just like when we keep everything local. */
8390 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8391 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
8392 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
8393#endif
8394}
8395
8396
8397/**
8398 * Emits code to exit the current TB with the given reason @a enmExitReason if @a iGprSrc is not zero.
8399 *
8400 * The operand size is given by @a f64Bit.
8401 */
8402DECL_INLINE_THROW(uint32_t)
8403iemNativeEmitTestIfGprIsNotZeroAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8404 uint8_t iGprSrc, bool f64Bit, IEMNATIVELABELTYPE enmExitReason)
8405{
8406#if defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) && defined(RT_ARCH_AMD64)
8407 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
8408 off, iGprSrc, f64Bit, enmExitReason);
8409 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8410 return off;
8411#else
8412 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8413 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
8414#endif
8415}
8416
8417
8418#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8419/*********************************************************************************************************************************
8420* SIMD helpers. *
8421*********************************************************************************************************************************/
8422
8423
8424/**
8425 * Emits code to load the variable address into an argument GPR.
8426 *
8427 * This is a special variant intended for SIMD variables only and only called
8428 * by the TLB miss path in the memory fetch/store code because there we pass
8429 * the value by reference and need both the register and stack depending on which
8430 * path is taken (TLB hit vs. miss).
8431 */
8432DECL_FORCE_INLINE_THROW(uint32_t)
8433iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8434 bool fSyncRegWithStack = true)
8435{
8436 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8437 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8438 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
8439 || pVar->enmKind == kIemNativeVarKind_Stack,
8440 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8441 AssertStmt(pVar->fSimdReg,
8442 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8443 Assert( pVar->idxStackSlot != UINT8_MAX
8444 && pVar->idxReg != UINT8_MAX);
8445
8446 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8447 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
8448
8449 uint8_t const idxRegVar = pVar->idxReg;
8450 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8451 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
8452
8453 if (fSyncRegWithStack)
8454 {
8455 if (pVar->cbVar == sizeof(RTUINT128U))
8456 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDisp, idxRegVar);
8457 else
8458 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDisp, idxRegVar);
8459 }
8460
8461 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
8462}
8463
8464
8465/**
8466 * Emits code to sync the host SIMD register assigned to the given SIMD variable.
8467 *
8468 * This is a special helper and only called
8469 * by the TLB miss path in the memory fetch/store code because there we pass
8470 * the value by reference and need to sync the value on the stack with the assigned host register
8471 * after a TLB miss where the value ends up on the stack.
8472 */
8473DECL_FORCE_INLINE_THROW(uint32_t)
8474iemNativeEmitSimdVarSyncStackToRegister(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar)
8475{
8476 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8477 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8478 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
8479 || pVar->enmKind == kIemNativeVarKind_Stack,
8480 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8481 AssertStmt(pVar->fSimdReg,
8482 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8483 Assert( pVar->idxStackSlot != UINT8_MAX
8484 && pVar->idxReg != UINT8_MAX);
8485
8486 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8487 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
8488
8489 uint8_t const idxRegVar = pVar->idxReg;
8490 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8491 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
8492
8493 if (pVar->cbVar == sizeof(RTUINT128U))
8494 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxRegVar, offBpDisp);
8495 else
8496 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxRegVar, offBpDisp);
8497
8498 return off;
8499}
8500
8501
8502/**
8503 * Emits a gprdst = ~gprsrc store.
8504 */
8505DECL_FORCE_INLINE_THROW(uint32_t)
8506iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
8507{
8508#ifdef RT_ARCH_AMD64
8509 if (iGprDst != iGprSrc)
8510 {
8511 /* mov gprdst, gprsrc. */
8512 if (f64Bit)
8513 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
8514 else
8515 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
8516 }
8517
8518 /* not gprdst */
8519 if (f64Bit || iGprDst >= 8)
8520 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
8521 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
8522 pCodeBuf[off++] = 0xf7;
8523 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
8524#elif defined(RT_ARCH_ARM64)
8525 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
8526#else
8527# error "port me"
8528#endif
8529 return off;
8530}
8531
8532
8533/**
8534 * Emits a gprdst = ~gprsrc store.
8535 */
8536DECL_INLINE_THROW(uint32_t)
8537iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
8538{
8539#ifdef RT_ARCH_AMD64
8540 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
8541#elif defined(RT_ARCH_ARM64)
8542 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
8543#else
8544# error "port me"
8545#endif
8546 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8547 return off;
8548}
8549
8550
8551/**
8552 * Emits a 128-bit vector register store to a VCpu value.
8553 */
8554DECL_FORCE_INLINE_THROW(uint32_t)
8555iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8556{
8557#ifdef RT_ARCH_AMD64
8558 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
8559 pCodeBuf[off++] = 0x66;
8560 if (iVecReg >= 8)
8561 pCodeBuf[off++] = X86_OP_REX_R;
8562 pCodeBuf[off++] = 0x0f;
8563 pCodeBuf[off++] = 0x7f;
8564 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8565#elif defined(RT_ARCH_ARM64)
8566 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
8567
8568#else
8569# error "port me"
8570#endif
8571 return off;
8572}
8573
8574
8575/**
8576 * Emits a 128-bit vector register load of a VCpu value.
8577 */
8578DECL_INLINE_THROW(uint32_t)
8579iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8580{
8581#ifdef RT_ARCH_AMD64
8582 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
8583#elif defined(RT_ARCH_ARM64)
8584 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
8585#else
8586# error "port me"
8587#endif
8588 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8589 return off;
8590}
8591
8592
8593/**
8594 * Emits a high 128-bit vector register store to a VCpu value.
8595 */
8596DECL_FORCE_INLINE_THROW(uint32_t)
8597iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8598{
8599#ifdef RT_ARCH_AMD64
8600 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
8601 pCodeBuf[off++] = X86_OP_VEX3;
8602 if (iVecReg >= 8)
8603 pCodeBuf[off++] = 0x63;
8604 else
8605 pCodeBuf[off++] = 0xe3;
8606 pCodeBuf[off++] = 0x7d;
8607 pCodeBuf[off++] = 0x39;
8608 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8609 pCodeBuf[off++] = 0x01; /* Immediate */
8610#elif defined(RT_ARCH_ARM64)
8611 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
8612#else
8613# error "port me"
8614#endif
8615 return off;
8616}
8617
8618
8619/**
8620 * Emits a high 128-bit vector register load of a VCpu value.
8621 */
8622DECL_INLINE_THROW(uint32_t)
8623iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8624{
8625#ifdef RT_ARCH_AMD64
8626 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
8627#elif defined(RT_ARCH_ARM64)
8628 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8629 Assert(!(iVecReg & 0x1));
8630 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
8631#else
8632# error "port me"
8633#endif
8634 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8635 return off;
8636}
8637
8638
8639/**
8640 * Emits a 128-bit vector register load of a VCpu value.
8641 */
8642DECL_FORCE_INLINE_THROW(uint32_t)
8643iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8644{
8645#ifdef RT_ARCH_AMD64
8646 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
8647 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8648 if (iVecReg >= 8)
8649 pCodeBuf[off++] = X86_OP_REX_R;
8650 pCodeBuf[off++] = 0x0f;
8651 pCodeBuf[off++] = 0x6f;
8652 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8653#elif defined(RT_ARCH_ARM64)
8654 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
8655
8656#else
8657# error "port me"
8658#endif
8659 return off;
8660}
8661
8662
8663/**
8664 * Emits a 128-bit vector register load of a VCpu value.
8665 */
8666DECL_INLINE_THROW(uint32_t)
8667iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8668{
8669#ifdef RT_ARCH_AMD64
8670 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
8671#elif defined(RT_ARCH_ARM64)
8672 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
8673#else
8674# error "port me"
8675#endif
8676 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8677 return off;
8678}
8679
8680
8681/**
8682 * Emits a 128-bit vector register load of a VCpu value.
8683 */
8684DECL_FORCE_INLINE_THROW(uint32_t)
8685iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8686{
8687#ifdef RT_ARCH_AMD64
8688 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
8689 pCodeBuf[off++] = X86_OP_VEX3;
8690 if (iVecReg >= 8)
8691 pCodeBuf[off++] = 0x63;
8692 else
8693 pCodeBuf[off++] = 0xe3;
8694 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8695 pCodeBuf[off++] = 0x38;
8696 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8697 pCodeBuf[off++] = 0x01; /* Immediate */
8698#elif defined(RT_ARCH_ARM64)
8699 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
8700#else
8701# error "port me"
8702#endif
8703 return off;
8704}
8705
8706
8707/**
8708 * Emits a 128-bit vector register load of a VCpu value.
8709 */
8710DECL_INLINE_THROW(uint32_t)
8711iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8712{
8713#ifdef RT_ARCH_AMD64
8714 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
8715#elif defined(RT_ARCH_ARM64)
8716 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8717 Assert(!(iVecReg & 0x1));
8718 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
8719#else
8720# error "port me"
8721#endif
8722 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8723 return off;
8724}
8725
8726
8727/**
8728 * Emits a vecdst = vecsrc load.
8729 */
8730DECL_FORCE_INLINE(uint32_t)
8731iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8732{
8733#ifdef RT_ARCH_AMD64
8734 /* movdqu vecdst, vecsrc */
8735 pCodeBuf[off++] = 0xf3;
8736
8737 if ((iVecRegDst | iVecRegSrc) >= 8)
8738 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
8739 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
8740 : X86_OP_REX_R;
8741 pCodeBuf[off++] = 0x0f;
8742 pCodeBuf[off++] = 0x6f;
8743 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8744
8745#elif defined(RT_ARCH_ARM64)
8746 /* mov dst, src; alias for: orr dst, src, src */
8747 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
8748
8749#else
8750# error "port me"
8751#endif
8752 return off;
8753}
8754
8755
8756/**
8757 * Emits a vecdst = vecsrc load, 128-bit.
8758 */
8759DECL_INLINE_THROW(uint32_t)
8760iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8761{
8762#ifdef RT_ARCH_AMD64
8763 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
8764#elif defined(RT_ARCH_ARM64)
8765 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
8766#else
8767# error "port me"
8768#endif
8769 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8770 return off;
8771}
8772
8773
8774/**
8775 * Emits a vecdst[128:255] = vecsrc[128:255] load.
8776 */
8777DECL_FORCE_INLINE_THROW(uint32_t)
8778iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8779{
8780#ifdef RT_ARCH_AMD64
8781 /* vperm2i128 dst, dst, src, 0x30. */ /* ASSUMES AVX2 support */
8782 pCodeBuf[off++] = X86_OP_VEX3;
8783 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
8784 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8785 pCodeBuf[off++] = 0x46;
8786 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8787 pCodeBuf[off++] = 0x30; /* Immediate, this will leave the low 128 bits of dst untouched and move the high 128 bits from src to dst. */
8788
8789#elif defined(RT_ARCH_ARM64)
8790 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
8791
8792 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128(). */
8793# ifdef IEM_WITH_THROW_CATCH
8794 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
8795# else
8796 AssertReleaseFailedStmt(off = UINT32_MAX);
8797# endif
8798#else
8799# error "port me"
8800#endif
8801 return off;
8802}
8803
8804
8805/**
8806 * Emits a vecdst[128:255] = vecsrc[128:255] load, high 128-bit.
8807 */
8808DECL_INLINE_THROW(uint32_t)
8809iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8810{
8811#ifdef RT_ARCH_AMD64
8812 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
8813#elif defined(RT_ARCH_ARM64)
8814 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8815 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iVecRegSrc + 1);
8816#else
8817# error "port me"
8818#endif
8819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8820 return off;
8821}
8822
8823
8824/**
8825 * Emits a vecdst[0:127] = vecsrc[128:255] load.
8826 */
8827DECL_FORCE_INLINE_THROW(uint32_t)
8828iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8829{
8830#ifdef RT_ARCH_AMD64
8831 /* vextracti128 dst, src, 1. */ /* ASSUMES AVX2 support */
8832 pCodeBuf[off++] = X86_OP_VEX3;
8833 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegDst >= 8, false, iVecRegSrc >= 8);
8834 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8835 pCodeBuf[off++] = 0x39;
8836 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7);
8837 pCodeBuf[off++] = 0x1;
8838
8839#elif defined(RT_ARCH_ARM64)
8840 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
8841
8842 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(). */
8843# ifdef IEM_WITH_THROW_CATCH
8844 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
8845# else
8846 AssertReleaseFailedStmt(off = UINT32_MAX);
8847# endif
8848#else
8849# error "port me"
8850#endif
8851 return off;
8852}
8853
8854
8855/**
8856 * Emits a vecdst[0:127] = vecsrc[128:255] load, high 128-bit.
8857 */
8858DECL_INLINE_THROW(uint32_t)
8859iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8860{
8861#ifdef RT_ARCH_AMD64
8862 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
8863#elif defined(RT_ARCH_ARM64)
8864 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8865 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc + 1);
8866#else
8867# error "port me"
8868#endif
8869 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8870 return off;
8871}
8872
8873
8874/**
8875 * Emits a vecdst = vecsrc load, 256-bit.
8876 */
8877DECL_INLINE_THROW(uint32_t)
8878iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8879{
8880#ifdef RT_ARCH_AMD64
8881 /* vmovdqa ymm, ymm */
8882 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8883 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
8884 {
8885 pbCodeBuf[off++] = X86_OP_VEX3;
8886 pbCodeBuf[off++] = 0x41;
8887 pbCodeBuf[off++] = 0x7d;
8888 pbCodeBuf[off++] = 0x6f;
8889 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8890 }
8891 else
8892 {
8893 pbCodeBuf[off++] = X86_OP_VEX2;
8894 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
8895 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
8896 pbCodeBuf[off++] = iVecRegSrc >= 8
8897 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
8898 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8899 }
8900#elif defined(RT_ARCH_ARM64)
8901 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8902 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
8903 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
8904 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
8905#else
8906# error "port me"
8907#endif
8908 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8909 return off;
8910}
8911
8912
8913/**
8914 * Emits a vecdst = vecsrc load.
8915 */
8916DECL_FORCE_INLINE(uint32_t)
8917iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8918{
8919#ifdef RT_ARCH_AMD64
8920 /* vinserti128 dst, dst, src, 1. */ /* ASSUMES AVX2 support */
8921 pCodeBuf[off++] = X86_OP_VEX3;
8922 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
8923 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8924 pCodeBuf[off++] = 0x38;
8925 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8926 pCodeBuf[off++] = 0x01; /* Immediate */
8927
8928#elif defined(RT_ARCH_ARM64)
8929 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8930 /* mov dst, src; alias for: orr dst, src, src */
8931 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
8932
8933#else
8934# error "port me"
8935#endif
8936 return off;
8937}
8938
8939
8940/**
8941 * Emits a vecdst[128:255] = vecsrc[0:127] load, 128-bit.
8942 */
8943DECL_INLINE_THROW(uint32_t)
8944iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8945{
8946#ifdef RT_ARCH_AMD64
8947 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
8948#elif defined(RT_ARCH_ARM64)
8949 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
8950#else
8951# error "port me"
8952#endif
8953 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8954 return off;
8955}
8956
8957
8958/**
8959 * Emits a gprdst = vecsrc[x] load, 64-bit.
8960 */
8961DECL_FORCE_INLINE(uint32_t)
8962iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
8963{
8964#ifdef RT_ARCH_AMD64
8965 if (iQWord >= 2)
8966 {
8967 /*
8968 * vpextrq doesn't work on the upper 128-bits.
8969 * So we use the following sequence:
8970 * vextracti128 vectmp0, vecsrc, 1
8971 * pextrq gpr, vectmp0, #(iQWord - 2)
8972 */
8973 /* vextracti128 */
8974 pCodeBuf[off++] = X86_OP_VEX3;
8975 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
8976 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8977 pCodeBuf[off++] = 0x39;
8978 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8979 pCodeBuf[off++] = 0x1;
8980
8981 /* pextrq */
8982 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8983 pCodeBuf[off++] = X86_OP_REX_W
8984 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
8985 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8986 pCodeBuf[off++] = 0x0f;
8987 pCodeBuf[off++] = 0x3a;
8988 pCodeBuf[off++] = 0x16;
8989 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
8990 pCodeBuf[off++] = iQWord - 2;
8991 }
8992 else
8993 {
8994 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
8995 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8996 pCodeBuf[off++] = X86_OP_REX_W
8997 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8998 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8999 pCodeBuf[off++] = 0x0f;
9000 pCodeBuf[off++] = 0x3a;
9001 pCodeBuf[off++] = 0x16;
9002 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
9003 pCodeBuf[off++] = iQWord;
9004 }
9005#elif defined(RT_ARCH_ARM64)
9006 /* umov gprdst, vecsrc[iQWord] */
9007 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
9008#else
9009# error "port me"
9010#endif
9011 return off;
9012}
9013
9014
9015/**
9016 * Emits a gprdst = vecsrc[x] load, 64-bit.
9017 */
9018DECL_INLINE_THROW(uint32_t)
9019iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
9020{
9021 Assert(iQWord <= 3);
9022
9023#ifdef RT_ARCH_AMD64
9024 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iVecRegSrc, iQWord);
9025#elif defined(RT_ARCH_ARM64)
9026 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9027 Assert(!(iVecRegSrc & 0x1));
9028 /* Need to access the "high" 128-bit vector register. */
9029 if (iQWord >= 2)
9030 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
9031 else
9032 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
9033#else
9034# error "port me"
9035#endif
9036 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9037 return off;
9038}
9039
9040
9041/**
9042 * Emits a gprdst = vecsrc[x] load, 32-bit.
9043 */
9044DECL_FORCE_INLINE(uint32_t)
9045iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
9046{
9047#ifdef RT_ARCH_AMD64
9048 if (iDWord >= 4)
9049 {
9050 /*
9051 * vpextrd doesn't work on the upper 128-bits.
9052 * So we use the following sequence:
9053 * vextracti128 vectmp0, vecsrc, 1
9054 * pextrd gpr, vectmp0, #(iDWord - 4)
9055 */
9056 /* vextracti128 */
9057 pCodeBuf[off++] = X86_OP_VEX3;
9058 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
9059 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9060 pCodeBuf[off++] = 0x39;
9061 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9062 pCodeBuf[off++] = 0x1;
9063
9064 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
9065 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9066 if (iGprDst >= 8 || IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8)
9067 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9068 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9069 pCodeBuf[off++] = 0x0f;
9070 pCodeBuf[off++] = 0x3a;
9071 pCodeBuf[off++] = 0x16;
9072 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
9073 pCodeBuf[off++] = iDWord - 4;
9074 }
9075 else
9076 {
9077 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
9078 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9079 if (iGprDst >= 8 || iVecRegSrc >= 8)
9080 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
9081 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9082 pCodeBuf[off++] = 0x0f;
9083 pCodeBuf[off++] = 0x3a;
9084 pCodeBuf[off++] = 0x16;
9085 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
9086 pCodeBuf[off++] = iDWord;
9087 }
9088#elif defined(RT_ARCH_ARM64)
9089 Assert(iDWord < 4);
9090
9091 /* umov gprdst, vecsrc[iDWord] */
9092 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
9093#else
9094# error "port me"
9095#endif
9096 return off;
9097}
9098
9099
9100/**
9101 * Emits a gprdst = vecsrc[x] load, 32-bit.
9102 */
9103DECL_INLINE_THROW(uint32_t)
9104iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
9105{
9106 Assert(iDWord <= 7);
9107
9108#ifdef RT_ARCH_AMD64
9109 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 15), off, iGprDst, iVecRegSrc, iDWord);
9110#elif defined(RT_ARCH_ARM64)
9111 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9112 Assert(!(iVecRegSrc & 0x1));
9113 /* Need to access the "high" 128-bit vector register. */
9114 if (iDWord >= 4)
9115 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
9116 else
9117 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
9118#else
9119# error "port me"
9120#endif
9121 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9122 return off;
9123}
9124
9125
9126/**
9127 * Emits a gprdst = vecsrc[x] load, 16-bit.
9128 */
9129DECL_FORCE_INLINE(uint32_t)
9130iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
9131{
9132#ifdef RT_ARCH_AMD64
9133 if (iWord >= 8)
9134 {
9135 /** @todo Currently not used. */
9136 AssertReleaseFailed();
9137 }
9138 else
9139 {
9140 /* pextrw gpr, vecsrc, #iWord */
9141 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9142 if (iGprDst >= 8 || iVecRegSrc >= 8)
9143 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
9144 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
9145 pCodeBuf[off++] = 0x0f;
9146 pCodeBuf[off++] = 0xc5;
9147 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
9148 pCodeBuf[off++] = iWord;
9149 }
9150#elif defined(RT_ARCH_ARM64)
9151 /* umov gprdst, vecsrc[iWord] */
9152 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
9153#else
9154# error "port me"
9155#endif
9156 return off;
9157}
9158
9159
9160/**
9161 * Emits a gprdst = vecsrc[x] load, 16-bit.
9162 */
9163DECL_INLINE_THROW(uint32_t)
9164iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
9165{
9166 Assert(iWord <= 16);
9167
9168#ifdef RT_ARCH_AMD64
9169 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
9170#elif defined(RT_ARCH_ARM64)
9171 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9172 Assert(!(iVecRegSrc & 0x1));
9173 /* Need to access the "high" 128-bit vector register. */
9174 if (iWord >= 8)
9175 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
9176 else
9177 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
9178#else
9179# error "port me"
9180#endif
9181 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9182 return off;
9183}
9184
9185
9186/**
9187 * Emits a gprdst = vecsrc[x] load, 8-bit.
9188 */
9189DECL_FORCE_INLINE(uint32_t)
9190iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
9191{
9192#ifdef RT_ARCH_AMD64
9193 if (iByte >= 16)
9194 {
9195 /** @todo Currently not used. */
9196 AssertReleaseFailed();
9197 }
9198 else
9199 {
9200 /* pextrb gpr, vecsrc, #iByte */
9201 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9202 if (iGprDst >= 8 || iVecRegSrc >= 8)
9203 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
9204 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9205 pCodeBuf[off++] = 0x0f;
9206 pCodeBuf[off++] = 0x3a;
9207 pCodeBuf[off++] = 0x14;
9208 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
9209 pCodeBuf[off++] = iByte;
9210 }
9211#elif defined(RT_ARCH_ARM64)
9212 /* umov gprdst, vecsrc[iByte] */
9213 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
9214#else
9215# error "port me"
9216#endif
9217 return off;
9218}
9219
9220
9221/**
9222 * Emits a gprdst = vecsrc[x] load, 8-bit.
9223 */
9224DECL_INLINE_THROW(uint32_t)
9225iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
9226{
9227 Assert(iByte <= 32);
9228
9229#ifdef RT_ARCH_AMD64
9230 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iByte);
9231#elif defined(RT_ARCH_ARM64)
9232 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9233 Assert(!(iVecRegSrc & 0x1));
9234 /* Need to access the "high" 128-bit vector register. */
9235 if (iByte >= 16)
9236 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
9237 else
9238 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
9239#else
9240# error "port me"
9241#endif
9242 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9243 return off;
9244}
9245
9246
9247/**
9248 * Emits a vecdst[x] = gprsrc store, 64-bit.
9249 */
9250DECL_FORCE_INLINE(uint32_t)
9251iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
9252{
9253#ifdef RT_ARCH_AMD64
9254 if (iQWord >= 2)
9255 {
9256 /*
9257 * vpinsrq doesn't work on the upper 128-bits.
9258 * So we use the following sequence:
9259 * vextracti128 vectmp0, vecdst, 1
9260 * pinsrq vectmp0, gpr, #(iQWord - 2)
9261 * vinserti128 vecdst, vectmp0, 1
9262 */
9263 /* vextracti128 */
9264 pCodeBuf[off++] = X86_OP_VEX3;
9265 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9266 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9267 pCodeBuf[off++] = 0x39;
9268 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9269 pCodeBuf[off++] = 0x1;
9270
9271 /* pinsrq */
9272 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9273 pCodeBuf[off++] = X86_OP_REX_W
9274 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9275 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9276 pCodeBuf[off++] = 0x0f;
9277 pCodeBuf[off++] = 0x3a;
9278 pCodeBuf[off++] = 0x22;
9279 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
9280 pCodeBuf[off++] = iQWord - 2;
9281
9282 /* vinserti128 */
9283 pCodeBuf[off++] = X86_OP_VEX3;
9284 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9285 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9286 pCodeBuf[off++] = 0x38;
9287 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9288 pCodeBuf[off++] = 0x01; /* Immediate */
9289 }
9290 else
9291 {
9292 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
9293 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9294 pCodeBuf[off++] = X86_OP_REX_W
9295 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9296 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9297 pCodeBuf[off++] = 0x0f;
9298 pCodeBuf[off++] = 0x3a;
9299 pCodeBuf[off++] = 0x22;
9300 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9301 pCodeBuf[off++] = iQWord;
9302 }
9303#elif defined(RT_ARCH_ARM64)
9304 /* ins vecsrc[iQWord], gpr */
9305 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
9306#else
9307# error "port me"
9308#endif
9309 return off;
9310}
9311
9312
9313/**
9314 * Emits a vecdst[x] = gprsrc store, 64-bit.
9315 */
9316DECL_INLINE_THROW(uint32_t)
9317iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
9318{
9319 Assert(iQWord <= 3);
9320
9321#ifdef RT_ARCH_AMD64
9322 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iQWord);
9323#elif defined(RT_ARCH_ARM64)
9324 Assert(!(iVecRegDst & 0x1));
9325 if (iQWord >= 2)
9326 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iQWord - 2);
9327 else
9328 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
9329#else
9330# error "port me"
9331#endif
9332 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9333 return off;
9334}
9335
9336
9337/**
9338 * Emits a vecdst[x] = gprsrc store, 32-bit.
9339 */
9340DECL_FORCE_INLINE(uint32_t)
9341iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
9342{
9343#ifdef RT_ARCH_AMD64
9344 if (iDWord >= 4)
9345 {
9346 /*
9347 * vpinsrq doesn't work on the upper 128-bits.
9348 * So we use the following sequence:
9349 * vextracti128 vectmp0, vecdst, 1
9350 * pinsrd vectmp0, gpr, #(iDword - 4)
9351 * vinserti128 vecdst, vectmp0, 1
9352 */
9353 /* vextracti128 */
9354 pCodeBuf[off++] = X86_OP_VEX3;
9355 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9356 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9357 pCodeBuf[off++] = 0x39;
9358 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9359 pCodeBuf[off++] = 0x1;
9360
9361 /* pinsrd */
9362 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9363 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 || iGprSrc >= 8)
9364 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9365 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9366 pCodeBuf[off++] = 0x0f;
9367 pCodeBuf[off++] = 0x3a;
9368 pCodeBuf[off++] = 0x22;
9369 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
9370 pCodeBuf[off++] = iDWord - 4;
9371
9372 /* vinserti128 */
9373 pCodeBuf[off++] = X86_OP_VEX3;
9374 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9375 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9376 pCodeBuf[off++] = 0x38;
9377 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9378 pCodeBuf[off++] = 0x01; /* Immediate */
9379 }
9380 else
9381 {
9382 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
9383 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9384 if (iVecRegDst >= 8 || iGprSrc >= 8)
9385 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9386 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9387 pCodeBuf[off++] = 0x0f;
9388 pCodeBuf[off++] = 0x3a;
9389 pCodeBuf[off++] = 0x22;
9390 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9391 pCodeBuf[off++] = iDWord;
9392 }
9393#elif defined(RT_ARCH_ARM64)
9394 /* ins vecsrc[iDWord], gpr */
9395 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
9396#else
9397# error "port me"
9398#endif
9399 return off;
9400}
9401
9402
9403/**
9404 * Emits a vecdst[x] = gprsrc store, 64-bit.
9405 */
9406DECL_INLINE_THROW(uint32_t)
9407iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
9408{
9409 Assert(iDWord <= 7);
9410
9411#ifdef RT_ARCH_AMD64
9412 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iDWord);
9413#elif defined(RT_ARCH_ARM64)
9414 Assert(!(iVecRegDst & 0x1));
9415 if (iDWord >= 4)
9416 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iDWord - 4);
9417 else
9418 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
9419#else
9420# error "port me"
9421#endif
9422 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9423 return off;
9424}
9425
9426
9427/**
9428 * Emits a vecdst[x] = gprsrc store, 16-bit.
9429 */
9430DECL_FORCE_INLINE(uint32_t)
9431iemNativeEmitSimdStoreGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
9432{
9433#ifdef RT_ARCH_AMD64
9434 /* pinsrw vecsrc, gpr, #iWord. */
9435 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9436 if (iVecRegDst >= 8 || iGprSrc >= 8)
9437 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9438 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9439 pCodeBuf[off++] = 0x0f;
9440 pCodeBuf[off++] = 0xc4;
9441 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9442 pCodeBuf[off++] = iWord;
9443#elif defined(RT_ARCH_ARM64)
9444 /* ins vecsrc[iWord], gpr */
9445 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iWord, kArmv8InstrUmovInsSz_U16);
9446#else
9447# error "port me"
9448#endif
9449 return off;
9450}
9451
9452
9453/**
9454 * Emits a vecdst[x] = gprsrc store, 16-bit.
9455 */
9456DECL_INLINE_THROW(uint32_t)
9457iemNativeEmitSimdStoreGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
9458{
9459 Assert(iWord <= 15);
9460
9461#ifdef RT_ARCH_AMD64
9462 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iGprSrc, iWord);
9463#elif defined(RT_ARCH_ARM64)
9464 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iWord);
9465#else
9466# error "port me"
9467#endif
9468 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9469 return off;
9470}
9471
9472
9473/**
9474 * Emits a vecdst[x] = gprsrc store, 8-bit.
9475 */
9476DECL_FORCE_INLINE(uint32_t)
9477iemNativeEmitSimdStoreGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
9478{
9479#ifdef RT_ARCH_AMD64
9480 /* pinsrb vecsrc, gpr, #iByte (ASSUMES SSE4.1). */
9481 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9482 if (iVecRegDst >= 8 || iGprSrc >= 8)
9483 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9484 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9485 pCodeBuf[off++] = 0x0f;
9486 pCodeBuf[off++] = 0x3a;
9487 pCodeBuf[off++] = 0x20;
9488 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9489 pCodeBuf[off++] = iByte;
9490#elif defined(RT_ARCH_ARM64)
9491 /* ins vecsrc[iByte], gpr */
9492 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iByte, kArmv8InstrUmovInsSz_U8);
9493#else
9494# error "port me"
9495#endif
9496 return off;
9497}
9498
9499
9500/**
9501 * Emits a vecdst[x] = gprsrc store, 8-bit.
9502 */
9503DECL_INLINE_THROW(uint32_t)
9504iemNativeEmitSimdStoreGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
9505{
9506 Assert(iByte <= 15);
9507
9508#ifdef RT_ARCH_AMD64
9509 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iByte);
9510#elif defined(RT_ARCH_ARM64)
9511 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iByte);
9512#else
9513# error "port me"
9514#endif
9515 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9516 return off;
9517}
9518
9519
9520/**
9521 * Emits a vecdst.au32[iDWord] = 0 store.
9522 */
9523DECL_FORCE_INLINE(uint32_t)
9524iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
9525{
9526 Assert(iDWord <= 7);
9527
9528#ifdef RT_ARCH_AMD64
9529 /*
9530 * xor tmp0, tmp0
9531 * pinsrd xmm, tmp0, iDword
9532 */
9533 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
9534 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
9535 pCodeBuf[off++] = 0x33;
9536 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
9537 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(pCodeBuf, off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
9538#elif defined(RT_ARCH_ARM64)
9539 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9540 Assert(!(iVecReg & 0x1));
9541 /* ins vecsrc[iDWord], wzr */
9542 if (iDWord >= 4)
9543 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
9544 else
9545 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
9546#else
9547# error "port me"
9548#endif
9549 return off;
9550}
9551
9552
9553/**
9554 * Emits a vecdst.au32[iDWord] = 0 store.
9555 */
9556DECL_INLINE_THROW(uint32_t)
9557iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
9558{
9559
9560#ifdef RT_ARCH_AMD64
9561 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
9562#elif defined(RT_ARCH_ARM64)
9563 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
9564#else
9565# error "port me"
9566#endif
9567 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9568 return off;
9569}
9570
9571
9572/**
9573 * Emits a vecdst[0:127] = 0 store.
9574 */
9575DECL_FORCE_INLINE(uint32_t)
9576iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
9577{
9578#ifdef RT_ARCH_AMD64
9579 /* pxor xmm, xmm */
9580 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9581 if (iVecReg >= 8)
9582 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
9583 pCodeBuf[off++] = 0x0f;
9584 pCodeBuf[off++] = 0xef;
9585 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
9586#elif defined(RT_ARCH_ARM64)
9587 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9588 Assert(!(iVecReg & 0x1));
9589 /* eor vecreg, vecreg, vecreg */
9590 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
9591#else
9592# error "port me"
9593#endif
9594 return off;
9595}
9596
9597
9598/**
9599 * Emits a vecdst[0:127] = 0 store.
9600 */
9601DECL_INLINE_THROW(uint32_t)
9602iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
9603{
9604#ifdef RT_ARCH_AMD64
9605 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
9606#elif defined(RT_ARCH_ARM64)
9607 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
9608#else
9609# error "port me"
9610#endif
9611 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9612 return off;
9613}
9614
9615
9616/**
9617 * Emits a vecdst[128:255] = 0 store.
9618 */
9619DECL_FORCE_INLINE(uint32_t)
9620iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
9621{
9622#ifdef RT_ARCH_AMD64
9623 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
9624 if (iVecReg < 8)
9625 {
9626 pCodeBuf[off++] = X86_OP_VEX2;
9627 pCodeBuf[off++] = 0xf9;
9628 }
9629 else
9630 {
9631 pCodeBuf[off++] = X86_OP_VEX3;
9632 pCodeBuf[off++] = 0x41;
9633 pCodeBuf[off++] = 0x79;
9634 }
9635 pCodeBuf[off++] = 0x6f;
9636 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
9637#elif defined(RT_ARCH_ARM64)
9638 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9639 Assert(!(iVecReg & 0x1));
9640 /* eor vecreg, vecreg, vecreg */
9641 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
9642#else
9643# error "port me"
9644#endif
9645 return off;
9646}
9647
9648
9649/**
9650 * Emits a vecdst[128:255] = 0 store.
9651 */
9652DECL_INLINE_THROW(uint32_t)
9653iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
9654{
9655#ifdef RT_ARCH_AMD64
9656 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
9657#elif defined(RT_ARCH_ARM64)
9658 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
9659#else
9660# error "port me"
9661#endif
9662 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9663 return off;
9664}
9665
9666
9667/**
9668 * Emits a vecdst[0:255] = 0 store.
9669 */
9670DECL_FORCE_INLINE(uint32_t)
9671iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
9672{
9673#ifdef RT_ARCH_AMD64
9674 /* vpxor ymm, ymm, ymm */
9675 if (iVecReg < 8)
9676 {
9677 pCodeBuf[off++] = X86_OP_VEX2;
9678 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
9679 }
9680 else
9681 {
9682 pCodeBuf[off++] = X86_OP_VEX3;
9683 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
9684 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
9685 }
9686 pCodeBuf[off++] = 0xef;
9687 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
9688#elif defined(RT_ARCH_ARM64)
9689 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9690 Assert(!(iVecReg & 0x1));
9691 /* eor vecreg, vecreg, vecreg */
9692 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
9693 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
9694#else
9695# error "port me"
9696#endif
9697 return off;
9698}
9699
9700
9701/**
9702 * Emits a vecdst[0:255] = 0 store.
9703 */
9704DECL_INLINE_THROW(uint32_t)
9705iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
9706{
9707#ifdef RT_ARCH_AMD64
9708 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
9709#elif defined(RT_ARCH_ARM64)
9710 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
9711#else
9712# error "port me"
9713#endif
9714 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9715 return off;
9716}
9717
9718
9719/**
9720 * Emits a vecdst = gprsrc broadcast, 8-bit.
9721 */
9722DECL_FORCE_INLINE(uint32_t)
9723iemNativeEmitSimdBroadcastGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9724{
9725#ifdef RT_ARCH_AMD64
9726 /* pinsrb vecdst, gpr, #0 (ASSUMES SSE 4.1) */
9727 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9728 if (iVecRegDst >= 8 || iGprSrc >= 8)
9729 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9730 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9731 pCodeBuf[off++] = 0x0f;
9732 pCodeBuf[off++] = 0x3a;
9733 pCodeBuf[off++] = 0x20;
9734 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9735 pCodeBuf[off++] = 0x00;
9736
9737 /* vpbroadcastb {y,x}mm, xmm (ASSUMES AVX2). */
9738 pCodeBuf[off++] = X86_OP_VEX3;
9739 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9740 | 0x02 /* opcode map. */
9741 | ( iVecRegDst >= 8
9742 ? 0
9743 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9744 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9745 pCodeBuf[off++] = 0x78;
9746 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9747#elif defined(RT_ARCH_ARM64)
9748 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9749 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9750
9751 /* dup vecsrc, gpr */
9752 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U8);
9753 if (f256Bit)
9754 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U8);
9755#else
9756# error "port me"
9757#endif
9758 return off;
9759}
9760
9761
9762/**
9763 * Emits a vecdst[x] = gprsrc broadcast, 8-bit.
9764 */
9765DECL_INLINE_THROW(uint32_t)
9766iemNativeEmitSimdBroadcastGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9767{
9768#ifdef RT_ARCH_AMD64
9769 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
9770#elif defined(RT_ARCH_ARM64)
9771 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9772#else
9773# error "port me"
9774#endif
9775 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9776 return off;
9777}
9778
9779
9780/**
9781 * Emits a vecdst = gprsrc broadcast, 16-bit.
9782 */
9783DECL_FORCE_INLINE(uint32_t)
9784iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9785{
9786#ifdef RT_ARCH_AMD64
9787 /* pinsrw vecdst, gpr, #0 */
9788 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9789 if (iVecRegDst >= 8 || iGprSrc >= 8)
9790 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9791 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9792 pCodeBuf[off++] = 0x0f;
9793 pCodeBuf[off++] = 0xc4;
9794 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9795 pCodeBuf[off++] = 0x00;
9796
9797 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
9798 pCodeBuf[off++] = X86_OP_VEX3;
9799 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9800 | 0x02 /* opcode map. */
9801 | ( iVecRegDst >= 8
9802 ? 0
9803 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9804 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9805 pCodeBuf[off++] = 0x79;
9806 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9807#elif defined(RT_ARCH_ARM64)
9808 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9809 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9810
9811 /* dup vecsrc, gpr */
9812 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
9813 if (f256Bit)
9814 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
9815#else
9816# error "port me"
9817#endif
9818 return off;
9819}
9820
9821
9822/**
9823 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
9824 */
9825DECL_INLINE_THROW(uint32_t)
9826iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9827{
9828#ifdef RT_ARCH_AMD64
9829 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
9830#elif defined(RT_ARCH_ARM64)
9831 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9832#else
9833# error "port me"
9834#endif
9835 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9836 return off;
9837}
9838
9839
9840/**
9841 * Emits a vecdst = gprsrc broadcast, 32-bit.
9842 */
9843DECL_FORCE_INLINE(uint32_t)
9844iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9845{
9846#ifdef RT_ARCH_AMD64
9847 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
9848 * vbroadcast needs a memory operand or another xmm register to work... */
9849
9850 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
9851 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9852 if (iVecRegDst >= 8 || iGprSrc >= 8)
9853 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9854 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9855 pCodeBuf[off++] = 0x0f;
9856 pCodeBuf[off++] = 0x3a;
9857 pCodeBuf[off++] = 0x22;
9858 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9859 pCodeBuf[off++] = 0x00;
9860
9861 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
9862 pCodeBuf[off++] = X86_OP_VEX3;
9863 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9864 | 0x02 /* opcode map. */
9865 | ( iVecRegDst >= 8
9866 ? 0
9867 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9868 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9869 pCodeBuf[off++] = 0x58;
9870 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9871#elif defined(RT_ARCH_ARM64)
9872 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9873 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9874
9875 /* dup vecsrc, gpr */
9876 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
9877 if (f256Bit)
9878 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
9879#else
9880# error "port me"
9881#endif
9882 return off;
9883}
9884
9885
9886/**
9887 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
9888 */
9889DECL_INLINE_THROW(uint32_t)
9890iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9891{
9892#ifdef RT_ARCH_AMD64
9893 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
9894#elif defined(RT_ARCH_ARM64)
9895 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9896#else
9897# error "port me"
9898#endif
9899 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9900 return off;
9901}
9902
9903
9904/**
9905 * Emits a vecdst = gprsrc broadcast, 64-bit.
9906 */
9907DECL_FORCE_INLINE(uint32_t)
9908iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9909{
9910#ifdef RT_ARCH_AMD64
9911 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
9912 * vbroadcast needs a memory operand or another xmm register to work... */
9913
9914 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
9915 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9916 pCodeBuf[off++] = X86_OP_REX_W
9917 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9918 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9919 pCodeBuf[off++] = 0x0f;
9920 pCodeBuf[off++] = 0x3a;
9921 pCodeBuf[off++] = 0x22;
9922 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9923 pCodeBuf[off++] = 0x00;
9924
9925 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
9926 pCodeBuf[off++] = X86_OP_VEX3;
9927 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9928 | 0x02 /* opcode map. */
9929 | ( iVecRegDst >= 8
9930 ? 0
9931 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9932 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9933 pCodeBuf[off++] = 0x59;
9934 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9935#elif defined(RT_ARCH_ARM64)
9936 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9937 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9938
9939 /* dup vecsrc, gpr */
9940 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
9941 if (f256Bit)
9942 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
9943#else
9944# error "port me"
9945#endif
9946 return off;
9947}
9948
9949
9950/**
9951 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
9952 */
9953DECL_INLINE_THROW(uint32_t)
9954iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9955{
9956#ifdef RT_ARCH_AMD64
9957 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
9958#elif defined(RT_ARCH_ARM64)
9959 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9960#else
9961# error "port me"
9962#endif
9963 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9964 return off;
9965}
9966
9967
9968/**
9969 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
9970 */
9971DECL_FORCE_INLINE(uint32_t)
9972iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9973{
9974#ifdef RT_ARCH_AMD64
9975 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(pCodeBuf, off, iVecRegDst, iVecRegSrc);
9976
9977 /* vinserti128 ymm, ymm, xmm, 1. */ /* ASSUMES AVX2 support */
9978 pCodeBuf[off++] = X86_OP_VEX3;
9979 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
9980 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9981 pCodeBuf[off++] = 0x38;
9982 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9983 pCodeBuf[off++] = 0x01; /* Immediate */
9984#elif defined(RT_ARCH_ARM64)
9985 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9986 Assert(!(iVecRegDst & 0x1));
9987
9988 /* mov dst, src; alias for: orr dst, src, src */
9989 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
9990 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
9991#else
9992# error "port me"
9993#endif
9994 return off;
9995}
9996
9997
9998/**
9999 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
10000 */
10001DECL_INLINE_THROW(uint32_t)
10002iemNativeEmitSimdBroadcastVecRegU128ToVecReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
10003{
10004#ifdef RT_ARCH_AMD64
10005 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 11), off, iVecRegDst, iVecRegSrc);
10006#elif defined(RT_ARCH_ARM64)
10007 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecRegDst, iVecRegSrc);
10008#else
10009# error "port me"
10010#endif
10011 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10012 return off;
10013}
10014
10015#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
10016
10017/** @} */
10018
10019#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
10020
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette