VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 104984

Last change on this file since 104984 was 104984, checked in by vboxsync, 8 months ago

VMM/IEM: Relax alignment restrictions in native code TLB lookup, avoid the fallback/tlbmiss code path for most accesses as long as they're within the same page. bugref:10687

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 342.9 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 104984 2024-06-20 14:07:04Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 if (uInfo == 0)
71 pu32CodeBuf[off++] = ARMV8_A64_INSTR_NOP;
72 else
73 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(ARMV8_A64_REG_XZR, (uint16_t)uInfo);
74
75 RT_NOREF(uInfo);
76#else
77# error "port me"
78#endif
79 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
80 return off;
81}
82
83
84/**
85 * Emit a breakpoint instruction.
86 */
87DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
88{
89#ifdef RT_ARCH_AMD64
90 pCodeBuf[off++] = 0xcc;
91 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
92
93#elif defined(RT_ARCH_ARM64)
94 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
95
96#else
97# error "error"
98#endif
99 return off;
100}
101
102
103/**
104 * Emit a breakpoint instruction.
105 */
106DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
107{
108#ifdef RT_ARCH_AMD64
109 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
110#elif defined(RT_ARCH_ARM64)
111 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
112#else
113# error "error"
114#endif
115 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
116 return off;
117}
118
119
120/*********************************************************************************************************************************
121* Loads, Stores and Related Stuff. *
122*********************************************************************************************************************************/
123
124#ifdef RT_ARCH_AMD64
125/**
126 * Common bit of iemNativeEmitLoadGprByGpr and friends.
127 */
128DECL_FORCE_INLINE(uint32_t)
129iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
130{
131 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
132 {
133 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
134 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
135 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
136 }
137 else if (offDisp == (int8_t)offDisp)
138 {
139 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
140 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
141 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
142 pbCodeBuf[off++] = (uint8_t)offDisp;
143 }
144 else
145 {
146 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
147 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
148 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
149 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
150 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
151 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
152 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
153 }
154 return off;
155}
156#endif /* RT_ARCH_AMD64 */
157
158/**
159 * Emits setting a GPR to zero.
160 */
161DECL_INLINE_THROW(uint32_t)
162iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
163{
164#ifdef RT_ARCH_AMD64
165 /* xor gpr32, gpr32 */
166 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
167 if (iGpr >= 8)
168 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
169 pbCodeBuf[off++] = 0x33;
170 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
171
172#elif defined(RT_ARCH_ARM64)
173 /* mov gpr, #0x0 */
174 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
175 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
176
177#else
178# error "port me"
179#endif
180 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
181 return off;
182}
183
184
185/**
186 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
187 * buffer space.
188 *
189 * Max buffer consumption:
190 * - AMD64: 6 instruction bytes.
191 * - ARM64: 2 instruction words (8 bytes).
192 *
193 * @note The top 32 bits will be cleared.
194 */
195DECL_FORCE_INLINE(uint32_t)
196iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
197{
198#ifdef RT_ARCH_AMD64
199 if (uImm32 == 0)
200 {
201 /* xor gpr, gpr */
202 if (iGpr >= 8)
203 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
204 pCodeBuf[off++] = 0x33;
205 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
206 }
207 else
208 {
209 /* mov gpr, imm32 */
210 if (iGpr >= 8)
211 pCodeBuf[off++] = X86_OP_REX_B;
212 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
213 pCodeBuf[off++] = RT_BYTE1(uImm32);
214 pCodeBuf[off++] = RT_BYTE2(uImm32);
215 pCodeBuf[off++] = RT_BYTE3(uImm32);
216 pCodeBuf[off++] = RT_BYTE4(uImm32);
217 }
218
219#elif defined(RT_ARCH_ARM64)
220 if ((uImm32 >> 16) == 0)
221 /* movz gpr, imm16 */
222 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
223 else if ((uImm32 & UINT32_C(0xffff)) == 0)
224 /* movz gpr, imm16, lsl #16 */
225 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
226 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
227 /* movn gpr, imm16, lsl #16 */
228 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
229 else if ((uImm32 >> 16) == UINT32_C(0xffff))
230 /* movn gpr, imm16 */
231 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
232 else
233 {
234 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
235 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
236 }
237
238#else
239# error "port me"
240#endif
241 return off;
242}
243
244
245/**
246 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
247 * buffer space.
248 *
249 * Max buffer consumption:
250 * - AMD64: 10 instruction bytes.
251 * - ARM64: 4 instruction words (16 bytes).
252 */
253DECL_FORCE_INLINE(uint32_t)
254iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
255{
256#ifdef RT_ARCH_AMD64
257 if (uImm64 == 0)
258 {
259 /* xor gpr, gpr */
260 if (iGpr >= 8)
261 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
262 pCodeBuf[off++] = 0x33;
263 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
264 }
265 else if (uImm64 <= UINT32_MAX)
266 {
267 /* mov gpr, imm32 */
268 if (iGpr >= 8)
269 pCodeBuf[off++] = X86_OP_REX_B;
270 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
271 pCodeBuf[off++] = RT_BYTE1(uImm64);
272 pCodeBuf[off++] = RT_BYTE2(uImm64);
273 pCodeBuf[off++] = RT_BYTE3(uImm64);
274 pCodeBuf[off++] = RT_BYTE4(uImm64);
275 }
276 else if (uImm64 == (uint64_t)(int32_t)uImm64)
277 {
278 /* mov gpr, sx(imm32) */
279 if (iGpr < 8)
280 pCodeBuf[off++] = X86_OP_REX_W;
281 else
282 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
283 pCodeBuf[off++] = 0xc7;
284 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
285 pCodeBuf[off++] = RT_BYTE1(uImm64);
286 pCodeBuf[off++] = RT_BYTE2(uImm64);
287 pCodeBuf[off++] = RT_BYTE3(uImm64);
288 pCodeBuf[off++] = RT_BYTE4(uImm64);
289 }
290 else
291 {
292 /* mov gpr, imm64 */
293 if (iGpr < 8)
294 pCodeBuf[off++] = X86_OP_REX_W;
295 else
296 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
297 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
298 pCodeBuf[off++] = RT_BYTE1(uImm64);
299 pCodeBuf[off++] = RT_BYTE2(uImm64);
300 pCodeBuf[off++] = RT_BYTE3(uImm64);
301 pCodeBuf[off++] = RT_BYTE4(uImm64);
302 pCodeBuf[off++] = RT_BYTE5(uImm64);
303 pCodeBuf[off++] = RT_BYTE6(uImm64);
304 pCodeBuf[off++] = RT_BYTE7(uImm64);
305 pCodeBuf[off++] = RT_BYTE8(uImm64);
306 }
307
308#elif defined(RT_ARCH_ARM64)
309 /*
310 * Quick simplification: Do 32-bit load if top half is zero.
311 */
312 if (uImm64 <= UINT32_MAX)
313 return iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGpr, (uint32_t)uImm64);
314
315 /*
316 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
317 * supply remaining bits using 'movk grp, imm16, lsl #x'.
318 *
319 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
320 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
321 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
322 * after the first non-zero immediate component so we switch to movk for
323 * the remainder.
324 */
325 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
326 + !((uImm64 >> 16) & UINT16_MAX)
327 + !((uImm64 >> 32) & UINT16_MAX)
328 + !((uImm64 >> 48) & UINT16_MAX);
329 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
330 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
331 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
332 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
333 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
334 if (cFfffHalfWords <= cZeroHalfWords)
335 {
336 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
337
338 /* movz gpr, imm16 */
339 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
340 if (uImmPart || cZeroHalfWords == 4)
341 {
342 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
343 fMovBase |= RT_BIT_32(29);
344 }
345 /* mov[z/k] gpr, imm16, lsl #16 */
346 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
347 if (uImmPart)
348 {
349 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
350 fMovBase |= RT_BIT_32(29);
351 }
352 /* mov[z/k] gpr, imm16, lsl #32 */
353 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
354 if (uImmPart)
355 {
356 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
357 fMovBase |= RT_BIT_32(29);
358 }
359 /* mov[z/k] gpr, imm16, lsl #48 */
360 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
361 if (uImmPart)
362 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
363 }
364 else
365 {
366 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
367
368 /* find the first half-word that isn't UINT16_MAX. */
369 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
370 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
371 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
372
373 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
374 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
375 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
376 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
377 /* movk gpr, imm16 */
378 if (iHwNotFfff != 0)
379 {
380 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
381 if (uImmPart != UINT32_C(0xffff))
382 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
383 }
384 /* movk gpr, imm16, lsl #16 */
385 if (iHwNotFfff != 1)
386 {
387 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
388 if (uImmPart != UINT32_C(0xffff))
389 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
390 }
391 /* movk gpr, imm16, lsl #32 */
392 if (iHwNotFfff != 2)
393 {
394 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
395 if (uImmPart != UINT32_C(0xffff))
396 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
397 }
398 /* movk gpr, imm16, lsl #48 */
399 if (iHwNotFfff != 3)
400 {
401 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
402 if (uImmPart != UINT32_C(0xffff))
403 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
404 }
405 }
406
407#else
408# error "port me"
409#endif
410 return off;
411}
412
413
414/**
415 * Emits loading a constant into a 64-bit GPR
416 */
417DECL_INLINE_THROW(uint32_t)
418iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
419{
420#ifdef RT_ARCH_AMD64
421 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
422#elif defined(RT_ARCH_ARM64)
423 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
424#else
425# error "port me"
426#endif
427 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
428 return off;
429}
430
431
432/**
433 * Emits loading a constant into a 32-bit GPR.
434 * @note The top 32 bits will be cleared.
435 */
436DECL_INLINE_THROW(uint32_t)
437iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
438{
439#ifdef RT_ARCH_AMD64
440 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
441#elif defined(RT_ARCH_ARM64)
442 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
443#else
444# error "port me"
445#endif
446 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
447 return off;
448}
449
450
451/**
452 * Emits loading a constant into a 8-bit GPR
453 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
454 * only the ARM64 version does that.
455 */
456DECL_INLINE_THROW(uint32_t)
457iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
458{
459#ifdef RT_ARCH_AMD64
460 /* mov gpr, imm8 */
461 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
462 if (iGpr >= 8)
463 pbCodeBuf[off++] = X86_OP_REX_B;
464 else if (iGpr >= 4)
465 pbCodeBuf[off++] = X86_OP_REX;
466 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
467 pbCodeBuf[off++] = RT_BYTE1(uImm8);
468
469#elif defined(RT_ARCH_ARM64)
470 /* movz gpr, imm16, lsl #0 */
471 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
472 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
473
474#else
475# error "port me"
476#endif
477 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
478 return off;
479}
480
481
482#ifdef RT_ARCH_AMD64
483/**
484 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
485 */
486DECL_FORCE_INLINE(uint32_t)
487iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
488{
489 if (offVCpu < 128)
490 {
491 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
492 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
493 }
494 else
495 {
496 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
497 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
498 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
499 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
500 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
501 }
502 return off;
503}
504
505#elif defined(RT_ARCH_ARM64)
506
507/**
508 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
509 *
510 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
511 * registers (@a iGprTmp).
512 * @note DON'T try this with prefetch.
513 */
514DECL_FORCE_INLINE_THROW(uint32_t)
515iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
516 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
517{
518 /*
519 * There are a couple of ldr variants that takes an immediate offset, so
520 * try use those if we can, otherwise we have to use the temporary register
521 * help with the addressing.
522 */
523 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
524 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
525 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
526 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
527 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
528 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
529 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
530 {
531 /* The offset is too large, so we must load it into a register and use
532 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
533 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
534 if (iGprTmp == UINT8_MAX)
535 iGprTmp = iGprReg;
536 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
537 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
538 }
539 else
540# ifdef IEM_WITH_THROW_CATCH
541 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
542# else
543 AssertReleaseFailedStmt(off = UINT32_MAX);
544# endif
545
546 return off;
547}
548
549/**
550 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
551 */
552DECL_FORCE_INLINE_THROW(uint32_t)
553iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
554 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
555{
556 /*
557 * There are a couple of ldr variants that takes an immediate offset, so
558 * try use those if we can, otherwise we have to use the temporary register
559 * help with the addressing.
560 */
561 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
562 {
563 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
564 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
565 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
566 }
567 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
568 {
569 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
570 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
571 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
572 }
573 else
574 {
575 /* The offset is too large, so we must load it into a register and use
576 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
577 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
578 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
579 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
580 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
581 IEMNATIVE_REG_FIXED_TMP0);
582 }
583 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
584 return off;
585}
586
587#endif /* RT_ARCH_ARM64 */
588
589
590/**
591 * Emits a 64-bit GPR load of a VCpu value.
592 */
593DECL_FORCE_INLINE_THROW(uint32_t)
594iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
595{
596#ifdef RT_ARCH_AMD64
597 /* mov reg64, mem64 */
598 if (iGpr < 8)
599 pCodeBuf[off++] = X86_OP_REX_W;
600 else
601 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
602 pCodeBuf[off++] = 0x8b;
603 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off,iGpr, offVCpu);
604
605#elif defined(RT_ARCH_ARM64)
606 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
607
608#else
609# error "port me"
610#endif
611 return off;
612}
613
614
615/**
616 * Emits a 64-bit GPR load of a VCpu value.
617 */
618DECL_INLINE_THROW(uint32_t)
619iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
620{
621#ifdef RT_ARCH_AMD64
622 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
623 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
624
625#elif defined(RT_ARCH_ARM64)
626 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
627
628#else
629# error "port me"
630#endif
631 return off;
632}
633
634/**
635 * Emits a 32-bit GPR load of a VCpu value.
636 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
637 */
638DECL_INLINE_THROW(uint32_t)
639iemNativeEmitLoadGprFromVCpuU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
640{
641#ifdef RT_ARCH_AMD64
642 /* mov reg32, mem32 */
643 if (iGpr >= 8)
644 pCodeBuf[off++] = X86_OP_REX_R;
645 pCodeBuf[off++] = 0x8b;
646 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
647
648#elif defined(RT_ARCH_ARM64)
649 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
650
651#else
652# error "port me"
653#endif
654 return off;
655}
656
657
658/**
659 * Emits a 32-bit GPR load of a VCpu value.
660 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
661 */
662DECL_INLINE_THROW(uint32_t)
663iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
664{
665#ifdef RT_ARCH_AMD64
666 off = iemNativeEmitLoadGprFromVCpuU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
667 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
668
669#elif defined(RT_ARCH_ARM64)
670 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
671
672#else
673# error "port me"
674#endif
675 return off;
676}
677
678
679/**
680 * Emits a 16-bit GPR load of a VCpu value.
681 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
682 */
683DECL_INLINE_THROW(uint32_t)
684iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
685{
686#ifdef RT_ARCH_AMD64
687 /* movzx reg32, mem16 */
688 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
689 if (iGpr >= 8)
690 pbCodeBuf[off++] = X86_OP_REX_R;
691 pbCodeBuf[off++] = 0x0f;
692 pbCodeBuf[off++] = 0xb7;
693 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
694 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
695
696#elif defined(RT_ARCH_ARM64)
697 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
698
699#else
700# error "port me"
701#endif
702 return off;
703}
704
705
706/**
707 * Emits a 8-bit GPR load of a VCpu value.
708 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
709 */
710DECL_INLINE_THROW(uint32_t)
711iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
712{
713#ifdef RT_ARCH_AMD64
714 /* movzx reg32, mem8 */
715 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
716 if (iGpr >= 8)
717 pbCodeBuf[off++] = X86_OP_REX_R;
718 pbCodeBuf[off++] = 0x0f;
719 pbCodeBuf[off++] = 0xb6;
720 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
721 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
722
723#elif defined(RT_ARCH_ARM64)
724 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
725
726#else
727# error "port me"
728#endif
729 return off;
730}
731
732
733/**
734 * Emits a store of a GPR value to a 64-bit VCpu field.
735 */
736DECL_FORCE_INLINE_THROW(uint32_t)
737iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
738 uint8_t iGprTmp = UINT8_MAX)
739{
740#ifdef RT_ARCH_AMD64
741 /* mov mem64, reg64 */
742 if (iGpr < 8)
743 pCodeBuf[off++] = X86_OP_REX_W;
744 else
745 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
746 pCodeBuf[off++] = 0x89;
747 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
748 RT_NOREF(iGprTmp);
749
750#elif defined(RT_ARCH_ARM64)
751 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
752
753#else
754# error "port me"
755#endif
756 return off;
757}
758
759
760/**
761 * Emits a store of a GPR value to a 64-bit VCpu field.
762 */
763DECL_INLINE_THROW(uint32_t)
764iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
765{
766#ifdef RT_ARCH_AMD64
767 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
768#elif defined(RT_ARCH_ARM64)
769 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
770 IEMNATIVE_REG_FIXED_TMP0);
771#else
772# error "port me"
773#endif
774 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
775 return off;
776}
777
778
779/**
780 * Emits a store of a GPR value to a 32-bit VCpu field.
781 */
782DECL_INLINE_THROW(uint32_t)
783iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
784{
785#ifdef RT_ARCH_AMD64
786 /* mov mem32, reg32 */
787 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
788 if (iGpr >= 8)
789 pbCodeBuf[off++] = X86_OP_REX_R;
790 pbCodeBuf[off++] = 0x89;
791 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
792 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
793
794#elif defined(RT_ARCH_ARM64)
795 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
796
797#else
798# error "port me"
799#endif
800 return off;
801}
802
803
804/**
805 * Emits a store of a GPR value to a 16-bit VCpu field.
806 */
807DECL_INLINE_THROW(uint32_t)
808iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
809{
810#ifdef RT_ARCH_AMD64
811 /* mov mem16, reg16 */
812 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
813 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
814 if (iGpr >= 8)
815 pbCodeBuf[off++] = X86_OP_REX_R;
816 pbCodeBuf[off++] = 0x89;
817 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
818 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
819
820#elif defined(RT_ARCH_ARM64)
821 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
822
823#else
824# error "port me"
825#endif
826 return off;
827}
828
829
830/**
831 * Emits a store of a GPR value to a 8-bit VCpu field.
832 */
833DECL_INLINE_THROW(uint32_t)
834iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
835{
836#ifdef RT_ARCH_AMD64
837 /* mov mem8, reg8 */
838 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
839 if (iGpr >= 8)
840 pbCodeBuf[off++] = X86_OP_REX_R;
841 pbCodeBuf[off++] = 0x88;
842 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
843 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
844
845#elif defined(RT_ARCH_ARM64)
846 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
847
848#else
849# error "port me"
850#endif
851 return off;
852}
853
854
855/**
856 * Emits a store of an immediate value to a 64-bit VCpu field.
857 *
858 * @note Will allocate temporary registers on both ARM64 and AMD64.
859 */
860DECL_FORCE_INLINE_THROW(uint32_t)
861iemNativeEmitStoreImmToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uImm, uint32_t offVCpu)
862{
863#ifdef RT_ARCH_AMD64
864 /* mov mem32, imm32 */
865 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
866 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxRegImm, offVCpu);
867 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
868 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
869
870#elif defined(RT_ARCH_ARM64)
871 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
872 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t));
873 if (idxRegImm != ARMV8_A64_REG_XZR)
874 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
875
876#else
877# error "port me"
878#endif
879 return off;
880}
881
882
883/**
884 * Emits a store of an immediate value to a 32-bit VCpu field.
885 *
886 * @note ARM64: Will allocate temporary registers.
887 */
888DECL_FORCE_INLINE_THROW(uint32_t)
889iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
890{
891#ifdef RT_ARCH_AMD64
892 /* mov mem32, imm32 */
893 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
894 pCodeBuf[off++] = 0xc7;
895 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
896 pCodeBuf[off++] = RT_BYTE1(uImm);
897 pCodeBuf[off++] = RT_BYTE2(uImm);
898 pCodeBuf[off++] = RT_BYTE3(uImm);
899 pCodeBuf[off++] = RT_BYTE4(uImm);
900 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
901
902#elif defined(RT_ARCH_ARM64)
903 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
904 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
905 if (idxRegImm != ARMV8_A64_REG_XZR)
906 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
907
908#else
909# error "port me"
910#endif
911 return off;
912}
913
914
915
916/**
917 * Emits a store of an immediate value to a 16-bit VCpu field.
918 *
919 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
920 * offset can be encoded as an immediate or not. The @a offVCpu immediate
921 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
922 */
923DECL_FORCE_INLINE_THROW(uint32_t)
924iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
925 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
926{
927#ifdef RT_ARCH_AMD64
928 /* mov mem16, imm16 */
929 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
930 pCodeBuf[off++] = 0xc7;
931 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
932 pCodeBuf[off++] = RT_BYTE1(uImm);
933 pCodeBuf[off++] = RT_BYTE2(uImm);
934 RT_NOREF(idxTmp1, idxTmp2);
935
936#elif defined(RT_ARCH_ARM64)
937 if (idxTmp1 != UINT8_MAX)
938 {
939 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
940 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
941 sizeof(uint16_t), idxTmp2);
942 }
943 else
944# ifdef IEM_WITH_THROW_CATCH
945 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
946# else
947 AssertReleaseFailedStmt(off = UINT32_MAX);
948# endif
949
950#else
951# error "port me"
952#endif
953 return off;
954}
955
956
957/**
958 * Emits a store of an immediate value to a 8-bit VCpu field.
959 */
960DECL_INLINE_THROW(uint32_t)
961iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu)
962{
963#ifdef RT_ARCH_AMD64
964 /* mov mem8, imm8 */
965 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
966 pbCodeBuf[off++] = 0xc6;
967 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
968 pbCodeBuf[off++] = bImm;
969 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
970
971#elif defined(RT_ARCH_ARM64)
972 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
973 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
974 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
975 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
976
977#else
978# error "port me"
979#endif
980 return off;
981}
982
983
984/**
985 * Emits a load effective address to a GRP of a VCpu field.
986 */
987DECL_INLINE_THROW(uint32_t)
988iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
989{
990#ifdef RT_ARCH_AMD64
991 /* lea gprdst, [rbx + offDisp] */
992 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
993 if (iGprDst < 8)
994 pbCodeBuf[off++] = X86_OP_REX_W;
995 else
996 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
997 pbCodeBuf[off++] = 0x8d;
998 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
999
1000#elif defined(RT_ARCH_ARM64)
1001 if (offVCpu < (unsigned)_4K)
1002 {
1003 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1004 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
1005 }
1006 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
1007 {
1008 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1009 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
1010 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
1011 }
1012 else if (offVCpu <= 0xffffffU)
1013 {
1014 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1015 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu >> 12,
1016 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1017 if (offVCpu & 0xfffU)
1018 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, offVCpu & 0xfff);
1019 }
1020 else
1021 {
1022 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
1023 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
1024 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1025 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
1026 }
1027
1028#else
1029# error "port me"
1030#endif
1031 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1032 return off;
1033}
1034
1035
1036/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1037DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
1038{
1039 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
1040 Assert(off < sizeof(VMCPU));
1041 return off;
1042}
1043
1044
1045/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1046DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
1047{
1048 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
1049 Assert(off < sizeof(VMCPU));
1050 return off;
1051}
1052
1053
1054/**
1055 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1056 *
1057 * @note The two temp registers are not required for AMD64. ARM64 always
1058 * requires the first, and the 2nd is needed if the offset cannot be
1059 * encoded as an immediate.
1060 */
1061DECL_FORCE_INLINE(uint32_t)
1062iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1063{
1064#ifdef RT_ARCH_AMD64
1065 /* inc qword [pVCpu + off] */
1066 pCodeBuf[off++] = X86_OP_REX_W;
1067 pCodeBuf[off++] = 0xff;
1068 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1069 RT_NOREF(idxTmp1, idxTmp2);
1070
1071#elif defined(RT_ARCH_ARM64)
1072 /* Determine how we're to access pVCpu first. */
1073 uint32_t const cbData = sizeof(STAMCOUNTER);
1074 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1075 {
1076 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1077 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1078 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1079 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1080 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1081 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1082 }
1083 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1084 {
1085 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1086 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1087 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1088 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1089 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1090 }
1091 else
1092 {
1093 /* The offset is too large, so we must load it into a register and use
1094 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1095 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1096 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1097 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1098 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1099 }
1100
1101#else
1102# error "port me"
1103#endif
1104 return off;
1105}
1106
1107
1108/**
1109 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1110 *
1111 * @note The two temp registers are not required for AMD64. ARM64 always
1112 * requires the first, and the 2nd is needed if the offset cannot be
1113 * encoded as an immediate.
1114 */
1115DECL_FORCE_INLINE(uint32_t)
1116iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1117{
1118#ifdef RT_ARCH_AMD64
1119 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1120#elif defined(RT_ARCH_ARM64)
1121 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1122#else
1123# error "port me"
1124#endif
1125 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1126 return off;
1127}
1128
1129
1130/**
1131 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1132 *
1133 * @note The two temp registers are not required for AMD64. ARM64 always
1134 * requires the first, and the 2nd is needed if the offset cannot be
1135 * encoded as an immediate.
1136 */
1137DECL_FORCE_INLINE(uint32_t)
1138iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1139{
1140 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1141#ifdef RT_ARCH_AMD64
1142 /* inc dword [pVCpu + offVCpu] */
1143 pCodeBuf[off++] = 0xff;
1144 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1145 RT_NOREF(idxTmp1, idxTmp2);
1146
1147#elif defined(RT_ARCH_ARM64)
1148 /* Determine how we're to access pVCpu first. */
1149 uint32_t const cbData = sizeof(uint32_t);
1150 if (offVCpu < (unsigned)(_4K * cbData))
1151 {
1152 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1153 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1,
1154 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1155 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1156 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1,
1157 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1158 }
1159 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1160 {
1161 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1162 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1163 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1164 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1165 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1166 }
1167 else
1168 {
1169 /* The offset is too large, so we must load it into a register and use
1170 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1171 of the instruction if that'll reduce the constant to 16-bits. */
1172 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1173 {
1174 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1175 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1176 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1177 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1178 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1179 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1180 }
1181 else
1182 {
1183 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1184 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1185 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1186 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1187 }
1188 }
1189
1190#else
1191# error "port me"
1192#endif
1193 return off;
1194}
1195
1196
1197/**
1198 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1199 *
1200 * @note The two temp registers are not required for AMD64. ARM64 always
1201 * requires the first, and the 2nd is needed if the offset cannot be
1202 * encoded as an immediate.
1203 */
1204DECL_FORCE_INLINE(uint32_t)
1205iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1206{
1207#ifdef RT_ARCH_AMD64
1208 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1209#elif defined(RT_ARCH_ARM64)
1210 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1211#else
1212# error "port me"
1213#endif
1214 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1215 return off;
1216}
1217
1218
1219/**
1220 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1221 *
1222 * @note May allocate temporary registers (not AMD64).
1223 */
1224DECL_FORCE_INLINE(uint32_t)
1225iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1226{
1227 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1228#ifdef RT_ARCH_AMD64
1229 /* or dword [pVCpu + offVCpu], imm8/32 */
1230 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1231 if (fMask < 0x80)
1232 {
1233 pCodeBuf[off++] = 0x83;
1234 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1235 pCodeBuf[off++] = (uint8_t)fMask;
1236 }
1237 else
1238 {
1239 pCodeBuf[off++] = 0x81;
1240 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1241 pCodeBuf[off++] = RT_BYTE1(fMask);
1242 pCodeBuf[off++] = RT_BYTE2(fMask);
1243 pCodeBuf[off++] = RT_BYTE3(fMask);
1244 pCodeBuf[off++] = RT_BYTE4(fMask);
1245 }
1246
1247#elif defined(RT_ARCH_ARM64)
1248 /* If the constant is unwieldy we'll need a register to hold it as well. */
1249 uint32_t uImmSizeLen, uImmRotate;
1250 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1251 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1252
1253 /* We need a temp register for holding the member value we're modifying. */
1254 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1255
1256 /* Determine how we're to access pVCpu first. */
1257 uint32_t const cbData = sizeof(uint32_t);
1258 if (offVCpu < (unsigned)(_4K * cbData))
1259 {
1260 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1261 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1262 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1263 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1264 if (idxTmpMask == UINT8_MAX)
1265 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1266 else
1267 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1268 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1269 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1270 }
1271 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1272 {
1273 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1274 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1275 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1276 if (idxTmpMask == UINT8_MAX)
1277 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1278 else
1279 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1280 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1281 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1282 }
1283 else
1284 {
1285 /* The offset is too large, so we must load it into a register and use
1286 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1287 of the instruction if that'll reduce the constant to 16-bits. */
1288 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1289 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1290 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1291 if (fShifted)
1292 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1293 else
1294 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1295
1296 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1297 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1298
1299 if (idxTmpMask == UINT8_MAX)
1300 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1301 else
1302 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1303
1304 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1305 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1306 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1307 }
1308 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1309 if (idxTmpMask != UINT8_MAX)
1310 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1311
1312#else
1313# error "port me"
1314#endif
1315 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1316 return off;
1317}
1318
1319
1320/**
1321 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1322 *
1323 * @note May allocate temporary registers (not AMD64).
1324 */
1325DECL_FORCE_INLINE(uint32_t)
1326iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1327{
1328 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1329#ifdef RT_ARCH_AMD64
1330 /* and dword [pVCpu + offVCpu], imm8/32 */
1331 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1332 if (fMask < 0x80)
1333 {
1334 pCodeBuf[off++] = 0x83;
1335 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1336 pCodeBuf[off++] = (uint8_t)fMask;
1337 }
1338 else
1339 {
1340 pCodeBuf[off++] = 0x81;
1341 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1342 pCodeBuf[off++] = RT_BYTE1(fMask);
1343 pCodeBuf[off++] = RT_BYTE2(fMask);
1344 pCodeBuf[off++] = RT_BYTE3(fMask);
1345 pCodeBuf[off++] = RT_BYTE4(fMask);
1346 }
1347
1348#elif defined(RT_ARCH_ARM64)
1349 /* If the constant is unwieldy we'll need a register to hold it as well. */
1350 uint32_t uImmSizeLen, uImmRotate;
1351 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1352 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1353
1354 /* We need a temp register for holding the member value we're modifying. */
1355 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1356
1357 /* Determine how we're to access pVCpu first. */
1358 uint32_t const cbData = sizeof(uint32_t);
1359 if (offVCpu < (unsigned)(_4K * cbData))
1360 {
1361 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1362 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1363 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1364 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1365 if (idxTmpMask == UINT8_MAX)
1366 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1367 else
1368 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1369 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1370 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1371 }
1372 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1373 {
1374 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1375 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1376 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1377 if (idxTmpMask == UINT8_MAX)
1378 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1379 else
1380 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1381 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1382 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1383 }
1384 else
1385 {
1386 /* The offset is too large, so we must load it into a register and use
1387 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1388 of the instruction if that'll reduce the constant to 16-bits. */
1389 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1390 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1391 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1392 if (fShifted)
1393 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1394 else
1395 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1396
1397 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1398 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1399
1400 if (idxTmpMask == UINT8_MAX)
1401 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1402 else
1403 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1404
1405 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1406 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1407 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1408 }
1409 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1410 if (idxTmpMask != UINT8_MAX)
1411 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1412
1413#else
1414# error "port me"
1415#endif
1416 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1417 return off;
1418}
1419
1420
1421/**
1422 * Emits a gprdst = gprsrc load.
1423 */
1424DECL_FORCE_INLINE(uint32_t)
1425iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1426{
1427#ifdef RT_ARCH_AMD64
1428 /* mov gprdst, gprsrc */
1429 if ((iGprDst | iGprSrc) >= 8)
1430 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1431 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1432 : X86_OP_REX_W | X86_OP_REX_R;
1433 else
1434 pCodeBuf[off++] = X86_OP_REX_W;
1435 pCodeBuf[off++] = 0x8b;
1436 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1437
1438#elif defined(RT_ARCH_ARM64)
1439 /* mov dst, src; alias for: orr dst, xzr, src */
1440 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1441
1442#else
1443# error "port me"
1444#endif
1445 return off;
1446}
1447
1448
1449/**
1450 * Emits a gprdst = gprsrc load.
1451 */
1452DECL_INLINE_THROW(uint32_t)
1453iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1454{
1455#ifdef RT_ARCH_AMD64
1456 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1457#elif defined(RT_ARCH_ARM64)
1458 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1459#else
1460# error "port me"
1461#endif
1462 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1463 return off;
1464}
1465
1466
1467/**
1468 * Emits a gprdst = gprsrc[31:0] load.
1469 * @note Bits 63 thru 32 are cleared.
1470 */
1471DECL_FORCE_INLINE(uint32_t)
1472iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1473{
1474#ifdef RT_ARCH_AMD64
1475 /* mov gprdst, gprsrc */
1476 if ((iGprDst | iGprSrc) >= 8)
1477 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1478 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1479 : X86_OP_REX_R;
1480 pCodeBuf[off++] = 0x8b;
1481 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1482
1483#elif defined(RT_ARCH_ARM64)
1484 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1485 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1486
1487#else
1488# error "port me"
1489#endif
1490 return off;
1491}
1492
1493
1494/**
1495 * Emits a gprdst = gprsrc[31:0] load.
1496 * @note Bits 63 thru 32 are cleared.
1497 */
1498DECL_INLINE_THROW(uint32_t)
1499iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1500{
1501#ifdef RT_ARCH_AMD64
1502 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1503#elif defined(RT_ARCH_ARM64)
1504 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1505#else
1506# error "port me"
1507#endif
1508 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1509 return off;
1510}
1511
1512
1513/**
1514 * Emits a gprdst = gprsrc[15:0] load.
1515 * @note Bits 63 thru 15 are cleared.
1516 */
1517DECL_INLINE_THROW(uint32_t)
1518iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1519{
1520#ifdef RT_ARCH_AMD64
1521 /* movzx Gv,Ew */
1522 if ((iGprDst | iGprSrc) >= 8)
1523 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1524 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1525 : X86_OP_REX_R;
1526 pCodeBuf[off++] = 0x0f;
1527 pCodeBuf[off++] = 0xb7;
1528 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1529
1530#elif defined(RT_ARCH_ARM64)
1531 /* and gprdst, gprsrc, #0xffff */
1532# if 1
1533 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1534 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1535# else
1536 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1537 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1538# endif
1539
1540#else
1541# error "port me"
1542#endif
1543 return off;
1544}
1545
1546
1547/**
1548 * Emits a gprdst = gprsrc[15:0] load.
1549 * @note Bits 63 thru 15 are cleared.
1550 */
1551DECL_INLINE_THROW(uint32_t)
1552iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1553{
1554#ifdef RT_ARCH_AMD64
1555 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1556#elif defined(RT_ARCH_ARM64)
1557 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1558#else
1559# error "port me"
1560#endif
1561 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1562 return off;
1563}
1564
1565
1566/**
1567 * Emits a gprdst = gprsrc[7:0] load.
1568 * @note Bits 63 thru 8 are cleared.
1569 */
1570DECL_FORCE_INLINE(uint32_t)
1571iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1572{
1573#ifdef RT_ARCH_AMD64
1574 /* movzx Gv,Eb */
1575 if (iGprDst >= 8 || iGprSrc >= 8)
1576 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1577 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1578 : X86_OP_REX_R;
1579 else if (iGprSrc >= 4)
1580 pCodeBuf[off++] = X86_OP_REX;
1581 pCodeBuf[off++] = 0x0f;
1582 pCodeBuf[off++] = 0xb6;
1583 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1584
1585#elif defined(RT_ARCH_ARM64)
1586 /* and gprdst, gprsrc, #0xff */
1587 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1588 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1589
1590#else
1591# error "port me"
1592#endif
1593 return off;
1594}
1595
1596
1597/**
1598 * Emits a gprdst = gprsrc[7:0] load.
1599 * @note Bits 63 thru 8 are cleared.
1600 */
1601DECL_INLINE_THROW(uint32_t)
1602iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1603{
1604#ifdef RT_ARCH_AMD64
1605 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1606#elif defined(RT_ARCH_ARM64)
1607 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1608#else
1609# error "port me"
1610#endif
1611 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1612 return off;
1613}
1614
1615
1616/**
1617 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1618 * @note Bits 63 thru 8 are cleared.
1619 */
1620DECL_INLINE_THROW(uint32_t)
1621iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1622{
1623#ifdef RT_ARCH_AMD64
1624 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1625
1626 /* movzx Gv,Ew */
1627 if ((iGprDst | iGprSrc) >= 8)
1628 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1629 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1630 : X86_OP_REX_R;
1631 pbCodeBuf[off++] = 0x0f;
1632 pbCodeBuf[off++] = 0xb7;
1633 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1634
1635 /* shr Ev,8 */
1636 if (iGprDst >= 8)
1637 pbCodeBuf[off++] = X86_OP_REX_B;
1638 pbCodeBuf[off++] = 0xc1;
1639 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1640 pbCodeBuf[off++] = 8;
1641
1642#elif defined(RT_ARCH_ARM64)
1643 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1644 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1645 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1646
1647#else
1648# error "port me"
1649#endif
1650 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1651 return off;
1652}
1653
1654
1655/**
1656 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1657 */
1658DECL_INLINE_THROW(uint32_t)
1659iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1660{
1661#ifdef RT_ARCH_AMD64
1662 /* movsxd r64, r/m32 */
1663 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1664 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1665 pbCodeBuf[off++] = 0x63;
1666 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1667
1668#elif defined(RT_ARCH_ARM64)
1669 /* sxtw dst, src */
1670 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1671 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1672
1673#else
1674# error "port me"
1675#endif
1676 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1677 return off;
1678}
1679
1680
1681/**
1682 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1683 */
1684DECL_INLINE_THROW(uint32_t)
1685iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1686{
1687#ifdef RT_ARCH_AMD64
1688 /* movsx r64, r/m16 */
1689 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1690 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1691 pbCodeBuf[off++] = 0x0f;
1692 pbCodeBuf[off++] = 0xbf;
1693 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1694
1695#elif defined(RT_ARCH_ARM64)
1696 /* sxth dst, src */
1697 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1698 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1699
1700#else
1701# error "port me"
1702#endif
1703 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1704 return off;
1705}
1706
1707
1708/**
1709 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1710 */
1711DECL_INLINE_THROW(uint32_t)
1712iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1713{
1714#ifdef RT_ARCH_AMD64
1715 /* movsx r64, r/m16 */
1716 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1717 if (iGprDst >= 8 || iGprSrc >= 8)
1718 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1719 pbCodeBuf[off++] = 0x0f;
1720 pbCodeBuf[off++] = 0xbf;
1721 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1722
1723#elif defined(RT_ARCH_ARM64)
1724 /* sxth dst32, src */
1725 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1726 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1727
1728#else
1729# error "port me"
1730#endif
1731 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1732 return off;
1733}
1734
1735
1736/**
1737 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1738 */
1739DECL_INLINE_THROW(uint32_t)
1740iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1741{
1742#ifdef RT_ARCH_AMD64
1743 /* movsx r64, r/m8 */
1744 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1745 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1746 pbCodeBuf[off++] = 0x0f;
1747 pbCodeBuf[off++] = 0xbe;
1748 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1749
1750#elif defined(RT_ARCH_ARM64)
1751 /* sxtb dst, src */
1752 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1753 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1754
1755#else
1756# error "port me"
1757#endif
1758 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1759 return off;
1760}
1761
1762
1763/**
1764 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1765 * @note Bits 63 thru 32 are cleared.
1766 */
1767DECL_INLINE_THROW(uint32_t)
1768iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1769{
1770#ifdef RT_ARCH_AMD64
1771 /* movsx r32, r/m8 */
1772 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1773 if (iGprDst >= 8 || iGprSrc >= 8)
1774 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1775 else if (iGprSrc >= 4)
1776 pbCodeBuf[off++] = X86_OP_REX;
1777 pbCodeBuf[off++] = 0x0f;
1778 pbCodeBuf[off++] = 0xbe;
1779 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1780
1781#elif defined(RT_ARCH_ARM64)
1782 /* sxtb dst32, src32 */
1783 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1784 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1785
1786#else
1787# error "port me"
1788#endif
1789 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1790 return off;
1791}
1792
1793
1794/**
1795 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1796 * @note Bits 63 thru 16 are cleared.
1797 */
1798DECL_INLINE_THROW(uint32_t)
1799iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1800{
1801#ifdef RT_ARCH_AMD64
1802 /* movsx r16, r/m8 */
1803 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1804 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1805 if (iGprDst >= 8 || iGprSrc >= 8)
1806 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1807 else if (iGprSrc >= 4)
1808 pbCodeBuf[off++] = X86_OP_REX;
1809 pbCodeBuf[off++] = 0x0f;
1810 pbCodeBuf[off++] = 0xbe;
1811 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1812
1813 /* movzx r32, r/m16 */
1814 if (iGprDst >= 8)
1815 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
1816 pbCodeBuf[off++] = 0x0f;
1817 pbCodeBuf[off++] = 0xb7;
1818 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
1819
1820#elif defined(RT_ARCH_ARM64)
1821 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
1822 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1823 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1824 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1825 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
1826
1827#else
1828# error "port me"
1829#endif
1830 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1831 return off;
1832}
1833
1834
1835/**
1836 * Emits a gprdst = gprsrc + addend load.
1837 * @note The addend is 32-bit for AMD64 and 64-bit for ARM64.
1838 */
1839#ifdef RT_ARCH_AMD64
1840DECL_INLINE_THROW(uint32_t)
1841iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1842 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1843{
1844 Assert(iAddend != 0);
1845
1846 /* lea gprdst, [gprsrc + iAddend] */
1847 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1848 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1849 pbCodeBuf[off++] = 0x8d;
1850 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1851 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1852 return off;
1853}
1854
1855#elif defined(RT_ARCH_ARM64)
1856DECL_INLINE_THROW(uint32_t)
1857iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1858 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1859{
1860 if ((uint32_t)iAddend < 4096)
1861 {
1862 /* add dst, src, uimm12 */
1863 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1864 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
1865 }
1866 else if ((uint32_t)-iAddend < 4096)
1867 {
1868 /* sub dst, src, uimm12 */
1869 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1870 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
1871 }
1872 else
1873 {
1874 Assert(iGprSrc != iGprDst);
1875 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
1876 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1877 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
1878 }
1879 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1880 return off;
1881}
1882#else
1883# error "port me"
1884#endif
1885
1886/**
1887 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
1888 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1889 */
1890#ifdef RT_ARCH_AMD64
1891DECL_INLINE_THROW(uint32_t)
1892iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1893 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1894#else
1895DECL_INLINE_THROW(uint32_t)
1896iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1897 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1898#endif
1899{
1900 if (iAddend != 0)
1901 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1902 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
1903}
1904
1905
1906/**
1907 * Emits a gprdst = gprsrc32 + addend load.
1908 * @note Bits 63 thru 32 are cleared.
1909 */
1910DECL_INLINE_THROW(uint32_t)
1911iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1912 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1913{
1914 Assert(iAddend != 0);
1915
1916#ifdef RT_ARCH_AMD64
1917 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
1918 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1919 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
1920 if ((iGprDst | iGprSrc) >= 8)
1921 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1922 pbCodeBuf[off++] = 0x8d;
1923 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1924
1925#elif defined(RT_ARCH_ARM64)
1926 if ((uint32_t)iAddend < 4096)
1927 {
1928 /* add dst, src, uimm12 */
1929 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1930 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
1931 }
1932 else if ((uint32_t)-iAddend < 4096)
1933 {
1934 /* sub dst, src, uimm12 */
1935 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1936 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
1937 }
1938 else
1939 {
1940 Assert(iGprSrc != iGprDst);
1941 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
1942 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1943 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
1944 }
1945
1946#else
1947# error "port me"
1948#endif
1949 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1950 return off;
1951}
1952
1953
1954/**
1955 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
1956 */
1957DECL_INLINE_THROW(uint32_t)
1958iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1959 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1960{
1961 if (iAddend != 0)
1962 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1963 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
1964}
1965
1966
1967/**
1968 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1969 * destination.
1970 */
1971DECL_FORCE_INLINE(uint32_t)
1972iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1973{
1974#ifdef RT_ARCH_AMD64
1975 /* mov reg16, r/m16 */
1976 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1977 if (idxDst >= 8 || idxSrc >= 8)
1978 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
1979 pCodeBuf[off++] = 0x8b;
1980 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
1981
1982#elif defined(RT_ARCH_ARM64)
1983 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
1984 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
1985
1986#else
1987# error "Port me!"
1988#endif
1989 return off;
1990}
1991
1992
1993/**
1994 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1995 * destination.
1996 */
1997DECL_INLINE_THROW(uint32_t)
1998iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1999{
2000#ifdef RT_ARCH_AMD64
2001 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
2002#elif defined(RT_ARCH_ARM64)
2003 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
2004#else
2005# error "Port me!"
2006#endif
2007 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2008 return off;
2009}
2010
2011
2012#ifdef RT_ARCH_AMD64
2013/**
2014 * Common bit of iemNativeEmitLoadGprByBp and friends.
2015 */
2016DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
2017 PIEMRECOMPILERSTATE pReNativeAssert)
2018{
2019 if (offDisp < 128 && offDisp >= -128)
2020 {
2021 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
2022 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
2023 }
2024 else
2025 {
2026 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
2027 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2028 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2029 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2030 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2031 }
2032 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
2033 return off;
2034}
2035#elif defined(RT_ARCH_ARM64)
2036/**
2037 * Common bit of iemNativeEmitLoadGprByBp and friends.
2038 */
2039DECL_FORCE_INLINE_THROW(uint32_t)
2040iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2041 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2042{
2043 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
2044 {
2045 /* str w/ unsigned imm12 (scaled) */
2046 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2047 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
2048 }
2049 else if (offDisp >= -256 && offDisp <= 256)
2050 {
2051 /* stur w/ signed imm9 (unscaled) */
2052 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2053 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
2054 }
2055 else
2056 {
2057 /* Use temporary indexing register. */
2058 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2059 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2060 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2061 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2062 }
2063 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2064 return off;
2065}
2066#endif
2067
2068
2069/**
2070 * Emits a 64-bit GRP load instruction with an BP relative source address.
2071 */
2072DECL_INLINE_THROW(uint32_t)
2073iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2074{
2075#ifdef RT_ARCH_AMD64
2076 /* mov gprdst, qword [rbp + offDisp] */
2077 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2078 if (iGprDst < 8)
2079 pbCodeBuf[off++] = X86_OP_REX_W;
2080 else
2081 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2082 pbCodeBuf[off++] = 0x8b;
2083 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2084
2085#elif defined(RT_ARCH_ARM64)
2086 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2087
2088#else
2089# error "port me"
2090#endif
2091}
2092
2093
2094/**
2095 * Emits a 32-bit GRP load instruction with an BP relative source address.
2096 * @note Bits 63 thru 32 of the GPR will be cleared.
2097 */
2098DECL_INLINE_THROW(uint32_t)
2099iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2100{
2101#ifdef RT_ARCH_AMD64
2102 /* mov gprdst, dword [rbp + offDisp] */
2103 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2104 if (iGprDst >= 8)
2105 pbCodeBuf[off++] = X86_OP_REX_R;
2106 pbCodeBuf[off++] = 0x8b;
2107 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2108
2109#elif defined(RT_ARCH_ARM64)
2110 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2111
2112#else
2113# error "port me"
2114#endif
2115}
2116
2117
2118/**
2119 * Emits a 16-bit GRP load instruction with an BP relative source address.
2120 * @note Bits 63 thru 16 of the GPR will be cleared.
2121 */
2122DECL_INLINE_THROW(uint32_t)
2123iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2124{
2125#ifdef RT_ARCH_AMD64
2126 /* movzx gprdst, word [rbp + offDisp] */
2127 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2128 if (iGprDst >= 8)
2129 pbCodeBuf[off++] = X86_OP_REX_R;
2130 pbCodeBuf[off++] = 0x0f;
2131 pbCodeBuf[off++] = 0xb7;
2132 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2133
2134#elif defined(RT_ARCH_ARM64)
2135 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2136
2137#else
2138# error "port me"
2139#endif
2140}
2141
2142
2143/**
2144 * Emits a 8-bit GRP load instruction with an BP relative source address.
2145 * @note Bits 63 thru 8 of the GPR will be cleared.
2146 */
2147DECL_INLINE_THROW(uint32_t)
2148iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2149{
2150#ifdef RT_ARCH_AMD64
2151 /* movzx gprdst, byte [rbp + offDisp] */
2152 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2153 if (iGprDst >= 8)
2154 pbCodeBuf[off++] = X86_OP_REX_R;
2155 pbCodeBuf[off++] = 0x0f;
2156 pbCodeBuf[off++] = 0xb6;
2157 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2158
2159#elif defined(RT_ARCH_ARM64)
2160 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2161
2162#else
2163# error "port me"
2164#endif
2165}
2166
2167
2168#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2169/**
2170 * Emits a 128-bit vector register load instruction with an BP relative source address.
2171 */
2172DECL_FORCE_INLINE_THROW(uint32_t)
2173iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2174{
2175#ifdef RT_ARCH_AMD64
2176 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2177
2178 /* movdqu reg128, mem128 */
2179 pbCodeBuf[off++] = 0xf3;
2180 if (iVecRegDst >= 8)
2181 pbCodeBuf[off++] = X86_OP_REX_R;
2182 pbCodeBuf[off++] = 0x0f;
2183 pbCodeBuf[off++] = 0x6f;
2184 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2185#elif defined(RT_ARCH_ARM64)
2186 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2187#else
2188# error "port me"
2189#endif
2190}
2191
2192
2193/**
2194 * Emits a 256-bit vector register load instruction with an BP relative source address.
2195 */
2196DECL_FORCE_INLINE_THROW(uint32_t)
2197iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2198{
2199#ifdef RT_ARCH_AMD64
2200 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2201
2202 /* vmovdqu reg256, mem256 */
2203 pbCodeBuf[off++] = X86_OP_VEX2;
2204 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2205 pbCodeBuf[off++] = 0x6f;
2206 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2207#elif defined(RT_ARCH_ARM64)
2208 /* ASSUMES two consecutive vector registers for the 256-bit value. */
2209 Assert(!(iVecRegDst & 0x1));
2210 off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2211 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2212#else
2213# error "port me"
2214#endif
2215}
2216
2217#endif
2218
2219
2220/**
2221 * Emits a load effective address to a GRP with an BP relative source address.
2222 */
2223DECL_INLINE_THROW(uint32_t)
2224iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2225{
2226#ifdef RT_ARCH_AMD64
2227 /* lea gprdst, [rbp + offDisp] */
2228 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2229 if (iGprDst < 8)
2230 pbCodeBuf[off++] = X86_OP_REX_W;
2231 else
2232 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2233 pbCodeBuf[off++] = 0x8d;
2234 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2235
2236#elif defined(RT_ARCH_ARM64)
2237 bool const fSub = offDisp < 0;
2238 uint32_t const offAbsDisp = (uint32_t)RT_ABS(offDisp);
2239 if (offAbsDisp <= 0xffffffU)
2240 {
2241 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2242 if (offAbsDisp <= 0xfffU)
2243 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, ARMV8_A64_REG_BP, offAbsDisp);
2244 else
2245 {
2246 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, ARMV8_A64_REG_BP, offAbsDisp >> 12,
2247 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2248 if (offAbsDisp & 0xfffU)
2249 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, offAbsDisp & 0xfff);
2250 }
2251 }
2252 else
2253 {
2254 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2255 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offAbsDisp);
2256 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2257 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2258 }
2259
2260#else
2261# error "port me"
2262#endif
2263
2264 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2265 return off;
2266}
2267
2268
2269/**
2270 * Emits a 64-bit GPR store with an BP relative destination address.
2271 *
2272 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2273 */
2274DECL_INLINE_THROW(uint32_t)
2275iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2276{
2277#ifdef RT_ARCH_AMD64
2278 /* mov qword [rbp + offDisp], gprdst */
2279 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2280 if (iGprSrc < 8)
2281 pbCodeBuf[off++] = X86_OP_REX_W;
2282 else
2283 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2284 pbCodeBuf[off++] = 0x89;
2285 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2286
2287#elif defined(RT_ARCH_ARM64)
2288 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2289 {
2290 /* str w/ unsigned imm12 (scaled) */
2291 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2292 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2293 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2294 }
2295 else if (offDisp >= -256 && offDisp <= 256)
2296 {
2297 /* stur w/ signed imm9 (unscaled) */
2298 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2299 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2300 }
2301 else if ((uint32_t)-offDisp < (unsigned)_4K)
2302 {
2303 /* Use temporary indexing register w/ sub uimm12. */
2304 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2305 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2306 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2307 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2308 }
2309 else
2310 {
2311 /* Use temporary indexing register. */
2312 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2313 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2314 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2315 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2316 }
2317 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2318 return off;
2319
2320#else
2321# error "Port me!"
2322#endif
2323}
2324
2325
2326/**
2327 * Emits a 64-bit immediate store with an BP relative destination address.
2328 *
2329 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2330 */
2331DECL_INLINE_THROW(uint32_t)
2332iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2333{
2334#ifdef RT_ARCH_AMD64
2335 if ((int64_t)uImm64 == (int32_t)uImm64)
2336 {
2337 /* mov qword [rbp + offDisp], imm32 - sign extended */
2338 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2339 pbCodeBuf[off++] = X86_OP_REX_W;
2340 pbCodeBuf[off++] = 0xc7;
2341 if (offDisp < 128 && offDisp >= -128)
2342 {
2343 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2344 pbCodeBuf[off++] = (uint8_t)offDisp;
2345 }
2346 else
2347 {
2348 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2349 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2350 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2351 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2352 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2353 }
2354 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2355 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2356 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2357 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2358 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2359 return off;
2360 }
2361#endif
2362
2363 /* Load tmp0, imm64; Store tmp to bp+disp. */
2364 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2365 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2366}
2367
2368#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2369
2370/**
2371 * Emits a 128-bit vector register store with an BP relative destination address.
2372 *
2373 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2374 */
2375DECL_INLINE_THROW(uint32_t)
2376iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2377{
2378#ifdef RT_ARCH_AMD64
2379 /* movdqu [rbp + offDisp], vecsrc */
2380 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2381 pbCodeBuf[off++] = 0xf3;
2382 if (iVecRegSrc >= 8)
2383 pbCodeBuf[off++] = X86_OP_REX_R;
2384 pbCodeBuf[off++] = 0x0f;
2385 pbCodeBuf[off++] = 0x7f;
2386 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2387
2388#elif defined(RT_ARCH_ARM64)
2389 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2390 {
2391 /* str w/ unsigned imm12 (scaled) */
2392 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2393 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc,
2394 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2395 }
2396 else if (offDisp >= -256 && offDisp <= 256)
2397 {
2398 /* stur w/ signed imm9 (unscaled) */
2399 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2400 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp);
2401 }
2402 else if ((uint32_t)-offDisp < (unsigned)_4K)
2403 {
2404 /* Use temporary indexing register w/ sub uimm12. */
2405 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2406 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2407 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2408 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2409 }
2410 else
2411 {
2412 /* Use temporary indexing register. */
2413 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2414 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2415 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP,
2416 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2417 }
2418 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2419 return off;
2420
2421#else
2422# error "Port me!"
2423#endif
2424}
2425
2426
2427/**
2428 * Emits a 256-bit vector register store with an BP relative destination address.
2429 *
2430 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2431 */
2432DECL_INLINE_THROW(uint32_t)
2433iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2434{
2435#ifdef RT_ARCH_AMD64
2436 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2437
2438 /* vmovdqu mem256, reg256 */
2439 pbCodeBuf[off++] = X86_OP_VEX2;
2440 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2441 pbCodeBuf[off++] = 0x7f;
2442 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2443#elif defined(RT_ARCH_ARM64)
2444 Assert(!(iVecRegSrc & 0x1));
2445 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp, iVecRegSrc);
2446 return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1);
2447#else
2448# error "Port me!"
2449#endif
2450}
2451
2452#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
2453#if defined(RT_ARCH_ARM64)
2454
2455/**
2456 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2457 *
2458 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2459 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2460 * caller does not heed this.
2461 *
2462 * @note DON'T try this with prefetch.
2463 */
2464DECL_FORCE_INLINE_THROW(uint32_t)
2465iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2466 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2467{
2468 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2469 {
2470 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2471 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2472 }
2473 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2474 && iGprReg != iGprBase)
2475 || iGprTmp != UINT8_MAX)
2476 {
2477 /* The offset is too large, so we must load it into a register and use
2478 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2479 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2480 if (iGprTmp == UINT8_MAX)
2481 iGprTmp = iGprReg;
2482 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2483 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2484 }
2485 else
2486# ifdef IEM_WITH_THROW_CATCH
2487 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2488# else
2489 AssertReleaseFailedStmt(off = UINT32_MAX);
2490# endif
2491 return off;
2492}
2493
2494/**
2495 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2496 */
2497DECL_FORCE_INLINE_THROW(uint32_t)
2498iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2499 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2500{
2501 /*
2502 * There are a couple of ldr variants that takes an immediate offset, so
2503 * try use those if we can, otherwise we have to use the temporary register
2504 * help with the addressing.
2505 */
2506 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2507 {
2508 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2509 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2510 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2511 }
2512 else
2513 {
2514 /* The offset is too large, so we must load it into a register and use
2515 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2516 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2517 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2518
2519 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2520 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2521
2522 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2523 }
2524 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2525 return off;
2526}
2527
2528# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2529/**
2530 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2531 *
2532 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2533 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2534 * caller does not heed this.
2535 *
2536 * @note DON'T try this with prefetch.
2537 */
2538DECL_FORCE_INLINE_THROW(uint32_t)
2539iemNativeEmitVecRegByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iGprBase, int32_t offDisp,
2540 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2541{
2542 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2543 {
2544 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2545 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2546 }
2547 else if ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2548 || iGprTmp != UINT8_MAX)
2549 {
2550 /* The offset is too large, so we must load it into a register and use
2551 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2552 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2553 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2554 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, iGprTmp);
2555 }
2556 else
2557# ifdef IEM_WITH_THROW_CATCH
2558 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2559# else
2560 AssertReleaseFailedStmt(off = UINT32_MAX);
2561# endif
2562 return off;
2563}
2564# endif
2565
2566
2567/**
2568 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2569 */
2570DECL_FORCE_INLINE_THROW(uint32_t)
2571iemNativeEmitVecRegByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg,
2572 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2573{
2574 /*
2575 * There are a couple of ldr variants that takes an immediate offset, so
2576 * try use those if we can, otherwise we have to use the temporary register
2577 * help with the addressing.
2578 */
2579 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2580 {
2581 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2582 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2583 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2584 }
2585 else
2586 {
2587 /* The offset is too large, so we must load it into a register and use
2588 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2589 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2590 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2591
2592 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2593 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, idxTmpReg);
2594
2595 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2596 }
2597 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2598 return off;
2599}
2600#endif /* RT_ARCH_ARM64 */
2601
2602/**
2603 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2604 *
2605 * @note ARM64: Misaligned @a offDisp values and values not in the
2606 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2607 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2608 * does not heed this.
2609 */
2610DECL_FORCE_INLINE_THROW(uint32_t)
2611iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2612 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2613{
2614#ifdef RT_ARCH_AMD64
2615 /* mov reg64, mem64 */
2616 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2617 pCodeBuf[off++] = 0x8b;
2618 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2619 RT_NOREF(iGprTmp);
2620
2621#elif defined(RT_ARCH_ARM64)
2622 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2623 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2624
2625#else
2626# error "port me"
2627#endif
2628 return off;
2629}
2630
2631
2632/**
2633 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2634 */
2635DECL_INLINE_THROW(uint32_t)
2636iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2637{
2638#ifdef RT_ARCH_AMD64
2639 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2640 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2641
2642#elif defined(RT_ARCH_ARM64)
2643 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2644
2645#else
2646# error "port me"
2647#endif
2648 return off;
2649}
2650
2651
2652/**
2653 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2654 *
2655 * @note ARM64: Misaligned @a offDisp values and values not in the
2656 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2657 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2658 * caller does not heed this.
2659 *
2660 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2661 */
2662DECL_FORCE_INLINE_THROW(uint32_t)
2663iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2664 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2665{
2666#ifdef RT_ARCH_AMD64
2667 /* mov reg32, mem32 */
2668 if (iGprDst >= 8 || iGprBase >= 8)
2669 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2670 pCodeBuf[off++] = 0x8b;
2671 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2672 RT_NOREF(iGprTmp);
2673
2674#elif defined(RT_ARCH_ARM64)
2675 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2676 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2677
2678#else
2679# error "port me"
2680#endif
2681 return off;
2682}
2683
2684
2685/**
2686 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2687 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2688 */
2689DECL_INLINE_THROW(uint32_t)
2690iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2691{
2692#ifdef RT_ARCH_AMD64
2693 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2694 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2695
2696#elif defined(RT_ARCH_ARM64)
2697 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2698
2699#else
2700# error "port me"
2701#endif
2702 return off;
2703}
2704
2705
2706/**
2707 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2708 * sign-extending the value to 64 bits.
2709 *
2710 * @note ARM64: Misaligned @a offDisp values and values not in the
2711 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2712 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2713 * caller does not heed this.
2714 */
2715DECL_FORCE_INLINE_THROW(uint32_t)
2716iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2717 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2718{
2719#ifdef RT_ARCH_AMD64
2720 /* movsxd reg64, mem32 */
2721 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2722 pCodeBuf[off++] = 0x63;
2723 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2724 RT_NOREF(iGprTmp);
2725
2726#elif defined(RT_ARCH_ARM64)
2727 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2728 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2729
2730#else
2731# error "port me"
2732#endif
2733 return off;
2734}
2735
2736
2737/**
2738 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2739 *
2740 * @note ARM64: Misaligned @a offDisp values and values not in the
2741 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2742 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2743 * caller does not heed this.
2744 *
2745 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2746 */
2747DECL_FORCE_INLINE_THROW(uint32_t)
2748iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2749 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2750{
2751#ifdef RT_ARCH_AMD64
2752 /* movzx reg32, mem16 */
2753 if (iGprDst >= 8 || iGprBase >= 8)
2754 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2755 pCodeBuf[off++] = 0x0f;
2756 pCodeBuf[off++] = 0xb7;
2757 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2758 RT_NOREF(iGprTmp);
2759
2760#elif defined(RT_ARCH_ARM64)
2761 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2762 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2763
2764#else
2765# error "port me"
2766#endif
2767 return off;
2768}
2769
2770
2771/**
2772 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2773 * sign-extending the value to 64 bits.
2774 *
2775 * @note ARM64: Misaligned @a offDisp values and values not in the
2776 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2777 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2778 * caller does not heed this.
2779 */
2780DECL_FORCE_INLINE_THROW(uint32_t)
2781iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2782 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2783{
2784#ifdef RT_ARCH_AMD64
2785 /* movsx reg64, mem16 */
2786 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2787 pCodeBuf[off++] = 0x0f;
2788 pCodeBuf[off++] = 0xbf;
2789 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2790 RT_NOREF(iGprTmp);
2791
2792#elif defined(RT_ARCH_ARM64)
2793 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2794 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2795
2796#else
2797# error "port me"
2798#endif
2799 return off;
2800}
2801
2802
2803/**
2804 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2805 * sign-extending the value to 32 bits.
2806 *
2807 * @note ARM64: Misaligned @a offDisp values and values not in the
2808 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2809 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2810 * caller does not heed this.
2811 *
2812 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2813 */
2814DECL_FORCE_INLINE_THROW(uint32_t)
2815iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2816 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2817{
2818#ifdef RT_ARCH_AMD64
2819 /* movsx reg32, mem16 */
2820 if (iGprDst >= 8 || iGprBase >= 8)
2821 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2822 pCodeBuf[off++] = 0x0f;
2823 pCodeBuf[off++] = 0xbf;
2824 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2825 RT_NOREF(iGprTmp);
2826
2827#elif defined(RT_ARCH_ARM64)
2828 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2829 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
2830
2831#else
2832# error "port me"
2833#endif
2834 return off;
2835}
2836
2837
2838/**
2839 * Emits a 8-bit GPR load via a GPR base address with a displacement.
2840 *
2841 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2842 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2843 * same. Will assert / throw if caller does not heed this.
2844 *
2845 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
2846 */
2847DECL_FORCE_INLINE_THROW(uint32_t)
2848iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2849 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2850{
2851#ifdef RT_ARCH_AMD64
2852 /* movzx reg32, mem8 */
2853 if (iGprDst >= 8 || iGprBase >= 8)
2854 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2855 pCodeBuf[off++] = 0x0f;
2856 pCodeBuf[off++] = 0xb6;
2857 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2858 RT_NOREF(iGprTmp);
2859
2860#elif defined(RT_ARCH_ARM64)
2861 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2862 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
2863
2864#else
2865# error "port me"
2866#endif
2867 return off;
2868}
2869
2870
2871/**
2872 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2873 * sign-extending the value to 64 bits.
2874 *
2875 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2876 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2877 * same. Will assert / throw if caller does not heed this.
2878 */
2879DECL_FORCE_INLINE_THROW(uint32_t)
2880iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2881 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2882{
2883#ifdef RT_ARCH_AMD64
2884 /* movsx reg64, mem8 */
2885 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2886 pCodeBuf[off++] = 0x0f;
2887 pCodeBuf[off++] = 0xbe;
2888 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2889 RT_NOREF(iGprTmp);
2890
2891#elif defined(RT_ARCH_ARM64)
2892 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2893 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
2894
2895#else
2896# error "port me"
2897#endif
2898 return off;
2899}
2900
2901
2902/**
2903 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2904 * sign-extending the value to 32 bits.
2905 *
2906 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2907 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2908 * same. Will assert / throw if caller does not heed this.
2909 *
2910 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2911 */
2912DECL_FORCE_INLINE_THROW(uint32_t)
2913iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2914 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2915{
2916#ifdef RT_ARCH_AMD64
2917 /* movsx reg32, mem8 */
2918 if (iGprDst >= 8 || iGprBase >= 8)
2919 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2920 pCodeBuf[off++] = 0x0f;
2921 pCodeBuf[off++] = 0xbe;
2922 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2923 RT_NOREF(iGprTmp);
2924
2925#elif defined(RT_ARCH_ARM64)
2926 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2927 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2928
2929#else
2930# error "port me"
2931#endif
2932 return off;
2933}
2934
2935
2936/**
2937 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2938 * sign-extending the value to 16 bits.
2939 *
2940 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2941 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2942 * same. Will assert / throw if caller does not heed this.
2943 *
2944 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2945 */
2946DECL_FORCE_INLINE_THROW(uint32_t)
2947iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2948 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2949{
2950#ifdef RT_ARCH_AMD64
2951 /* movsx reg32, mem8 */
2952 if (iGprDst >= 8 || iGprBase >= 8)
2953 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2954 pCodeBuf[off++] = 0x0f;
2955 pCodeBuf[off++] = 0xbe;
2956 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2957# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
2958 /* and reg32, 0xffffh */
2959 if (iGprDst >= 8)
2960 pCodeBuf[off++] = X86_OP_REX_B;
2961 pCodeBuf[off++] = 0x81;
2962 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
2963 pCodeBuf[off++] = 0xff;
2964 pCodeBuf[off++] = 0xff;
2965 pCodeBuf[off++] = 0;
2966 pCodeBuf[off++] = 0;
2967# else
2968 /* movzx reg32, reg16 */
2969 if (iGprDst >= 8)
2970 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
2971 pCodeBuf[off++] = 0x0f;
2972 pCodeBuf[off++] = 0xb7;
2973 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2974# endif
2975 RT_NOREF(iGprTmp);
2976
2977#elif defined(RT_ARCH_ARM64)
2978 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2979 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2980 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2981 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
2982
2983#else
2984# error "port me"
2985#endif
2986 return off;
2987}
2988
2989
2990#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2991/**
2992 * Emits a 128-bit vector register load via a GPR base address with a displacement.
2993 *
2994 * @note ARM64: Misaligned @a offDisp values and values not in the
2995 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2996 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2997 * does not heed this.
2998 */
2999DECL_FORCE_INLINE_THROW(uint32_t)
3000iemNativeEmitLoadVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3001 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3002{
3003#ifdef RT_ARCH_AMD64
3004 /* movdqu reg128, mem128 */
3005 pCodeBuf[off++] = 0xf3;
3006 if (iVecRegDst >= 8 || iGprBase >= 8)
3007 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3008 pCodeBuf[off++] = 0x0f;
3009 pCodeBuf[off++] = 0x6f;
3010 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3011 RT_NOREF(iGprTmp);
3012
3013#elif defined(RT_ARCH_ARM64)
3014 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3015 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3016
3017#else
3018# error "port me"
3019#endif
3020 return off;
3021}
3022
3023
3024/**
3025 * Emits a 128-bit GPR load via a GPR base address with a displacement.
3026 */
3027DECL_INLINE_THROW(uint32_t)
3028iemNativeEmitLoadVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3029{
3030#ifdef RT_ARCH_AMD64
3031 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3032 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3033
3034#elif defined(RT_ARCH_ARM64)
3035 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3036
3037#else
3038# error "port me"
3039#endif
3040 return off;
3041}
3042
3043
3044/**
3045 * Emits a 256-bit vector register load via a GPR base address with a displacement.
3046 *
3047 * @note ARM64: Misaligned @a offDisp values and values not in the
3048 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3049 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3050 * does not heed this.
3051 */
3052DECL_FORCE_INLINE_THROW(uint32_t)
3053iemNativeEmitLoadVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3054 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3055{
3056#ifdef RT_ARCH_AMD64
3057 /* vmovdqu reg256, mem256 */
3058 pCodeBuf[off++] = X86_OP_VEX3;
3059 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3060 | X86_OP_VEX3_BYTE1_X
3061 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3062 | UINT8_C(0x01);
3063 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3064 pCodeBuf[off++] = 0x6f;
3065 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3066 RT_NOREF(iGprTmp);
3067
3068#elif defined(RT_ARCH_ARM64)
3069 Assert(!(iVecRegDst & 0x1));
3070 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3071 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3072 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3073 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3074#else
3075# error "port me"
3076#endif
3077 return off;
3078}
3079
3080
3081/**
3082 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3083 */
3084DECL_INLINE_THROW(uint32_t)
3085iemNativeEmitLoadVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3086{
3087#ifdef RT_ARCH_AMD64
3088 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3089 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3090
3091#elif defined(RT_ARCH_ARM64)
3092 Assert(!(iVecRegDst & 0x1));
3093 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3094 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3095 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3096 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3097
3098#else
3099# error "port me"
3100#endif
3101 return off;
3102}
3103#endif
3104
3105
3106/**
3107 * Emits a 64-bit GPR store via a GPR base address with a displacement.
3108 *
3109 * @note ARM64: Misaligned @a offDisp values and values not in the
3110 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3111 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3112 * does not heed this.
3113 */
3114DECL_FORCE_INLINE_THROW(uint32_t)
3115iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3116 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3117{
3118#ifdef RT_ARCH_AMD64
3119 /* mov mem64, reg64 */
3120 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3121 pCodeBuf[off++] = 0x89;
3122 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3123 RT_NOREF(iGprTmp);
3124
3125#elif defined(RT_ARCH_ARM64)
3126 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3127 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
3128
3129#else
3130# error "port me"
3131#endif
3132 return off;
3133}
3134
3135
3136/**
3137 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3138 *
3139 * @note ARM64: Misaligned @a offDisp values and values not in the
3140 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3141 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3142 * does not heed this.
3143 */
3144DECL_FORCE_INLINE_THROW(uint32_t)
3145iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3146 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3147{
3148#ifdef RT_ARCH_AMD64
3149 /* mov mem32, reg32 */
3150 if (iGprSrc >= 8 || iGprBase >= 8)
3151 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3152 pCodeBuf[off++] = 0x89;
3153 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3154 RT_NOREF(iGprTmp);
3155
3156#elif defined(RT_ARCH_ARM64)
3157 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3158 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3159
3160#else
3161# error "port me"
3162#endif
3163 return off;
3164}
3165
3166
3167/**
3168 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3169 *
3170 * @note ARM64: Misaligned @a offDisp values and values not in the
3171 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3172 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3173 * does not heed this.
3174 */
3175DECL_FORCE_INLINE_THROW(uint32_t)
3176iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3177 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3178{
3179#ifdef RT_ARCH_AMD64
3180 /* mov mem16, reg16 */
3181 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3182 if (iGprSrc >= 8 || iGprBase >= 8)
3183 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3184 pCodeBuf[off++] = 0x89;
3185 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3186 RT_NOREF(iGprTmp);
3187
3188#elif defined(RT_ARCH_ARM64)
3189 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3190 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3191
3192#else
3193# error "port me"
3194#endif
3195 return off;
3196}
3197
3198
3199/**
3200 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3201 *
3202 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3203 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3204 * same. Will assert / throw if caller does not heed this.
3205 */
3206DECL_FORCE_INLINE_THROW(uint32_t)
3207iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3208 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3209{
3210#ifdef RT_ARCH_AMD64
3211 /* mov mem8, reg8 */
3212 if (iGprSrc >= 8 || iGprBase >= 8)
3213 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3214 else if (iGprSrc >= 4)
3215 pCodeBuf[off++] = X86_OP_REX;
3216 pCodeBuf[off++] = 0x88;
3217 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3218 RT_NOREF(iGprTmp);
3219
3220#elif defined(RT_ARCH_ARM64)
3221 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3222 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3223
3224#else
3225# error "port me"
3226#endif
3227 return off;
3228}
3229
3230
3231/**
3232 * Emits a 64-bit immediate store via a GPR base address with a displacement.
3233 *
3234 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
3235 * AMD64 it depends on the immediate value.
3236 *
3237 * @note ARM64: Misaligned @a offDisp values and values not in the
3238 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3239 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3240 * does not heed this.
3241 */
3242DECL_FORCE_INLINE_THROW(uint32_t)
3243iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
3244 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3245{
3246#ifdef RT_ARCH_AMD64
3247 if ((int32_t)uImm == (int64_t)uImm)
3248 {
3249 /* mov mem64, imm32 (sign-extended) */
3250 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3251 pCodeBuf[off++] = 0xc7;
3252 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3253 pCodeBuf[off++] = RT_BYTE1(uImm);
3254 pCodeBuf[off++] = RT_BYTE2(uImm);
3255 pCodeBuf[off++] = RT_BYTE3(uImm);
3256 pCodeBuf[off++] = RT_BYTE4(uImm);
3257 }
3258 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
3259 {
3260 /* require temporary register. */
3261 if (iGprImmTmp == UINT8_MAX)
3262 iGprImmTmp = iGprTmp;
3263 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3264 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
3265 }
3266 else
3267# ifdef IEM_WITH_THROW_CATCH
3268 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3269# else
3270 AssertReleaseFailedStmt(off = UINT32_MAX);
3271# endif
3272
3273#elif defined(RT_ARCH_ARM64)
3274 if (uImm == 0)
3275 iGprImmTmp = ARMV8_A64_REG_XZR;
3276 else
3277 {
3278 Assert(iGprImmTmp < 31);
3279 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3280 }
3281 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
3282
3283#else
3284# error "port me"
3285#endif
3286 return off;
3287}
3288
3289
3290/**
3291 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3292 *
3293 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3294 *
3295 * @note ARM64: Misaligned @a offDisp values and values not in the
3296 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3297 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3298 * does not heed this.
3299 */
3300DECL_FORCE_INLINE_THROW(uint32_t)
3301iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
3302 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3303{
3304#ifdef RT_ARCH_AMD64
3305 /* mov mem32, imm32 */
3306 if (iGprBase >= 8)
3307 pCodeBuf[off++] = X86_OP_REX_B;
3308 pCodeBuf[off++] = 0xc7;
3309 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3310 pCodeBuf[off++] = RT_BYTE1(uImm);
3311 pCodeBuf[off++] = RT_BYTE2(uImm);
3312 pCodeBuf[off++] = RT_BYTE3(uImm);
3313 pCodeBuf[off++] = RT_BYTE4(uImm);
3314 RT_NOREF(iGprImmTmp, iGprTmp);
3315
3316#elif defined(RT_ARCH_ARM64)
3317 Assert(iGprImmTmp < 31);
3318 if (uImm == 0)
3319 iGprImmTmp = ARMV8_A64_REG_XZR;
3320 else
3321 {
3322 Assert(iGprImmTmp < 31);
3323 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3324 }
3325 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3326 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3327
3328#else
3329# error "port me"
3330#endif
3331 return off;
3332}
3333
3334
3335/**
3336 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3337 *
3338 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3339 *
3340 * @note ARM64: Misaligned @a offDisp values and values not in the
3341 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3342 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3343 * does not heed this.
3344 */
3345DECL_FORCE_INLINE_THROW(uint32_t)
3346iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
3347 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3348{
3349#ifdef RT_ARCH_AMD64
3350 /* mov mem16, imm16 */
3351 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3352 if (iGprBase >= 8)
3353 pCodeBuf[off++] = X86_OP_REX_B;
3354 pCodeBuf[off++] = 0xc7;
3355 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3356 pCodeBuf[off++] = RT_BYTE1(uImm);
3357 pCodeBuf[off++] = RT_BYTE2(uImm);
3358 RT_NOREF(iGprImmTmp, iGprTmp);
3359
3360#elif defined(RT_ARCH_ARM64)
3361 if (uImm == 0)
3362 iGprImmTmp = ARMV8_A64_REG_XZR;
3363 else
3364 {
3365 Assert(iGprImmTmp < 31);
3366 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3367 }
3368 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3369 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3370
3371#else
3372# error "port me"
3373#endif
3374 return off;
3375}
3376
3377
3378/**
3379 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3380 *
3381 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3382 *
3383 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3384 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3385 * same. Will assert / throw if caller does not heed this.
3386 */
3387DECL_FORCE_INLINE_THROW(uint32_t)
3388iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3389 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3390{
3391#ifdef RT_ARCH_AMD64
3392 /* mov mem8, imm8 */
3393 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3394 if (iGprBase >= 8)
3395 pCodeBuf[off++] = X86_OP_REX_B;
3396 pCodeBuf[off++] = 0xc6;
3397 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3398 pCodeBuf[off++] = uImm;
3399 RT_NOREF(iGprImmTmp, iGprTmp);
3400
3401#elif defined(RT_ARCH_ARM64)
3402 if (uImm == 0)
3403 iGprImmTmp = ARMV8_A64_REG_XZR;
3404 else
3405 {
3406 Assert(iGprImmTmp < 31);
3407 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3408 }
3409 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3410 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3411
3412#else
3413# error "port me"
3414#endif
3415 return off;
3416}
3417
3418
3419#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3420/**
3421 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3422 *
3423 * @note ARM64: Misaligned @a offDisp values and values not in the
3424 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3425 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3426 * does not heed this.
3427 */
3428DECL_FORCE_INLINE_THROW(uint32_t)
3429iemNativeEmitStoreVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3430 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3431{
3432#ifdef RT_ARCH_AMD64
3433 /* movdqu mem128, reg128 */
3434 pCodeBuf[off++] = 0xf3;
3435 if (iVecRegDst >= 8 || iGprBase >= 8)
3436 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3437 pCodeBuf[off++] = 0x0f;
3438 pCodeBuf[off++] = 0x7f;
3439 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3440 RT_NOREF(iGprTmp);
3441
3442#elif defined(RT_ARCH_ARM64)
3443 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3444 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3445
3446#else
3447# error "port me"
3448#endif
3449 return off;
3450}
3451
3452
3453/**
3454 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3455 */
3456DECL_INLINE_THROW(uint32_t)
3457iemNativeEmitStoreVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3458{
3459#ifdef RT_ARCH_AMD64
3460 off = iemNativeEmitStoreVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3461 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3462
3463#elif defined(RT_ARCH_ARM64)
3464 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3465
3466#else
3467# error "port me"
3468#endif
3469 return off;
3470}
3471
3472
3473/**
3474 * Emits a 256-bit vector register store via a GPR base address with a displacement.
3475 *
3476 * @note ARM64: Misaligned @a offDisp values and values not in the
3477 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3478 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3479 * does not heed this.
3480 */
3481DECL_FORCE_INLINE_THROW(uint32_t)
3482iemNativeEmitStoreVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3483 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3484{
3485#ifdef RT_ARCH_AMD64
3486 /* vmovdqu mem256, reg256 */
3487 pCodeBuf[off++] = X86_OP_VEX3;
3488 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3489 | X86_OP_VEX3_BYTE1_X
3490 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3491 | UINT8_C(0x01);
3492 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3493 pCodeBuf[off++] = 0x7f;
3494 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3495 RT_NOREF(iGprTmp);
3496
3497#elif defined(RT_ARCH_ARM64)
3498 Assert(!(iVecRegDst & 0x1));
3499 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3500 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3501 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3502 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3503#else
3504# error "port me"
3505#endif
3506 return off;
3507}
3508
3509
3510/**
3511 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3512 */
3513DECL_INLINE_THROW(uint32_t)
3514iemNativeEmitStoreVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3515{
3516#ifdef RT_ARCH_AMD64
3517 off = iemNativeEmitStoreVecRegByGprU256Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3518 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3519
3520#elif defined(RT_ARCH_ARM64)
3521 Assert(!(iVecRegDst & 0x1));
3522 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3523 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3524 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3525 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3526
3527#else
3528# error "port me"
3529#endif
3530 return off;
3531}
3532#endif
3533
3534
3535
3536/*********************************************************************************************************************************
3537* Subtraction and Additions *
3538*********************************************************************************************************************************/
3539
3540/**
3541 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3542 * @note The AMD64 version sets flags.
3543 */
3544DECL_INLINE_THROW(uint32_t)
3545iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3546{
3547#if defined(RT_ARCH_AMD64)
3548 /* sub Gv,Ev */
3549 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3550 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3551 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3552 pbCodeBuf[off++] = 0x2b;
3553 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3554
3555#elif defined(RT_ARCH_ARM64)
3556 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3557 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3558
3559#else
3560# error "Port me"
3561#endif
3562 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3563 return off;
3564}
3565
3566
3567/**
3568 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3569 * @note The AMD64 version sets flags.
3570 */
3571DECL_FORCE_INLINE(uint32_t)
3572iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3573{
3574#if defined(RT_ARCH_AMD64)
3575 /* sub Gv,Ev */
3576 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3577 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3578 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3579 pCodeBuf[off++] = 0x2b;
3580 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3581
3582#elif defined(RT_ARCH_ARM64)
3583 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3584
3585#else
3586# error "Port me"
3587#endif
3588 return off;
3589}
3590
3591
3592/**
3593 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3594 * @note The AMD64 version sets flags.
3595 */
3596DECL_INLINE_THROW(uint32_t)
3597iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3598{
3599#if defined(RT_ARCH_AMD64)
3600 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3601#elif defined(RT_ARCH_ARM64)
3602 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3603#else
3604# error "Port me"
3605#endif
3606 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3607 return off;
3608}
3609
3610
3611/**
3612 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3613 *
3614 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3615 *
3616 * @note Larger constants will require a temporary register. Failing to specify
3617 * one when needed will trigger fatal assertion / throw.
3618 */
3619DECL_FORCE_INLINE_THROW(uint32_t)
3620iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3621 uint8_t iGprTmp = UINT8_MAX)
3622{
3623#ifdef RT_ARCH_AMD64
3624 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3625 if (iSubtrahend == 1)
3626 {
3627 /* dec r/m64 */
3628 pCodeBuf[off++] = 0xff;
3629 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3630 }
3631 else if (iSubtrahend == -1)
3632 {
3633 /* inc r/m64 */
3634 pCodeBuf[off++] = 0xff;
3635 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3636 }
3637 else if ((int8_t)iSubtrahend == iSubtrahend)
3638 {
3639 /* sub r/m64, imm8 */
3640 pCodeBuf[off++] = 0x83;
3641 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3642 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3643 }
3644 else if ((int32_t)iSubtrahend == iSubtrahend)
3645 {
3646 /* sub r/m64, imm32 */
3647 pCodeBuf[off++] = 0x81;
3648 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3649 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3650 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3651 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3652 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3653 }
3654 else if (iGprTmp != UINT8_MAX)
3655 {
3656 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3657 /* sub r/m64, r64 */
3658 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3659 pCodeBuf[off++] = 0x29;
3660 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3661 }
3662 else
3663# ifdef IEM_WITH_THROW_CATCH
3664 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3665# else
3666 AssertReleaseFailedStmt(off = UINT32_MAX);
3667# endif
3668
3669#elif defined(RT_ARCH_ARM64)
3670 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3671 if (uAbsSubtrahend < 4096)
3672 {
3673 if (iSubtrahend >= 0)
3674 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3675 else
3676 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3677 }
3678 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3679 {
3680 if (iSubtrahend >= 0)
3681 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3682 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3683 else
3684 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3685 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3686 }
3687 else if (iGprTmp != UINT8_MAX)
3688 {
3689 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3690 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3691 }
3692 else
3693# ifdef IEM_WITH_THROW_CATCH
3694 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3695# else
3696 AssertReleaseFailedStmt(off = UINT32_MAX);
3697# endif
3698
3699#else
3700# error "Port me"
3701#endif
3702 return off;
3703}
3704
3705
3706/**
3707 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3708 *
3709 * @note Larger constants will require a temporary register. Failing to specify
3710 * one when needed will trigger fatal assertion / throw.
3711 */
3712DECL_INLINE_THROW(uint32_t)
3713iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3714 uint8_t iGprTmp = UINT8_MAX)
3715
3716{
3717#ifdef RT_ARCH_AMD64
3718 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3719#elif defined(RT_ARCH_ARM64)
3720 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3721#else
3722# error "Port me"
3723#endif
3724 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3725 return off;
3726}
3727
3728
3729/**
3730 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3731 *
3732 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3733 *
3734 * @note ARM64: Larger constants will require a temporary register. Failing to
3735 * specify one when needed will trigger fatal assertion / throw.
3736 */
3737DECL_FORCE_INLINE_THROW(uint32_t)
3738iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3739 uint8_t iGprTmp = UINT8_MAX)
3740{
3741#ifdef RT_ARCH_AMD64
3742 if (iGprDst >= 8)
3743 pCodeBuf[off++] = X86_OP_REX_B;
3744 if (iSubtrahend == 1)
3745 {
3746 /* dec r/m32 */
3747 pCodeBuf[off++] = 0xff;
3748 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3749 }
3750 else if (iSubtrahend == -1)
3751 {
3752 /* inc r/m32 */
3753 pCodeBuf[off++] = 0xff;
3754 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3755 }
3756 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3757 {
3758 /* sub r/m32, imm8 */
3759 pCodeBuf[off++] = 0x83;
3760 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3761 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3762 }
3763 else
3764 {
3765 /* sub r/m32, imm32 */
3766 pCodeBuf[off++] = 0x81;
3767 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3768 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3769 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3770 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3771 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3772 }
3773 RT_NOREF(iGprTmp);
3774
3775#elif defined(RT_ARCH_ARM64)
3776 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3777 if (uAbsSubtrahend < 4096)
3778 {
3779 if (iSubtrahend >= 0)
3780 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3781 else
3782 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3783 }
3784 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3785 {
3786 if (iSubtrahend >= 0)
3787 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3788 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3789 else
3790 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3791 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3792 }
3793 else if (iGprTmp != UINT8_MAX)
3794 {
3795 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3796 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3797 }
3798 else
3799# ifdef IEM_WITH_THROW_CATCH
3800 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3801# else
3802 AssertReleaseFailedStmt(off = UINT32_MAX);
3803# endif
3804
3805#else
3806# error "Port me"
3807#endif
3808 return off;
3809}
3810
3811
3812/**
3813 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3814 *
3815 * @note ARM64: Larger constants will require a temporary register. Failing to
3816 * specify one when needed will trigger fatal assertion / throw.
3817 */
3818DECL_INLINE_THROW(uint32_t)
3819iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3820 uint8_t iGprTmp = UINT8_MAX)
3821
3822{
3823#ifdef RT_ARCH_AMD64
3824 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
3825#elif defined(RT_ARCH_ARM64)
3826 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
3827#else
3828# error "Port me"
3829#endif
3830 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3831 return off;
3832}
3833
3834
3835/**
3836 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
3837 *
3838 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
3839 * so not suitable as a base for conditional jumps.
3840 *
3841 * @note AMD64: Will only update the lower 16 bits of the register.
3842 * @note ARM64: Will update the entire register.
3843 * @note ARM64: Larger constants will require a temporary register. Failing to
3844 * specify one when needed will trigger fatal assertion / throw.
3845 */
3846DECL_FORCE_INLINE_THROW(uint32_t)
3847iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
3848 uint8_t iGprTmp = UINT8_MAX)
3849{
3850#ifdef RT_ARCH_AMD64
3851 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3852 if (iGprDst >= 8)
3853 pCodeBuf[off++] = X86_OP_REX_B;
3854 if (iSubtrahend == 1)
3855 {
3856 /* dec r/m16 */
3857 pCodeBuf[off++] = 0xff;
3858 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3859 }
3860 else if (iSubtrahend == -1)
3861 {
3862 /* inc r/m16 */
3863 pCodeBuf[off++] = 0xff;
3864 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3865 }
3866 else if ((int8_t)iSubtrahend == iSubtrahend)
3867 {
3868 /* sub r/m16, imm8 */
3869 pCodeBuf[off++] = 0x83;
3870 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3871 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3872 }
3873 else
3874 {
3875 /* sub r/m16, imm16 */
3876 pCodeBuf[off++] = 0x81;
3877 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3878 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
3879 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
3880 }
3881 RT_NOREF(iGprTmp);
3882
3883#elif defined(RT_ARCH_ARM64)
3884 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3885 if (uAbsSubtrahend < 4096)
3886 {
3887 if (iSubtrahend >= 0)
3888 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3889 else
3890 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3891 }
3892 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3893 {
3894 if (iSubtrahend >= 0)
3895 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3896 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3897 else
3898 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3899 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3900 }
3901 else if (iGprTmp != UINT8_MAX)
3902 {
3903 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3904 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3905 }
3906 else
3907# ifdef IEM_WITH_THROW_CATCH
3908 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3909# else
3910 AssertReleaseFailedStmt(off = UINT32_MAX);
3911# endif
3912 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3913
3914#else
3915# error "Port me"
3916#endif
3917 return off;
3918}
3919
3920
3921/**
3922 * Emits adding a 64-bit GPR to another, storing the result in the first.
3923 * @note The AMD64 version sets flags.
3924 */
3925DECL_FORCE_INLINE(uint32_t)
3926iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3927{
3928#if defined(RT_ARCH_AMD64)
3929 /* add Gv,Ev */
3930 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3931 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
3932 pCodeBuf[off++] = 0x03;
3933 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3934
3935#elif defined(RT_ARCH_ARM64)
3936 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
3937
3938#else
3939# error "Port me"
3940#endif
3941 return off;
3942}
3943
3944
3945/**
3946 * Emits adding a 64-bit GPR to another, storing the result in the first.
3947 * @note The AMD64 version sets flags.
3948 */
3949DECL_INLINE_THROW(uint32_t)
3950iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3951{
3952#if defined(RT_ARCH_AMD64)
3953 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3954#elif defined(RT_ARCH_ARM64)
3955 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3956#else
3957# error "Port me"
3958#endif
3959 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3960 return off;
3961}
3962
3963
3964/**
3965 * Emits adding a 64-bit GPR to another, storing the result in the first.
3966 * @note The AMD64 version sets flags.
3967 */
3968DECL_FORCE_INLINE(uint32_t)
3969iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3970{
3971#if defined(RT_ARCH_AMD64)
3972 /* add Gv,Ev */
3973 if (iGprDst >= 8 || iGprAddend >= 8)
3974 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
3975 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
3976 pCodeBuf[off++] = 0x03;
3977 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3978
3979#elif defined(RT_ARCH_ARM64)
3980 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
3981
3982#else
3983# error "Port me"
3984#endif
3985 return off;
3986}
3987
3988
3989/**
3990 * Emits adding a 64-bit GPR to another, storing the result in the first.
3991 * @note The AMD64 version sets flags.
3992 */
3993DECL_INLINE_THROW(uint32_t)
3994iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3995{
3996#if defined(RT_ARCH_AMD64)
3997 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3998#elif defined(RT_ARCH_ARM64)
3999 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
4000#else
4001# error "Port me"
4002#endif
4003 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4004 return off;
4005}
4006
4007
4008/**
4009 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
4010 */
4011DECL_INLINE_THROW(uint32_t)
4012iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4013{
4014#if defined(RT_ARCH_AMD64)
4015 /* add or inc */
4016 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4017 if (iImm8 != 1)
4018 {
4019 pCodeBuf[off++] = 0x83;
4020 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4021 pCodeBuf[off++] = (uint8_t)iImm8;
4022 }
4023 else
4024 {
4025 pCodeBuf[off++] = 0xff;
4026 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4027 }
4028
4029#elif defined(RT_ARCH_ARM64)
4030 if (iImm8 >= 0)
4031 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
4032 else
4033 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
4034
4035#else
4036# error "Port me"
4037#endif
4038 return off;
4039}
4040
4041
4042/**
4043 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
4044 */
4045DECL_INLINE_THROW(uint32_t)
4046iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4047{
4048#if defined(RT_ARCH_AMD64)
4049 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4050#elif defined(RT_ARCH_ARM64)
4051 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4052#else
4053# error "Port me"
4054#endif
4055 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4056 return off;
4057}
4058
4059
4060/**
4061 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4062 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4063 */
4064DECL_FORCE_INLINE(uint32_t)
4065iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4066{
4067#if defined(RT_ARCH_AMD64)
4068 /* add or inc */
4069 if (iGprDst >= 8)
4070 pCodeBuf[off++] = X86_OP_REX_B;
4071 if (iImm8 != 1)
4072 {
4073 pCodeBuf[off++] = 0x83;
4074 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4075 pCodeBuf[off++] = (uint8_t)iImm8;
4076 }
4077 else
4078 {
4079 pCodeBuf[off++] = 0xff;
4080 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4081 }
4082
4083#elif defined(RT_ARCH_ARM64)
4084 if (iImm8 >= 0)
4085 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
4086 else
4087 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
4088
4089#else
4090# error "Port me"
4091#endif
4092 return off;
4093}
4094
4095
4096/**
4097 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4098 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4099 */
4100DECL_INLINE_THROW(uint32_t)
4101iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4102{
4103#if defined(RT_ARCH_AMD64)
4104 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4105#elif defined(RT_ARCH_ARM64)
4106 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4107#else
4108# error "Port me"
4109#endif
4110 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4111 return off;
4112}
4113
4114
4115/**
4116 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4117 *
4118 * @note Will assert / throw if @a iGprTmp is not specified when needed.
4119 */
4120DECL_FORCE_INLINE_THROW(uint32_t)
4121iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4122{
4123#if defined(RT_ARCH_AMD64)
4124 if ((int8_t)iAddend == iAddend)
4125 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4126
4127 if ((int32_t)iAddend == iAddend)
4128 {
4129 /* add grp, imm32 */
4130 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4131 pCodeBuf[off++] = 0x81;
4132 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4133 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4134 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4135 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4136 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4137 }
4138 else if (iGprTmp != UINT8_MAX)
4139 {
4140 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4141
4142 /* add dst, tmpreg */
4143 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4144 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
4145 pCodeBuf[off++] = 0x03;
4146 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
4147 }
4148 else
4149# ifdef IEM_WITH_THROW_CATCH
4150 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4151# else
4152 AssertReleaseFailedStmt(off = UINT32_MAX);
4153# endif
4154
4155#elif defined(RT_ARCH_ARM64)
4156 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4157 if (uAbsAddend <= 0xffffffU)
4158 {
4159 bool const fSub = iAddend < 0;
4160 if (uAbsAddend > 0xfffU)
4161 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, true /*f64Bit*/,
4162 false /*fSetFlags*/, true /*fShift12*/);
4163 if (uAbsAddend & 0xfffU)
4164 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & UINT32_C(0xfff));
4165 }
4166 else if (iGprTmp != UINT8_MAX)
4167 {
4168 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4169 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
4170 }
4171 else
4172# ifdef IEM_WITH_THROW_CATCH
4173 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4174# else
4175 AssertReleaseFailedStmt(off = UINT32_MAX);
4176# endif
4177
4178#else
4179# error "Port me"
4180#endif
4181 return off;
4182}
4183
4184
4185/**
4186 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4187 */
4188DECL_INLINE_THROW(uint32_t)
4189iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
4190{
4191#if defined(RT_ARCH_AMD64)
4192 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4193 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
4194
4195 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
4196 {
4197 /* add grp, imm32 */
4198 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4199 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4200 pbCodeBuf[off++] = 0x81;
4201 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4202 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4203 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4204 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4205 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4206 }
4207 else
4208 {
4209 /* Best to use a temporary register to deal with this in the simplest way: */
4210 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4211
4212 /* add dst, tmpreg */
4213 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4214 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4215 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
4216 pbCodeBuf[off++] = 0x03;
4217 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
4218
4219 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4220 }
4221
4222#elif defined(RT_ARCH_ARM64)
4223 bool const fSub = iAddend < 0;
4224 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4225 if (uAbsAddend <= 0xffffffU)
4226 {
4227 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4228 if (uAbsAddend > 0xfffU)
4229 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, true /*f64Bit*/,
4230 false /*fSetFlags*/, true /*fShift12*/);
4231 if (uAbsAddend & 0xfffU)
4232 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & UINT32_C(0xfff));
4233 }
4234 else
4235 {
4236 /* Use temporary register for the immediate. */
4237 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uAbsAddend);
4238
4239 /* add gprdst, gprdst, tmpreg */
4240 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4241 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, iGprDst, iTmpReg);
4242
4243 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4244 }
4245
4246#else
4247# error "Port me"
4248#endif
4249 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4250 return off;
4251}
4252
4253
4254/**
4255 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4256 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4257 * @note For ARM64 the iAddend value must be in the range 0x000000..0xffffff.
4258 * The negative ranges are also allowed, making it behave like a
4259 * subtraction. If the constant does not conform, bad stuff will happen.
4260 */
4261DECL_FORCE_INLINE_THROW(uint32_t)
4262iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4263{
4264#if defined(RT_ARCH_AMD64)
4265 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4266 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4267
4268 /* add grp, imm32 */
4269 if (iGprDst >= 8)
4270 pCodeBuf[off++] = X86_OP_REX_B;
4271 pCodeBuf[off++] = 0x81;
4272 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4273 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4274 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4275 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4276 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4277
4278#elif defined(RT_ARCH_ARM64)
4279 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4280 if (uAbsAddend <= 0xffffffU)
4281 {
4282 bool const fSub = iAddend < 0;
4283 if (uAbsAddend > 0xfffU)
4284 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4285 false /*fSetFlags*/, true /*fShift12*/);
4286 if (uAbsAddend & 0xfffU)
4287 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4288 }
4289 else
4290# ifdef IEM_WITH_THROW_CATCH
4291 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4292# else
4293 AssertReleaseFailedStmt(off = UINT32_MAX);
4294# endif
4295
4296#else
4297# error "Port me"
4298#endif
4299 return off;
4300}
4301
4302
4303/**
4304 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4305 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4306 */
4307DECL_INLINE_THROW(uint32_t)
4308iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4309{
4310#if defined(RT_ARCH_AMD64)
4311 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
4312
4313#elif defined(RT_ARCH_ARM64)
4314 bool const fSub = iAddend < 0;
4315 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4316 if (uAbsAddend <= 0xffffffU)
4317 {
4318 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4319 if (uAbsAddend > 0xfffU)
4320 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4321 false /*fSetFlags*/, true /*fShift12*/);
4322 if (uAbsAddend & 0xfffU)
4323 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4324 }
4325 else
4326 {
4327 /* Use temporary register for the immediate. */
4328 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uAbsAddend);
4329
4330 /* add gprdst, gprdst, tmpreg */
4331 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4332 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
4333
4334 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4335 }
4336
4337#else
4338# error "Port me"
4339#endif
4340 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4341 return off;
4342}
4343
4344
4345/**
4346 * Emits a 16-bit GPR add with a signed immediate addend.
4347 *
4348 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
4349 * so not suitable as a base for conditional jumps.
4350 *
4351 * @note AMD64: Will only update the lower 16 bits of the register.
4352 * @note ARM64: Will update the entire register.
4353 * @sa iemNativeEmitSubGpr16ImmEx
4354 */
4355DECL_FORCE_INLINE(uint32_t)
4356iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend)
4357{
4358#ifdef RT_ARCH_AMD64
4359 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4360 if (iGprDst >= 8)
4361 pCodeBuf[off++] = X86_OP_REX_B;
4362 if (iAddend == 1)
4363 {
4364 /* inc r/m16 */
4365 pCodeBuf[off++] = 0xff;
4366 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4367 }
4368 else if (iAddend == -1)
4369 {
4370 /* dec r/m16 */
4371 pCodeBuf[off++] = 0xff;
4372 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4373 }
4374 else if ((int8_t)iAddend == iAddend)
4375 {
4376 /* add r/m16, imm8 */
4377 pCodeBuf[off++] = 0x83;
4378 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4379 pCodeBuf[off++] = (uint8_t)iAddend;
4380 }
4381 else
4382 {
4383 /* add r/m16, imm16 */
4384 pCodeBuf[off++] = 0x81;
4385 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4386 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
4387 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
4388 }
4389
4390#elif defined(RT_ARCH_ARM64)
4391 bool const fSub = iAddend < 0;
4392 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4393 if (uAbsAddend > 0xfffU)
4394 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4395 false /*fSetFlags*/, true /*fShift12*/);
4396 if (uAbsAddend & 0xfffU)
4397 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4398 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4399
4400#else
4401# error "Port me"
4402#endif
4403 return off;
4404}
4405
4406
4407
4408/**
4409 * Adds two 64-bit GPRs together, storing the result in a third register.
4410 */
4411DECL_FORCE_INLINE(uint32_t)
4412iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4413{
4414#ifdef RT_ARCH_AMD64
4415 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4416 {
4417 /** @todo consider LEA */
4418 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
4419 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
4420 }
4421 else
4422 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4423
4424#elif defined(RT_ARCH_ARM64)
4425 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
4426
4427#else
4428# error "Port me!"
4429#endif
4430 return off;
4431}
4432
4433
4434
4435/**
4436 * Adds two 32-bit GPRs together, storing the result in a third register.
4437 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4438 */
4439DECL_FORCE_INLINE(uint32_t)
4440iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4441{
4442#ifdef RT_ARCH_AMD64
4443 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4444 {
4445 /** @todo consider LEA */
4446 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
4447 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
4448 }
4449 else
4450 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4451
4452#elif defined(RT_ARCH_ARM64)
4453 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
4454
4455#else
4456# error "Port me!"
4457#endif
4458 return off;
4459}
4460
4461
4462/**
4463 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
4464 * third register.
4465 *
4466 * @note The ARM64 version does not work for non-trivial constants if the
4467 * two registers are the same. Will assert / throw exception.
4468 */
4469DECL_FORCE_INLINE_THROW(uint32_t)
4470iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4471{
4472#ifdef RT_ARCH_AMD64
4473 /** @todo consider LEA */
4474 if ((int8_t)iImmAddend == iImmAddend)
4475 {
4476 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4477 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4478 }
4479 else
4480 {
4481 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4482 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4483 }
4484
4485#elif defined(RT_ARCH_ARM64)
4486 bool const fSub = iImmAddend < 0;
4487 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4488 if (uAbsImmAddend <= 0xfffU)
4489 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend);
4490 else if (uAbsImmAddend <= 0xffffffU)
4491 {
4492 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend >> 12,
4493 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
4494 if (uAbsImmAddend & 0xfffU)
4495 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsImmAddend & UINT32_C(0xfff));
4496 }
4497 else if (iGprDst != iGprAddend)
4498 {
4499 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4500 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4501 }
4502 else
4503# ifdef IEM_WITH_THROW_CATCH
4504 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4505# else
4506 AssertReleaseFailedStmt(off = UINT32_MAX);
4507# endif
4508
4509#else
4510# error "Port me!"
4511#endif
4512 return off;
4513}
4514
4515
4516/**
4517 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4518 * third register.
4519 *
4520 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4521 *
4522 * @note The ARM64 version does not work for non-trivial constants if the
4523 * two registers are the same. Will assert / throw exception.
4524 */
4525DECL_FORCE_INLINE_THROW(uint32_t)
4526iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4527{
4528#ifdef RT_ARCH_AMD64
4529 /** @todo consider LEA */
4530 if ((int8_t)iImmAddend == iImmAddend)
4531 {
4532 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4533 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4534 }
4535 else
4536 {
4537 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4538 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4539 }
4540
4541#elif defined(RT_ARCH_ARM64)
4542 bool const fSub = iImmAddend < 0;
4543 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4544 if (uAbsImmAddend <= 0xfffU)
4545 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4546 else if (uAbsImmAddend <= 0xffffffU)
4547 {
4548 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend >> 12,
4549 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
4550 if (uAbsImmAddend & 0xfffU)
4551 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsImmAddend & 0xfff, false /*f64Bit*/);
4552 }
4553 else if (iGprDst != iGprAddend)
4554 {
4555 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4556 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4557 }
4558 else
4559# ifdef IEM_WITH_THROW_CATCH
4560 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4561# else
4562 AssertReleaseFailedStmt(off = UINT32_MAX);
4563# endif
4564
4565#else
4566# error "Port me!"
4567#endif
4568 return off;
4569}
4570
4571
4572/*********************************************************************************************************************************
4573* Unary Operations *
4574*********************************************************************************************************************************/
4575
4576/**
4577 * Emits code for two complement negation of a 64-bit GPR.
4578 */
4579DECL_FORCE_INLINE_THROW(uint32_t)
4580iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4581{
4582#if defined(RT_ARCH_AMD64)
4583 /* neg Ev */
4584 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4585 pCodeBuf[off++] = 0xf7;
4586 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4587
4588#elif defined(RT_ARCH_ARM64)
4589 /* sub dst, xzr, dst */
4590 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4591
4592#else
4593# error "Port me"
4594#endif
4595 return off;
4596}
4597
4598
4599/**
4600 * Emits code for two complement negation of a 64-bit GPR.
4601 */
4602DECL_INLINE_THROW(uint32_t)
4603iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4604{
4605#if defined(RT_ARCH_AMD64)
4606 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4607#elif defined(RT_ARCH_ARM64)
4608 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4609#else
4610# error "Port me"
4611#endif
4612 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4613 return off;
4614}
4615
4616
4617/**
4618 * Emits code for two complement negation of a 32-bit GPR.
4619 * @note bit 32 thru 63 are set to zero.
4620 */
4621DECL_FORCE_INLINE_THROW(uint32_t)
4622iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4623{
4624#if defined(RT_ARCH_AMD64)
4625 /* neg Ev */
4626 if (iGprDst >= 8)
4627 pCodeBuf[off++] = X86_OP_REX_B;
4628 pCodeBuf[off++] = 0xf7;
4629 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4630
4631#elif defined(RT_ARCH_ARM64)
4632 /* sub dst, xzr, dst */
4633 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4634
4635#else
4636# error "Port me"
4637#endif
4638 return off;
4639}
4640
4641
4642/**
4643 * Emits code for two complement negation of a 32-bit GPR.
4644 * @note bit 32 thru 63 are set to zero.
4645 */
4646DECL_INLINE_THROW(uint32_t)
4647iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4648{
4649#if defined(RT_ARCH_AMD64)
4650 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4651#elif defined(RT_ARCH_ARM64)
4652 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4653#else
4654# error "Port me"
4655#endif
4656 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4657 return off;
4658}
4659
4660
4661
4662/*********************************************************************************************************************************
4663* Bit Operations *
4664*********************************************************************************************************************************/
4665
4666/**
4667 * Emits code for clearing bits 16 thru 63 in the GPR.
4668 */
4669DECL_INLINE_THROW(uint32_t)
4670iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4671{
4672#if defined(RT_ARCH_AMD64)
4673 /* movzx Gv,Ew */
4674 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4675 if (iGprDst >= 8)
4676 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4677 pbCodeBuf[off++] = 0x0f;
4678 pbCodeBuf[off++] = 0xb7;
4679 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4680
4681#elif defined(RT_ARCH_ARM64)
4682 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4683# if 1
4684 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4685# else
4686 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4687 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4688# endif
4689#else
4690# error "Port me"
4691#endif
4692 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4693 return off;
4694}
4695
4696
4697/**
4698 * Emits code for AND'ing two 64-bit GPRs.
4699 *
4700 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4701 * and ARM64 hosts.
4702 */
4703DECL_FORCE_INLINE(uint32_t)
4704iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4705{
4706#if defined(RT_ARCH_AMD64)
4707 /* and Gv, Ev */
4708 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4709 pCodeBuf[off++] = 0x23;
4710 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4711 RT_NOREF(fSetFlags);
4712
4713#elif defined(RT_ARCH_ARM64)
4714 if (!fSetFlags)
4715 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4716 else
4717 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4718
4719#else
4720# error "Port me"
4721#endif
4722 return off;
4723}
4724
4725
4726/**
4727 * Emits code for AND'ing two 64-bit GPRs.
4728 *
4729 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4730 * and ARM64 hosts.
4731 */
4732DECL_INLINE_THROW(uint32_t)
4733iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4734{
4735#if defined(RT_ARCH_AMD64)
4736 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4737#elif defined(RT_ARCH_ARM64)
4738 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4739#else
4740# error "Port me"
4741#endif
4742 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4743 return off;
4744}
4745
4746
4747/**
4748 * Emits code for AND'ing two 32-bit GPRs.
4749 */
4750DECL_FORCE_INLINE(uint32_t)
4751iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4752{
4753#if defined(RT_ARCH_AMD64)
4754 /* and Gv, Ev */
4755 if (iGprDst >= 8 || iGprSrc >= 8)
4756 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4757 pCodeBuf[off++] = 0x23;
4758 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4759 RT_NOREF(fSetFlags);
4760
4761#elif defined(RT_ARCH_ARM64)
4762 if (!fSetFlags)
4763 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4764 else
4765 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4766
4767#else
4768# error "Port me"
4769#endif
4770 return off;
4771}
4772
4773
4774/**
4775 * Emits code for AND'ing two 32-bit GPRs.
4776 */
4777DECL_INLINE_THROW(uint32_t)
4778iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4779{
4780#if defined(RT_ARCH_AMD64)
4781 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4782#elif defined(RT_ARCH_ARM64)
4783 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4784#else
4785# error "Port me"
4786#endif
4787 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4788 return off;
4789}
4790
4791
4792/**
4793 * Emits code for AND'ing a 64-bit GPRs with a constant.
4794 *
4795 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4796 * and ARM64 hosts.
4797 */
4798DECL_INLINE_THROW(uint32_t)
4799iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4800{
4801#if defined(RT_ARCH_AMD64)
4802 if ((int64_t)uImm == (int8_t)uImm)
4803 {
4804 /* and Ev, imm8 */
4805 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4806 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4807 pbCodeBuf[off++] = 0x83;
4808 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4809 pbCodeBuf[off++] = (uint8_t)uImm;
4810 }
4811 else if ((int64_t)uImm == (int32_t)uImm)
4812 {
4813 /* and Ev, imm32 */
4814 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4815 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4816 pbCodeBuf[off++] = 0x81;
4817 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4818 pbCodeBuf[off++] = RT_BYTE1(uImm);
4819 pbCodeBuf[off++] = RT_BYTE2(uImm);
4820 pbCodeBuf[off++] = RT_BYTE3(uImm);
4821 pbCodeBuf[off++] = RT_BYTE4(uImm);
4822 }
4823 else
4824 {
4825 /* Use temporary register for the 64-bit immediate. */
4826 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4827 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
4828 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4829 }
4830 RT_NOREF(fSetFlags);
4831
4832#elif defined(RT_ARCH_ARM64)
4833 uint32_t uImmR = 0;
4834 uint32_t uImmNandS = 0;
4835 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4836 {
4837 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4838 if (!fSetFlags)
4839 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
4840 else
4841 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
4842 }
4843 else
4844 {
4845 /* Use temporary register for the 64-bit immediate. */
4846 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4847 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4848 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4849 }
4850
4851#else
4852# error "Port me"
4853#endif
4854 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4855 return off;
4856}
4857
4858
4859/**
4860 * Emits code for AND'ing an 32-bit GPRs with a constant.
4861 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4862 * @note For ARM64 this only supports @a uImm values that can be expressed using
4863 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
4864 * make sure this is possible!
4865 */
4866DECL_FORCE_INLINE_THROW(uint32_t)
4867iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4868{
4869#if defined(RT_ARCH_AMD64)
4870 /* and Ev, imm */
4871 if (iGprDst >= 8)
4872 pCodeBuf[off++] = X86_OP_REX_B;
4873 if ((int32_t)uImm == (int8_t)uImm)
4874 {
4875 pCodeBuf[off++] = 0x83;
4876 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4877 pCodeBuf[off++] = (uint8_t)uImm;
4878 }
4879 else
4880 {
4881 pCodeBuf[off++] = 0x81;
4882 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4883 pCodeBuf[off++] = RT_BYTE1(uImm);
4884 pCodeBuf[off++] = RT_BYTE2(uImm);
4885 pCodeBuf[off++] = RT_BYTE3(uImm);
4886 pCodeBuf[off++] = RT_BYTE4(uImm);
4887 }
4888 RT_NOREF(fSetFlags);
4889
4890#elif defined(RT_ARCH_ARM64)
4891 uint32_t uImmR = 0;
4892 uint32_t uImmNandS = 0;
4893 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4894 {
4895 if (!fSetFlags)
4896 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4897 else
4898 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4899 }
4900 else
4901# ifdef IEM_WITH_THROW_CATCH
4902 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4903# else
4904 AssertReleaseFailedStmt(off = UINT32_MAX);
4905# endif
4906
4907#else
4908# error "Port me"
4909#endif
4910 return off;
4911}
4912
4913
4914/**
4915 * Emits code for AND'ing an 32-bit GPRs with a constant.
4916 *
4917 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4918 */
4919DECL_INLINE_THROW(uint32_t)
4920iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4921{
4922#if defined(RT_ARCH_AMD64)
4923 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
4924
4925#elif defined(RT_ARCH_ARM64)
4926 uint32_t uImmR = 0;
4927 uint32_t uImmNandS = 0;
4928 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4929 {
4930 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4931 if (!fSetFlags)
4932 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4933 else
4934 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4935 }
4936 else
4937 {
4938 /* Use temporary register for the 64-bit immediate. */
4939 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4940 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4941 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4942 }
4943
4944#else
4945# error "Port me"
4946#endif
4947 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4948 return off;
4949}
4950
4951
4952/**
4953 * Emits code for AND'ing an 64-bit GPRs with a constant.
4954 *
4955 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4956 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4957 * the same.
4958 */
4959DECL_FORCE_INLINE_THROW(uint32_t)
4960iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
4961 bool fSetFlags = false)
4962{
4963#if defined(RT_ARCH_AMD64)
4964 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4965 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
4966 RT_NOREF(fSetFlags);
4967
4968#elif defined(RT_ARCH_ARM64)
4969 uint32_t uImmR = 0;
4970 uint32_t uImmNandS = 0;
4971 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4972 {
4973 if (!fSetFlags)
4974 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4975 else
4976 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4977 }
4978 else if (iGprDst != iGprSrc)
4979 {
4980 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4981 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4982 }
4983 else
4984# ifdef IEM_WITH_THROW_CATCH
4985 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4986# else
4987 AssertReleaseFailedStmt(off = UINT32_MAX);
4988# endif
4989
4990#else
4991# error "Port me"
4992#endif
4993 return off;
4994}
4995
4996/**
4997 * Emits code for AND'ing an 32-bit GPRs with a constant.
4998 *
4999 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
5000 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
5001 * the same.
5002 *
5003 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5004 */
5005DECL_FORCE_INLINE_THROW(uint32_t)
5006iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
5007 bool fSetFlags = false)
5008{
5009#if defined(RT_ARCH_AMD64)
5010 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5011 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5012 RT_NOREF(fSetFlags);
5013
5014#elif defined(RT_ARCH_ARM64)
5015 uint32_t uImmR = 0;
5016 uint32_t uImmNandS = 0;
5017 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5018 {
5019 if (!fSetFlags)
5020 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5021 else
5022 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5023 }
5024 else if (iGprDst != iGprSrc)
5025 {
5026 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5027 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
5028 }
5029 else
5030# ifdef IEM_WITH_THROW_CATCH
5031 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5032# else
5033 AssertReleaseFailedStmt(off = UINT32_MAX);
5034# endif
5035
5036#else
5037# error "Port me"
5038#endif
5039 return off;
5040}
5041
5042
5043/**
5044 * Emits code for OR'ing two 64-bit GPRs.
5045 */
5046DECL_FORCE_INLINE(uint32_t)
5047iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5048{
5049#if defined(RT_ARCH_AMD64)
5050 /* or Gv, Ev */
5051 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5052 pCodeBuf[off++] = 0x0b;
5053 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5054
5055#elif defined(RT_ARCH_ARM64)
5056 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
5057
5058#else
5059# error "Port me"
5060#endif
5061 return off;
5062}
5063
5064
5065/**
5066 * Emits code for OR'ing two 64-bit GPRs.
5067 */
5068DECL_INLINE_THROW(uint32_t)
5069iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5070{
5071#if defined(RT_ARCH_AMD64)
5072 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5073#elif defined(RT_ARCH_ARM64)
5074 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5075#else
5076# error "Port me"
5077#endif
5078 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5079 return off;
5080}
5081
5082
5083/**
5084 * Emits code for OR'ing two 32-bit GPRs.
5085 * @note Bits 63:32 of the destination GPR will be cleared.
5086 */
5087DECL_FORCE_INLINE(uint32_t)
5088iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5089{
5090#if defined(RT_ARCH_AMD64)
5091 /* or Gv, Ev */
5092 if (iGprDst >= 8 || iGprSrc >= 8)
5093 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5094 pCodeBuf[off++] = 0x0b;
5095 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5096
5097#elif defined(RT_ARCH_ARM64)
5098 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5099
5100#else
5101# error "Port me"
5102#endif
5103 return off;
5104}
5105
5106
5107/**
5108 * Emits code for OR'ing two 32-bit GPRs.
5109 * @note Bits 63:32 of the destination GPR will be cleared.
5110 */
5111DECL_INLINE_THROW(uint32_t)
5112iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5113{
5114#if defined(RT_ARCH_AMD64)
5115 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5116#elif defined(RT_ARCH_ARM64)
5117 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5118#else
5119# error "Port me"
5120#endif
5121 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5122 return off;
5123}
5124
5125
5126/**
5127 * Emits code for OR'ing a 64-bit GPRs with a constant.
5128 */
5129DECL_INLINE_THROW(uint32_t)
5130iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
5131{
5132#if defined(RT_ARCH_AMD64)
5133 if ((int64_t)uImm == (int8_t)uImm)
5134 {
5135 /* or Ev, imm8 */
5136 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5137 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5138 pbCodeBuf[off++] = 0x83;
5139 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5140 pbCodeBuf[off++] = (uint8_t)uImm;
5141 }
5142 else if ((int64_t)uImm == (int32_t)uImm)
5143 {
5144 /* or Ev, imm32 */
5145 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5146 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5147 pbCodeBuf[off++] = 0x81;
5148 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5149 pbCodeBuf[off++] = RT_BYTE1(uImm);
5150 pbCodeBuf[off++] = RT_BYTE2(uImm);
5151 pbCodeBuf[off++] = RT_BYTE3(uImm);
5152 pbCodeBuf[off++] = RT_BYTE4(uImm);
5153 }
5154 else
5155 {
5156 /* Use temporary register for the 64-bit immediate. */
5157 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5158 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
5159 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5160 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5161 }
5162
5163#elif defined(RT_ARCH_ARM64)
5164 uint32_t uImmR = 0;
5165 uint32_t uImmNandS = 0;
5166 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5167 {
5168 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5169 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
5170 }
5171 else
5172 {
5173 /* Use temporary register for the 64-bit immediate. */
5174 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5175 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
5176 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5177 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5178 }
5179
5180#else
5181# error "Port me"
5182#endif
5183 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5184 return off;
5185}
5186
5187
5188/**
5189 * Emits code for OR'ing an 32-bit GPRs with a constant.
5190 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5191 * @note For ARM64 this only supports @a uImm values that can be expressed using
5192 * the two 6-bit immediates of the OR instructions. The caller must make
5193 * sure this is possible!
5194 */
5195DECL_FORCE_INLINE_THROW(uint32_t)
5196iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5197{
5198#if defined(RT_ARCH_AMD64)
5199 /* or Ev, imm */
5200 if (iGprDst >= 8)
5201 pCodeBuf[off++] = X86_OP_REX_B;
5202 if ((int32_t)uImm == (int8_t)uImm)
5203 {
5204 pCodeBuf[off++] = 0x83;
5205 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5206 pCodeBuf[off++] = (uint8_t)uImm;
5207 }
5208 else
5209 {
5210 pCodeBuf[off++] = 0x81;
5211 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5212 pCodeBuf[off++] = RT_BYTE1(uImm);
5213 pCodeBuf[off++] = RT_BYTE2(uImm);
5214 pCodeBuf[off++] = RT_BYTE3(uImm);
5215 pCodeBuf[off++] = RT_BYTE4(uImm);
5216 }
5217
5218#elif defined(RT_ARCH_ARM64)
5219 uint32_t uImmR = 0;
5220 uint32_t uImmNandS = 0;
5221 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5222 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5223 else
5224# ifdef IEM_WITH_THROW_CATCH
5225 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5226# else
5227 AssertReleaseFailedStmt(off = UINT32_MAX);
5228# endif
5229
5230#else
5231# error "Port me"
5232#endif
5233 return off;
5234}
5235
5236
5237/**
5238 * Emits code for OR'ing an 32-bit GPRs with a constant.
5239 *
5240 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5241 */
5242DECL_INLINE_THROW(uint32_t)
5243iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5244{
5245#if defined(RT_ARCH_AMD64)
5246 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5247
5248#elif defined(RT_ARCH_ARM64)
5249 uint32_t uImmR = 0;
5250 uint32_t uImmNandS = 0;
5251 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5252 {
5253 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5254 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5255 }
5256 else
5257 {
5258 /* Use temporary register for the 64-bit immediate. */
5259 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5260 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
5261 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5262 }
5263
5264#else
5265# error "Port me"
5266#endif
5267 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5268 return off;
5269}
5270
5271
5272
5273/**
5274 * ORs two 64-bit GPRs together, storing the result in a third register.
5275 */
5276DECL_FORCE_INLINE(uint32_t)
5277iemNativeEmitGprEqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5278{
5279#ifdef RT_ARCH_AMD64
5280 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5281 {
5282 /** @todo consider LEA */
5283 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc1);
5284 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5285 }
5286 else
5287 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5288
5289#elif defined(RT_ARCH_ARM64)
5290 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2);
5291
5292#else
5293# error "Port me!"
5294#endif
5295 return off;
5296}
5297
5298
5299
5300/**
5301 * Ors two 32-bit GPRs together, storing the result in a third register.
5302 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
5303 */
5304DECL_FORCE_INLINE(uint32_t)
5305iemNativeEmitGpr32EqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5306{
5307#ifdef RT_ARCH_AMD64
5308 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5309 {
5310 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc1);
5311 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5312 }
5313 else
5314 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5315
5316#elif defined(RT_ARCH_ARM64)
5317 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2, false /*f64Bit*/);
5318
5319#else
5320# error "Port me!"
5321#endif
5322 return off;
5323}
5324
5325
5326/**
5327 * Emits code for XOR'ing two 64-bit GPRs.
5328 */
5329DECL_INLINE_THROW(uint32_t)
5330iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5331{
5332#if defined(RT_ARCH_AMD64)
5333 /* and Gv, Ev */
5334 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5335 pCodeBuf[off++] = 0x33;
5336 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5337
5338#elif defined(RT_ARCH_ARM64)
5339 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
5340
5341#else
5342# error "Port me"
5343#endif
5344 return off;
5345}
5346
5347
5348/**
5349 * Emits code for XOR'ing two 64-bit GPRs.
5350 */
5351DECL_INLINE_THROW(uint32_t)
5352iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5353{
5354#if defined(RT_ARCH_AMD64)
5355 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5356#elif defined(RT_ARCH_ARM64)
5357 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5358#else
5359# error "Port me"
5360#endif
5361 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5362 return off;
5363}
5364
5365
5366/**
5367 * Emits code for XOR'ing two 32-bit GPRs.
5368 */
5369DECL_INLINE_THROW(uint32_t)
5370iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5371{
5372#if defined(RT_ARCH_AMD64)
5373 /* and Gv, Ev */
5374 if (iGprDst >= 8 || iGprSrc >= 8)
5375 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5376 pCodeBuf[off++] = 0x33;
5377 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5378
5379#elif defined(RT_ARCH_ARM64)
5380 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5381
5382#else
5383# error "Port me"
5384#endif
5385 return off;
5386}
5387
5388
5389/**
5390 * Emits code for XOR'ing two 32-bit GPRs.
5391 */
5392DECL_INLINE_THROW(uint32_t)
5393iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5394{
5395#if defined(RT_ARCH_AMD64)
5396 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5397#elif defined(RT_ARCH_ARM64)
5398 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5399#else
5400# error "Port me"
5401#endif
5402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5403 return off;
5404}
5405
5406
5407/**
5408 * Emits code for XOR'ing an 32-bit GPRs with a constant.
5409 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5410 * @note For ARM64 this only supports @a uImm values that can be expressed using
5411 * the two 6-bit immediates of the EOR instructions. The caller must make
5412 * sure this is possible!
5413 */
5414DECL_FORCE_INLINE_THROW(uint32_t)
5415iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5416{
5417#if defined(RT_ARCH_AMD64)
5418 /* and Ev, imm */
5419 if (iGprDst >= 8)
5420 pCodeBuf[off++] = X86_OP_REX_B;
5421 if ((int32_t)uImm == (int8_t)uImm)
5422 {
5423 pCodeBuf[off++] = 0x83;
5424 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5425 pCodeBuf[off++] = (uint8_t)uImm;
5426 }
5427 else
5428 {
5429 pCodeBuf[off++] = 0x81;
5430 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5431 pCodeBuf[off++] = RT_BYTE1(uImm);
5432 pCodeBuf[off++] = RT_BYTE2(uImm);
5433 pCodeBuf[off++] = RT_BYTE3(uImm);
5434 pCodeBuf[off++] = RT_BYTE4(uImm);
5435 }
5436
5437#elif defined(RT_ARCH_ARM64)
5438 uint32_t uImmR = 0;
5439 uint32_t uImmNandS = 0;
5440 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5441 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5442 else
5443# ifdef IEM_WITH_THROW_CATCH
5444 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5445# else
5446 AssertReleaseFailedStmt(off = UINT32_MAX);
5447# endif
5448
5449#else
5450# error "Port me"
5451#endif
5452 return off;
5453}
5454
5455
5456/**
5457 * Emits code for XOR'ing two 32-bit GPRs.
5458 */
5459DECL_INLINE_THROW(uint32_t)
5460iemNativeEmitXorGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5461{
5462#if defined(RT_ARCH_AMD64)
5463 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5464#elif defined(RT_ARCH_ARM64)
5465 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, uImm);
5466#else
5467# error "Port me"
5468#endif
5469 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5470 return off;
5471}
5472
5473
5474/*********************************************************************************************************************************
5475* Shifting *
5476*********************************************************************************************************************************/
5477
5478/**
5479 * Emits code for shifting a GPR a fixed number of bits to the left.
5480 */
5481DECL_FORCE_INLINE(uint32_t)
5482iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5483{
5484 Assert(cShift > 0 && cShift < 64);
5485
5486#if defined(RT_ARCH_AMD64)
5487 /* shl dst, cShift */
5488 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5489 if (cShift != 1)
5490 {
5491 pCodeBuf[off++] = 0xc1;
5492 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5493 pCodeBuf[off++] = cShift;
5494 }
5495 else
5496 {
5497 pCodeBuf[off++] = 0xd1;
5498 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5499 }
5500
5501#elif defined(RT_ARCH_ARM64)
5502 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
5503
5504#else
5505# error "Port me"
5506#endif
5507 return off;
5508}
5509
5510
5511/**
5512 * Emits code for shifting a GPR a fixed number of bits to the left.
5513 */
5514DECL_INLINE_THROW(uint32_t)
5515iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5516{
5517#if defined(RT_ARCH_AMD64)
5518 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5519#elif defined(RT_ARCH_ARM64)
5520 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5521#else
5522# error "Port me"
5523#endif
5524 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5525 return off;
5526}
5527
5528
5529/**
5530 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5531 */
5532DECL_FORCE_INLINE(uint32_t)
5533iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5534{
5535 Assert(cShift > 0 && cShift < 32);
5536
5537#if defined(RT_ARCH_AMD64)
5538 /* shl dst, cShift */
5539 if (iGprDst >= 8)
5540 pCodeBuf[off++] = X86_OP_REX_B;
5541 if (cShift != 1)
5542 {
5543 pCodeBuf[off++] = 0xc1;
5544 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5545 pCodeBuf[off++] = cShift;
5546 }
5547 else
5548 {
5549 pCodeBuf[off++] = 0xd1;
5550 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5551 }
5552
5553#elif defined(RT_ARCH_ARM64)
5554 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5555
5556#else
5557# error "Port me"
5558#endif
5559 return off;
5560}
5561
5562
5563/**
5564 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5565 */
5566DECL_INLINE_THROW(uint32_t)
5567iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5568{
5569#if defined(RT_ARCH_AMD64)
5570 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5571#elif defined(RT_ARCH_ARM64)
5572 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5573#else
5574# error "Port me"
5575#endif
5576 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5577 return off;
5578}
5579
5580
5581/**
5582 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5583 */
5584DECL_FORCE_INLINE(uint32_t)
5585iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5586{
5587 Assert(cShift > 0 && cShift < 64);
5588
5589#if defined(RT_ARCH_AMD64)
5590 /* shr dst, cShift */
5591 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5592 if (cShift != 1)
5593 {
5594 pCodeBuf[off++] = 0xc1;
5595 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5596 pCodeBuf[off++] = cShift;
5597 }
5598 else
5599 {
5600 pCodeBuf[off++] = 0xd1;
5601 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5602 }
5603
5604#elif defined(RT_ARCH_ARM64)
5605 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5606
5607#else
5608# error "Port me"
5609#endif
5610 return off;
5611}
5612
5613
5614/**
5615 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5616 */
5617DECL_INLINE_THROW(uint32_t)
5618iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5619{
5620#if defined(RT_ARCH_AMD64)
5621 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5622#elif defined(RT_ARCH_ARM64)
5623 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5624#else
5625# error "Port me"
5626#endif
5627 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5628 return off;
5629}
5630
5631
5632/**
5633 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5634 * right.
5635 */
5636DECL_FORCE_INLINE(uint32_t)
5637iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5638{
5639 Assert(cShift > 0 && cShift < 32);
5640
5641#if defined(RT_ARCH_AMD64)
5642 /* shr dst, cShift */
5643 if (iGprDst >= 8)
5644 pCodeBuf[off++] = X86_OP_REX_B;
5645 if (cShift != 1)
5646 {
5647 pCodeBuf[off++] = 0xc1;
5648 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5649 pCodeBuf[off++] = cShift;
5650 }
5651 else
5652 {
5653 pCodeBuf[off++] = 0xd1;
5654 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5655 }
5656
5657#elif defined(RT_ARCH_ARM64)
5658 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5659
5660#else
5661# error "Port me"
5662#endif
5663 return off;
5664}
5665
5666
5667/**
5668 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5669 * right.
5670 */
5671DECL_INLINE_THROW(uint32_t)
5672iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5673{
5674#if defined(RT_ARCH_AMD64)
5675 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5676#elif defined(RT_ARCH_ARM64)
5677 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5678#else
5679# error "Port me"
5680#endif
5681 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5682 return off;
5683}
5684
5685
5686/**
5687 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5688 * right and assigning it to a different GPR.
5689 */
5690DECL_INLINE_THROW(uint32_t)
5691iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5692{
5693 Assert(cShift > 0); Assert(cShift < 32);
5694#if defined(RT_ARCH_AMD64)
5695 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5696 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5697
5698#elif defined(RT_ARCH_ARM64)
5699 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5700
5701#else
5702# error "Port me"
5703#endif
5704 return off;
5705}
5706
5707
5708/**
5709 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5710 */
5711DECL_FORCE_INLINE(uint32_t)
5712iemNativeEmitArithShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5713{
5714 Assert(cShift > 0 && cShift < 64);
5715
5716#if defined(RT_ARCH_AMD64)
5717 /* sar dst, cShift */
5718 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5719 if (cShift != 1)
5720 {
5721 pCodeBuf[off++] = 0xc1;
5722 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5723 pCodeBuf[off++] = cShift;
5724 }
5725 else
5726 {
5727 pCodeBuf[off++] = 0xd1;
5728 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5729 }
5730
5731#elif defined(RT_ARCH_ARM64)
5732 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift);
5733
5734#else
5735# error "Port me"
5736#endif
5737 return off;
5738}
5739
5740
5741/**
5742 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5743 */
5744DECL_INLINE_THROW(uint32_t)
5745iemNativeEmitArithShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5746{
5747#if defined(RT_ARCH_AMD64)
5748 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5749#elif defined(RT_ARCH_ARM64)
5750 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5751#else
5752# error "Port me"
5753#endif
5754 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5755 return off;
5756}
5757
5758
5759/**
5760 * Emits code for (signed) shifting a 32-bit GPR a fixed number of bits to the right.
5761 */
5762DECL_FORCE_INLINE(uint32_t)
5763iemNativeEmitArithShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5764{
5765 Assert(cShift > 0 && cShift < 64);
5766
5767#if defined(RT_ARCH_AMD64)
5768 /* sar dst, cShift */
5769 if (iGprDst >= 8)
5770 pCodeBuf[off++] = X86_OP_REX_B;
5771 if (cShift != 1)
5772 {
5773 pCodeBuf[off++] = 0xc1;
5774 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5775 pCodeBuf[off++] = cShift;
5776 }
5777 else
5778 {
5779 pCodeBuf[off++] = 0xd1;
5780 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5781 }
5782
5783#elif defined(RT_ARCH_ARM64)
5784 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift, false /*f64Bit*/);
5785
5786#else
5787# error "Port me"
5788#endif
5789 return off;
5790}
5791
5792
5793/**
5794 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5795 */
5796DECL_INLINE_THROW(uint32_t)
5797iemNativeEmitArithShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5798{
5799#if defined(RT_ARCH_AMD64)
5800 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5801#elif defined(RT_ARCH_ARM64)
5802 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5803#else
5804# error "Port me"
5805#endif
5806 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5807 return off;
5808}
5809
5810
5811/**
5812 * Emits code for rotating a GPR a fixed number of bits to the left.
5813 */
5814DECL_FORCE_INLINE(uint32_t)
5815iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5816{
5817 Assert(cShift > 0 && cShift < 64);
5818
5819#if defined(RT_ARCH_AMD64)
5820 /* rol dst, cShift */
5821 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5822 if (cShift != 1)
5823 {
5824 pCodeBuf[off++] = 0xc1;
5825 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5826 pCodeBuf[off++] = cShift;
5827 }
5828 else
5829 {
5830 pCodeBuf[off++] = 0xd1;
5831 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5832 }
5833
5834#elif defined(RT_ARCH_ARM64)
5835 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
5836
5837#else
5838# error "Port me"
5839#endif
5840 return off;
5841}
5842
5843
5844#if defined(RT_ARCH_AMD64)
5845/**
5846 * Emits code for rotating a 32-bit GPR a fixed number of bits to the left via carry.
5847 */
5848DECL_FORCE_INLINE(uint32_t)
5849iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5850{
5851 Assert(cShift > 0 && cShift < 32);
5852
5853 /* rcl dst, cShift */
5854 if (iGprDst >= 8)
5855 pCodeBuf[off++] = X86_OP_REX_B;
5856 if (cShift != 1)
5857 {
5858 pCodeBuf[off++] = 0xc1;
5859 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5860 pCodeBuf[off++] = cShift;
5861 }
5862 else
5863 {
5864 pCodeBuf[off++] = 0xd1;
5865 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5866 }
5867
5868 return off;
5869}
5870#endif /* RT_ARCH_AMD64 */
5871
5872
5873
5874/**
5875 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
5876 * @note Bits 63:32 of the destination GPR will be cleared.
5877 */
5878DECL_FORCE_INLINE(uint32_t)
5879iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5880{
5881#if defined(RT_ARCH_AMD64)
5882 /*
5883 * There is no bswap r16 on x86 (the encoding exists but does not work).
5884 * So just use a rol (gcc -O2 is doing that).
5885 *
5886 * rol r16, 0x8
5887 */
5888 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5889 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5890 if (iGpr >= 8)
5891 pbCodeBuf[off++] = X86_OP_REX_B;
5892 pbCodeBuf[off++] = 0xc1;
5893 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
5894 pbCodeBuf[off++] = 0x08;
5895#elif defined(RT_ARCH_ARM64)
5896 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5897
5898 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
5899#else
5900# error "Port me"
5901#endif
5902
5903 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5904 return off;
5905}
5906
5907
5908/**
5909 * Emits code for reversing the byte order in a 32-bit GPR.
5910 * @note Bits 63:32 of the destination GPR will be cleared.
5911 */
5912DECL_FORCE_INLINE(uint32_t)
5913iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5914{
5915#if defined(RT_ARCH_AMD64)
5916 /* bswap r32 */
5917 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5918
5919 if (iGpr >= 8)
5920 pbCodeBuf[off++] = X86_OP_REX_B;
5921 pbCodeBuf[off++] = 0x0f;
5922 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5923#elif defined(RT_ARCH_ARM64)
5924 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5925
5926 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
5927#else
5928# error "Port me"
5929#endif
5930
5931 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5932 return off;
5933}
5934
5935
5936/**
5937 * Emits code for reversing the byte order in a 64-bit GPR.
5938 */
5939DECL_FORCE_INLINE(uint32_t)
5940iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5941{
5942#if defined(RT_ARCH_AMD64)
5943 /* bswap r64 */
5944 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5945
5946 if (iGpr >= 8)
5947 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
5948 else
5949 pbCodeBuf[off++] = X86_OP_REX_W;
5950 pbCodeBuf[off++] = 0x0f;
5951 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5952#elif defined(RT_ARCH_ARM64)
5953 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5954
5955 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
5956#else
5957# error "Port me"
5958#endif
5959
5960 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5961 return off;
5962}
5963
5964
5965/*********************************************************************************************************************************
5966* Compare and Testing *
5967*********************************************************************************************************************************/
5968
5969
5970#ifdef RT_ARCH_ARM64
5971/**
5972 * Emits an ARM64 compare instruction.
5973 */
5974DECL_INLINE_THROW(uint32_t)
5975iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
5976 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
5977{
5978 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5979 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
5980 f64Bit, true /*fSetFlags*/, cShift, enmShift);
5981 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5982 return off;
5983}
5984#endif
5985
5986
5987/**
5988 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5989 * with conditional instruction.
5990 */
5991DECL_FORCE_INLINE(uint32_t)
5992iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5993{
5994#ifdef RT_ARCH_AMD64
5995 /* cmp Gv, Ev */
5996 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5997 pCodeBuf[off++] = 0x3b;
5998 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5999
6000#elif defined(RT_ARCH_ARM64)
6001 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
6002
6003#else
6004# error "Port me!"
6005#endif
6006 return off;
6007}
6008
6009
6010/**
6011 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
6012 * with conditional instruction.
6013 */
6014DECL_INLINE_THROW(uint32_t)
6015iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6016{
6017#ifdef RT_ARCH_AMD64
6018 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6019#elif defined(RT_ARCH_ARM64)
6020 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6021#else
6022# error "Port me!"
6023#endif
6024 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6025 return off;
6026}
6027
6028
6029/**
6030 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6031 * with conditional instruction.
6032 */
6033DECL_FORCE_INLINE(uint32_t)
6034iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6035{
6036#ifdef RT_ARCH_AMD64
6037 /* cmp Gv, Ev */
6038 if (iGprLeft >= 8 || iGprRight >= 8)
6039 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
6040 pCodeBuf[off++] = 0x3b;
6041 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
6042
6043#elif defined(RT_ARCH_ARM64)
6044 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
6045
6046#else
6047# error "Port me!"
6048#endif
6049 return off;
6050}
6051
6052
6053/**
6054 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6055 * with conditional instruction.
6056 */
6057DECL_INLINE_THROW(uint32_t)
6058iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6059{
6060#ifdef RT_ARCH_AMD64
6061 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6062#elif defined(RT_ARCH_ARM64)
6063 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6064#else
6065# error "Port me!"
6066#endif
6067 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6068 return off;
6069}
6070
6071
6072/**
6073 * Emits a compare of a 64-bit GPR with a constant value, settings status
6074 * flags/whatever for use with conditional instruction.
6075 */
6076DECL_INLINE_THROW(uint32_t)
6077iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
6078{
6079#ifdef RT_ARCH_AMD64
6080 if (uImm <= UINT32_C(0xff))
6081 {
6082 /* cmp Ev, Ib */
6083 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
6084 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6085 pbCodeBuf[off++] = 0x83;
6086 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6087 pbCodeBuf[off++] = (uint8_t)uImm;
6088 }
6089 else if ((int64_t)uImm == (int32_t)uImm)
6090 {
6091 /* cmp Ev, imm */
6092 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6093 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6094 pbCodeBuf[off++] = 0x81;
6095 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6096 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6097 pbCodeBuf[off++] = RT_BYTE1(uImm);
6098 pbCodeBuf[off++] = RT_BYTE2(uImm);
6099 pbCodeBuf[off++] = RT_BYTE3(uImm);
6100 pbCodeBuf[off++] = RT_BYTE4(uImm);
6101 }
6102 else
6103 {
6104 /* Use temporary register for the immediate. */
6105 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6106 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6107 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6108 }
6109
6110#elif defined(RT_ARCH_ARM64)
6111 /** @todo guess there are clevere things we can do here... */
6112 if (uImm < _4K)
6113 {
6114 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6115 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6116 true /*64Bit*/, true /*fSetFlags*/);
6117 }
6118 else if ((uImm & ~(uint64_t)0xfff000) == 0)
6119 {
6120 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6121 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
6122 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6123 }
6124 else
6125 {
6126 /* Use temporary register for the immediate. */
6127 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6128 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6129 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6130 }
6131
6132#else
6133# error "Port me!"
6134#endif
6135
6136 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6137 return off;
6138}
6139
6140
6141/**
6142 * Emits a compare of a 32-bit GPR with a constant value, settings status
6143 * flags/whatever for use with conditional instruction.
6144 *
6145 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6146 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6147 * bits all zero). Will release assert or throw exception if the caller
6148 * violates this restriction.
6149 */
6150DECL_FORCE_INLINE_THROW(uint32_t)
6151iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6152{
6153#ifdef RT_ARCH_AMD64
6154 if (iGprLeft >= 8)
6155 pCodeBuf[off++] = X86_OP_REX_B;
6156 if (uImm <= UINT32_C(0x7f))
6157 {
6158 /* cmp Ev, Ib */
6159 pCodeBuf[off++] = 0x83;
6160 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6161 pCodeBuf[off++] = (uint8_t)uImm;
6162 }
6163 else
6164 {
6165 /* cmp Ev, imm */
6166 pCodeBuf[off++] = 0x81;
6167 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6168 pCodeBuf[off++] = RT_BYTE1(uImm);
6169 pCodeBuf[off++] = RT_BYTE2(uImm);
6170 pCodeBuf[off++] = RT_BYTE3(uImm);
6171 pCodeBuf[off++] = RT_BYTE4(uImm);
6172 }
6173
6174#elif defined(RT_ARCH_ARM64)
6175 /** @todo guess there are clevere things we can do here... */
6176 if (uImm < _4K)
6177 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6178 false /*64Bit*/, true /*fSetFlags*/);
6179 else if ((uImm & ~(uint32_t)0xfff000) == 0)
6180 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6181 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6182 else
6183# ifdef IEM_WITH_THROW_CATCH
6184 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6185# else
6186 AssertReleaseFailedStmt(off = UINT32_MAX);
6187# endif
6188
6189#else
6190# error "Port me!"
6191#endif
6192 return off;
6193}
6194
6195
6196/**
6197 * Emits a compare of a 32-bit GPR with a constant value, settings status
6198 * flags/whatever for use with conditional instruction.
6199 */
6200DECL_INLINE_THROW(uint32_t)
6201iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6202{
6203#ifdef RT_ARCH_AMD64
6204 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
6205
6206#elif defined(RT_ARCH_ARM64)
6207 /** @todo guess there are clevere things we can do here... */
6208 if (uImm < _4K)
6209 {
6210 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6211 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6212 false /*64Bit*/, true /*fSetFlags*/);
6213 }
6214 else if ((uImm & ~(uint32_t)0xfff000) == 0)
6215 {
6216 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6217 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6218 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6219 }
6220 else
6221 {
6222 /* Use temporary register for the immediate. */
6223 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6224 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
6225 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6226 }
6227
6228#else
6229# error "Port me!"
6230#endif
6231
6232 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6233 return off;
6234}
6235
6236
6237/**
6238 * Emits a compare of a 32-bit GPR with a constant value, settings status
6239 * flags/whatever for use with conditional instruction.
6240 *
6241 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
6242 * 16-bit value from @a iGrpLeft.
6243 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6244 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6245 * bits all zero). Will release assert or throw exception if the caller
6246 * violates this restriction.
6247 */
6248DECL_FORCE_INLINE_THROW(uint32_t)
6249iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6250 uint8_t idxTmpReg = UINT8_MAX)
6251{
6252#ifdef RT_ARCH_AMD64
6253 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6254 if (iGprLeft >= 8)
6255 pCodeBuf[off++] = X86_OP_REX_B;
6256 if (uImm <= UINT32_C(0x7f))
6257 {
6258 /* cmp Ev, Ib */
6259 pCodeBuf[off++] = 0x83;
6260 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6261 pCodeBuf[off++] = (uint8_t)uImm;
6262 }
6263 else
6264 {
6265 /* cmp Ev, imm */
6266 pCodeBuf[off++] = 0x81;
6267 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6268 pCodeBuf[off++] = RT_BYTE1(uImm);
6269 pCodeBuf[off++] = RT_BYTE2(uImm);
6270 }
6271 RT_NOREF(idxTmpReg);
6272
6273#elif defined(RT_ARCH_ARM64)
6274# ifdef IEM_WITH_THROW_CATCH
6275 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6276# else
6277 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
6278# endif
6279 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6280 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
6281 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
6282
6283#else
6284# error "Port me!"
6285#endif
6286 return off;
6287}
6288
6289
6290/**
6291 * Emits a compare of a 16-bit GPR with a constant value, settings status
6292 * flags/whatever for use with conditional instruction.
6293 *
6294 * @note ARM64: Helper register is required (idxTmpReg).
6295 */
6296DECL_INLINE_THROW(uint32_t)
6297iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6298 uint8_t idxTmpReg = UINT8_MAX)
6299{
6300#ifdef RT_ARCH_AMD64
6301 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
6302#elif defined(RT_ARCH_ARM64)
6303 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
6304#else
6305# error "Port me!"
6306#endif
6307 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6308 return off;
6309}
6310
6311
6312
6313/*********************************************************************************************************************************
6314* Branching *
6315*********************************************************************************************************************************/
6316
6317/**
6318 * Emits a JMP rel32 / B imm19 to the given label.
6319 */
6320DECL_FORCE_INLINE_THROW(uint32_t)
6321iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
6322{
6323 Assert(idxLabel < pReNative->cLabels);
6324
6325#ifdef RT_ARCH_AMD64
6326 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6327 {
6328 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
6329 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
6330 {
6331 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
6332 pCodeBuf[off++] = (uint8_t)offRel;
6333 }
6334 else
6335 {
6336 offRel -= 3;
6337 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6338 pCodeBuf[off++] = RT_BYTE1(offRel);
6339 pCodeBuf[off++] = RT_BYTE2(offRel);
6340 pCodeBuf[off++] = RT_BYTE3(offRel);
6341 pCodeBuf[off++] = RT_BYTE4(offRel);
6342 }
6343 }
6344 else
6345 {
6346 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6347 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6348 pCodeBuf[off++] = 0xfe;
6349 pCodeBuf[off++] = 0xff;
6350 pCodeBuf[off++] = 0xff;
6351 pCodeBuf[off++] = 0xff;
6352 }
6353 pCodeBuf[off++] = 0xcc; /* int3 poison */
6354
6355#elif defined(RT_ARCH_ARM64)
6356 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6357 {
6358 pCodeBuf[off] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
6359 off++;
6360 }
6361 else
6362 {
6363 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
6364 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
6365 }
6366
6367#else
6368# error "Port me!"
6369#endif
6370 return off;
6371}
6372
6373
6374/**
6375 * Emits a JMP rel32 / B imm19 to the given label.
6376 */
6377DECL_INLINE_THROW(uint32_t)
6378iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6379{
6380#ifdef RT_ARCH_AMD64
6381 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
6382#elif defined(RT_ARCH_ARM64)
6383 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
6384#else
6385# error "Port me!"
6386#endif
6387 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6388 return off;
6389}
6390
6391
6392/**
6393 * Emits a JMP rel32 / B imm19 to a new undefined label.
6394 */
6395DECL_INLINE_THROW(uint32_t)
6396iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6397{
6398 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6399 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
6400}
6401
6402/** Condition type. */
6403#ifdef RT_ARCH_AMD64
6404typedef enum IEMNATIVEINSTRCOND : uint8_t
6405{
6406 kIemNativeInstrCond_o = 0,
6407 kIemNativeInstrCond_no,
6408 kIemNativeInstrCond_c,
6409 kIemNativeInstrCond_nc,
6410 kIemNativeInstrCond_e,
6411 kIemNativeInstrCond_z = kIemNativeInstrCond_e,
6412 kIemNativeInstrCond_ne,
6413 kIemNativeInstrCond_nz = kIemNativeInstrCond_ne,
6414 kIemNativeInstrCond_be,
6415 kIemNativeInstrCond_nbe,
6416 kIemNativeInstrCond_s,
6417 kIemNativeInstrCond_ns,
6418 kIemNativeInstrCond_p,
6419 kIemNativeInstrCond_np,
6420 kIemNativeInstrCond_l,
6421 kIemNativeInstrCond_nl,
6422 kIemNativeInstrCond_le,
6423 kIemNativeInstrCond_nle
6424} IEMNATIVEINSTRCOND;
6425#elif defined(RT_ARCH_ARM64)
6426typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
6427# define kIemNativeInstrCond_o todo_conditional_codes
6428# define kIemNativeInstrCond_no todo_conditional_codes
6429# define kIemNativeInstrCond_c todo_conditional_codes
6430# define kIemNativeInstrCond_nc todo_conditional_codes
6431# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
6432# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
6433# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
6434# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
6435# define kIemNativeInstrCond_s todo_conditional_codes
6436# define kIemNativeInstrCond_ns todo_conditional_codes
6437# define kIemNativeInstrCond_p todo_conditional_codes
6438# define kIemNativeInstrCond_np todo_conditional_codes
6439# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
6440# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
6441# define kIemNativeInstrCond_le kArmv8InstrCond_Le
6442# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
6443#else
6444# error "Port me!"
6445#endif
6446
6447
6448/**
6449 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6450 */
6451DECL_FORCE_INLINE_THROW(uint32_t)
6452iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6453 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6454{
6455 Assert(idxLabel < pReNative->cLabels);
6456
6457 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
6458#ifdef RT_ARCH_AMD64
6459 if (offLabel >= off)
6460 {
6461 /* jcc rel32 */
6462 pCodeBuf[off++] = 0x0f;
6463 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6464 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6465 pCodeBuf[off++] = 0x00;
6466 pCodeBuf[off++] = 0x00;
6467 pCodeBuf[off++] = 0x00;
6468 pCodeBuf[off++] = 0x00;
6469 }
6470 else
6471 {
6472 int32_t offDisp = offLabel - (off + 2);
6473 if ((int8_t)offDisp == offDisp)
6474 {
6475 /* jcc rel8 */
6476 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6477 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6478 }
6479 else
6480 {
6481 /* jcc rel32 */
6482 offDisp -= 4;
6483 pCodeBuf[off++] = 0x0f;
6484 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6485 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6486 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6487 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6488 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6489 }
6490 }
6491
6492#elif defined(RT_ARCH_ARM64)
6493 if (offLabel >= off)
6494 {
6495 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6496 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
6497 }
6498 else
6499 {
6500 Assert(off - offLabel <= 0x3ffffU);
6501 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
6502 off++;
6503 }
6504
6505#else
6506# error "Port me!"
6507#endif
6508 return off;
6509}
6510
6511
6512/**
6513 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6514 */
6515DECL_INLINE_THROW(uint32_t)
6516iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6517{
6518#ifdef RT_ARCH_AMD64
6519 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
6520#elif defined(RT_ARCH_ARM64)
6521 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
6522#else
6523# error "Port me!"
6524#endif
6525 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6526 return off;
6527}
6528
6529
6530/**
6531 * Emits a Jcc rel32 / B.cc imm19 to a new label.
6532 */
6533DECL_INLINE_THROW(uint32_t)
6534iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6535 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
6536{
6537 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6538 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
6539}
6540
6541
6542/**
6543 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
6544 */
6545DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6546{
6547#ifdef RT_ARCH_AMD64
6548 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
6549#elif defined(RT_ARCH_ARM64)
6550 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
6551#else
6552# error "Port me!"
6553#endif
6554}
6555
6556/**
6557 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
6558 */
6559DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6560 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6561{
6562#ifdef RT_ARCH_AMD64
6563 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
6564#elif defined(RT_ARCH_ARM64)
6565 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
6566#else
6567# error "Port me!"
6568#endif
6569}
6570
6571
6572/**
6573 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
6574 */
6575DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6576{
6577#ifdef RT_ARCH_AMD64
6578 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
6579#elif defined(RT_ARCH_ARM64)
6580 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
6581#else
6582# error "Port me!"
6583#endif
6584}
6585
6586/**
6587 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
6588 */
6589DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6590 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6591{
6592#ifdef RT_ARCH_AMD64
6593 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
6594#elif defined(RT_ARCH_ARM64)
6595 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
6596#else
6597# error "Port me!"
6598#endif
6599}
6600
6601
6602/**
6603 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
6604 */
6605DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6606{
6607#ifdef RT_ARCH_AMD64
6608 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
6609#elif defined(RT_ARCH_ARM64)
6610 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
6611#else
6612# error "Port me!"
6613#endif
6614}
6615
6616/**
6617 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
6618 */
6619DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6620 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6621{
6622#ifdef RT_ARCH_AMD64
6623 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
6624#elif defined(RT_ARCH_ARM64)
6625 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
6626#else
6627# error "Port me!"
6628#endif
6629}
6630
6631
6632/**
6633 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
6634 */
6635DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6636{
6637#ifdef RT_ARCH_AMD64
6638 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
6639#elif defined(RT_ARCH_ARM64)
6640 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
6641#else
6642# error "Port me!"
6643#endif
6644}
6645
6646/**
6647 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
6648 */
6649DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6650 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6651{
6652#ifdef RT_ARCH_AMD64
6653 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
6654#elif defined(RT_ARCH_ARM64)
6655 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
6656#else
6657# error "Port me!"
6658#endif
6659}
6660
6661
6662/**
6663 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
6664 */
6665DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6666{
6667#ifdef RT_ARCH_AMD64
6668 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
6669#elif defined(RT_ARCH_ARM64)
6670 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
6671#else
6672# error "Port me!"
6673#endif
6674}
6675
6676/**
6677 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
6678 */
6679DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6680 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6681{
6682#ifdef RT_ARCH_AMD64
6683 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
6684#elif defined(RT_ARCH_ARM64)
6685 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
6686#else
6687# error "Port me!"
6688#endif
6689}
6690
6691
6692/**
6693 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6694 *
6695 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6696 *
6697 * Only use hardcoded jumps forward when emitting for exactly one
6698 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6699 * the right target address on all platforms!
6700 *
6701 * Please also note that on x86 it is necessary pass off + 256 or higher
6702 * for @a offTarget one believe the intervening code is more than 127
6703 * bytes long.
6704 */
6705DECL_FORCE_INLINE(uint32_t)
6706iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6707{
6708#ifdef RT_ARCH_AMD64
6709 /* jcc rel8 / rel32 */
6710 int32_t offDisp = (int32_t)(offTarget - (off + 2));
6711 if (offDisp < 128 && offDisp >= -128)
6712 {
6713 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6714 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6715 }
6716 else
6717 {
6718 offDisp -= 4;
6719 pCodeBuf[off++] = 0x0f;
6720 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6721 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6722 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6723 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6724 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6725 }
6726
6727#elif defined(RT_ARCH_ARM64)
6728 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
6729 off++;
6730#else
6731# error "Port me!"
6732#endif
6733 return off;
6734}
6735
6736
6737/**
6738 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6739 *
6740 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6741 *
6742 * Only use hardcoded jumps forward when emitting for exactly one
6743 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6744 * the right target address on all platforms!
6745 *
6746 * Please also note that on x86 it is necessary pass off + 256 or higher
6747 * for @a offTarget if one believe the intervening code is more than 127
6748 * bytes long.
6749 */
6750DECL_INLINE_THROW(uint32_t)
6751iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6752{
6753#ifdef RT_ARCH_AMD64
6754 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
6755#elif defined(RT_ARCH_ARM64)
6756 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
6757#else
6758# error "Port me!"
6759#endif
6760 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6761 return off;
6762}
6763
6764
6765/**
6766 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
6767 *
6768 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6769 */
6770DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6771{
6772#ifdef RT_ARCH_AMD64
6773 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
6774#elif defined(RT_ARCH_ARM64)
6775 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
6776#else
6777# error "Port me!"
6778#endif
6779}
6780
6781
6782/**
6783 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
6784 *
6785 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6786 */
6787DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6788{
6789#ifdef RT_ARCH_AMD64
6790 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
6791#elif defined(RT_ARCH_ARM64)
6792 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
6793#else
6794# error "Port me!"
6795#endif
6796}
6797
6798
6799/**
6800 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
6801 *
6802 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6803 */
6804DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6805{
6806#ifdef RT_ARCH_AMD64
6807 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
6808#elif defined(RT_ARCH_ARM64)
6809 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
6810#else
6811# error "Port me!"
6812#endif
6813}
6814
6815
6816/**
6817 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
6818 *
6819 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6820 */
6821DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6822{
6823#ifdef RT_ARCH_AMD64
6824 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
6825#elif defined(RT_ARCH_ARM64)
6826 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
6827#else
6828# error "Port me!"
6829#endif
6830}
6831
6832
6833/**
6834 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6835 *
6836 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6837 */
6838DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
6839{
6840#ifdef RT_ARCH_AMD64
6841 /* jmp rel8 or rel32 */
6842 int32_t offDisp = offTarget - (off + 2);
6843 if (offDisp < 128 && offDisp >= -128)
6844 {
6845 pCodeBuf[off++] = 0xeb;
6846 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6847 }
6848 else
6849 {
6850 offDisp -= 3;
6851 pCodeBuf[off++] = 0xe9;
6852 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6853 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6854 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6855 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6856 }
6857
6858#elif defined(RT_ARCH_ARM64)
6859 pCodeBuf[off] = Armv8A64MkInstrB((int32_t)(offTarget - off));
6860 off++;
6861
6862#else
6863# error "Port me!"
6864#endif
6865 return off;
6866}
6867
6868
6869/**
6870 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6871 *
6872 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6873 */
6874DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6875{
6876#ifdef RT_ARCH_AMD64
6877 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
6878#elif defined(RT_ARCH_ARM64)
6879 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
6880#else
6881# error "Port me!"
6882#endif
6883 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6884 return off;
6885}
6886
6887
6888/**
6889 * Fixes up a conditional jump to a fixed label.
6890 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
6891 * iemNativeEmitJzToFixed, ...
6892 */
6893DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
6894{
6895#ifdef RT_ARCH_AMD64
6896 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
6897 uint8_t const bOpcode = pbCodeBuf[offFixup];
6898 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
6899 {
6900 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
6901 AssertStmt(pbCodeBuf[offFixup + 1] == offTarget - (offFixup + 2),
6902 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
6903 }
6904 else
6905 {
6906 if (bOpcode != 0x0f)
6907 Assert(bOpcode == 0xe9);
6908 else
6909 {
6910 offFixup += 1;
6911 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
6912 }
6913 uint32_t const offRel32 = offTarget - (offFixup + 5);
6914 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
6915 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
6916 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
6917 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
6918 }
6919
6920#elif defined(RT_ARCH_ARM64)
6921 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
6922 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
6923 {
6924 /* B.COND + BC.COND */
6925 int32_t const offDisp = offTarget - offFixup;
6926 Assert(offDisp >= -262144 && offDisp < 262144);
6927 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
6928 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6929 }
6930 else
6931 {
6932 /* B imm26 */
6933 Assert((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000));
6934 int32_t const offDisp = offTarget - offFixup;
6935 Assert(offDisp >= -33554432 && offDisp < 33554432);
6936 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
6937 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6938 }
6939
6940#else
6941# error "Port me!"
6942#endif
6943}
6944
6945
6946#ifdef RT_ARCH_AMD64
6947/**
6948 * For doing bt on a register.
6949 */
6950DECL_INLINE_THROW(uint32_t)
6951iemNativeEmitAmd64TestBitInGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
6952{
6953 Assert(iBitNo < 64);
6954 /* bt Ev, imm8 */
6955 if (iBitNo >= 32)
6956 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6957 else if (iGprSrc >= 8)
6958 pCodeBuf[off++] = X86_OP_REX_B;
6959 pCodeBuf[off++] = 0x0f;
6960 pCodeBuf[off++] = 0xba;
6961 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6962 pCodeBuf[off++] = iBitNo;
6963 return off;
6964}
6965#endif /* RT_ARCH_AMD64 */
6966
6967
6968/**
6969 * Internal helper, don't call directly.
6970 */
6971DECL_INLINE_THROW(uint32_t)
6972iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
6973 uint32_t offTarget, uint32_t *poffFixup, bool fJmpIfSet)
6974{
6975 Assert(iBitNo < 64);
6976#ifdef RT_ARCH_AMD64
6977 if (iBitNo < 8)
6978 {
6979 /* test Eb, imm8 */
6980 if (iGprSrc >= 4)
6981 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6982 pCodeBuf[off++] = 0xf6;
6983 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6984 pCodeBuf[off++] = (uint8_t)1 << iBitNo;
6985 if (poffFixup)
6986 *poffFixup = off;
6987 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6988 }
6989 else
6990 {
6991 /* bt Ev, imm8 */
6992 if (iBitNo >= 32)
6993 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6994 else if (iGprSrc >= 8)
6995 pCodeBuf[off++] = X86_OP_REX_B;
6996 pCodeBuf[off++] = 0x0f;
6997 pCodeBuf[off++] = 0xba;
6998 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6999 pCodeBuf[off++] = iBitNo;
7000 if (poffFixup)
7001 *poffFixup = off;
7002 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
7003 }
7004
7005#elif defined(RT_ARCH_ARM64)
7006 /* Just use the TBNZ instruction here. */
7007 if (poffFixup)
7008 *poffFixup = off;
7009 pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, off - offTarget, iGprSrc, iBitNo);
7010
7011#else
7012# error "Port me!"
7013#endif
7014 return off;
7015}
7016
7017
7018/**
7019 * Emits a jump to @a idxTarget on the condition that bit @a iBitNo _is_ _set_
7020 * in @a iGprSrc.
7021 */
7022DECL_INLINE_THROW(uint32_t)
7023iemNativeEmitTestBitInGprAndJmpToFixedIfSetEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7024 uint32_t offTarget, uint32_t *poffFixup)
7025{
7026 return iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(pCodeBuf, off, iGprSrc, iBitNo, offTarget, poffFixup, true /*fJmpIfSet*/);
7027}
7028
7029
7030/**
7031 * Emits a jump to @a idxTarget on the condition that bit @a iBitNo _is_ _not_
7032 * _set_ in @a iGprSrc.
7033 */
7034DECL_INLINE_THROW(uint32_t)
7035iemNativeEmitTestBitInGprAndJmpToLabelIfNotSetEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7036 uint32_t offTarget, uint32_t *poffFixup)
7037{
7038 return iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(pCodeBuf, off, iGprSrc, iBitNo, offTarget, poffFixup, false /*fJmpIfSet*/);
7039}
7040
7041
7042
7043/**
7044 * Internal helper, don't call directly.
7045 */
7046DECL_INLINE_THROW(uint32_t)
7047iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7048 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
7049{
7050 Assert(iBitNo < 64);
7051#ifdef RT_ARCH_AMD64
7052 if (iBitNo < 8)
7053 {
7054 /* test Eb, imm8 */
7055 if (iGprSrc >= 4)
7056 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7057 pCodeBuf[off++] = 0xf6;
7058 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7059 pCodeBuf[off++] = (uint8_t)1 << iBitNo;
7060 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7061 fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7062 }
7063 else
7064 {
7065 /* bt Ev, imm8 */
7066 if (iBitNo >= 32)
7067 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7068 else if (iGprSrc >= 8)
7069 pCodeBuf[off++] = X86_OP_REX_B;
7070 pCodeBuf[off++] = 0x0f;
7071 pCodeBuf[off++] = 0xba;
7072 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7073 pCodeBuf[off++] = iBitNo;
7074 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7075 fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
7076 }
7077
7078#elif defined(RT_ARCH_ARM64)
7079 /* Use the TBNZ instruction here. */
7080 if (pReNative->paLabels[idxLabel].enmType > kIemNativeLabelType_LastWholeTbBranch)
7081 {
7082 AssertMsg(pReNative->paLabels[idxLabel].off == UINT32_MAX,
7083 ("TODO: Please enable & test commented out code for jumping back to a predefined label.\n"));
7084 //uint32_t offLabel = pReNative->paLabels[idxLabel].off;
7085 //if (offLabel == UINT32_MAX)
7086 {
7087 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
7088 pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
7089 }
7090 //else
7091 //{
7092 // RT_BREAKPOINT();
7093 // Assert(off - offLabel <= 0x1fffU);
7094 // pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, offLabel - off, iGprSrc, iBitNo);
7095 //
7096 //}
7097 }
7098 else
7099 {
7100 Assert(Armv8A64ConvertImmRImmS2Mask64(0x40, (64U - iBitNo) & 63U) == RT_BIT_64(iBitNo));
7101 pCodeBuf[off++] = Armv8A64MkInstrTstImm(iGprSrc, 0x40, (64U - iBitNo) & 63U);
7102 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7103 pCodeBuf[off++] = Armv8A64MkInstrBCond(fJmpIfSet ? kArmv8InstrCond_Ne : kArmv8InstrCond_Eq, 0);
7104 }
7105
7106#else
7107# error "Port me!"
7108#endif
7109 return off;
7110}
7111
7112
7113/**
7114 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
7115 * @a iGprSrc.
7116 */
7117DECL_INLINE_THROW(uint32_t)
7118iemNativeEmitTestBitInGprAndJmpToLabelIfSetEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7119 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7120{
7121 return iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, pCodeBuf, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
7122}
7123
7124
7125/**
7126 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
7127 * _set_ in @a iGprSrc.
7128 */
7129DECL_INLINE_THROW(uint32_t)
7130iemNativeEmitTestBitInGprAndJmpToLabelIfNotSetEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7131 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7132{
7133 return iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, pCodeBuf, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
7134}
7135
7136
7137/**
7138 * Internal helper, don't call directly.
7139 */
7140DECL_INLINE_THROW(uint32_t)
7141iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7142 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
7143{
7144#ifdef RT_ARCH_AMD64
7145 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 5+6), off,
7146 iGprSrc, iBitNo, idxLabel, fJmpIfSet);
7147#elif defined(RT_ARCH_ARM64)
7148 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 2), off,
7149 iGprSrc, iBitNo, idxLabel, fJmpIfSet);
7150#else
7151# error "Port me!"
7152#endif
7153 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7154 return off;
7155}
7156
7157
7158/**
7159 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
7160 * @a iGprSrc.
7161 */
7162DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7163 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7164{
7165 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
7166}
7167
7168
7169/**
7170 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
7171 * _set_ in @a iGprSrc.
7172 */
7173DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7174 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7175{
7176 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
7177}
7178
7179
7180/**
7181 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
7182 * flags accordingly.
7183 */
7184DECL_INLINE_THROW(uint32_t)
7185iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
7186{
7187 Assert(fBits != 0);
7188#ifdef RT_ARCH_AMD64
7189
7190 if (fBits >= UINT32_MAX)
7191 {
7192 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7193
7194 /* test Ev,Gv */
7195 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7196 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
7197 pbCodeBuf[off++] = 0x85;
7198 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
7199
7200 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7201 }
7202 else if (fBits <= UINT32_MAX)
7203 {
7204 /* test Eb, imm8 or test Ev, imm32 */
7205 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7206 if (fBits <= UINT8_MAX)
7207 {
7208 if (iGprSrc >= 4)
7209 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7210 pbCodeBuf[off++] = 0xf6;
7211 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7212 pbCodeBuf[off++] = (uint8_t)fBits;
7213 }
7214 else
7215 {
7216 if (iGprSrc >= 8)
7217 pbCodeBuf[off++] = X86_OP_REX_B;
7218 pbCodeBuf[off++] = 0xf7;
7219 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7220 pbCodeBuf[off++] = RT_BYTE1(fBits);
7221 pbCodeBuf[off++] = RT_BYTE2(fBits);
7222 pbCodeBuf[off++] = RT_BYTE3(fBits);
7223 pbCodeBuf[off++] = RT_BYTE4(fBits);
7224 }
7225 }
7226 /** @todo implement me. */
7227 else
7228 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
7229
7230#elif defined(RT_ARCH_ARM64)
7231 uint32_t uImmR = 0;
7232 uint32_t uImmNandS = 0;
7233 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
7234 {
7235 /* ands xzr, iGprSrc, #fBits */
7236 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7237 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
7238 }
7239 else
7240 {
7241 /* ands xzr, iGprSrc, iTmpReg */
7242 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7243 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7244 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
7245 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7246 }
7247
7248#else
7249# error "Port me!"
7250#endif
7251 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7252 return off;
7253}
7254
7255
7256/**
7257 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
7258 * @a iGprSrc, setting CPU flags accordingly.
7259 *
7260 * @note For ARM64 this only supports @a fBits values that can be expressed
7261 * using the two 6-bit immediates of the ANDS instruction. The caller
7262 * must make sure this is possible!
7263 */
7264DECL_FORCE_INLINE_THROW(uint32_t)
7265iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits)
7266{
7267 Assert(fBits != 0);
7268
7269#ifdef RT_ARCH_AMD64
7270 if (fBits <= UINT8_MAX)
7271 {
7272 /* test Eb, imm8 */
7273 if (iGprSrc >= 4)
7274 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7275 pCodeBuf[off++] = 0xf6;
7276 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7277 pCodeBuf[off++] = (uint8_t)fBits;
7278 }
7279 else
7280 {
7281 /* test Ev, imm32 */
7282 if (iGprSrc >= 8)
7283 pCodeBuf[off++] = X86_OP_REX_B;
7284 pCodeBuf[off++] = 0xf7;
7285 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7286 pCodeBuf[off++] = RT_BYTE1(fBits);
7287 pCodeBuf[off++] = RT_BYTE2(fBits);
7288 pCodeBuf[off++] = RT_BYTE3(fBits);
7289 pCodeBuf[off++] = RT_BYTE4(fBits);
7290 }
7291
7292#elif defined(RT_ARCH_ARM64)
7293 /* ands xzr, src, #fBits */
7294 uint32_t uImmR = 0;
7295 uint32_t uImmNandS = 0;
7296 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7297 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7298 else
7299# ifdef IEM_WITH_THROW_CATCH
7300 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7301# else
7302 AssertReleaseFailedStmt(off = UINT32_MAX);
7303# endif
7304
7305#else
7306# error "Port me!"
7307#endif
7308 return off;
7309}
7310
7311
7312
7313/**
7314 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7315 * @a iGprSrc, setting CPU flags accordingly.
7316 *
7317 * @note For ARM64 this only supports @a fBits values that can be expressed
7318 * using the two 6-bit immediates of the ANDS instruction. The caller
7319 * must make sure this is possible!
7320 */
7321DECL_FORCE_INLINE_THROW(uint32_t)
7322iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7323{
7324 Assert(fBits != 0);
7325
7326#ifdef RT_ARCH_AMD64
7327 /* test Eb, imm8 */
7328 if (iGprSrc >= 4)
7329 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7330 pCodeBuf[off++] = 0xf6;
7331 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7332 pCodeBuf[off++] = fBits;
7333
7334#elif defined(RT_ARCH_ARM64)
7335 /* ands xzr, src, #fBits */
7336 uint32_t uImmR = 0;
7337 uint32_t uImmNandS = 0;
7338 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7339 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7340 else
7341# ifdef IEM_WITH_THROW_CATCH
7342 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7343# else
7344 AssertReleaseFailedStmt(off = UINT32_MAX);
7345# endif
7346
7347#else
7348# error "Port me!"
7349#endif
7350 return off;
7351}
7352
7353
7354/**
7355 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7356 * @a iGprSrc, setting CPU flags accordingly.
7357 */
7358DECL_INLINE_THROW(uint32_t)
7359iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7360{
7361 Assert(fBits != 0);
7362
7363#ifdef RT_ARCH_AMD64
7364 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
7365
7366#elif defined(RT_ARCH_ARM64)
7367 /* ands xzr, src, [tmp|#imm] */
7368 uint32_t uImmR = 0;
7369 uint32_t uImmNandS = 0;
7370 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7371 {
7372 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7373 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7374 }
7375 else
7376 {
7377 /* Use temporary register for the 64-bit immediate. */
7378 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7379 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7380 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
7381 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7382 }
7383
7384#else
7385# error "Port me!"
7386#endif
7387 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7388 return off;
7389}
7390
7391
7392/**
7393 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
7394 * are set in @a iGprSrc.
7395 */
7396DECL_INLINE_THROW(uint32_t)
7397iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7398 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7399{
7400 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7401
7402 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7403 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7404
7405 return off;
7406}
7407
7408
7409/**
7410 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
7411 * are set in @a iGprSrc.
7412 */
7413DECL_INLINE_THROW(uint32_t)
7414iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7415 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7416{
7417 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7418
7419 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7420 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7421
7422 return off;
7423}
7424
7425
7426/**
7427 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7428 *
7429 * The operand size is given by @a f64Bit.
7430 */
7431DECL_FORCE_INLINE_THROW(uint32_t)
7432iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7433 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7434{
7435 Assert(idxLabel < pReNative->cLabels);
7436
7437#ifdef RT_ARCH_AMD64
7438 /* test reg32,reg32 / test reg64,reg64 */
7439 if (f64Bit)
7440 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7441 else if (iGprSrc >= 8)
7442 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7443 pCodeBuf[off++] = 0x85;
7444 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7445
7446 /* jnz idxLabel */
7447 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7448 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7449
7450#elif defined(RT_ARCH_ARM64)
7451 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
7452 {
7453 pCodeBuf[off] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
7454 iGprSrc, f64Bit);
7455 off++;
7456 }
7457 else
7458 {
7459 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7460 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
7461 }
7462
7463#else
7464# error "Port me!"
7465#endif
7466 return off;
7467}
7468
7469
7470/**
7471 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7472 *
7473 * The operand size is given by @a f64Bit.
7474 */
7475DECL_FORCE_INLINE_THROW(uint32_t)
7476iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7477 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7478{
7479#ifdef RT_ARCH_AMD64
7480 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7481 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7482#elif defined(RT_ARCH_ARM64)
7483 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
7484 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7485#else
7486# error "Port me!"
7487#endif
7488 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7489 return off;
7490}
7491
7492
7493/* if (Grp1 == 0) Jmp idxLabel; */
7494
7495/**
7496 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7497 *
7498 * The operand size is given by @a f64Bit.
7499 */
7500DECL_FORCE_INLINE_THROW(uint32_t)
7501iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7502 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7503{
7504 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7505 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7506}
7507
7508
7509/**
7510 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7511 *
7512 * The operand size is given by @a f64Bit.
7513 */
7514DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7515 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7516{
7517 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7518}
7519
7520
7521/**
7522 * Emits code that jumps to a new label if @a iGprSrc is zero.
7523 *
7524 * The operand size is given by @a f64Bit.
7525 */
7526DECL_INLINE_THROW(uint32_t)
7527iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7528 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7529{
7530 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7531 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7532}
7533
7534
7535/* if (Grp1 != 0) Jmp idxLabel; */
7536
7537/**
7538 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7539 *
7540 * The operand size is given by @a f64Bit.
7541 */
7542DECL_FORCE_INLINE_THROW(uint32_t)
7543iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7544 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7545{
7546 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7547 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7548}
7549
7550
7551/**
7552 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7553 *
7554 * The operand size is given by @a f64Bit.
7555 */
7556DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7557 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7558{
7559 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7560}
7561
7562
7563/**
7564 * Emits code that jumps to a new label if @a iGprSrc is not zero.
7565 *
7566 * The operand size is given by @a f64Bit.
7567 */
7568DECL_INLINE_THROW(uint32_t)
7569iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7570 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7571{
7572 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7573 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7574}
7575
7576
7577/* if (Grp1 != Gpr2) Jmp idxLabel; */
7578
7579/**
7580 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
7581 * differs.
7582 */
7583DECL_INLINE_THROW(uint32_t)
7584iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7585 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
7586{
7587 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
7588 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7589 return off;
7590}
7591
7592
7593/**
7594 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
7595 */
7596DECL_INLINE_THROW(uint32_t)
7597iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7598 uint8_t iGprLeft, uint8_t iGprRight,
7599 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7600{
7601 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7602 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
7603}
7604
7605
7606/* if (Grp != Imm) Jmp idxLabel; */
7607
7608/**
7609 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
7610 */
7611DECL_INLINE_THROW(uint32_t)
7612iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7613 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7614{
7615 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7616 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7617 return off;
7618}
7619
7620
7621/**
7622 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
7623 */
7624DECL_INLINE_THROW(uint32_t)
7625iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7626 uint8_t iGprSrc, uint64_t uImm,
7627 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7628{
7629 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7630 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7631}
7632
7633
7634/**
7635 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
7636 * @a uImm.
7637 */
7638DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7639 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7640{
7641 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7642 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7643 return off;
7644}
7645
7646
7647/**
7648 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
7649 * @a uImm.
7650 */
7651DECL_INLINE_THROW(uint32_t)
7652iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7653 uint8_t iGprSrc, uint32_t uImm,
7654 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7655{
7656 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7657 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7658}
7659
7660
7661/**
7662 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
7663 * @a uImm.
7664 */
7665DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7666 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
7667{
7668 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
7669 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7670 return off;
7671}
7672
7673
7674/**
7675 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
7676 * @a uImm.
7677 */
7678DECL_INLINE_THROW(uint32_t)
7679iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7680 uint8_t iGprSrc, uint16_t uImm,
7681 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7682{
7683 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7684 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7685}
7686
7687
7688/* if (Grp == Imm) Jmp idxLabel; */
7689
7690/**
7691 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
7692 */
7693DECL_INLINE_THROW(uint32_t)
7694iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7695 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7696{
7697 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7698 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7699 return off;
7700}
7701
7702
7703/**
7704 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
7705 */
7706DECL_INLINE_THROW(uint32_t)
7707iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
7708 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7709{
7710 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7711 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7712}
7713
7714
7715/**
7716 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
7717 */
7718DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7719 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7720{
7721 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7722 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7723 return off;
7724}
7725
7726
7727/**
7728 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
7729 */
7730DECL_INLINE_THROW(uint32_t)
7731iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
7732 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7733{
7734 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7735 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7736}
7737
7738
7739/**
7740 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
7741 *
7742 * @note ARM64: Helper register is required (idxTmpReg).
7743 */
7744DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7745 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
7746 uint8_t idxTmpReg = UINT8_MAX)
7747{
7748 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
7749 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7750 return off;
7751}
7752
7753
7754/**
7755 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
7756 *
7757 * @note ARM64: Helper register is required (idxTmpReg).
7758 */
7759DECL_INLINE_THROW(uint32_t)
7760iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
7761 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
7762 uint8_t idxTmpReg = UINT8_MAX)
7763{
7764 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7765 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
7766}
7767
7768
7769
7770/*********************************************************************************************************************************
7771* Indirect Jumps. *
7772*********************************************************************************************************************************/
7773
7774/**
7775 * Emits an indirect jump a 64-bit address in a GPR.
7776 */
7777DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpViaGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc)
7778{
7779#ifdef RT_ARCH_AMD64
7780 uint8_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
7781 if (iGprSrc >= 8)
7782 pCodeBuf[off++] = X86_OP_REX_B;
7783 pCodeBuf[off++] = 0xff;
7784 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7785
7786#elif defined(RT_ARCH_ARM64)
7787 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7788 pCodeBuf[off++] = Armv8A64MkInstrBr(iGprSrc);
7789
7790#else
7791# error "port me"
7792#endif
7793 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7794 return off;
7795}
7796
7797
7798/**
7799 * Emits an indirect jump to an immediate 64-bit address (uses the temporary GPR).
7800 */
7801DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
7802{
7803 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
7804 return iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP0);
7805}
7806
7807
7808/*********************************************************************************************************************************
7809* Calls. *
7810*********************************************************************************************************************************/
7811
7812/**
7813 * Emits a call to a 64-bit address.
7814 */
7815DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
7816{
7817#ifdef RT_ARCH_AMD64
7818 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
7819
7820 /* call rax */
7821 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7822 pbCodeBuf[off++] = 0xff;
7823 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
7824
7825#elif defined(RT_ARCH_ARM64)
7826 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
7827
7828 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7829 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
7830
7831#else
7832# error "port me"
7833#endif
7834 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7835 return off;
7836}
7837
7838
7839/**
7840 * Emits code to load a stack variable into an argument GPR.
7841 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7842 */
7843DECL_FORCE_INLINE_THROW(uint32_t)
7844iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7845 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
7846 bool fSpilledVarsInVolatileRegs = false)
7847{
7848 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7849 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7850 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7851
7852 uint8_t const idxRegVar = pVar->idxReg;
7853 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
7854 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
7855 || !fSpilledVarsInVolatileRegs ))
7856 {
7857 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
7858 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
7859 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
7860 if (!offAddend)
7861 {
7862 if (idxRegArg != idxRegVar)
7863 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
7864 }
7865 else
7866 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
7867 }
7868 else
7869 {
7870 uint8_t const idxStackSlot = pVar->idxStackSlot;
7871 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7872 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
7873 if (offAddend)
7874 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
7875 }
7876 return off;
7877}
7878
7879
7880/**
7881 * Emits code to load a stack or immediate variable value into an argument GPR,
7882 * optional with a addend.
7883 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7884 */
7885DECL_FORCE_INLINE_THROW(uint32_t)
7886iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7887 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
7888 bool fSpilledVarsInVolatileRegs = false)
7889{
7890 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7891 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7892 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7893 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
7894 else
7895 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
7896 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
7897 return off;
7898}
7899
7900
7901/**
7902 * Emits code to load the variable address into an argument GPR.
7903 *
7904 * This only works for uninitialized and stack variables.
7905 */
7906DECL_FORCE_INLINE_THROW(uint32_t)
7907iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7908 bool fFlushShadows)
7909{
7910 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7911 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7912 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7913 || pVar->enmKind == kIemNativeVarKind_Stack,
7914 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7915 AssertStmt(!pVar->fSimdReg,
7916 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7917
7918 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7919 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7920
7921 uint8_t const idxRegVar = pVar->idxReg;
7922 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
7923 {
7924 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
7925 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
7926 Assert(pVar->idxReg == UINT8_MAX);
7927 }
7928 Assert( pVar->idxStackSlot != UINT8_MAX
7929 && pVar->idxReg == UINT8_MAX);
7930
7931 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7932}
7933
7934
7935/*********************************************************************************************************************************
7936* TB exiting helpers. *
7937*********************************************************************************************************************************/
7938
7939
7940/**
7941 * Emits a Jcc rel32 / B.cc imm19 to the epilog.
7942 */
7943DECL_INLINE_THROW(uint32_t)
7944iemNativeEmitJccTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7945 IEMNATIVEEXITREASON enmExitReason, IEMNATIVEINSTRCOND enmCond)
7946{
7947 return iemNativeEmitJccToNewLabel(pReNative, off, (IEMNATIVELABELTYPE)enmExitReason, 0 /*uData*/, enmCond);
7948}
7949
7950
7951/**
7952 * Emits a JNZ/JNE rel32 / B.NE imm19 to the TB exit routine with the given reason.
7953 */
7954DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7955 IEMNATIVEEXITREASON enmExitReason)
7956{
7957#ifdef RT_ARCH_AMD64
7958 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_ne);
7959#elif defined(RT_ARCH_ARM64)
7960 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kArmv8InstrCond_Ne);
7961#else
7962# error "Port me!"
7963#endif
7964}
7965
7966
7967/**
7968 * Emits a JZ/JE rel32 / B.EQ imm19 to the TB exit routine with the given reason.
7969 */
7970DECL_INLINE_THROW(uint32_t) iemNativeEmitJzTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7971 IEMNATIVEEXITREASON enmExitReason)
7972{
7973#ifdef RT_ARCH_AMD64
7974 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_e);
7975#elif defined(RT_ARCH_ARM64)
7976 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kArmv8InstrCond_Eq);
7977#else
7978# error "Port me!"
7979#endif
7980}
7981
7982
7983/**
7984 * Emits a JA/JNBE rel32 / B.HI imm19 to the TB exit.
7985 */
7986DECL_INLINE_THROW(uint32_t) iemNativeEmitJaTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7987 IEMNATIVEEXITREASON enmExitReason)
7988{
7989#ifdef RT_ARCH_AMD64
7990 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_nbe);
7991#elif defined(RT_ARCH_ARM64)
7992 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kArmv8InstrCond_Hi);
7993#else
7994# error "Port me!"
7995#endif
7996}
7997
7998
7999/**
8000 * Emits a JL/JNGE rel32 / B.LT imm19 to the TB exit with the given reason.
8001 */
8002DECL_INLINE_THROW(uint32_t) iemNativeEmitJlTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8003 IEMNATIVEEXITREASON enmExitReason)
8004{
8005#ifdef RT_ARCH_AMD64
8006 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_l);
8007#elif defined(RT_ARCH_ARM64)
8008 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kArmv8InstrCond_Lt);
8009#else
8010# error "Port me!"
8011#endif
8012}
8013
8014
8015DECL_INLINE_THROW(uint32_t)
8016iemNativeEmitTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEEXITREASON enmExitReason)
8017{
8018 return iemNativeEmitJmpToNewLabel(pReNative, off, (IEMNATIVELABELTYPE)enmExitReason);
8019}
8020
8021
8022DECL_INLINE_THROW(uint32_t)
8023iemNativeEmitTbExitEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, IEMNATIVEEXITREASON enmExitReason)
8024{
8025 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, (IEMNATIVELABELTYPE)enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8026 return iemNativeEmitJmpToLabelEx(pReNative, pCodeBuf, off, idxLabel);
8027}
8028
8029
8030/**
8031 * Emits a jump to the TB exit with @a enmExitReason on the condition _any_ of the bits in @a fBits
8032 * are set in @a iGprSrc.
8033 */
8034DECL_INLINE_THROW(uint32_t)
8035iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8036 uint8_t iGprSrc, uint64_t fBits, IEMNATIVEEXITREASON enmExitReason)
8037{
8038 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
8039
8040 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
8041 return iemNativeEmitJnzTbExit(pReNative, off, enmExitReason);
8042}
8043
8044
8045/**
8046 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
8047 * are set in @a iGprSrc.
8048 */
8049DECL_INLINE_THROW(uint32_t)
8050iemNativeEmitTestAnyBitsInGprAndTbExitIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8051 uint8_t iGprSrc, uint64_t fBits, IEMNATIVEEXITREASON enmExitReason)
8052{
8053 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
8054
8055 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
8056 return iemNativeEmitJzTbExit(pReNative, off, enmExitReason);
8057}
8058
8059
8060/**
8061 * Emits code that exits the TB with the given reason if @a iGprLeft and @a iGprRight
8062 * differs.
8063 */
8064DECL_INLINE_THROW(uint32_t)
8065iemNativeEmitTestIfGprNotEqualGprAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8066 uint8_t iGprLeft, uint8_t iGprRight, IEMNATIVEEXITREASON enmExitReason)
8067{
8068 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
8069 off = iemNativeEmitJnzTbExit(pReNative, off, enmExitReason);
8070 return off;
8071}
8072
8073
8074/**
8075 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
8076 * @a uImm.
8077 */
8078DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8079 uint8_t iGprSrc, uint32_t uImm, IEMNATIVEEXITREASON enmExitReason)
8080{
8081 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8082 off = iemNativeEmitJnzTbExit(pReNative, off, enmExitReason);
8083 return off;
8084}
8085
8086
8087/**
8088 * Emits code that exits the current TB if @a iGprSrc differs from @a uImm.
8089 */
8090DECL_INLINE_THROW(uint32_t)
8091iemNativeEmitTestIfGprNotEqualImmAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8092 uint8_t iGprSrc, uint64_t uImm, IEMNATIVEEXITREASON enmExitReason)
8093{
8094 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
8095 off = iemNativeEmitJnzTbExit(pReNative, off, enmExitReason);
8096 return off;
8097}
8098
8099
8100/**
8101 * Emits code that exits the current TB with the given reason if 32-bit @a iGprSrc equals @a uImm.
8102 */
8103DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8104 uint8_t iGprSrc, uint32_t uImm, IEMNATIVEEXITREASON enmExitReason)
8105{
8106 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8107 off = iemNativeEmitJzTbExit(pReNative, off, enmExitReason);
8108 return off;
8109}
8110
8111
8112/**
8113 * Emits code to exit the current TB with the reason @a enmExitReason on the condition that bit @a iBitNo _is_ _set_ in
8114 * @a iGprSrc.
8115 *
8116 * @note On ARM64 the range is only +/-8191 instructions.
8117 */
8118DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndTbExitIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8119 uint8_t iGprSrc, uint8_t iBitNo, IEMNATIVEEXITREASON enmExitReason)
8120{
8121 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, (IEMNATIVELABELTYPE)enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8122 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
8123}
8124
8125
8126/**
8127 * Emits code that exits the current TB with @a enmExitReason if @a iGprSrc is not zero.
8128 *
8129 * The operand size is given by @a f64Bit.
8130 */
8131DECL_FORCE_INLINE_THROW(uint32_t)
8132iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
8133 uint8_t iGprSrc, bool f64Bit, IEMNATIVEEXITREASON enmExitReason)
8134{
8135 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, (IEMNATIVELABELTYPE)enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8136 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
8137 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
8138}
8139
8140
8141/**
8142 * Emits code to exit the current TB on the given condition.
8143 */
8144DECL_INLINE_THROW(uint32_t)
8145iemNativeEmitJccTbExitEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, IEMNATIVEEXITREASON enmExitReason, IEMNATIVEINSTRCOND enmCond)
8146{
8147 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, (IEMNATIVELABELTYPE)enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8148#ifdef RT_ARCH_AMD64
8149 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel, enmCond);
8150#elif defined(RT_ARCH_ARM64)
8151 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel, enmCond);
8152#else
8153# error "Port me!"
8154#endif
8155 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8156 return off;
8157}
8158
8159
8160/**
8161 * Emits code to exit the current TB with the given reason @a enmExitReason if @a iGprSrc is not zero.
8162 *
8163 * The operand size is given by @a f64Bit.
8164 */
8165DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8166 uint8_t iGprSrc, bool f64Bit, IEMNATIVEEXITREASON enmExitReason)
8167{
8168 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, (IEMNATIVELABELTYPE)enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8169 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
8170}
8171
8172
8173#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8174/*********************************************************************************************************************************
8175* SIMD helpers. *
8176*********************************************************************************************************************************/
8177
8178
8179/**
8180 * Emits code to load the variable address into an argument GPR.
8181 *
8182 * This is a special variant intended for SIMD variables only and only called
8183 * by the TLB miss path in the memory fetch/store code because there we pass
8184 * the value by reference and need both the register and stack depending on which
8185 * path is taken (TLB hit vs. miss).
8186 */
8187DECL_FORCE_INLINE_THROW(uint32_t)
8188iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8189 bool fSyncRegWithStack = true)
8190{
8191 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8192 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8193 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
8194 || pVar->enmKind == kIemNativeVarKind_Stack,
8195 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8196 AssertStmt(pVar->fSimdReg,
8197 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8198 Assert( pVar->idxStackSlot != UINT8_MAX
8199 && pVar->idxReg != UINT8_MAX);
8200
8201 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8202 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
8203
8204 uint8_t const idxRegVar = pVar->idxReg;
8205 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8206 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
8207
8208 if (fSyncRegWithStack)
8209 {
8210 if (pVar->cbVar == sizeof(RTUINT128U))
8211 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDisp, idxRegVar);
8212 else
8213 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDisp, idxRegVar);
8214 }
8215
8216 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
8217}
8218
8219
8220/**
8221 * Emits code to sync the host SIMD register assigned to the given SIMD variable.
8222 *
8223 * This is a special helper and only called
8224 * by the TLB miss path in the memory fetch/store code because there we pass
8225 * the value by reference and need to sync the value on the stack with the assigned host register
8226 * after a TLB miss where the value ends up on the stack.
8227 */
8228DECL_FORCE_INLINE_THROW(uint32_t)
8229iemNativeEmitSimdVarSyncStackToRegister(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar)
8230{
8231 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8232 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8233 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
8234 || pVar->enmKind == kIemNativeVarKind_Stack,
8235 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8236 AssertStmt(pVar->fSimdReg,
8237 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8238 Assert( pVar->idxStackSlot != UINT8_MAX
8239 && pVar->idxReg != UINT8_MAX);
8240
8241 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8242 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
8243
8244 uint8_t const idxRegVar = pVar->idxReg;
8245 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8246 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
8247
8248 if (pVar->cbVar == sizeof(RTUINT128U))
8249 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxRegVar, offBpDisp);
8250 else
8251 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxRegVar, offBpDisp);
8252
8253 return off;
8254}
8255
8256
8257/**
8258 * Emits a gprdst = ~gprsrc store.
8259 */
8260DECL_FORCE_INLINE_THROW(uint32_t)
8261iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
8262{
8263#ifdef RT_ARCH_AMD64
8264 if (iGprDst != iGprSrc)
8265 {
8266 /* mov gprdst, gprsrc. */
8267 if (f64Bit)
8268 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
8269 else
8270 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
8271 }
8272
8273 /* not gprdst */
8274 if (f64Bit || iGprDst >= 8)
8275 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
8276 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
8277 pCodeBuf[off++] = 0xf7;
8278 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
8279#elif defined(RT_ARCH_ARM64)
8280 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
8281#else
8282# error "port me"
8283#endif
8284 return off;
8285}
8286
8287
8288/**
8289 * Emits a gprdst = ~gprsrc store.
8290 */
8291DECL_INLINE_THROW(uint32_t)
8292iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
8293{
8294#ifdef RT_ARCH_AMD64
8295 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
8296#elif defined(RT_ARCH_ARM64)
8297 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
8298#else
8299# error "port me"
8300#endif
8301 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8302 return off;
8303}
8304
8305
8306/**
8307 * Emits a 128-bit vector register store to a VCpu value.
8308 */
8309DECL_FORCE_INLINE_THROW(uint32_t)
8310iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8311{
8312#ifdef RT_ARCH_AMD64
8313 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
8314 pCodeBuf[off++] = 0x66;
8315 if (iVecReg >= 8)
8316 pCodeBuf[off++] = X86_OP_REX_R;
8317 pCodeBuf[off++] = 0x0f;
8318 pCodeBuf[off++] = 0x7f;
8319 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8320#elif defined(RT_ARCH_ARM64)
8321 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
8322
8323#else
8324# error "port me"
8325#endif
8326 return off;
8327}
8328
8329
8330/**
8331 * Emits a 128-bit vector register load of a VCpu value.
8332 */
8333DECL_INLINE_THROW(uint32_t)
8334iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8335{
8336#ifdef RT_ARCH_AMD64
8337 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
8338#elif defined(RT_ARCH_ARM64)
8339 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
8340#else
8341# error "port me"
8342#endif
8343 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8344 return off;
8345}
8346
8347
8348/**
8349 * Emits a high 128-bit vector register store to a VCpu value.
8350 */
8351DECL_FORCE_INLINE_THROW(uint32_t)
8352iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8353{
8354#ifdef RT_ARCH_AMD64
8355 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
8356 pCodeBuf[off++] = X86_OP_VEX3;
8357 if (iVecReg >= 8)
8358 pCodeBuf[off++] = 0x63;
8359 else
8360 pCodeBuf[off++] = 0xe3;
8361 pCodeBuf[off++] = 0x7d;
8362 pCodeBuf[off++] = 0x39;
8363 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8364 pCodeBuf[off++] = 0x01; /* Immediate */
8365#elif defined(RT_ARCH_ARM64)
8366 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
8367#else
8368# error "port me"
8369#endif
8370 return off;
8371}
8372
8373
8374/**
8375 * Emits a high 128-bit vector register load of a VCpu value.
8376 */
8377DECL_INLINE_THROW(uint32_t)
8378iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8379{
8380#ifdef RT_ARCH_AMD64
8381 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
8382#elif defined(RT_ARCH_ARM64)
8383 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8384 Assert(!(iVecReg & 0x1));
8385 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
8386#else
8387# error "port me"
8388#endif
8389 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8390 return off;
8391}
8392
8393
8394/**
8395 * Emits a 128-bit vector register load of a VCpu value.
8396 */
8397DECL_FORCE_INLINE_THROW(uint32_t)
8398iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8399{
8400#ifdef RT_ARCH_AMD64
8401 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
8402 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8403 if (iVecReg >= 8)
8404 pCodeBuf[off++] = X86_OP_REX_R;
8405 pCodeBuf[off++] = 0x0f;
8406 pCodeBuf[off++] = 0x6f;
8407 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8408#elif defined(RT_ARCH_ARM64)
8409 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
8410
8411#else
8412# error "port me"
8413#endif
8414 return off;
8415}
8416
8417
8418/**
8419 * Emits a 128-bit vector register load of a VCpu value.
8420 */
8421DECL_INLINE_THROW(uint32_t)
8422iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8423{
8424#ifdef RT_ARCH_AMD64
8425 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
8426#elif defined(RT_ARCH_ARM64)
8427 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
8428#else
8429# error "port me"
8430#endif
8431 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8432 return off;
8433}
8434
8435
8436/**
8437 * Emits a 128-bit vector register load of a VCpu value.
8438 */
8439DECL_FORCE_INLINE_THROW(uint32_t)
8440iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8441{
8442#ifdef RT_ARCH_AMD64
8443 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
8444 pCodeBuf[off++] = X86_OP_VEX3;
8445 if (iVecReg >= 8)
8446 pCodeBuf[off++] = 0x63;
8447 else
8448 pCodeBuf[off++] = 0xe3;
8449 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8450 pCodeBuf[off++] = 0x38;
8451 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8452 pCodeBuf[off++] = 0x01; /* Immediate */
8453#elif defined(RT_ARCH_ARM64)
8454 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
8455#else
8456# error "port me"
8457#endif
8458 return off;
8459}
8460
8461
8462/**
8463 * Emits a 128-bit vector register load of a VCpu value.
8464 */
8465DECL_INLINE_THROW(uint32_t)
8466iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8467{
8468#ifdef RT_ARCH_AMD64
8469 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
8470#elif defined(RT_ARCH_ARM64)
8471 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8472 Assert(!(iVecReg & 0x1));
8473 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
8474#else
8475# error "port me"
8476#endif
8477 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8478 return off;
8479}
8480
8481
8482/**
8483 * Emits a vecdst = vecsrc load.
8484 */
8485DECL_FORCE_INLINE(uint32_t)
8486iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8487{
8488#ifdef RT_ARCH_AMD64
8489 /* movdqu vecdst, vecsrc */
8490 pCodeBuf[off++] = 0xf3;
8491
8492 if ((iVecRegDst | iVecRegSrc) >= 8)
8493 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
8494 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
8495 : X86_OP_REX_R;
8496 pCodeBuf[off++] = 0x0f;
8497 pCodeBuf[off++] = 0x6f;
8498 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8499
8500#elif defined(RT_ARCH_ARM64)
8501 /* mov dst, src; alias for: orr dst, src, src */
8502 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
8503
8504#else
8505# error "port me"
8506#endif
8507 return off;
8508}
8509
8510
8511/**
8512 * Emits a vecdst = vecsrc load, 128-bit.
8513 */
8514DECL_INLINE_THROW(uint32_t)
8515iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8516{
8517#ifdef RT_ARCH_AMD64
8518 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
8519#elif defined(RT_ARCH_ARM64)
8520 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
8521#else
8522# error "port me"
8523#endif
8524 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8525 return off;
8526}
8527
8528
8529/**
8530 * Emits a vecdst[128:255] = vecsrc[128:255] load.
8531 */
8532DECL_FORCE_INLINE_THROW(uint32_t)
8533iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8534{
8535#ifdef RT_ARCH_AMD64
8536 /* vperm2i128 dst, dst, src, 0x30. */ /* ASSUMES AVX2 support */
8537 pCodeBuf[off++] = X86_OP_VEX3;
8538 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
8539 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8540 pCodeBuf[off++] = 0x46;
8541 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8542 pCodeBuf[off++] = 0x30; /* Immediate, this will leave the low 128 bits of dst untouched and move the high 128 bits from src to dst. */
8543
8544#elif defined(RT_ARCH_ARM64)
8545 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
8546
8547 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128(). */
8548# ifdef IEM_WITH_THROW_CATCH
8549 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
8550# else
8551 AssertReleaseFailedStmt(off = UINT32_MAX);
8552# endif
8553#else
8554# error "port me"
8555#endif
8556 return off;
8557}
8558
8559
8560/**
8561 * Emits a vecdst[128:255] = vecsrc[128:255] load, high 128-bit.
8562 */
8563DECL_INLINE_THROW(uint32_t)
8564iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8565{
8566#ifdef RT_ARCH_AMD64
8567 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
8568#elif defined(RT_ARCH_ARM64)
8569 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8570 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iVecRegSrc + 1);
8571#else
8572# error "port me"
8573#endif
8574 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8575 return off;
8576}
8577
8578
8579/**
8580 * Emits a vecdst[0:127] = vecsrc[128:255] load.
8581 */
8582DECL_FORCE_INLINE_THROW(uint32_t)
8583iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8584{
8585#ifdef RT_ARCH_AMD64
8586 /* vextracti128 dst, src, 1. */ /* ASSUMES AVX2 support */
8587 pCodeBuf[off++] = X86_OP_VEX3;
8588 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegDst >= 8, false, iVecRegSrc >= 8);
8589 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8590 pCodeBuf[off++] = 0x39;
8591 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7);
8592 pCodeBuf[off++] = 0x1;
8593
8594#elif defined(RT_ARCH_ARM64)
8595 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
8596
8597 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(). */
8598# ifdef IEM_WITH_THROW_CATCH
8599 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
8600# else
8601 AssertReleaseFailedStmt(off = UINT32_MAX);
8602# endif
8603#else
8604# error "port me"
8605#endif
8606 return off;
8607}
8608
8609
8610/**
8611 * Emits a vecdst[0:127] = vecsrc[128:255] load, high 128-bit.
8612 */
8613DECL_INLINE_THROW(uint32_t)
8614iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8615{
8616#ifdef RT_ARCH_AMD64
8617 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
8618#elif defined(RT_ARCH_ARM64)
8619 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8620 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc + 1);
8621#else
8622# error "port me"
8623#endif
8624 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8625 return off;
8626}
8627
8628
8629/**
8630 * Emits a vecdst = vecsrc load, 256-bit.
8631 */
8632DECL_INLINE_THROW(uint32_t)
8633iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8634{
8635#ifdef RT_ARCH_AMD64
8636 /* vmovdqa ymm, ymm */
8637 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8638 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
8639 {
8640 pbCodeBuf[off++] = X86_OP_VEX3;
8641 pbCodeBuf[off++] = 0x41;
8642 pbCodeBuf[off++] = 0x7d;
8643 pbCodeBuf[off++] = 0x6f;
8644 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8645 }
8646 else
8647 {
8648 pbCodeBuf[off++] = X86_OP_VEX2;
8649 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
8650 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
8651 pbCodeBuf[off++] = iVecRegSrc >= 8
8652 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
8653 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8654 }
8655#elif defined(RT_ARCH_ARM64)
8656 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8657 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
8658 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
8659 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
8660#else
8661# error "port me"
8662#endif
8663 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8664 return off;
8665}
8666
8667
8668/**
8669 * Emits a vecdst = vecsrc load.
8670 */
8671DECL_FORCE_INLINE(uint32_t)
8672iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8673{
8674#ifdef RT_ARCH_AMD64
8675 /* vinserti128 dst, dst, src, 1. */ /* ASSUMES AVX2 support */
8676 pCodeBuf[off++] = X86_OP_VEX3;
8677 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
8678 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8679 pCodeBuf[off++] = 0x38;
8680 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8681 pCodeBuf[off++] = 0x01; /* Immediate */
8682
8683#elif defined(RT_ARCH_ARM64)
8684 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8685 /* mov dst, src; alias for: orr dst, src, src */
8686 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
8687
8688#else
8689# error "port me"
8690#endif
8691 return off;
8692}
8693
8694
8695/**
8696 * Emits a vecdst[128:255] = vecsrc[0:127] load, 128-bit.
8697 */
8698DECL_INLINE_THROW(uint32_t)
8699iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8700{
8701#ifdef RT_ARCH_AMD64
8702 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
8703#elif defined(RT_ARCH_ARM64)
8704 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
8705#else
8706# error "port me"
8707#endif
8708 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8709 return off;
8710}
8711
8712
8713/**
8714 * Emits a gprdst = vecsrc[x] load, 64-bit.
8715 */
8716DECL_FORCE_INLINE(uint32_t)
8717iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
8718{
8719#ifdef RT_ARCH_AMD64
8720 if (iQWord >= 2)
8721 {
8722 /*
8723 * vpextrq doesn't work on the upper 128-bits.
8724 * So we use the following sequence:
8725 * vextracti128 vectmp0, vecsrc, 1
8726 * pextrq gpr, vectmp0, #(iQWord - 2)
8727 */
8728 /* vextracti128 */
8729 pCodeBuf[off++] = X86_OP_VEX3;
8730 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
8731 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8732 pCodeBuf[off++] = 0x39;
8733 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8734 pCodeBuf[off++] = 0x1;
8735
8736 /* pextrq */
8737 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8738 pCodeBuf[off++] = X86_OP_REX_W
8739 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
8740 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8741 pCodeBuf[off++] = 0x0f;
8742 pCodeBuf[off++] = 0x3a;
8743 pCodeBuf[off++] = 0x16;
8744 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
8745 pCodeBuf[off++] = iQWord - 2;
8746 }
8747 else
8748 {
8749 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
8750 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8751 pCodeBuf[off++] = X86_OP_REX_W
8752 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8753 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8754 pCodeBuf[off++] = 0x0f;
8755 pCodeBuf[off++] = 0x3a;
8756 pCodeBuf[off++] = 0x16;
8757 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8758 pCodeBuf[off++] = iQWord;
8759 }
8760#elif defined(RT_ARCH_ARM64)
8761 /* umov gprdst, vecsrc[iQWord] */
8762 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
8763#else
8764# error "port me"
8765#endif
8766 return off;
8767}
8768
8769
8770/**
8771 * Emits a gprdst = vecsrc[x] load, 64-bit.
8772 */
8773DECL_INLINE_THROW(uint32_t)
8774iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
8775{
8776 Assert(iQWord <= 3);
8777
8778#ifdef RT_ARCH_AMD64
8779 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iVecRegSrc, iQWord);
8780#elif defined(RT_ARCH_ARM64)
8781 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8782 Assert(!(iVecRegSrc & 0x1));
8783 /* Need to access the "high" 128-bit vector register. */
8784 if (iQWord >= 2)
8785 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
8786 else
8787 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
8788#else
8789# error "port me"
8790#endif
8791 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8792 return off;
8793}
8794
8795
8796/**
8797 * Emits a gprdst = vecsrc[x] load, 32-bit.
8798 */
8799DECL_FORCE_INLINE(uint32_t)
8800iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
8801{
8802#ifdef RT_ARCH_AMD64
8803 if (iDWord >= 4)
8804 {
8805 /*
8806 * vpextrd doesn't work on the upper 128-bits.
8807 * So we use the following sequence:
8808 * vextracti128 vectmp0, vecsrc, 1
8809 * pextrd gpr, vectmp0, #(iDWord - 4)
8810 */
8811 /* vextracti128 */
8812 pCodeBuf[off++] = X86_OP_VEX3;
8813 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
8814 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8815 pCodeBuf[off++] = 0x39;
8816 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8817 pCodeBuf[off++] = 0x1;
8818
8819 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
8820 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8821 if (iGprDst >= 8 || IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8)
8822 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
8823 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8824 pCodeBuf[off++] = 0x0f;
8825 pCodeBuf[off++] = 0x3a;
8826 pCodeBuf[off++] = 0x16;
8827 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
8828 pCodeBuf[off++] = iDWord - 4;
8829 }
8830 else
8831 {
8832 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
8833 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8834 if (iGprDst >= 8 || iVecRegSrc >= 8)
8835 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8836 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8837 pCodeBuf[off++] = 0x0f;
8838 pCodeBuf[off++] = 0x3a;
8839 pCodeBuf[off++] = 0x16;
8840 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8841 pCodeBuf[off++] = iDWord;
8842 }
8843#elif defined(RT_ARCH_ARM64)
8844 Assert(iDWord < 4);
8845
8846 /* umov gprdst, vecsrc[iDWord] */
8847 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
8848#else
8849# error "port me"
8850#endif
8851 return off;
8852}
8853
8854
8855/**
8856 * Emits a gprdst = vecsrc[x] load, 32-bit.
8857 */
8858DECL_INLINE_THROW(uint32_t)
8859iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
8860{
8861 Assert(iDWord <= 7);
8862
8863#ifdef RT_ARCH_AMD64
8864 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 15), off, iGprDst, iVecRegSrc, iDWord);
8865#elif defined(RT_ARCH_ARM64)
8866 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8867 Assert(!(iVecRegSrc & 0x1));
8868 /* Need to access the "high" 128-bit vector register. */
8869 if (iDWord >= 4)
8870 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
8871 else
8872 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
8873#else
8874# error "port me"
8875#endif
8876 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8877 return off;
8878}
8879
8880
8881/**
8882 * Emits a gprdst = vecsrc[x] load, 16-bit.
8883 */
8884DECL_FORCE_INLINE(uint32_t)
8885iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
8886{
8887#ifdef RT_ARCH_AMD64
8888 if (iWord >= 8)
8889 {
8890 /** @todo Currently not used. */
8891 AssertReleaseFailed();
8892 }
8893 else
8894 {
8895 /* pextrw gpr, vecsrc, #iWord */
8896 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8897 if (iGprDst >= 8 || iVecRegSrc >= 8)
8898 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
8899 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
8900 pCodeBuf[off++] = 0x0f;
8901 pCodeBuf[off++] = 0xc5;
8902 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
8903 pCodeBuf[off++] = iWord;
8904 }
8905#elif defined(RT_ARCH_ARM64)
8906 /* umov gprdst, vecsrc[iWord] */
8907 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
8908#else
8909# error "port me"
8910#endif
8911 return off;
8912}
8913
8914
8915/**
8916 * Emits a gprdst = vecsrc[x] load, 16-bit.
8917 */
8918DECL_INLINE_THROW(uint32_t)
8919iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
8920{
8921 Assert(iWord <= 16);
8922
8923#ifdef RT_ARCH_AMD64
8924 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
8925#elif defined(RT_ARCH_ARM64)
8926 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8927 Assert(!(iVecRegSrc & 0x1));
8928 /* Need to access the "high" 128-bit vector register. */
8929 if (iWord >= 8)
8930 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
8931 else
8932 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
8933#else
8934# error "port me"
8935#endif
8936 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8937 return off;
8938}
8939
8940
8941/**
8942 * Emits a gprdst = vecsrc[x] load, 8-bit.
8943 */
8944DECL_FORCE_INLINE(uint32_t)
8945iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
8946{
8947#ifdef RT_ARCH_AMD64
8948 if (iByte >= 16)
8949 {
8950 /** @todo Currently not used. */
8951 AssertReleaseFailed();
8952 }
8953 else
8954 {
8955 /* pextrb gpr, vecsrc, #iByte */
8956 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8957 if (iGprDst >= 8 || iVecRegSrc >= 8)
8958 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8959 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8960 pCodeBuf[off++] = 0x0f;
8961 pCodeBuf[off++] = 0x3a;
8962 pCodeBuf[off++] = 0x14;
8963 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8964 pCodeBuf[off++] = iByte;
8965 }
8966#elif defined(RT_ARCH_ARM64)
8967 /* umov gprdst, vecsrc[iByte] */
8968 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
8969#else
8970# error "port me"
8971#endif
8972 return off;
8973}
8974
8975
8976/**
8977 * Emits a gprdst = vecsrc[x] load, 8-bit.
8978 */
8979DECL_INLINE_THROW(uint32_t)
8980iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
8981{
8982 Assert(iByte <= 32);
8983
8984#ifdef RT_ARCH_AMD64
8985 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iByte);
8986#elif defined(RT_ARCH_ARM64)
8987 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8988 Assert(!(iVecRegSrc & 0x1));
8989 /* Need to access the "high" 128-bit vector register. */
8990 if (iByte >= 16)
8991 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
8992 else
8993 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
8994#else
8995# error "port me"
8996#endif
8997 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8998 return off;
8999}
9000
9001
9002/**
9003 * Emits a vecdst[x] = gprsrc store, 64-bit.
9004 */
9005DECL_FORCE_INLINE(uint32_t)
9006iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
9007{
9008#ifdef RT_ARCH_AMD64
9009 if (iQWord >= 2)
9010 {
9011 /*
9012 * vpinsrq doesn't work on the upper 128-bits.
9013 * So we use the following sequence:
9014 * vextracti128 vectmp0, vecdst, 1
9015 * pinsrq vectmp0, gpr, #(iQWord - 2)
9016 * vinserti128 vecdst, vectmp0, 1
9017 */
9018 /* vextracti128 */
9019 pCodeBuf[off++] = X86_OP_VEX3;
9020 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9021 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9022 pCodeBuf[off++] = 0x39;
9023 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9024 pCodeBuf[off++] = 0x1;
9025
9026 /* pinsrq */
9027 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9028 pCodeBuf[off++] = X86_OP_REX_W
9029 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9030 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9031 pCodeBuf[off++] = 0x0f;
9032 pCodeBuf[off++] = 0x3a;
9033 pCodeBuf[off++] = 0x22;
9034 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
9035 pCodeBuf[off++] = iQWord - 2;
9036
9037 /* vinserti128 */
9038 pCodeBuf[off++] = X86_OP_VEX3;
9039 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9040 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9041 pCodeBuf[off++] = 0x38;
9042 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9043 pCodeBuf[off++] = 0x01; /* Immediate */
9044 }
9045 else
9046 {
9047 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
9048 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9049 pCodeBuf[off++] = X86_OP_REX_W
9050 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9051 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9052 pCodeBuf[off++] = 0x0f;
9053 pCodeBuf[off++] = 0x3a;
9054 pCodeBuf[off++] = 0x22;
9055 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9056 pCodeBuf[off++] = iQWord;
9057 }
9058#elif defined(RT_ARCH_ARM64)
9059 /* ins vecsrc[iQWord], gpr */
9060 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
9061#else
9062# error "port me"
9063#endif
9064 return off;
9065}
9066
9067
9068/**
9069 * Emits a vecdst[x] = gprsrc store, 64-bit.
9070 */
9071DECL_INLINE_THROW(uint32_t)
9072iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
9073{
9074 Assert(iQWord <= 3);
9075
9076#ifdef RT_ARCH_AMD64
9077 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iQWord);
9078#elif defined(RT_ARCH_ARM64)
9079 Assert(!(iVecRegDst & 0x1));
9080 if (iQWord >= 2)
9081 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iQWord - 2);
9082 else
9083 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
9084#else
9085# error "port me"
9086#endif
9087 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9088 return off;
9089}
9090
9091
9092/**
9093 * Emits a vecdst[x] = gprsrc store, 32-bit.
9094 */
9095DECL_FORCE_INLINE(uint32_t)
9096iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
9097{
9098#ifdef RT_ARCH_AMD64
9099 if (iDWord >= 4)
9100 {
9101 /*
9102 * vpinsrq doesn't work on the upper 128-bits.
9103 * So we use the following sequence:
9104 * vextracti128 vectmp0, vecdst, 1
9105 * pinsrd vectmp0, gpr, #(iDword - 4)
9106 * vinserti128 vecdst, vectmp0, 1
9107 */
9108 /* vextracti128 */
9109 pCodeBuf[off++] = X86_OP_VEX3;
9110 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9111 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9112 pCodeBuf[off++] = 0x39;
9113 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9114 pCodeBuf[off++] = 0x1;
9115
9116 /* pinsrd */
9117 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9118 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 || iGprSrc >= 8)
9119 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9120 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9121 pCodeBuf[off++] = 0x0f;
9122 pCodeBuf[off++] = 0x3a;
9123 pCodeBuf[off++] = 0x22;
9124 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
9125 pCodeBuf[off++] = iDWord - 4;
9126
9127 /* vinserti128 */
9128 pCodeBuf[off++] = X86_OP_VEX3;
9129 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9130 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9131 pCodeBuf[off++] = 0x38;
9132 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9133 pCodeBuf[off++] = 0x01; /* Immediate */
9134 }
9135 else
9136 {
9137 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
9138 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9139 if (iVecRegDst >= 8 || iGprSrc >= 8)
9140 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9141 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9142 pCodeBuf[off++] = 0x0f;
9143 pCodeBuf[off++] = 0x3a;
9144 pCodeBuf[off++] = 0x22;
9145 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9146 pCodeBuf[off++] = iDWord;
9147 }
9148#elif defined(RT_ARCH_ARM64)
9149 /* ins vecsrc[iDWord], gpr */
9150 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
9151#else
9152# error "port me"
9153#endif
9154 return off;
9155}
9156
9157
9158/**
9159 * Emits a vecdst[x] = gprsrc store, 64-bit.
9160 */
9161DECL_INLINE_THROW(uint32_t)
9162iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
9163{
9164 Assert(iDWord <= 7);
9165
9166#ifdef RT_ARCH_AMD64
9167 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iDWord);
9168#elif defined(RT_ARCH_ARM64)
9169 Assert(!(iVecRegDst & 0x1));
9170 if (iDWord >= 4)
9171 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iDWord - 4);
9172 else
9173 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
9174#else
9175# error "port me"
9176#endif
9177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9178 return off;
9179}
9180
9181
9182/**
9183 * Emits a vecdst[x] = gprsrc store, 16-bit.
9184 */
9185DECL_FORCE_INLINE(uint32_t)
9186iemNativeEmitSimdStoreGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
9187{
9188#ifdef RT_ARCH_AMD64
9189 /* pinsrw vecsrc, gpr, #iWord. */
9190 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9191 if (iVecRegDst >= 8 || iGprSrc >= 8)
9192 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9193 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9194 pCodeBuf[off++] = 0x0f;
9195 pCodeBuf[off++] = 0xc4;
9196 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9197 pCodeBuf[off++] = iWord;
9198#elif defined(RT_ARCH_ARM64)
9199 /* ins vecsrc[iWord], gpr */
9200 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iWord, kArmv8InstrUmovInsSz_U16);
9201#else
9202# error "port me"
9203#endif
9204 return off;
9205}
9206
9207
9208/**
9209 * Emits a vecdst[x] = gprsrc store, 16-bit.
9210 */
9211DECL_INLINE_THROW(uint32_t)
9212iemNativeEmitSimdStoreGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
9213{
9214 Assert(iWord <= 15);
9215
9216#ifdef RT_ARCH_AMD64
9217 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iGprSrc, iWord);
9218#elif defined(RT_ARCH_ARM64)
9219 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iWord);
9220#else
9221# error "port me"
9222#endif
9223 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9224 return off;
9225}
9226
9227
9228/**
9229 * Emits a vecdst[x] = gprsrc store, 8-bit.
9230 */
9231DECL_FORCE_INLINE(uint32_t)
9232iemNativeEmitSimdStoreGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
9233{
9234#ifdef RT_ARCH_AMD64
9235 /* pinsrb vecsrc, gpr, #iByte (ASSUMES SSE4.1). */
9236 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9237 if (iVecRegDst >= 8 || iGprSrc >= 8)
9238 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9239 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9240 pCodeBuf[off++] = 0x0f;
9241 pCodeBuf[off++] = 0x3a;
9242 pCodeBuf[off++] = 0x20;
9243 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9244 pCodeBuf[off++] = iByte;
9245#elif defined(RT_ARCH_ARM64)
9246 /* ins vecsrc[iByte], gpr */
9247 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iByte, kArmv8InstrUmovInsSz_U8);
9248#else
9249# error "port me"
9250#endif
9251 return off;
9252}
9253
9254
9255/**
9256 * Emits a vecdst[x] = gprsrc store, 8-bit.
9257 */
9258DECL_INLINE_THROW(uint32_t)
9259iemNativeEmitSimdStoreGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
9260{
9261 Assert(iByte <= 15);
9262
9263#ifdef RT_ARCH_AMD64
9264 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iByte);
9265#elif defined(RT_ARCH_ARM64)
9266 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iByte);
9267#else
9268# error "port me"
9269#endif
9270 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9271 return off;
9272}
9273
9274
9275/**
9276 * Emits a vecdst.au32[iDWord] = 0 store.
9277 */
9278DECL_FORCE_INLINE(uint32_t)
9279iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
9280{
9281 Assert(iDWord <= 7);
9282
9283#ifdef RT_ARCH_AMD64
9284 /*
9285 * xor tmp0, tmp0
9286 * pinsrd xmm, tmp0, iDword
9287 */
9288 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
9289 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
9290 pCodeBuf[off++] = 0x33;
9291 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
9292 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(pCodeBuf, off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
9293#elif defined(RT_ARCH_ARM64)
9294 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9295 Assert(!(iVecReg & 0x1));
9296 /* ins vecsrc[iDWord], wzr */
9297 if (iDWord >= 4)
9298 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
9299 else
9300 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
9301#else
9302# error "port me"
9303#endif
9304 return off;
9305}
9306
9307
9308/**
9309 * Emits a vecdst.au32[iDWord] = 0 store.
9310 */
9311DECL_INLINE_THROW(uint32_t)
9312iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
9313{
9314
9315#ifdef RT_ARCH_AMD64
9316 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
9317#elif defined(RT_ARCH_ARM64)
9318 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
9319#else
9320# error "port me"
9321#endif
9322 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9323 return off;
9324}
9325
9326
9327/**
9328 * Emits a vecdst[0:127] = 0 store.
9329 */
9330DECL_FORCE_INLINE(uint32_t)
9331iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
9332{
9333#ifdef RT_ARCH_AMD64
9334 /* pxor xmm, xmm */
9335 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9336 if (iVecReg >= 8)
9337 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
9338 pCodeBuf[off++] = 0x0f;
9339 pCodeBuf[off++] = 0xef;
9340 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
9341#elif defined(RT_ARCH_ARM64)
9342 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9343 Assert(!(iVecReg & 0x1));
9344 /* eor vecreg, vecreg, vecreg */
9345 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
9346#else
9347# error "port me"
9348#endif
9349 return off;
9350}
9351
9352
9353/**
9354 * Emits a vecdst[0:127] = 0 store.
9355 */
9356DECL_INLINE_THROW(uint32_t)
9357iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
9358{
9359#ifdef RT_ARCH_AMD64
9360 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
9361#elif defined(RT_ARCH_ARM64)
9362 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
9363#else
9364# error "port me"
9365#endif
9366 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9367 return off;
9368}
9369
9370
9371/**
9372 * Emits a vecdst[128:255] = 0 store.
9373 */
9374DECL_FORCE_INLINE(uint32_t)
9375iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
9376{
9377#ifdef RT_ARCH_AMD64
9378 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
9379 if (iVecReg < 8)
9380 {
9381 pCodeBuf[off++] = X86_OP_VEX2;
9382 pCodeBuf[off++] = 0xf9;
9383 }
9384 else
9385 {
9386 pCodeBuf[off++] = X86_OP_VEX3;
9387 pCodeBuf[off++] = 0x41;
9388 pCodeBuf[off++] = 0x79;
9389 }
9390 pCodeBuf[off++] = 0x6f;
9391 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
9392#elif defined(RT_ARCH_ARM64)
9393 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9394 Assert(!(iVecReg & 0x1));
9395 /* eor vecreg, vecreg, vecreg */
9396 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
9397#else
9398# error "port me"
9399#endif
9400 return off;
9401}
9402
9403
9404/**
9405 * Emits a vecdst[128:255] = 0 store.
9406 */
9407DECL_INLINE_THROW(uint32_t)
9408iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
9409{
9410#ifdef RT_ARCH_AMD64
9411 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
9412#elif defined(RT_ARCH_ARM64)
9413 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
9414#else
9415# error "port me"
9416#endif
9417 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9418 return off;
9419}
9420
9421
9422/**
9423 * Emits a vecdst[0:255] = 0 store.
9424 */
9425DECL_FORCE_INLINE(uint32_t)
9426iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
9427{
9428#ifdef RT_ARCH_AMD64
9429 /* vpxor ymm, ymm, ymm */
9430 if (iVecReg < 8)
9431 {
9432 pCodeBuf[off++] = X86_OP_VEX2;
9433 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
9434 }
9435 else
9436 {
9437 pCodeBuf[off++] = X86_OP_VEX3;
9438 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
9439 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
9440 }
9441 pCodeBuf[off++] = 0xef;
9442 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
9443#elif defined(RT_ARCH_ARM64)
9444 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9445 Assert(!(iVecReg & 0x1));
9446 /* eor vecreg, vecreg, vecreg */
9447 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
9448 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
9449#else
9450# error "port me"
9451#endif
9452 return off;
9453}
9454
9455
9456/**
9457 * Emits a vecdst[0:255] = 0 store.
9458 */
9459DECL_INLINE_THROW(uint32_t)
9460iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
9461{
9462#ifdef RT_ARCH_AMD64
9463 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
9464#elif defined(RT_ARCH_ARM64)
9465 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
9466#else
9467# error "port me"
9468#endif
9469 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9470 return off;
9471}
9472
9473
9474/**
9475 * Emits a vecdst = gprsrc broadcast, 8-bit.
9476 */
9477DECL_FORCE_INLINE(uint32_t)
9478iemNativeEmitSimdBroadcastGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9479{
9480#ifdef RT_ARCH_AMD64
9481 /* pinsrb vecdst, gpr, #0 (ASSUMES SSE 4.1) */
9482 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9483 if (iVecRegDst >= 8 || iGprSrc >= 8)
9484 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9485 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9486 pCodeBuf[off++] = 0x0f;
9487 pCodeBuf[off++] = 0x3a;
9488 pCodeBuf[off++] = 0x20;
9489 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9490 pCodeBuf[off++] = 0x00;
9491
9492 /* vpbroadcastb {y,x}mm, xmm (ASSUMES AVX2). */
9493 pCodeBuf[off++] = X86_OP_VEX3;
9494 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9495 | 0x02 /* opcode map. */
9496 | ( iVecRegDst >= 8
9497 ? 0
9498 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9499 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9500 pCodeBuf[off++] = 0x78;
9501 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9502#elif defined(RT_ARCH_ARM64)
9503 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9504 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9505
9506 /* dup vecsrc, gpr */
9507 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U8);
9508 if (f256Bit)
9509 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U8);
9510#else
9511# error "port me"
9512#endif
9513 return off;
9514}
9515
9516
9517/**
9518 * Emits a vecdst[x] = gprsrc broadcast, 8-bit.
9519 */
9520DECL_INLINE_THROW(uint32_t)
9521iemNativeEmitSimdBroadcastGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9522{
9523#ifdef RT_ARCH_AMD64
9524 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
9525#elif defined(RT_ARCH_ARM64)
9526 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9527#else
9528# error "port me"
9529#endif
9530 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9531 return off;
9532}
9533
9534
9535/**
9536 * Emits a vecdst = gprsrc broadcast, 16-bit.
9537 */
9538DECL_FORCE_INLINE(uint32_t)
9539iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9540{
9541#ifdef RT_ARCH_AMD64
9542 /* pinsrw vecdst, gpr, #0 */
9543 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9544 if (iVecRegDst >= 8 || iGprSrc >= 8)
9545 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9546 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9547 pCodeBuf[off++] = 0x0f;
9548 pCodeBuf[off++] = 0xc4;
9549 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9550 pCodeBuf[off++] = 0x00;
9551
9552 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
9553 pCodeBuf[off++] = X86_OP_VEX3;
9554 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9555 | 0x02 /* opcode map. */
9556 | ( iVecRegDst >= 8
9557 ? 0
9558 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9559 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9560 pCodeBuf[off++] = 0x79;
9561 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9562#elif defined(RT_ARCH_ARM64)
9563 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9564 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9565
9566 /* dup vecsrc, gpr */
9567 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
9568 if (f256Bit)
9569 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
9570#else
9571# error "port me"
9572#endif
9573 return off;
9574}
9575
9576
9577/**
9578 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
9579 */
9580DECL_INLINE_THROW(uint32_t)
9581iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9582{
9583#ifdef RT_ARCH_AMD64
9584 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
9585#elif defined(RT_ARCH_ARM64)
9586 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9587#else
9588# error "port me"
9589#endif
9590 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9591 return off;
9592}
9593
9594
9595/**
9596 * Emits a vecdst = gprsrc broadcast, 32-bit.
9597 */
9598DECL_FORCE_INLINE(uint32_t)
9599iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9600{
9601#ifdef RT_ARCH_AMD64
9602 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
9603 * vbroadcast needs a memory operand or another xmm register to work... */
9604
9605 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
9606 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9607 if (iVecRegDst >= 8 || iGprSrc >= 8)
9608 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9609 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9610 pCodeBuf[off++] = 0x0f;
9611 pCodeBuf[off++] = 0x3a;
9612 pCodeBuf[off++] = 0x22;
9613 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9614 pCodeBuf[off++] = 0x00;
9615
9616 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
9617 pCodeBuf[off++] = X86_OP_VEX3;
9618 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9619 | 0x02 /* opcode map. */
9620 | ( iVecRegDst >= 8
9621 ? 0
9622 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9623 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9624 pCodeBuf[off++] = 0x58;
9625 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9626#elif defined(RT_ARCH_ARM64)
9627 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9628 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9629
9630 /* dup vecsrc, gpr */
9631 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
9632 if (f256Bit)
9633 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
9634#else
9635# error "port me"
9636#endif
9637 return off;
9638}
9639
9640
9641/**
9642 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
9643 */
9644DECL_INLINE_THROW(uint32_t)
9645iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9646{
9647#ifdef RT_ARCH_AMD64
9648 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
9649#elif defined(RT_ARCH_ARM64)
9650 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9651#else
9652# error "port me"
9653#endif
9654 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9655 return off;
9656}
9657
9658
9659/**
9660 * Emits a vecdst = gprsrc broadcast, 64-bit.
9661 */
9662DECL_FORCE_INLINE(uint32_t)
9663iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9664{
9665#ifdef RT_ARCH_AMD64
9666 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
9667 * vbroadcast needs a memory operand or another xmm register to work... */
9668
9669 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
9670 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9671 pCodeBuf[off++] = X86_OP_REX_W
9672 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9673 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9674 pCodeBuf[off++] = 0x0f;
9675 pCodeBuf[off++] = 0x3a;
9676 pCodeBuf[off++] = 0x22;
9677 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9678 pCodeBuf[off++] = 0x00;
9679
9680 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
9681 pCodeBuf[off++] = X86_OP_VEX3;
9682 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9683 | 0x02 /* opcode map. */
9684 | ( iVecRegDst >= 8
9685 ? 0
9686 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9687 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9688 pCodeBuf[off++] = 0x59;
9689 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9690#elif defined(RT_ARCH_ARM64)
9691 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9692 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9693
9694 /* dup vecsrc, gpr */
9695 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
9696 if (f256Bit)
9697 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
9698#else
9699# error "port me"
9700#endif
9701 return off;
9702}
9703
9704
9705/**
9706 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
9707 */
9708DECL_INLINE_THROW(uint32_t)
9709iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9710{
9711#ifdef RT_ARCH_AMD64
9712 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
9713#elif defined(RT_ARCH_ARM64)
9714 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9715#else
9716# error "port me"
9717#endif
9718 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9719 return off;
9720}
9721
9722
9723/**
9724 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
9725 */
9726DECL_FORCE_INLINE(uint32_t)
9727iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9728{
9729#ifdef RT_ARCH_AMD64
9730 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(pCodeBuf, off, iVecRegDst, iVecRegSrc);
9731
9732 /* vinserti128 ymm, ymm, xmm, 1. */ /* ASSUMES AVX2 support */
9733 pCodeBuf[off++] = X86_OP_VEX3;
9734 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
9735 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9736 pCodeBuf[off++] = 0x38;
9737 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9738 pCodeBuf[off++] = 0x01; /* Immediate */
9739#elif defined(RT_ARCH_ARM64)
9740 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9741 Assert(!(iVecRegDst & 0x1));
9742
9743 /* mov dst, src; alias for: orr dst, src, src */
9744 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
9745 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
9746#else
9747# error "port me"
9748#endif
9749 return off;
9750}
9751
9752
9753/**
9754 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
9755 */
9756DECL_INLINE_THROW(uint32_t)
9757iemNativeEmitSimdBroadcastVecRegU128ToVecReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9758{
9759#ifdef RT_ARCH_AMD64
9760 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 11), off, iVecRegDst, iVecRegSrc);
9761#elif defined(RT_ARCH_ARM64)
9762 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecRegDst, iVecRegSrc);
9763#else
9764# error "port me"
9765#endif
9766 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9767 return off;
9768}
9769
9770#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
9771
9772/** @} */
9773
9774#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
9775
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette