VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 105307

Last change on this file since 105307 was 105271, checked in by vboxsync, 7 months ago

VMM/IEM: Replaced IEMNATIVEEXITREASON with IEMNATIVELABELTYPE, since it's always been a super set of it. Some source code width adjustments. bugref:10677

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 349.6 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 105271 2024-07-11 10:30:56Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 if (uInfo == 0)
71 pu32CodeBuf[off++] = ARMV8_A64_INSTR_NOP;
72 else
73 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(ARMV8_A64_REG_XZR, (uint16_t)uInfo);
74
75 RT_NOREF(uInfo);
76#else
77# error "port me"
78#endif
79 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
80 return off;
81}
82
83
84/**
85 * Emit a breakpoint instruction.
86 */
87DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
88{
89#ifdef RT_ARCH_AMD64
90 pCodeBuf[off++] = 0xcc;
91 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
92
93#elif defined(RT_ARCH_ARM64)
94 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
95
96#else
97# error "error"
98#endif
99 return off;
100}
101
102
103/**
104 * Emit a breakpoint instruction.
105 */
106DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
107{
108#ifdef RT_ARCH_AMD64
109 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
110#elif defined(RT_ARCH_ARM64)
111 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
112#else
113# error "error"
114#endif
115 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
116 return off;
117}
118
119
120/*********************************************************************************************************************************
121* Loads, Stores and Related Stuff. *
122*********************************************************************************************************************************/
123
124#ifdef RT_ARCH_AMD64
125/**
126 * Common bit of iemNativeEmitLoadGprByGpr and friends.
127 */
128DECL_FORCE_INLINE(uint32_t)
129iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
130{
131 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
132 {
133 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
134 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
135 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
136 }
137 else if (offDisp == (int8_t)offDisp)
138 {
139 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
140 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
141 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
142 pbCodeBuf[off++] = (uint8_t)offDisp;
143 }
144 else
145 {
146 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
147 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
148 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
149 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
150 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
151 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
152 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
153 }
154 return off;
155}
156#endif /* RT_ARCH_AMD64 */
157
158/**
159 * Emits setting a GPR to zero.
160 */
161DECL_INLINE_THROW(uint32_t)
162iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
163{
164#ifdef RT_ARCH_AMD64
165 /* xor gpr32, gpr32 */
166 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
167 if (iGpr >= 8)
168 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
169 pbCodeBuf[off++] = 0x33;
170 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
171
172#elif defined(RT_ARCH_ARM64)
173 /* mov gpr, #0x0 */
174 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
175 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
176
177#else
178# error "port me"
179#endif
180 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
181 return off;
182}
183
184
185/**
186 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
187 * buffer space.
188 *
189 * Max buffer consumption:
190 * - AMD64: 6 instruction bytes.
191 * - ARM64: 2 instruction words (8 bytes).
192 *
193 * @note The top 32 bits will be cleared.
194 */
195DECL_FORCE_INLINE(uint32_t)
196iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
197{
198#ifdef RT_ARCH_AMD64
199 if (uImm32 == 0)
200 {
201 /* xor gpr, gpr */
202 if (iGpr >= 8)
203 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
204 pCodeBuf[off++] = 0x33;
205 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
206 }
207 else
208 {
209 /* mov gpr, imm32 */
210 if (iGpr >= 8)
211 pCodeBuf[off++] = X86_OP_REX_B;
212 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
213 pCodeBuf[off++] = RT_BYTE1(uImm32);
214 pCodeBuf[off++] = RT_BYTE2(uImm32);
215 pCodeBuf[off++] = RT_BYTE3(uImm32);
216 pCodeBuf[off++] = RT_BYTE4(uImm32);
217 }
218
219#elif defined(RT_ARCH_ARM64)
220 if ((uImm32 >> 16) == 0)
221 /* movz gpr, imm16 */
222 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
223 else if ((uImm32 & UINT32_C(0xffff)) == 0)
224 /* movz gpr, imm16, lsl #16 */
225 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
226 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
227 /* movn gpr, imm16, lsl #16 */
228 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
229 else if ((uImm32 >> 16) == UINT32_C(0xffff))
230 /* movn gpr, imm16 */
231 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
232 else
233 {
234 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
235 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
236 }
237
238#else
239# error "port me"
240#endif
241 return off;
242}
243
244
245/**
246 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
247 * buffer space.
248 *
249 * Max buffer consumption:
250 * - AMD64: 10 instruction bytes.
251 * - ARM64: 4 instruction words (16 bytes).
252 */
253DECL_FORCE_INLINE(uint32_t)
254iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
255{
256#ifdef RT_ARCH_AMD64
257 if (uImm64 == 0)
258 {
259 /* xor gpr, gpr */
260 if (iGpr >= 8)
261 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
262 pCodeBuf[off++] = 0x33;
263 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
264 }
265 else if (uImm64 <= UINT32_MAX)
266 {
267 /* mov gpr, imm32 */
268 if (iGpr >= 8)
269 pCodeBuf[off++] = X86_OP_REX_B;
270 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
271 pCodeBuf[off++] = RT_BYTE1(uImm64);
272 pCodeBuf[off++] = RT_BYTE2(uImm64);
273 pCodeBuf[off++] = RT_BYTE3(uImm64);
274 pCodeBuf[off++] = RT_BYTE4(uImm64);
275 }
276 else if (uImm64 == (uint64_t)(int32_t)uImm64)
277 {
278 /* mov gpr, sx(imm32) */
279 if (iGpr < 8)
280 pCodeBuf[off++] = X86_OP_REX_W;
281 else
282 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
283 pCodeBuf[off++] = 0xc7;
284 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
285 pCodeBuf[off++] = RT_BYTE1(uImm64);
286 pCodeBuf[off++] = RT_BYTE2(uImm64);
287 pCodeBuf[off++] = RT_BYTE3(uImm64);
288 pCodeBuf[off++] = RT_BYTE4(uImm64);
289 }
290 else
291 {
292 /* mov gpr, imm64 */
293 if (iGpr < 8)
294 pCodeBuf[off++] = X86_OP_REX_W;
295 else
296 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
297 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
298 pCodeBuf[off++] = RT_BYTE1(uImm64);
299 pCodeBuf[off++] = RT_BYTE2(uImm64);
300 pCodeBuf[off++] = RT_BYTE3(uImm64);
301 pCodeBuf[off++] = RT_BYTE4(uImm64);
302 pCodeBuf[off++] = RT_BYTE5(uImm64);
303 pCodeBuf[off++] = RT_BYTE6(uImm64);
304 pCodeBuf[off++] = RT_BYTE7(uImm64);
305 pCodeBuf[off++] = RT_BYTE8(uImm64);
306 }
307
308#elif defined(RT_ARCH_ARM64)
309 /*
310 * Quick simplification: Do 32-bit load if top half is zero.
311 */
312 if (uImm64 <= UINT32_MAX)
313 return iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGpr, (uint32_t)uImm64);
314
315 /*
316 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
317 * supply remaining bits using 'movk grp, imm16, lsl #x'.
318 *
319 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
320 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
321 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
322 * after the first non-zero immediate component so we switch to movk for
323 * the remainder.
324 */
325 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
326 + !((uImm64 >> 16) & UINT16_MAX)
327 + !((uImm64 >> 32) & UINT16_MAX)
328 + !((uImm64 >> 48) & UINT16_MAX);
329 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
330 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
331 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
332 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
333 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
334 if (cFfffHalfWords <= cZeroHalfWords)
335 {
336 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
337
338 /* movz gpr, imm16 */
339 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
340 if (uImmPart || cZeroHalfWords == 4)
341 {
342 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
343 fMovBase |= RT_BIT_32(29);
344 }
345 /* mov[z/k] gpr, imm16, lsl #16 */
346 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
347 if (uImmPart)
348 {
349 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
350 fMovBase |= RT_BIT_32(29);
351 }
352 /* mov[z/k] gpr, imm16, lsl #32 */
353 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
354 if (uImmPart)
355 {
356 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
357 fMovBase |= RT_BIT_32(29);
358 }
359 /* mov[z/k] gpr, imm16, lsl #48 */
360 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
361 if (uImmPart)
362 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
363 }
364 else
365 {
366 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
367
368 /* find the first half-word that isn't UINT16_MAX. */
369 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
370 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
371 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
372
373 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
374 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
375 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
376 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
377 /* movk gpr, imm16 */
378 if (iHwNotFfff != 0)
379 {
380 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
381 if (uImmPart != UINT32_C(0xffff))
382 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
383 }
384 /* movk gpr, imm16, lsl #16 */
385 if (iHwNotFfff != 1)
386 {
387 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
388 if (uImmPart != UINT32_C(0xffff))
389 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
390 }
391 /* movk gpr, imm16, lsl #32 */
392 if (iHwNotFfff != 2)
393 {
394 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
395 if (uImmPart != UINT32_C(0xffff))
396 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
397 }
398 /* movk gpr, imm16, lsl #48 */
399 if (iHwNotFfff != 3)
400 {
401 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
402 if (uImmPart != UINT32_C(0xffff))
403 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
404 }
405 }
406
407#else
408# error "port me"
409#endif
410 return off;
411}
412
413
414/**
415 * Emits loading a constant into a 64-bit GPR
416 */
417DECL_INLINE_THROW(uint32_t)
418iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
419{
420#ifdef RT_ARCH_AMD64
421 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
422#elif defined(RT_ARCH_ARM64)
423 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
424#else
425# error "port me"
426#endif
427 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
428 return off;
429}
430
431
432/**
433 * Emits loading a constant into a 32-bit GPR.
434 * @note The top 32 bits will be cleared.
435 */
436DECL_INLINE_THROW(uint32_t)
437iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
438{
439#ifdef RT_ARCH_AMD64
440 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
441#elif defined(RT_ARCH_ARM64)
442 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
443#else
444# error "port me"
445#endif
446 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
447 return off;
448}
449
450
451/**
452 * Emits loading a constant into a 8-bit GPR
453 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
454 * only the ARM64 version does that.
455 */
456DECL_INLINE_THROW(uint32_t)
457iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
458{
459#ifdef RT_ARCH_AMD64
460 /* mov gpr, imm8 */
461 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
462 if (iGpr >= 8)
463 pbCodeBuf[off++] = X86_OP_REX_B;
464 else if (iGpr >= 4)
465 pbCodeBuf[off++] = X86_OP_REX;
466 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
467 pbCodeBuf[off++] = RT_BYTE1(uImm8);
468
469#elif defined(RT_ARCH_ARM64)
470 /* movz gpr, imm16, lsl #0 */
471 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
472 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
473
474#else
475# error "port me"
476#endif
477 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
478 return off;
479}
480
481
482#ifdef RT_ARCH_AMD64
483/**
484 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
485 */
486DECL_FORCE_INLINE(uint32_t)
487iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
488{
489 if (offVCpu < 128)
490 {
491 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
492 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
493 }
494 else
495 {
496 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
497 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
498 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
499 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
500 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
501 }
502 return off;
503}
504
505#elif defined(RT_ARCH_ARM64)
506
507/**
508 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
509 *
510 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
511 * registers (@a iGprTmp).
512 * @note DON'T try this with prefetch.
513 */
514DECL_FORCE_INLINE_THROW(uint32_t)
515iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
516 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
517{
518 /*
519 * There are a couple of ldr variants that takes an immediate offset, so
520 * try use those if we can, otherwise we have to use the temporary register
521 * help with the addressing.
522 */
523 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
524 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
525 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
526 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
527 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
528 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
529 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
530 {
531 /* The offset is too large, so we must load it into a register and use
532 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
533 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
534 if (iGprTmp == UINT8_MAX)
535 iGprTmp = iGprReg;
536 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
537 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
538 }
539 else
540# ifdef IEM_WITH_THROW_CATCH
541 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
542# else
543 AssertReleaseFailedStmt(off = UINT32_MAX);
544# endif
545
546 return off;
547}
548
549/**
550 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
551 */
552DECL_FORCE_INLINE_THROW(uint32_t)
553iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
554 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
555{
556 /*
557 * There are a couple of ldr variants that takes an immediate offset, so
558 * try use those if we can, otherwise we have to use the temporary register
559 * help with the addressing.
560 */
561 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
562 {
563 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
564 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
565 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
566 }
567 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
568 {
569 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
570 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
571 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
572 }
573 else
574 {
575 /* The offset is too large, so we must load it into a register and use
576 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
577 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
578 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
579 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
580 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
581 IEMNATIVE_REG_FIXED_TMP0);
582 }
583 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
584 return off;
585}
586
587#endif /* RT_ARCH_ARM64 */
588
589
590/**
591 * Emits a 64-bit GPR load of a VCpu value.
592 */
593DECL_FORCE_INLINE_THROW(uint32_t)
594iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
595{
596#ifdef RT_ARCH_AMD64
597 /* mov reg64, mem64 */
598 if (iGpr < 8)
599 pCodeBuf[off++] = X86_OP_REX_W;
600 else
601 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
602 pCodeBuf[off++] = 0x8b;
603 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off,iGpr, offVCpu);
604
605#elif defined(RT_ARCH_ARM64)
606 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
607
608#else
609# error "port me"
610#endif
611 return off;
612}
613
614
615/**
616 * Emits a 64-bit GPR load of a VCpu value.
617 */
618DECL_INLINE_THROW(uint32_t)
619iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
620{
621#ifdef RT_ARCH_AMD64
622 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
623 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
624
625#elif defined(RT_ARCH_ARM64)
626 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
627
628#else
629# error "port me"
630#endif
631 return off;
632}
633
634/**
635 * Emits a 32-bit GPR load of a VCpu value.
636 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
637 */
638DECL_INLINE_THROW(uint32_t)
639iemNativeEmitLoadGprFromVCpuU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
640{
641#ifdef RT_ARCH_AMD64
642 /* mov reg32, mem32 */
643 if (iGpr >= 8)
644 pCodeBuf[off++] = X86_OP_REX_R;
645 pCodeBuf[off++] = 0x8b;
646 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
647
648#elif defined(RT_ARCH_ARM64)
649 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
650
651#else
652# error "port me"
653#endif
654 return off;
655}
656
657
658/**
659 * Emits a 32-bit GPR load of a VCpu value.
660 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
661 */
662DECL_INLINE_THROW(uint32_t)
663iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
664{
665#ifdef RT_ARCH_AMD64
666 off = iemNativeEmitLoadGprFromVCpuU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
667 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
668
669#elif defined(RT_ARCH_ARM64)
670 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
671
672#else
673# error "port me"
674#endif
675 return off;
676}
677
678
679/**
680 * Emits a 16-bit GPR load of a VCpu value.
681 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
682 */
683DECL_INLINE_THROW(uint32_t)
684iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
685{
686#ifdef RT_ARCH_AMD64
687 /* movzx reg32, mem16 */
688 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
689 if (iGpr >= 8)
690 pbCodeBuf[off++] = X86_OP_REX_R;
691 pbCodeBuf[off++] = 0x0f;
692 pbCodeBuf[off++] = 0xb7;
693 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
694 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
695
696#elif defined(RT_ARCH_ARM64)
697 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
698
699#else
700# error "port me"
701#endif
702 return off;
703}
704
705
706/**
707 * Emits a 8-bit GPR load of a VCpu value.
708 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
709 */
710DECL_INLINE_THROW(uint32_t)
711iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
712{
713#ifdef RT_ARCH_AMD64
714 /* movzx reg32, mem8 */
715 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
716 if (iGpr >= 8)
717 pbCodeBuf[off++] = X86_OP_REX_R;
718 pbCodeBuf[off++] = 0x0f;
719 pbCodeBuf[off++] = 0xb6;
720 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
721 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
722
723#elif defined(RT_ARCH_ARM64)
724 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
725
726#else
727# error "port me"
728#endif
729 return off;
730}
731
732
733/**
734 * Emits a store of a GPR value to a 64-bit VCpu field.
735 */
736DECL_FORCE_INLINE_THROW(uint32_t)
737iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
738 uint8_t iGprTmp = UINT8_MAX)
739{
740#ifdef RT_ARCH_AMD64
741 /* mov mem64, reg64 */
742 if (iGpr < 8)
743 pCodeBuf[off++] = X86_OP_REX_W;
744 else
745 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
746 pCodeBuf[off++] = 0x89;
747 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
748 RT_NOREF(iGprTmp);
749
750#elif defined(RT_ARCH_ARM64)
751 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
752
753#else
754# error "port me"
755#endif
756 return off;
757}
758
759
760/**
761 * Emits a store of a GPR value to a 64-bit VCpu field.
762 */
763DECL_INLINE_THROW(uint32_t)
764iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
765{
766#ifdef RT_ARCH_AMD64
767 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
768#elif defined(RT_ARCH_ARM64)
769 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
770 IEMNATIVE_REG_FIXED_TMP0);
771#else
772# error "port me"
773#endif
774 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
775 return off;
776}
777
778
779/**
780 * Emits a store of a GPR value to a 32-bit VCpu field.
781 */
782DECL_INLINE_THROW(uint32_t)
783iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
784{
785#ifdef RT_ARCH_AMD64
786 /* mov mem32, reg32 */
787 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
788 if (iGpr >= 8)
789 pbCodeBuf[off++] = X86_OP_REX_R;
790 pbCodeBuf[off++] = 0x89;
791 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
792 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
793
794#elif defined(RT_ARCH_ARM64)
795 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
796
797#else
798# error "port me"
799#endif
800 return off;
801}
802
803
804/**
805 * Emits a store of a GPR value to a 16-bit VCpu field.
806 */
807DECL_INLINE_THROW(uint32_t)
808iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
809{
810#ifdef RT_ARCH_AMD64
811 /* mov mem16, reg16 */
812 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
813 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
814 if (iGpr >= 8)
815 pbCodeBuf[off++] = X86_OP_REX_R;
816 pbCodeBuf[off++] = 0x89;
817 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
818 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
819
820#elif defined(RT_ARCH_ARM64)
821 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
822
823#else
824# error "port me"
825#endif
826 return off;
827}
828
829
830/**
831 * Emits a store of a GPR value to a 8-bit VCpu field.
832 */
833DECL_INLINE_THROW(uint32_t)
834iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
835{
836#ifdef RT_ARCH_AMD64
837 /* mov mem8, reg8 */
838 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
839 if (iGpr >= 8)
840 pbCodeBuf[off++] = X86_OP_REX_R;
841 pbCodeBuf[off++] = 0x88;
842 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
843 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
844
845#elif defined(RT_ARCH_ARM64)
846 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
847
848#else
849# error "port me"
850#endif
851 return off;
852}
853
854
855/**
856 * Emits a store of an immediate value to a 64-bit VCpu field.
857 *
858 * @note Will allocate temporary registers on both ARM64 and AMD64.
859 */
860DECL_FORCE_INLINE_THROW(uint32_t)
861iemNativeEmitStoreImmToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uImm, uint32_t offVCpu)
862{
863#ifdef RT_ARCH_AMD64
864 /* mov mem32, imm32 */
865 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
866 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxRegImm, offVCpu);
867 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
868 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
869
870#elif defined(RT_ARCH_ARM64)
871 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
872 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t));
873 if (idxRegImm != ARMV8_A64_REG_XZR)
874 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
875
876#else
877# error "port me"
878#endif
879 return off;
880}
881
882
883/**
884 * Emits a store of an immediate value to a 32-bit VCpu field.
885 *
886 * @note ARM64: Will allocate temporary registers.
887 */
888DECL_FORCE_INLINE_THROW(uint32_t)
889iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
890{
891#ifdef RT_ARCH_AMD64
892 /* mov mem32, imm32 */
893 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
894 pCodeBuf[off++] = 0xc7;
895 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
896 pCodeBuf[off++] = RT_BYTE1(uImm);
897 pCodeBuf[off++] = RT_BYTE2(uImm);
898 pCodeBuf[off++] = RT_BYTE3(uImm);
899 pCodeBuf[off++] = RT_BYTE4(uImm);
900 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
901
902#elif defined(RT_ARCH_ARM64)
903 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
904 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
905 if (idxRegImm != ARMV8_A64_REG_XZR)
906 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
907
908#else
909# error "port me"
910#endif
911 return off;
912}
913
914
915
916/**
917 * Emits a store of an immediate value to a 16-bit VCpu field.
918 *
919 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
920 * offset can be encoded as an immediate or not. The @a offVCpu immediate
921 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
922 */
923DECL_FORCE_INLINE_THROW(uint32_t)
924iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
925 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
926{
927#ifdef RT_ARCH_AMD64
928 /* mov mem16, imm16 */
929 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
930 pCodeBuf[off++] = 0xc7;
931 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
932 pCodeBuf[off++] = RT_BYTE1(uImm);
933 pCodeBuf[off++] = RT_BYTE2(uImm);
934 RT_NOREF(idxTmp1, idxTmp2);
935
936#elif defined(RT_ARCH_ARM64)
937 if (idxTmp1 != UINT8_MAX)
938 {
939 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
940 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
941 sizeof(uint16_t), idxTmp2);
942 }
943 else
944# ifdef IEM_WITH_THROW_CATCH
945 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
946# else
947 AssertReleaseFailedStmt(off = UINT32_MAX);
948# endif
949
950#else
951# error "port me"
952#endif
953 return off;
954}
955
956
957/**
958 * Emits a store of an immediate value to a 8-bit VCpu field.
959 */
960DECL_INLINE_THROW(uint32_t)
961iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu)
962{
963#ifdef RT_ARCH_AMD64
964 /* mov mem8, imm8 */
965 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
966 pbCodeBuf[off++] = 0xc6;
967 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
968 pbCodeBuf[off++] = bImm;
969 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
970
971#elif defined(RT_ARCH_ARM64)
972 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
973 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
974 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
975 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
976
977#else
978# error "port me"
979#endif
980 return off;
981}
982
983
984/**
985 * Emits a load effective address to a GRP of a VCpu field.
986 */
987DECL_INLINE_THROW(uint32_t)
988iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
989{
990#ifdef RT_ARCH_AMD64
991 /* lea gprdst, [rbx + offDisp] */
992 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
993 if (iGprDst < 8)
994 pbCodeBuf[off++] = X86_OP_REX_W;
995 else
996 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
997 pbCodeBuf[off++] = 0x8d;
998 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
999
1000#elif defined(RT_ARCH_ARM64)
1001 if (offVCpu < (unsigned)_4K)
1002 {
1003 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1004 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
1005 }
1006 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
1007 {
1008 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1009 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
1010 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
1011 }
1012 else if (offVCpu <= 0xffffffU)
1013 {
1014 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1015 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu >> 12,
1016 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1017 if (offVCpu & 0xfffU)
1018 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, offVCpu & 0xfff);
1019 }
1020 else
1021 {
1022 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
1023 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
1024 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1025 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
1026 }
1027
1028#else
1029# error "port me"
1030#endif
1031 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1032 return off;
1033}
1034
1035
1036/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1037DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
1038{
1039 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
1040 Assert(off < sizeof(VMCPU));
1041 return off;
1042}
1043
1044
1045/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1046DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
1047{
1048 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
1049 Assert(off < sizeof(VMCPU));
1050 return off;
1051}
1052
1053
1054/**
1055 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1056 *
1057 * @note The two temp registers are not required for AMD64. ARM64 always
1058 * requires the first, and the 2nd is needed if the offset cannot be
1059 * encoded as an immediate.
1060 */
1061DECL_FORCE_INLINE(uint32_t)
1062iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1063{
1064#ifdef RT_ARCH_AMD64
1065 /* inc qword [pVCpu + off] */
1066 pCodeBuf[off++] = X86_OP_REX_W;
1067 pCodeBuf[off++] = 0xff;
1068 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1069 RT_NOREF(idxTmp1, idxTmp2);
1070
1071#elif defined(RT_ARCH_ARM64)
1072 /* Determine how we're to access pVCpu first. */
1073 uint32_t const cbData = sizeof(STAMCOUNTER);
1074 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1075 {
1076 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1077 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1078 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1079 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1080 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1081 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1082 }
1083 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1084 {
1085 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1086 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1087 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1088 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1089 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1090 }
1091 else
1092 {
1093 /* The offset is too large, so we must load it into a register and use
1094 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1095 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1096 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1097 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1098 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1099 }
1100
1101#else
1102# error "port me"
1103#endif
1104 return off;
1105}
1106
1107
1108/**
1109 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1110 *
1111 * @note The two temp registers are not required for AMD64. ARM64 always
1112 * requires the first, and the 2nd is needed if the offset cannot be
1113 * encoded as an immediate.
1114 */
1115DECL_FORCE_INLINE(uint32_t)
1116iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1117{
1118#ifdef RT_ARCH_AMD64
1119 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1120#elif defined(RT_ARCH_ARM64)
1121 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1122#else
1123# error "port me"
1124#endif
1125 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1126 return off;
1127}
1128
1129
1130/**
1131 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1132 *
1133 * @note The two temp registers are not required for AMD64. ARM64 always
1134 * requires the first, and the 2nd is needed if the offset cannot be
1135 * encoded as an immediate.
1136 */
1137DECL_FORCE_INLINE(uint32_t)
1138iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1139{
1140 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1141#ifdef RT_ARCH_AMD64
1142 /* inc dword [pVCpu + offVCpu] */
1143 pCodeBuf[off++] = 0xff;
1144 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1145 RT_NOREF(idxTmp1, idxTmp2);
1146
1147#elif defined(RT_ARCH_ARM64)
1148 /* Determine how we're to access pVCpu first. */
1149 uint32_t const cbData = sizeof(uint32_t);
1150 if (offVCpu < (unsigned)(_4K * cbData))
1151 {
1152 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1153 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1,
1154 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1155 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1156 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1,
1157 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1158 }
1159 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1160 {
1161 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1162 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1163 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1164 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1165 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1166 }
1167 else
1168 {
1169 /* The offset is too large, so we must load it into a register and use
1170 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1171 of the instruction if that'll reduce the constant to 16-bits. */
1172 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1173 {
1174 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1175 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1176 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1177 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1178 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1179 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1180 }
1181 else
1182 {
1183 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1184 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1185 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1186 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1187 }
1188 }
1189
1190#else
1191# error "port me"
1192#endif
1193 return off;
1194}
1195
1196
1197/**
1198 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1199 *
1200 * @note The two temp registers are not required for AMD64. ARM64 always
1201 * requires the first, and the 2nd is needed if the offset cannot be
1202 * encoded as an immediate.
1203 */
1204DECL_FORCE_INLINE(uint32_t)
1205iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1206{
1207#ifdef RT_ARCH_AMD64
1208 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1209#elif defined(RT_ARCH_ARM64)
1210 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1211#else
1212# error "port me"
1213#endif
1214 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1215 return off;
1216}
1217
1218
1219/**
1220 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1221 *
1222 * @note May allocate temporary registers (not AMD64).
1223 */
1224DECL_FORCE_INLINE(uint32_t)
1225iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1226{
1227 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1228#ifdef RT_ARCH_AMD64
1229 /* or dword [pVCpu + offVCpu], imm8/32 */
1230 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1231 if (fMask < 0x80)
1232 {
1233 pCodeBuf[off++] = 0x83;
1234 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1235 pCodeBuf[off++] = (uint8_t)fMask;
1236 }
1237 else
1238 {
1239 pCodeBuf[off++] = 0x81;
1240 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1241 pCodeBuf[off++] = RT_BYTE1(fMask);
1242 pCodeBuf[off++] = RT_BYTE2(fMask);
1243 pCodeBuf[off++] = RT_BYTE3(fMask);
1244 pCodeBuf[off++] = RT_BYTE4(fMask);
1245 }
1246
1247#elif defined(RT_ARCH_ARM64)
1248 /* If the constant is unwieldy we'll need a register to hold it as well. */
1249 uint32_t uImmSizeLen, uImmRotate;
1250 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1251 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1252
1253 /* We need a temp register for holding the member value we're modifying. */
1254 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1255
1256 /* Determine how we're to access pVCpu first. */
1257 uint32_t const cbData = sizeof(uint32_t);
1258 if (offVCpu < (unsigned)(_4K * cbData))
1259 {
1260 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1261 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1262 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1263 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1264 if (idxTmpMask == UINT8_MAX)
1265 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1266 else
1267 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1268 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1269 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1270 }
1271 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1272 {
1273 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1274 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1275 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1276 if (idxTmpMask == UINT8_MAX)
1277 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1278 else
1279 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1280 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1281 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1282 }
1283 else
1284 {
1285 /* The offset is too large, so we must load it into a register and use
1286 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1287 of the instruction if that'll reduce the constant to 16-bits. */
1288 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1289 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1290 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1291 if (fShifted)
1292 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1293 else
1294 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1295
1296 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1297 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1298
1299 if (idxTmpMask == UINT8_MAX)
1300 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1301 else
1302 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1303
1304 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1305 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1306 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1307 }
1308 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1309 if (idxTmpMask != UINT8_MAX)
1310 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1311
1312#else
1313# error "port me"
1314#endif
1315 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1316 return off;
1317}
1318
1319
1320/**
1321 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1322 *
1323 * @note May allocate temporary registers (not AMD64).
1324 */
1325DECL_FORCE_INLINE(uint32_t)
1326iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1327{
1328 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1329#ifdef RT_ARCH_AMD64
1330 /* and dword [pVCpu + offVCpu], imm8/32 */
1331 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1332 if (fMask < 0x80)
1333 {
1334 pCodeBuf[off++] = 0x83;
1335 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1336 pCodeBuf[off++] = (uint8_t)fMask;
1337 }
1338 else
1339 {
1340 pCodeBuf[off++] = 0x81;
1341 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1342 pCodeBuf[off++] = RT_BYTE1(fMask);
1343 pCodeBuf[off++] = RT_BYTE2(fMask);
1344 pCodeBuf[off++] = RT_BYTE3(fMask);
1345 pCodeBuf[off++] = RT_BYTE4(fMask);
1346 }
1347
1348#elif defined(RT_ARCH_ARM64)
1349 /* If the constant is unwieldy we'll need a register to hold it as well. */
1350 uint32_t uImmSizeLen, uImmRotate;
1351 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1352 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1353
1354 /* We need a temp register for holding the member value we're modifying. */
1355 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1356
1357 /* Determine how we're to access pVCpu first. */
1358 uint32_t const cbData = sizeof(uint32_t);
1359 if (offVCpu < (unsigned)(_4K * cbData))
1360 {
1361 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1362 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1363 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1364 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1365 if (idxTmpMask == UINT8_MAX)
1366 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1367 else
1368 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1369 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1370 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1371 }
1372 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1373 {
1374 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1375 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1376 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1377 if (idxTmpMask == UINT8_MAX)
1378 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1379 else
1380 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1381 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1382 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1383 }
1384 else
1385 {
1386 /* The offset is too large, so we must load it into a register and use
1387 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1388 of the instruction if that'll reduce the constant to 16-bits. */
1389 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1390 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1391 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1392 if (fShifted)
1393 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1394 else
1395 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1396
1397 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1398 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1399
1400 if (idxTmpMask == UINT8_MAX)
1401 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1402 else
1403 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1404
1405 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1406 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1407 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1408 }
1409 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1410 if (idxTmpMask != UINT8_MAX)
1411 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1412
1413#else
1414# error "port me"
1415#endif
1416 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1417 return off;
1418}
1419
1420
1421/**
1422 * Emits a gprdst = gprsrc load.
1423 */
1424DECL_FORCE_INLINE(uint32_t)
1425iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1426{
1427#ifdef RT_ARCH_AMD64
1428 /* mov gprdst, gprsrc */
1429 if ((iGprDst | iGprSrc) >= 8)
1430 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1431 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1432 : X86_OP_REX_W | X86_OP_REX_R;
1433 else
1434 pCodeBuf[off++] = X86_OP_REX_W;
1435 pCodeBuf[off++] = 0x8b;
1436 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1437
1438#elif defined(RT_ARCH_ARM64)
1439 /* mov dst, src; alias for: orr dst, xzr, src */
1440 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1441
1442#else
1443# error "port me"
1444#endif
1445 return off;
1446}
1447
1448
1449/**
1450 * Emits a gprdst = gprsrc load.
1451 */
1452DECL_INLINE_THROW(uint32_t)
1453iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1454{
1455#ifdef RT_ARCH_AMD64
1456 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1457#elif defined(RT_ARCH_ARM64)
1458 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1459#else
1460# error "port me"
1461#endif
1462 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1463 return off;
1464}
1465
1466
1467/**
1468 * Emits a gprdst = gprsrc[31:0] load.
1469 * @note Bits 63 thru 32 are cleared.
1470 */
1471DECL_FORCE_INLINE(uint32_t)
1472iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1473{
1474#ifdef RT_ARCH_AMD64
1475 /* mov gprdst, gprsrc */
1476 if ((iGprDst | iGprSrc) >= 8)
1477 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1478 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1479 : X86_OP_REX_R;
1480 pCodeBuf[off++] = 0x8b;
1481 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1482
1483#elif defined(RT_ARCH_ARM64)
1484 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1485 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1486
1487#else
1488# error "port me"
1489#endif
1490 return off;
1491}
1492
1493
1494/**
1495 * Emits a gprdst = gprsrc[31:0] load.
1496 * @note Bits 63 thru 32 are cleared.
1497 */
1498DECL_INLINE_THROW(uint32_t)
1499iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1500{
1501#ifdef RT_ARCH_AMD64
1502 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1503#elif defined(RT_ARCH_ARM64)
1504 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1505#else
1506# error "port me"
1507#endif
1508 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1509 return off;
1510}
1511
1512
1513/**
1514 * Emits a gprdst = gprsrc[15:0] load.
1515 * @note Bits 63 thru 15 are cleared.
1516 */
1517DECL_INLINE_THROW(uint32_t)
1518iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1519{
1520#ifdef RT_ARCH_AMD64
1521 /* movzx Gv,Ew */
1522 if ((iGprDst | iGprSrc) >= 8)
1523 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1524 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1525 : X86_OP_REX_R;
1526 pCodeBuf[off++] = 0x0f;
1527 pCodeBuf[off++] = 0xb7;
1528 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1529
1530#elif defined(RT_ARCH_ARM64)
1531 /* and gprdst, gprsrc, #0xffff */
1532# if 1
1533 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1534 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1535# else
1536 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1537 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1538# endif
1539
1540#else
1541# error "port me"
1542#endif
1543 return off;
1544}
1545
1546
1547/**
1548 * Emits a gprdst = gprsrc[15:0] load.
1549 * @note Bits 63 thru 15 are cleared.
1550 */
1551DECL_INLINE_THROW(uint32_t)
1552iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1553{
1554#ifdef RT_ARCH_AMD64
1555 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1556#elif defined(RT_ARCH_ARM64)
1557 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1558#else
1559# error "port me"
1560#endif
1561 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1562 return off;
1563}
1564
1565
1566/**
1567 * Emits a gprdst = gprsrc[7:0] load.
1568 * @note Bits 63 thru 8 are cleared.
1569 */
1570DECL_FORCE_INLINE(uint32_t)
1571iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1572{
1573#ifdef RT_ARCH_AMD64
1574 /* movzx Gv,Eb */
1575 if (iGprDst >= 8 || iGprSrc >= 8)
1576 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1577 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1578 : X86_OP_REX_R;
1579 else if (iGprSrc >= 4)
1580 pCodeBuf[off++] = X86_OP_REX;
1581 pCodeBuf[off++] = 0x0f;
1582 pCodeBuf[off++] = 0xb6;
1583 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1584
1585#elif defined(RT_ARCH_ARM64)
1586 /* and gprdst, gprsrc, #0xff */
1587 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1588 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1589
1590#else
1591# error "port me"
1592#endif
1593 return off;
1594}
1595
1596
1597/**
1598 * Emits a gprdst = gprsrc[7:0] load.
1599 * @note Bits 63 thru 8 are cleared.
1600 */
1601DECL_INLINE_THROW(uint32_t)
1602iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1603{
1604#ifdef RT_ARCH_AMD64
1605 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1606#elif defined(RT_ARCH_ARM64)
1607 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1608#else
1609# error "port me"
1610#endif
1611 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1612 return off;
1613}
1614
1615
1616/**
1617 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1618 * @note Bits 63 thru 8 are cleared.
1619 */
1620DECL_INLINE_THROW(uint32_t)
1621iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1622{
1623#ifdef RT_ARCH_AMD64
1624 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1625
1626 /* movzx Gv,Ew */
1627 if ((iGprDst | iGprSrc) >= 8)
1628 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1629 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1630 : X86_OP_REX_R;
1631 pbCodeBuf[off++] = 0x0f;
1632 pbCodeBuf[off++] = 0xb7;
1633 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1634
1635 /* shr Ev,8 */
1636 if (iGprDst >= 8)
1637 pbCodeBuf[off++] = X86_OP_REX_B;
1638 pbCodeBuf[off++] = 0xc1;
1639 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1640 pbCodeBuf[off++] = 8;
1641
1642#elif defined(RT_ARCH_ARM64)
1643 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1644 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1645 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1646
1647#else
1648# error "port me"
1649#endif
1650 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1651 return off;
1652}
1653
1654
1655/**
1656 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1657 */
1658DECL_INLINE_THROW(uint32_t)
1659iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1660{
1661#ifdef RT_ARCH_AMD64
1662 /* movsxd r64, r/m32 */
1663 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1664 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1665 pbCodeBuf[off++] = 0x63;
1666 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1667
1668#elif defined(RT_ARCH_ARM64)
1669 /* sxtw dst, src */
1670 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1671 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1672
1673#else
1674# error "port me"
1675#endif
1676 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1677 return off;
1678}
1679
1680
1681/**
1682 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1683 */
1684DECL_INLINE_THROW(uint32_t)
1685iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1686{
1687#ifdef RT_ARCH_AMD64
1688 /* movsx r64, r/m16 */
1689 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1690 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1691 pbCodeBuf[off++] = 0x0f;
1692 pbCodeBuf[off++] = 0xbf;
1693 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1694
1695#elif defined(RT_ARCH_ARM64)
1696 /* sxth dst, src */
1697 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1698 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1699
1700#else
1701# error "port me"
1702#endif
1703 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1704 return off;
1705}
1706
1707
1708/**
1709 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1710 */
1711DECL_INLINE_THROW(uint32_t)
1712iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1713{
1714#ifdef RT_ARCH_AMD64
1715 /* movsx r64, r/m16 */
1716 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1717 if (iGprDst >= 8 || iGprSrc >= 8)
1718 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1719 pbCodeBuf[off++] = 0x0f;
1720 pbCodeBuf[off++] = 0xbf;
1721 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1722
1723#elif defined(RT_ARCH_ARM64)
1724 /* sxth dst32, src */
1725 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1726 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1727
1728#else
1729# error "port me"
1730#endif
1731 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1732 return off;
1733}
1734
1735
1736/**
1737 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1738 */
1739DECL_INLINE_THROW(uint32_t)
1740iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1741{
1742#ifdef RT_ARCH_AMD64
1743 /* movsx r64, r/m8 */
1744 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1745 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1746 pbCodeBuf[off++] = 0x0f;
1747 pbCodeBuf[off++] = 0xbe;
1748 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1749
1750#elif defined(RT_ARCH_ARM64)
1751 /* sxtb dst, src */
1752 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1753 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1754
1755#else
1756# error "port me"
1757#endif
1758 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1759 return off;
1760}
1761
1762
1763/**
1764 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1765 * @note Bits 63 thru 32 are cleared.
1766 */
1767DECL_INLINE_THROW(uint32_t)
1768iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1769{
1770#ifdef RT_ARCH_AMD64
1771 /* movsx r32, r/m8 */
1772 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1773 if (iGprDst >= 8 || iGprSrc >= 8)
1774 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1775 else if (iGprSrc >= 4)
1776 pbCodeBuf[off++] = X86_OP_REX;
1777 pbCodeBuf[off++] = 0x0f;
1778 pbCodeBuf[off++] = 0xbe;
1779 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1780
1781#elif defined(RT_ARCH_ARM64)
1782 /* sxtb dst32, src32 */
1783 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1784 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1785
1786#else
1787# error "port me"
1788#endif
1789 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1790 return off;
1791}
1792
1793
1794/**
1795 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1796 * @note Bits 63 thru 16 are cleared.
1797 */
1798DECL_INLINE_THROW(uint32_t)
1799iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1800{
1801#ifdef RT_ARCH_AMD64
1802 /* movsx r16, r/m8 */
1803 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1804 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1805 if (iGprDst >= 8 || iGprSrc >= 8)
1806 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1807 else if (iGprSrc >= 4)
1808 pbCodeBuf[off++] = X86_OP_REX;
1809 pbCodeBuf[off++] = 0x0f;
1810 pbCodeBuf[off++] = 0xbe;
1811 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1812
1813 /* movzx r32, r/m16 */
1814 if (iGprDst >= 8)
1815 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
1816 pbCodeBuf[off++] = 0x0f;
1817 pbCodeBuf[off++] = 0xb7;
1818 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
1819
1820#elif defined(RT_ARCH_ARM64)
1821 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
1822 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1823 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1824 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1825 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
1826
1827#else
1828# error "port me"
1829#endif
1830 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1831 return off;
1832}
1833
1834
1835/**
1836 * Emits a gprdst = gprsrc + addend load.
1837 * @note The addend is 32-bit for AMD64 and 64-bit for ARM64.
1838 */
1839#ifdef RT_ARCH_AMD64
1840DECL_INLINE_THROW(uint32_t)
1841iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1842 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1843{
1844 Assert(iAddend != 0);
1845
1846 /* lea gprdst, [gprsrc + iAddend] */
1847 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1848 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1849 pbCodeBuf[off++] = 0x8d;
1850 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1851 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1852 return off;
1853}
1854
1855#elif defined(RT_ARCH_ARM64)
1856DECL_INLINE_THROW(uint32_t)
1857iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1858 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1859{
1860 if ((uint32_t)iAddend < 4096)
1861 {
1862 /* add dst, src, uimm12 */
1863 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1864 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
1865 }
1866 else if ((uint32_t)-iAddend < 4096)
1867 {
1868 /* sub dst, src, uimm12 */
1869 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1870 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
1871 }
1872 else
1873 {
1874 Assert(iGprSrc != iGprDst);
1875 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
1876 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1877 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
1878 }
1879 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1880 return off;
1881}
1882#else
1883# error "port me"
1884#endif
1885
1886/**
1887 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
1888 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1889 */
1890#ifdef RT_ARCH_AMD64
1891DECL_INLINE_THROW(uint32_t)
1892iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1893 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1894#else
1895DECL_INLINE_THROW(uint32_t)
1896iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1897 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1898#endif
1899{
1900 if (iAddend != 0)
1901 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1902 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
1903}
1904
1905
1906/**
1907 * Emits a gprdst = gprsrc32 + addend load.
1908 * @note Bits 63 thru 32 are cleared.
1909 */
1910DECL_INLINE_THROW(uint32_t)
1911iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1912 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1913{
1914 Assert(iAddend != 0);
1915
1916#ifdef RT_ARCH_AMD64
1917 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
1918 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1919 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
1920 if ((iGprDst | iGprSrc) >= 8)
1921 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1922 pbCodeBuf[off++] = 0x8d;
1923 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1924
1925#elif defined(RT_ARCH_ARM64)
1926 if ((uint32_t)iAddend < 4096)
1927 {
1928 /* add dst, src, uimm12 */
1929 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1930 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
1931 }
1932 else if ((uint32_t)-iAddend < 4096)
1933 {
1934 /* sub dst, src, uimm12 */
1935 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1936 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
1937 }
1938 else
1939 {
1940 Assert(iGprSrc != iGprDst);
1941 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
1942 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1943 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
1944 }
1945
1946#else
1947# error "port me"
1948#endif
1949 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1950 return off;
1951}
1952
1953
1954/**
1955 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
1956 */
1957DECL_INLINE_THROW(uint32_t)
1958iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1959 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1960{
1961 if (iAddend != 0)
1962 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1963 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
1964}
1965
1966
1967/**
1968 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1969 * destination.
1970 */
1971DECL_FORCE_INLINE(uint32_t)
1972iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1973{
1974#ifdef RT_ARCH_AMD64
1975 /* mov reg16, r/m16 */
1976 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1977 if (idxDst >= 8 || idxSrc >= 8)
1978 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
1979 pCodeBuf[off++] = 0x8b;
1980 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
1981
1982#elif defined(RT_ARCH_ARM64)
1983 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
1984 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
1985
1986#else
1987# error "Port me!"
1988#endif
1989 return off;
1990}
1991
1992
1993/**
1994 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1995 * destination.
1996 */
1997DECL_INLINE_THROW(uint32_t)
1998iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1999{
2000#ifdef RT_ARCH_AMD64
2001 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
2002#elif defined(RT_ARCH_ARM64)
2003 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
2004#else
2005# error "Port me!"
2006#endif
2007 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2008 return off;
2009}
2010
2011
2012#ifdef RT_ARCH_AMD64
2013/**
2014 * Common bit of iemNativeEmitLoadGprByBp and friends.
2015 */
2016DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
2017 PIEMRECOMPILERSTATE pReNativeAssert)
2018{
2019 if (offDisp < 128 && offDisp >= -128)
2020 {
2021 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
2022 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
2023 }
2024 else
2025 {
2026 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
2027 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2028 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2029 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2030 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2031 }
2032 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
2033 return off;
2034}
2035#elif defined(RT_ARCH_ARM64)
2036/**
2037 * Common bit of iemNativeEmitLoadGprByBp and friends.
2038 */
2039DECL_FORCE_INLINE_THROW(uint32_t)
2040iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2041 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2042{
2043 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
2044 {
2045 /* str w/ unsigned imm12 (scaled) */
2046 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2047 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
2048 }
2049 else if (offDisp >= -256 && offDisp <= 256)
2050 {
2051 /* stur w/ signed imm9 (unscaled) */
2052 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2053 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
2054 }
2055 else
2056 {
2057 /* Use temporary indexing register. */
2058 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2059 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2060 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2061 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2062 }
2063 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2064 return off;
2065}
2066#endif
2067
2068
2069/**
2070 * Emits a 64-bit GRP load instruction with an BP relative source address.
2071 */
2072DECL_INLINE_THROW(uint32_t)
2073iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2074{
2075#ifdef RT_ARCH_AMD64
2076 /* mov gprdst, qword [rbp + offDisp] */
2077 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2078 if (iGprDst < 8)
2079 pbCodeBuf[off++] = X86_OP_REX_W;
2080 else
2081 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2082 pbCodeBuf[off++] = 0x8b;
2083 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2084
2085#elif defined(RT_ARCH_ARM64)
2086 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2087
2088#else
2089# error "port me"
2090#endif
2091}
2092
2093
2094/**
2095 * Emits a 32-bit GRP load instruction with an BP relative source address.
2096 * @note Bits 63 thru 32 of the GPR will be cleared.
2097 */
2098DECL_INLINE_THROW(uint32_t)
2099iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2100{
2101#ifdef RT_ARCH_AMD64
2102 /* mov gprdst, dword [rbp + offDisp] */
2103 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2104 if (iGprDst >= 8)
2105 pbCodeBuf[off++] = X86_OP_REX_R;
2106 pbCodeBuf[off++] = 0x8b;
2107 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2108
2109#elif defined(RT_ARCH_ARM64)
2110 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2111
2112#else
2113# error "port me"
2114#endif
2115}
2116
2117
2118/**
2119 * Emits a 16-bit GRP load instruction with an BP relative source address.
2120 * @note Bits 63 thru 16 of the GPR will be cleared.
2121 */
2122DECL_INLINE_THROW(uint32_t)
2123iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2124{
2125#ifdef RT_ARCH_AMD64
2126 /* movzx gprdst, word [rbp + offDisp] */
2127 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2128 if (iGprDst >= 8)
2129 pbCodeBuf[off++] = X86_OP_REX_R;
2130 pbCodeBuf[off++] = 0x0f;
2131 pbCodeBuf[off++] = 0xb7;
2132 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2133
2134#elif defined(RT_ARCH_ARM64)
2135 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2136
2137#else
2138# error "port me"
2139#endif
2140}
2141
2142
2143/**
2144 * Emits a 8-bit GRP load instruction with an BP relative source address.
2145 * @note Bits 63 thru 8 of the GPR will be cleared.
2146 */
2147DECL_INLINE_THROW(uint32_t)
2148iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2149{
2150#ifdef RT_ARCH_AMD64
2151 /* movzx gprdst, byte [rbp + offDisp] */
2152 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2153 if (iGprDst >= 8)
2154 pbCodeBuf[off++] = X86_OP_REX_R;
2155 pbCodeBuf[off++] = 0x0f;
2156 pbCodeBuf[off++] = 0xb6;
2157 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2158
2159#elif defined(RT_ARCH_ARM64)
2160 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2161
2162#else
2163# error "port me"
2164#endif
2165}
2166
2167
2168#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2169/**
2170 * Emits a 128-bit vector register load instruction with an BP relative source address.
2171 */
2172DECL_FORCE_INLINE_THROW(uint32_t)
2173iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2174{
2175#ifdef RT_ARCH_AMD64
2176 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2177
2178 /* movdqu reg128, mem128 */
2179 pbCodeBuf[off++] = 0xf3;
2180 if (iVecRegDst >= 8)
2181 pbCodeBuf[off++] = X86_OP_REX_R;
2182 pbCodeBuf[off++] = 0x0f;
2183 pbCodeBuf[off++] = 0x6f;
2184 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2185#elif defined(RT_ARCH_ARM64)
2186 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2187#else
2188# error "port me"
2189#endif
2190}
2191
2192
2193/**
2194 * Emits a 256-bit vector register load instruction with an BP relative source address.
2195 */
2196DECL_FORCE_INLINE_THROW(uint32_t)
2197iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2198{
2199#ifdef RT_ARCH_AMD64
2200 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2201
2202 /* vmovdqu reg256, mem256 */
2203 pbCodeBuf[off++] = X86_OP_VEX2;
2204 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2205 pbCodeBuf[off++] = 0x6f;
2206 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2207#elif defined(RT_ARCH_ARM64)
2208 /* ASSUMES two consecutive vector registers for the 256-bit value. */
2209 Assert(!(iVecRegDst & 0x1));
2210 off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2211 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2212#else
2213# error "port me"
2214#endif
2215}
2216
2217#endif
2218
2219
2220/**
2221 * Emits a load effective address to a GRP with an BP relative source address.
2222 */
2223DECL_INLINE_THROW(uint32_t)
2224iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2225{
2226#ifdef RT_ARCH_AMD64
2227 /* lea gprdst, [rbp + offDisp] */
2228 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2229 if (iGprDst < 8)
2230 pbCodeBuf[off++] = X86_OP_REX_W;
2231 else
2232 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2233 pbCodeBuf[off++] = 0x8d;
2234 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2235
2236#elif defined(RT_ARCH_ARM64)
2237 bool const fSub = offDisp < 0;
2238 uint32_t const offAbsDisp = (uint32_t)RT_ABS(offDisp);
2239 if (offAbsDisp <= 0xffffffU)
2240 {
2241 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2242 if (offAbsDisp <= 0xfffU)
2243 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, ARMV8_A64_REG_BP, offAbsDisp);
2244 else
2245 {
2246 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, ARMV8_A64_REG_BP, offAbsDisp >> 12,
2247 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2248 if (offAbsDisp & 0xfffU)
2249 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, offAbsDisp & 0xfff);
2250 }
2251 }
2252 else
2253 {
2254 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2255 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offAbsDisp);
2256 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2257 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2258 }
2259
2260#else
2261# error "port me"
2262#endif
2263
2264 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2265 return off;
2266}
2267
2268
2269/**
2270 * Emits a 64-bit GPR store with an BP relative destination address.
2271 *
2272 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2273 */
2274DECL_INLINE_THROW(uint32_t)
2275iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2276{
2277#ifdef RT_ARCH_AMD64
2278 /* mov qword [rbp + offDisp], gprdst */
2279 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2280 if (iGprSrc < 8)
2281 pbCodeBuf[off++] = X86_OP_REX_W;
2282 else
2283 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2284 pbCodeBuf[off++] = 0x89;
2285 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2286
2287#elif defined(RT_ARCH_ARM64)
2288 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2289 {
2290 /* str w/ unsigned imm12 (scaled) */
2291 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2292 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2293 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2294 }
2295 else if (offDisp >= -256 && offDisp <= 256)
2296 {
2297 /* stur w/ signed imm9 (unscaled) */
2298 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2299 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2300 }
2301 else if ((uint32_t)-offDisp < (unsigned)_4K)
2302 {
2303 /* Use temporary indexing register w/ sub uimm12. */
2304 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2305 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2306 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2307 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2308 }
2309 else
2310 {
2311 /* Use temporary indexing register. */
2312 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2313 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2314 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2315 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2316 }
2317 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2318 return off;
2319
2320#else
2321# error "Port me!"
2322#endif
2323}
2324
2325
2326/**
2327 * Emits a 64-bit immediate store with an BP relative destination address.
2328 *
2329 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2330 */
2331DECL_INLINE_THROW(uint32_t)
2332iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2333{
2334#ifdef RT_ARCH_AMD64
2335 if ((int64_t)uImm64 == (int32_t)uImm64)
2336 {
2337 /* mov qword [rbp + offDisp], imm32 - sign extended */
2338 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2339 pbCodeBuf[off++] = X86_OP_REX_W;
2340 pbCodeBuf[off++] = 0xc7;
2341 if (offDisp < 128 && offDisp >= -128)
2342 {
2343 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2344 pbCodeBuf[off++] = (uint8_t)offDisp;
2345 }
2346 else
2347 {
2348 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2349 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2350 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2351 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2352 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2353 }
2354 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2355 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2356 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2357 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2358 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2359 return off;
2360 }
2361#endif
2362
2363 /* Load tmp0, imm64; Store tmp to bp+disp. */
2364 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2365 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2366}
2367
2368#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2369
2370/**
2371 * Emits a 128-bit vector register store with an BP relative destination address.
2372 *
2373 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2374 */
2375DECL_INLINE_THROW(uint32_t)
2376iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2377{
2378#ifdef RT_ARCH_AMD64
2379 /* movdqu [rbp + offDisp], vecsrc */
2380 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2381 pbCodeBuf[off++] = 0xf3;
2382 if (iVecRegSrc >= 8)
2383 pbCodeBuf[off++] = X86_OP_REX_R;
2384 pbCodeBuf[off++] = 0x0f;
2385 pbCodeBuf[off++] = 0x7f;
2386 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2387
2388#elif defined(RT_ARCH_ARM64)
2389 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2390 {
2391 /* str w/ unsigned imm12 (scaled) */
2392 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2393 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc,
2394 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2395 }
2396 else if (offDisp >= -256 && offDisp <= 256)
2397 {
2398 /* stur w/ signed imm9 (unscaled) */
2399 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2400 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp);
2401 }
2402 else if ((uint32_t)-offDisp < (unsigned)_4K)
2403 {
2404 /* Use temporary indexing register w/ sub uimm12. */
2405 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2406 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2407 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2408 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2409 }
2410 else
2411 {
2412 /* Use temporary indexing register. */
2413 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2414 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2415 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP,
2416 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2417 }
2418 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2419 return off;
2420
2421#else
2422# error "Port me!"
2423#endif
2424}
2425
2426
2427/**
2428 * Emits a 256-bit vector register store with an BP relative destination address.
2429 *
2430 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2431 */
2432DECL_INLINE_THROW(uint32_t)
2433iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2434{
2435#ifdef RT_ARCH_AMD64
2436 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2437
2438 /* vmovdqu mem256, reg256 */
2439 pbCodeBuf[off++] = X86_OP_VEX2;
2440 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2441 pbCodeBuf[off++] = 0x7f;
2442 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2443#elif defined(RT_ARCH_ARM64)
2444 Assert(!(iVecRegSrc & 0x1));
2445 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp, iVecRegSrc);
2446 return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1);
2447#else
2448# error "Port me!"
2449#endif
2450}
2451
2452#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
2453#if defined(RT_ARCH_ARM64)
2454
2455/**
2456 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2457 *
2458 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2459 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2460 * caller does not heed this.
2461 *
2462 * @note DON'T try this with prefetch.
2463 */
2464DECL_FORCE_INLINE_THROW(uint32_t)
2465iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2466 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2467{
2468 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2469 {
2470 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2471 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2472 }
2473 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2474 && iGprReg != iGprBase)
2475 || iGprTmp != UINT8_MAX)
2476 {
2477 /* The offset is too large, so we must load it into a register and use
2478 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2479 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2480 if (iGprTmp == UINT8_MAX)
2481 iGprTmp = iGprReg;
2482 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2483 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2484 }
2485 else
2486# ifdef IEM_WITH_THROW_CATCH
2487 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2488# else
2489 AssertReleaseFailedStmt(off = UINT32_MAX);
2490# endif
2491 return off;
2492}
2493
2494/**
2495 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2496 */
2497DECL_FORCE_INLINE_THROW(uint32_t)
2498iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2499 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2500{
2501 /*
2502 * There are a couple of ldr variants that takes an immediate offset, so
2503 * try use those if we can, otherwise we have to use the temporary register
2504 * help with the addressing.
2505 */
2506 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2507 {
2508 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2509 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2510 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2511 }
2512 else
2513 {
2514 /* The offset is too large, so we must load it into a register and use
2515 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2516 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2517 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2518
2519 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2520 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2521
2522 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2523 }
2524 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2525 return off;
2526}
2527
2528# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2529/**
2530 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2531 *
2532 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2533 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2534 * caller does not heed this.
2535 *
2536 * @note DON'T try this with prefetch.
2537 */
2538DECL_FORCE_INLINE_THROW(uint32_t)
2539iemNativeEmitVecRegByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iGprBase, int32_t offDisp,
2540 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2541{
2542 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2543 {
2544 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2545 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2546 }
2547 else if ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2548 || iGprTmp != UINT8_MAX)
2549 {
2550 /* The offset is too large, so we must load it into a register and use
2551 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2552 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2553 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2554 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, iGprTmp);
2555 }
2556 else
2557# ifdef IEM_WITH_THROW_CATCH
2558 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2559# else
2560 AssertReleaseFailedStmt(off = UINT32_MAX);
2561# endif
2562 return off;
2563}
2564# endif
2565
2566
2567/**
2568 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2569 */
2570DECL_FORCE_INLINE_THROW(uint32_t)
2571iemNativeEmitVecRegByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg,
2572 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2573{
2574 /*
2575 * There are a couple of ldr variants that takes an immediate offset, so
2576 * try use those if we can, otherwise we have to use the temporary register
2577 * help with the addressing.
2578 */
2579 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2580 {
2581 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2582 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2583 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2584 }
2585 else
2586 {
2587 /* The offset is too large, so we must load it into a register and use
2588 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2589 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2590 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2591
2592 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2593 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, idxTmpReg);
2594
2595 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2596 }
2597 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2598 return off;
2599}
2600#endif /* RT_ARCH_ARM64 */
2601
2602/**
2603 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2604 *
2605 * @note ARM64: Misaligned @a offDisp values and values not in the
2606 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2607 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2608 * does not heed this.
2609 */
2610DECL_FORCE_INLINE_THROW(uint32_t)
2611iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2612 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2613{
2614#ifdef RT_ARCH_AMD64
2615 /* mov reg64, mem64 */
2616 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2617 pCodeBuf[off++] = 0x8b;
2618 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2619 RT_NOREF(iGprTmp);
2620
2621#elif defined(RT_ARCH_ARM64)
2622 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2623 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2624
2625#else
2626# error "port me"
2627#endif
2628 return off;
2629}
2630
2631
2632/**
2633 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2634 */
2635DECL_INLINE_THROW(uint32_t)
2636iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2637{
2638#ifdef RT_ARCH_AMD64
2639 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2640 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2641
2642#elif defined(RT_ARCH_ARM64)
2643 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2644
2645#else
2646# error "port me"
2647#endif
2648 return off;
2649}
2650
2651
2652/**
2653 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2654 *
2655 * @note ARM64: Misaligned @a offDisp values and values not in the
2656 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2657 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2658 * caller does not heed this.
2659 *
2660 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2661 */
2662DECL_FORCE_INLINE_THROW(uint32_t)
2663iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2664 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2665{
2666#ifdef RT_ARCH_AMD64
2667 /* mov reg32, mem32 */
2668 if (iGprDst >= 8 || iGprBase >= 8)
2669 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2670 pCodeBuf[off++] = 0x8b;
2671 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2672 RT_NOREF(iGprTmp);
2673
2674#elif defined(RT_ARCH_ARM64)
2675 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2676 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2677
2678#else
2679# error "port me"
2680#endif
2681 return off;
2682}
2683
2684
2685/**
2686 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2687 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2688 */
2689DECL_INLINE_THROW(uint32_t)
2690iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2691{
2692#ifdef RT_ARCH_AMD64
2693 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2694 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2695
2696#elif defined(RT_ARCH_ARM64)
2697 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2698
2699#else
2700# error "port me"
2701#endif
2702 return off;
2703}
2704
2705
2706/**
2707 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2708 * sign-extending the value to 64 bits.
2709 *
2710 * @note ARM64: Misaligned @a offDisp values and values not in the
2711 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2712 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2713 * caller does not heed this.
2714 */
2715DECL_FORCE_INLINE_THROW(uint32_t)
2716iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2717 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2718{
2719#ifdef RT_ARCH_AMD64
2720 /* movsxd reg64, mem32 */
2721 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2722 pCodeBuf[off++] = 0x63;
2723 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2724 RT_NOREF(iGprTmp);
2725
2726#elif defined(RT_ARCH_ARM64)
2727 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2728 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2729
2730#else
2731# error "port me"
2732#endif
2733 return off;
2734}
2735
2736
2737/**
2738 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2739 *
2740 * @note ARM64: Misaligned @a offDisp values and values not in the
2741 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2742 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2743 * caller does not heed this.
2744 *
2745 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2746 */
2747DECL_FORCE_INLINE_THROW(uint32_t)
2748iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2749 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2750{
2751#ifdef RT_ARCH_AMD64
2752 /* movzx reg32, mem16 */
2753 if (iGprDst >= 8 || iGprBase >= 8)
2754 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2755 pCodeBuf[off++] = 0x0f;
2756 pCodeBuf[off++] = 0xb7;
2757 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2758 RT_NOREF(iGprTmp);
2759
2760#elif defined(RT_ARCH_ARM64)
2761 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2762 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2763
2764#else
2765# error "port me"
2766#endif
2767 return off;
2768}
2769
2770
2771/**
2772 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2773 * sign-extending the value to 64 bits.
2774 *
2775 * @note ARM64: Misaligned @a offDisp values and values not in the
2776 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2777 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2778 * caller does not heed this.
2779 */
2780DECL_FORCE_INLINE_THROW(uint32_t)
2781iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2782 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2783{
2784#ifdef RT_ARCH_AMD64
2785 /* movsx reg64, mem16 */
2786 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2787 pCodeBuf[off++] = 0x0f;
2788 pCodeBuf[off++] = 0xbf;
2789 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2790 RT_NOREF(iGprTmp);
2791
2792#elif defined(RT_ARCH_ARM64)
2793 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2794 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2795
2796#else
2797# error "port me"
2798#endif
2799 return off;
2800}
2801
2802
2803/**
2804 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2805 * sign-extending the value to 32 bits.
2806 *
2807 * @note ARM64: Misaligned @a offDisp values and values not in the
2808 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2809 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2810 * caller does not heed this.
2811 *
2812 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2813 */
2814DECL_FORCE_INLINE_THROW(uint32_t)
2815iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2816 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2817{
2818#ifdef RT_ARCH_AMD64
2819 /* movsx reg32, mem16 */
2820 if (iGprDst >= 8 || iGprBase >= 8)
2821 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2822 pCodeBuf[off++] = 0x0f;
2823 pCodeBuf[off++] = 0xbf;
2824 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2825 RT_NOREF(iGprTmp);
2826
2827#elif defined(RT_ARCH_ARM64)
2828 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2829 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
2830
2831#else
2832# error "port me"
2833#endif
2834 return off;
2835}
2836
2837
2838/**
2839 * Emits a 8-bit GPR load via a GPR base address with a displacement.
2840 *
2841 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2842 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2843 * same. Will assert / throw if caller does not heed this.
2844 *
2845 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
2846 */
2847DECL_FORCE_INLINE_THROW(uint32_t)
2848iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2849 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2850{
2851#ifdef RT_ARCH_AMD64
2852 /* movzx reg32, mem8 */
2853 if (iGprDst >= 8 || iGprBase >= 8)
2854 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2855 pCodeBuf[off++] = 0x0f;
2856 pCodeBuf[off++] = 0xb6;
2857 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2858 RT_NOREF(iGprTmp);
2859
2860#elif defined(RT_ARCH_ARM64)
2861 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2862 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
2863
2864#else
2865# error "port me"
2866#endif
2867 return off;
2868}
2869
2870
2871/**
2872 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2873 * sign-extending the value to 64 bits.
2874 *
2875 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2876 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2877 * same. Will assert / throw if caller does not heed this.
2878 */
2879DECL_FORCE_INLINE_THROW(uint32_t)
2880iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2881 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2882{
2883#ifdef RT_ARCH_AMD64
2884 /* movsx reg64, mem8 */
2885 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2886 pCodeBuf[off++] = 0x0f;
2887 pCodeBuf[off++] = 0xbe;
2888 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2889 RT_NOREF(iGprTmp);
2890
2891#elif defined(RT_ARCH_ARM64)
2892 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2893 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
2894
2895#else
2896# error "port me"
2897#endif
2898 return off;
2899}
2900
2901
2902/**
2903 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2904 * sign-extending the value to 32 bits.
2905 *
2906 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2907 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2908 * same. Will assert / throw if caller does not heed this.
2909 *
2910 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2911 */
2912DECL_FORCE_INLINE_THROW(uint32_t)
2913iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2914 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2915{
2916#ifdef RT_ARCH_AMD64
2917 /* movsx reg32, mem8 */
2918 if (iGprDst >= 8 || iGprBase >= 8)
2919 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2920 pCodeBuf[off++] = 0x0f;
2921 pCodeBuf[off++] = 0xbe;
2922 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2923 RT_NOREF(iGprTmp);
2924
2925#elif defined(RT_ARCH_ARM64)
2926 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2927 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2928
2929#else
2930# error "port me"
2931#endif
2932 return off;
2933}
2934
2935
2936/**
2937 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2938 * sign-extending the value to 16 bits.
2939 *
2940 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2941 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2942 * same. Will assert / throw if caller does not heed this.
2943 *
2944 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2945 */
2946DECL_FORCE_INLINE_THROW(uint32_t)
2947iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2948 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2949{
2950#ifdef RT_ARCH_AMD64
2951 /* movsx reg32, mem8 */
2952 if (iGprDst >= 8 || iGprBase >= 8)
2953 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2954 pCodeBuf[off++] = 0x0f;
2955 pCodeBuf[off++] = 0xbe;
2956 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2957# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
2958 /* and reg32, 0xffffh */
2959 if (iGprDst >= 8)
2960 pCodeBuf[off++] = X86_OP_REX_B;
2961 pCodeBuf[off++] = 0x81;
2962 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
2963 pCodeBuf[off++] = 0xff;
2964 pCodeBuf[off++] = 0xff;
2965 pCodeBuf[off++] = 0;
2966 pCodeBuf[off++] = 0;
2967# else
2968 /* movzx reg32, reg16 */
2969 if (iGprDst >= 8)
2970 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
2971 pCodeBuf[off++] = 0x0f;
2972 pCodeBuf[off++] = 0xb7;
2973 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2974# endif
2975 RT_NOREF(iGprTmp);
2976
2977#elif defined(RT_ARCH_ARM64)
2978 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2979 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2980 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2981 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
2982
2983#else
2984# error "port me"
2985#endif
2986 return off;
2987}
2988
2989
2990#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2991/**
2992 * Emits a 128-bit vector register load via a GPR base address with a displacement.
2993 *
2994 * @note ARM64: Misaligned @a offDisp values and values not in the
2995 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2996 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2997 * does not heed this.
2998 */
2999DECL_FORCE_INLINE_THROW(uint32_t)
3000iemNativeEmitLoadVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3001 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3002{
3003#ifdef RT_ARCH_AMD64
3004 /* movdqu reg128, mem128 */
3005 pCodeBuf[off++] = 0xf3;
3006 if (iVecRegDst >= 8 || iGprBase >= 8)
3007 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3008 pCodeBuf[off++] = 0x0f;
3009 pCodeBuf[off++] = 0x6f;
3010 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3011 RT_NOREF(iGprTmp);
3012
3013#elif defined(RT_ARCH_ARM64)
3014 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3015 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3016
3017#else
3018# error "port me"
3019#endif
3020 return off;
3021}
3022
3023
3024/**
3025 * Emits a 128-bit GPR load via a GPR base address with a displacement.
3026 */
3027DECL_INLINE_THROW(uint32_t)
3028iemNativeEmitLoadVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3029{
3030#ifdef RT_ARCH_AMD64
3031 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3032 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3033
3034#elif defined(RT_ARCH_ARM64)
3035 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3036
3037#else
3038# error "port me"
3039#endif
3040 return off;
3041}
3042
3043
3044/**
3045 * Emits a 256-bit vector register load via a GPR base address with a displacement.
3046 *
3047 * @note ARM64: Misaligned @a offDisp values and values not in the
3048 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3049 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3050 * does not heed this.
3051 */
3052DECL_FORCE_INLINE_THROW(uint32_t)
3053iemNativeEmitLoadVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3054 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3055{
3056#ifdef RT_ARCH_AMD64
3057 /* vmovdqu reg256, mem256 */
3058 pCodeBuf[off++] = X86_OP_VEX3;
3059 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3060 | X86_OP_VEX3_BYTE1_X
3061 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3062 | UINT8_C(0x01);
3063 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3064 pCodeBuf[off++] = 0x6f;
3065 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3066 RT_NOREF(iGprTmp);
3067
3068#elif defined(RT_ARCH_ARM64)
3069 Assert(!(iVecRegDst & 0x1));
3070 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3071 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3072 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3073 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3074#else
3075# error "port me"
3076#endif
3077 return off;
3078}
3079
3080
3081/**
3082 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3083 */
3084DECL_INLINE_THROW(uint32_t)
3085iemNativeEmitLoadVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3086{
3087#ifdef RT_ARCH_AMD64
3088 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3089 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3090
3091#elif defined(RT_ARCH_ARM64)
3092 Assert(!(iVecRegDst & 0x1));
3093 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3094 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3095 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3096 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3097
3098#else
3099# error "port me"
3100#endif
3101 return off;
3102}
3103#endif
3104
3105
3106/**
3107 * Emits a 64-bit GPR store via a GPR base address with a displacement.
3108 *
3109 * @note ARM64: Misaligned @a offDisp values and values not in the
3110 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3111 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3112 * does not heed this.
3113 */
3114DECL_FORCE_INLINE_THROW(uint32_t)
3115iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3116 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3117{
3118#ifdef RT_ARCH_AMD64
3119 /* mov mem64, reg64 */
3120 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3121 pCodeBuf[off++] = 0x89;
3122 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3123 RT_NOREF(iGprTmp);
3124
3125#elif defined(RT_ARCH_ARM64)
3126 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3127 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
3128
3129#else
3130# error "port me"
3131#endif
3132 return off;
3133}
3134
3135
3136/**
3137 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3138 *
3139 * @note ARM64: Misaligned @a offDisp values and values not in the
3140 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3141 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3142 * does not heed this.
3143 */
3144DECL_FORCE_INLINE_THROW(uint32_t)
3145iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3146 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3147{
3148#ifdef RT_ARCH_AMD64
3149 /* mov mem32, reg32 */
3150 if (iGprSrc >= 8 || iGprBase >= 8)
3151 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3152 pCodeBuf[off++] = 0x89;
3153 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3154 RT_NOREF(iGprTmp);
3155
3156#elif defined(RT_ARCH_ARM64)
3157 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3158 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3159
3160#else
3161# error "port me"
3162#endif
3163 return off;
3164}
3165
3166
3167/**
3168 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3169 *
3170 * @note ARM64: Misaligned @a offDisp values and values not in the
3171 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3172 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3173 * does not heed this.
3174 */
3175DECL_FORCE_INLINE_THROW(uint32_t)
3176iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3177 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3178{
3179#ifdef RT_ARCH_AMD64
3180 /* mov mem16, reg16 */
3181 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3182 if (iGprSrc >= 8 || iGprBase >= 8)
3183 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3184 pCodeBuf[off++] = 0x89;
3185 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3186 RT_NOREF(iGprTmp);
3187
3188#elif defined(RT_ARCH_ARM64)
3189 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3190 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3191
3192#else
3193# error "port me"
3194#endif
3195 return off;
3196}
3197
3198
3199/**
3200 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3201 *
3202 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3203 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3204 * same. Will assert / throw if caller does not heed this.
3205 */
3206DECL_FORCE_INLINE_THROW(uint32_t)
3207iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3208 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3209{
3210#ifdef RT_ARCH_AMD64
3211 /* mov mem8, reg8 */
3212 if (iGprSrc >= 8 || iGprBase >= 8)
3213 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3214 else if (iGprSrc >= 4)
3215 pCodeBuf[off++] = X86_OP_REX;
3216 pCodeBuf[off++] = 0x88;
3217 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3218 RT_NOREF(iGprTmp);
3219
3220#elif defined(RT_ARCH_ARM64)
3221 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3222 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3223
3224#else
3225# error "port me"
3226#endif
3227 return off;
3228}
3229
3230
3231/**
3232 * Emits a 64-bit immediate store via a GPR base address with a displacement.
3233 *
3234 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
3235 * AMD64 it depends on the immediate value.
3236 *
3237 * @note ARM64: Misaligned @a offDisp values and values not in the
3238 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3239 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3240 * does not heed this.
3241 */
3242DECL_FORCE_INLINE_THROW(uint32_t)
3243iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
3244 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3245{
3246#ifdef RT_ARCH_AMD64
3247 if ((int32_t)uImm == (int64_t)uImm)
3248 {
3249 /* mov mem64, imm32 (sign-extended) */
3250 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3251 pCodeBuf[off++] = 0xc7;
3252 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3253 pCodeBuf[off++] = RT_BYTE1(uImm);
3254 pCodeBuf[off++] = RT_BYTE2(uImm);
3255 pCodeBuf[off++] = RT_BYTE3(uImm);
3256 pCodeBuf[off++] = RT_BYTE4(uImm);
3257 }
3258 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
3259 {
3260 /* require temporary register. */
3261 if (iGprImmTmp == UINT8_MAX)
3262 iGprImmTmp = iGprTmp;
3263 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3264 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
3265 }
3266 else
3267# ifdef IEM_WITH_THROW_CATCH
3268 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3269# else
3270 AssertReleaseFailedStmt(off = UINT32_MAX);
3271# endif
3272
3273#elif defined(RT_ARCH_ARM64)
3274 if (uImm == 0)
3275 iGprImmTmp = ARMV8_A64_REG_XZR;
3276 else
3277 {
3278 Assert(iGprImmTmp < 31);
3279 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3280 }
3281 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
3282
3283#else
3284# error "port me"
3285#endif
3286 return off;
3287}
3288
3289
3290/**
3291 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3292 *
3293 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3294 *
3295 * @note ARM64: Misaligned @a offDisp values and values not in the
3296 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3297 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3298 * does not heed this.
3299 */
3300DECL_FORCE_INLINE_THROW(uint32_t)
3301iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
3302 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3303{
3304#ifdef RT_ARCH_AMD64
3305 /* mov mem32, imm32 */
3306 if (iGprBase >= 8)
3307 pCodeBuf[off++] = X86_OP_REX_B;
3308 pCodeBuf[off++] = 0xc7;
3309 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3310 pCodeBuf[off++] = RT_BYTE1(uImm);
3311 pCodeBuf[off++] = RT_BYTE2(uImm);
3312 pCodeBuf[off++] = RT_BYTE3(uImm);
3313 pCodeBuf[off++] = RT_BYTE4(uImm);
3314 RT_NOREF(iGprImmTmp, iGprTmp);
3315
3316#elif defined(RT_ARCH_ARM64)
3317 Assert(iGprImmTmp < 31);
3318 if (uImm == 0)
3319 iGprImmTmp = ARMV8_A64_REG_XZR;
3320 else
3321 {
3322 Assert(iGprImmTmp < 31);
3323 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3324 }
3325 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3326 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3327
3328#else
3329# error "port me"
3330#endif
3331 return off;
3332}
3333
3334
3335/**
3336 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3337 *
3338 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3339 *
3340 * @note ARM64: Misaligned @a offDisp values and values not in the
3341 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3342 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3343 * does not heed this.
3344 */
3345DECL_FORCE_INLINE_THROW(uint32_t)
3346iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
3347 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3348{
3349#ifdef RT_ARCH_AMD64
3350 /* mov mem16, imm16 */
3351 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3352 if (iGprBase >= 8)
3353 pCodeBuf[off++] = X86_OP_REX_B;
3354 pCodeBuf[off++] = 0xc7;
3355 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3356 pCodeBuf[off++] = RT_BYTE1(uImm);
3357 pCodeBuf[off++] = RT_BYTE2(uImm);
3358 RT_NOREF(iGprImmTmp, iGprTmp);
3359
3360#elif defined(RT_ARCH_ARM64)
3361 if (uImm == 0)
3362 iGprImmTmp = ARMV8_A64_REG_XZR;
3363 else
3364 {
3365 Assert(iGprImmTmp < 31);
3366 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3367 }
3368 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3369 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3370
3371#else
3372# error "port me"
3373#endif
3374 return off;
3375}
3376
3377
3378/**
3379 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3380 *
3381 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3382 *
3383 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3384 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3385 * same. Will assert / throw if caller does not heed this.
3386 */
3387DECL_FORCE_INLINE_THROW(uint32_t)
3388iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3389 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3390{
3391#ifdef RT_ARCH_AMD64
3392 /* mov mem8, imm8 */
3393 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3394 if (iGprBase >= 8)
3395 pCodeBuf[off++] = X86_OP_REX_B;
3396 pCodeBuf[off++] = 0xc6;
3397 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3398 pCodeBuf[off++] = uImm;
3399 RT_NOREF(iGprImmTmp, iGprTmp);
3400
3401#elif defined(RT_ARCH_ARM64)
3402 if (uImm == 0)
3403 iGprImmTmp = ARMV8_A64_REG_XZR;
3404 else
3405 {
3406 Assert(iGprImmTmp < 31);
3407 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3408 }
3409 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3410 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3411
3412#else
3413# error "port me"
3414#endif
3415 return off;
3416}
3417
3418
3419#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3420/**
3421 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3422 *
3423 * @note ARM64: Misaligned @a offDisp values and values not in the
3424 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3425 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3426 * does not heed this.
3427 */
3428DECL_FORCE_INLINE_THROW(uint32_t)
3429iemNativeEmitStoreVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3430 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3431{
3432#ifdef RT_ARCH_AMD64
3433 /* movdqu mem128, reg128 */
3434 pCodeBuf[off++] = 0xf3;
3435 if (iVecRegDst >= 8 || iGprBase >= 8)
3436 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3437 pCodeBuf[off++] = 0x0f;
3438 pCodeBuf[off++] = 0x7f;
3439 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3440 RT_NOREF(iGprTmp);
3441
3442#elif defined(RT_ARCH_ARM64)
3443 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3444 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3445
3446#else
3447# error "port me"
3448#endif
3449 return off;
3450}
3451
3452
3453/**
3454 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3455 */
3456DECL_INLINE_THROW(uint32_t)
3457iemNativeEmitStoreVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3458{
3459#ifdef RT_ARCH_AMD64
3460 off = iemNativeEmitStoreVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3461 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3462
3463#elif defined(RT_ARCH_ARM64)
3464 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3465
3466#else
3467# error "port me"
3468#endif
3469 return off;
3470}
3471
3472
3473/**
3474 * Emits a 256-bit vector register store via a GPR base address with a displacement.
3475 *
3476 * @note ARM64: Misaligned @a offDisp values and values not in the
3477 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3478 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3479 * does not heed this.
3480 */
3481DECL_FORCE_INLINE_THROW(uint32_t)
3482iemNativeEmitStoreVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3483 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3484{
3485#ifdef RT_ARCH_AMD64
3486 /* vmovdqu mem256, reg256 */
3487 pCodeBuf[off++] = X86_OP_VEX3;
3488 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3489 | X86_OP_VEX3_BYTE1_X
3490 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3491 | UINT8_C(0x01);
3492 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3493 pCodeBuf[off++] = 0x7f;
3494 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3495 RT_NOREF(iGprTmp);
3496
3497#elif defined(RT_ARCH_ARM64)
3498 Assert(!(iVecRegDst & 0x1));
3499 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3500 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3501 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3502 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3503#else
3504# error "port me"
3505#endif
3506 return off;
3507}
3508
3509
3510/**
3511 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3512 */
3513DECL_INLINE_THROW(uint32_t)
3514iemNativeEmitStoreVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3515{
3516#ifdef RT_ARCH_AMD64
3517 off = iemNativeEmitStoreVecRegByGprU256Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3518 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3519
3520#elif defined(RT_ARCH_ARM64)
3521 Assert(!(iVecRegDst & 0x1));
3522 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3523 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3524 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3525 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3526
3527#else
3528# error "port me"
3529#endif
3530 return off;
3531}
3532#endif
3533
3534
3535
3536/*********************************************************************************************************************************
3537* Subtraction and Additions *
3538*********************************************************************************************************************************/
3539
3540/**
3541 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3542 * @note The AMD64 version sets flags.
3543 */
3544DECL_INLINE_THROW(uint32_t)
3545iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3546{
3547#if defined(RT_ARCH_AMD64)
3548 /* sub Gv,Ev */
3549 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3550 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3551 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3552 pbCodeBuf[off++] = 0x2b;
3553 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3554
3555#elif defined(RT_ARCH_ARM64)
3556 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3557 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3558
3559#else
3560# error "Port me"
3561#endif
3562 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3563 return off;
3564}
3565
3566
3567/**
3568 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3569 * @note The AMD64 version sets flags.
3570 */
3571DECL_FORCE_INLINE(uint32_t)
3572iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3573{
3574#if defined(RT_ARCH_AMD64)
3575 /* sub Gv,Ev */
3576 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3577 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3578 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3579 pCodeBuf[off++] = 0x2b;
3580 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3581
3582#elif defined(RT_ARCH_ARM64)
3583 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3584
3585#else
3586# error "Port me"
3587#endif
3588 return off;
3589}
3590
3591
3592/**
3593 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3594 * @note The AMD64 version sets flags.
3595 */
3596DECL_INLINE_THROW(uint32_t)
3597iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3598{
3599#if defined(RT_ARCH_AMD64)
3600 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3601#elif defined(RT_ARCH_ARM64)
3602 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3603#else
3604# error "Port me"
3605#endif
3606 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3607 return off;
3608}
3609
3610
3611/**
3612 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3613 *
3614 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3615 *
3616 * @note Larger constants will require a temporary register. Failing to specify
3617 * one when needed will trigger fatal assertion / throw.
3618 */
3619DECL_FORCE_INLINE_THROW(uint32_t)
3620iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3621 uint8_t iGprTmp = UINT8_MAX)
3622{
3623#ifdef RT_ARCH_AMD64
3624 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3625 if (iSubtrahend == 1)
3626 {
3627 /* dec r/m64 */
3628 pCodeBuf[off++] = 0xff;
3629 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3630 }
3631 else if (iSubtrahend == -1)
3632 {
3633 /* inc r/m64 */
3634 pCodeBuf[off++] = 0xff;
3635 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3636 }
3637 else if ((int8_t)iSubtrahend == iSubtrahend)
3638 {
3639 /* sub r/m64, imm8 */
3640 pCodeBuf[off++] = 0x83;
3641 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3642 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3643 }
3644 else if ((int32_t)iSubtrahend == iSubtrahend)
3645 {
3646 /* sub r/m64, imm32 */
3647 pCodeBuf[off++] = 0x81;
3648 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3649 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3650 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3651 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3652 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3653 }
3654 else if (iGprTmp != UINT8_MAX)
3655 {
3656 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3657 /* sub r/m64, r64 */
3658 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3659 pCodeBuf[off++] = 0x29;
3660 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3661 }
3662 else
3663# ifdef IEM_WITH_THROW_CATCH
3664 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3665# else
3666 AssertReleaseFailedStmt(off = UINT32_MAX);
3667# endif
3668
3669#elif defined(RT_ARCH_ARM64)
3670 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3671 if (uAbsSubtrahend < 4096)
3672 {
3673 if (iSubtrahend >= 0)
3674 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3675 else
3676 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3677 }
3678 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3679 {
3680 if (iSubtrahend >= 0)
3681 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3682 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3683 else
3684 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3685 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3686 }
3687 else if (iGprTmp != UINT8_MAX)
3688 {
3689 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3690 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3691 }
3692 else
3693# ifdef IEM_WITH_THROW_CATCH
3694 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3695# else
3696 AssertReleaseFailedStmt(off = UINT32_MAX);
3697# endif
3698
3699#else
3700# error "Port me"
3701#endif
3702 return off;
3703}
3704
3705
3706/**
3707 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3708 *
3709 * @note Larger constants will require a temporary register. Failing to specify
3710 * one when needed will trigger fatal assertion / throw.
3711 */
3712DECL_INLINE_THROW(uint32_t)
3713iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3714 uint8_t iGprTmp = UINT8_MAX)
3715
3716{
3717#ifdef RT_ARCH_AMD64
3718 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3719#elif defined(RT_ARCH_ARM64)
3720 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3721#else
3722# error "Port me"
3723#endif
3724 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3725 return off;
3726}
3727
3728
3729/**
3730 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3731 *
3732 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3733 *
3734 * @note ARM64: Larger constants will require a temporary register. Failing to
3735 * specify one when needed will trigger fatal assertion / throw.
3736 */
3737DECL_FORCE_INLINE_THROW(uint32_t)
3738iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3739 uint8_t iGprTmp = UINT8_MAX)
3740{
3741#ifdef RT_ARCH_AMD64
3742 if (iGprDst >= 8)
3743 pCodeBuf[off++] = X86_OP_REX_B;
3744 if (iSubtrahend == 1)
3745 {
3746 /* dec r/m32 */
3747 pCodeBuf[off++] = 0xff;
3748 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3749 }
3750 else if (iSubtrahend == -1)
3751 {
3752 /* inc r/m32 */
3753 pCodeBuf[off++] = 0xff;
3754 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3755 }
3756 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3757 {
3758 /* sub r/m32, imm8 */
3759 pCodeBuf[off++] = 0x83;
3760 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3761 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3762 }
3763 else
3764 {
3765 /* sub r/m32, imm32 */
3766 pCodeBuf[off++] = 0x81;
3767 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3768 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3769 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3770 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3771 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3772 }
3773 RT_NOREF(iGprTmp);
3774
3775#elif defined(RT_ARCH_ARM64)
3776 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3777 if (uAbsSubtrahend < 4096)
3778 {
3779 if (iSubtrahend >= 0)
3780 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3781 else
3782 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3783 }
3784 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3785 {
3786 if (iSubtrahend >= 0)
3787 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3788 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3789 else
3790 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3791 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3792 }
3793 else if (iGprTmp != UINT8_MAX)
3794 {
3795 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3796 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3797 }
3798 else
3799# ifdef IEM_WITH_THROW_CATCH
3800 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3801# else
3802 AssertReleaseFailedStmt(off = UINT32_MAX);
3803# endif
3804
3805#else
3806# error "Port me"
3807#endif
3808 return off;
3809}
3810
3811
3812/**
3813 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3814 *
3815 * @note ARM64: Larger constants will require a temporary register. Failing to
3816 * specify one when needed will trigger fatal assertion / throw.
3817 */
3818DECL_INLINE_THROW(uint32_t)
3819iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3820 uint8_t iGprTmp = UINT8_MAX)
3821
3822{
3823#ifdef RT_ARCH_AMD64
3824 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
3825#elif defined(RT_ARCH_ARM64)
3826 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
3827#else
3828# error "Port me"
3829#endif
3830 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3831 return off;
3832}
3833
3834
3835/**
3836 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
3837 *
3838 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
3839 * so not suitable as a base for conditional jumps.
3840 *
3841 * @note AMD64: Will only update the lower 16 bits of the register.
3842 * @note ARM64: Will update the entire register.
3843 * @note ARM64: Larger constants will require a temporary register. Failing to
3844 * specify one when needed will trigger fatal assertion / throw.
3845 */
3846DECL_FORCE_INLINE_THROW(uint32_t)
3847iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
3848 uint8_t iGprTmp = UINT8_MAX)
3849{
3850#ifdef RT_ARCH_AMD64
3851 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3852 if (iGprDst >= 8)
3853 pCodeBuf[off++] = X86_OP_REX_B;
3854 if (iSubtrahend == 1)
3855 {
3856 /* dec r/m16 */
3857 pCodeBuf[off++] = 0xff;
3858 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3859 }
3860 else if (iSubtrahend == -1)
3861 {
3862 /* inc r/m16 */
3863 pCodeBuf[off++] = 0xff;
3864 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3865 }
3866 else if ((int8_t)iSubtrahend == iSubtrahend)
3867 {
3868 /* sub r/m16, imm8 */
3869 pCodeBuf[off++] = 0x83;
3870 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3871 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3872 }
3873 else
3874 {
3875 /* sub r/m16, imm16 */
3876 pCodeBuf[off++] = 0x81;
3877 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3878 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
3879 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
3880 }
3881 RT_NOREF(iGprTmp);
3882
3883#elif defined(RT_ARCH_ARM64)
3884 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3885 if (uAbsSubtrahend < 4096)
3886 {
3887 if (iSubtrahend >= 0)
3888 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3889 else
3890 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3891 }
3892 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3893 {
3894 if (iSubtrahend >= 0)
3895 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3896 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3897 else
3898 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3899 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3900 }
3901 else if (iGprTmp != UINT8_MAX)
3902 {
3903 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3904 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3905 }
3906 else
3907# ifdef IEM_WITH_THROW_CATCH
3908 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3909# else
3910 AssertReleaseFailedStmt(off = UINT32_MAX);
3911# endif
3912 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3913
3914#else
3915# error "Port me"
3916#endif
3917 return off;
3918}
3919
3920
3921/**
3922 * Emits adding a 64-bit GPR to another, storing the result in the first.
3923 * @note The AMD64 version sets flags.
3924 */
3925DECL_FORCE_INLINE(uint32_t)
3926iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3927{
3928#if defined(RT_ARCH_AMD64)
3929 /* add Gv,Ev */
3930 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3931 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
3932 pCodeBuf[off++] = 0x03;
3933 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3934
3935#elif defined(RT_ARCH_ARM64)
3936 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
3937
3938#else
3939# error "Port me"
3940#endif
3941 return off;
3942}
3943
3944
3945/**
3946 * Emits adding a 64-bit GPR to another, storing the result in the first.
3947 * @note The AMD64 version sets flags.
3948 */
3949DECL_INLINE_THROW(uint32_t)
3950iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3951{
3952#if defined(RT_ARCH_AMD64)
3953 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3954#elif defined(RT_ARCH_ARM64)
3955 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3956#else
3957# error "Port me"
3958#endif
3959 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3960 return off;
3961}
3962
3963
3964/**
3965 * Emits adding a 64-bit GPR to another, storing the result in the first.
3966 * @note The AMD64 version sets flags.
3967 */
3968DECL_FORCE_INLINE(uint32_t)
3969iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3970{
3971#if defined(RT_ARCH_AMD64)
3972 /* add Gv,Ev */
3973 if (iGprDst >= 8 || iGprAddend >= 8)
3974 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
3975 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
3976 pCodeBuf[off++] = 0x03;
3977 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3978
3979#elif defined(RT_ARCH_ARM64)
3980 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
3981
3982#else
3983# error "Port me"
3984#endif
3985 return off;
3986}
3987
3988
3989/**
3990 * Emits adding a 64-bit GPR to another, storing the result in the first.
3991 * @note The AMD64 version sets flags.
3992 */
3993DECL_INLINE_THROW(uint32_t)
3994iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3995{
3996#if defined(RT_ARCH_AMD64)
3997 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3998#elif defined(RT_ARCH_ARM64)
3999 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
4000#else
4001# error "Port me"
4002#endif
4003 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4004 return off;
4005}
4006
4007
4008/**
4009 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
4010 */
4011DECL_INLINE_THROW(uint32_t)
4012iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4013{
4014#if defined(RT_ARCH_AMD64)
4015 /* add or inc */
4016 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4017 if (iImm8 != 1)
4018 {
4019 pCodeBuf[off++] = 0x83;
4020 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4021 pCodeBuf[off++] = (uint8_t)iImm8;
4022 }
4023 else
4024 {
4025 pCodeBuf[off++] = 0xff;
4026 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4027 }
4028
4029#elif defined(RT_ARCH_ARM64)
4030 if (iImm8 >= 0)
4031 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
4032 else
4033 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
4034
4035#else
4036# error "Port me"
4037#endif
4038 return off;
4039}
4040
4041
4042/**
4043 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
4044 */
4045DECL_INLINE_THROW(uint32_t)
4046iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4047{
4048#if defined(RT_ARCH_AMD64)
4049 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4050#elif defined(RT_ARCH_ARM64)
4051 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4052#else
4053# error "Port me"
4054#endif
4055 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4056 return off;
4057}
4058
4059
4060/**
4061 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4062 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4063 */
4064DECL_FORCE_INLINE(uint32_t)
4065iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4066{
4067#if defined(RT_ARCH_AMD64)
4068 /* add or inc */
4069 if (iGprDst >= 8)
4070 pCodeBuf[off++] = X86_OP_REX_B;
4071 if (iImm8 != 1)
4072 {
4073 pCodeBuf[off++] = 0x83;
4074 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4075 pCodeBuf[off++] = (uint8_t)iImm8;
4076 }
4077 else
4078 {
4079 pCodeBuf[off++] = 0xff;
4080 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4081 }
4082
4083#elif defined(RT_ARCH_ARM64)
4084 if (iImm8 >= 0)
4085 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
4086 else
4087 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
4088
4089#else
4090# error "Port me"
4091#endif
4092 return off;
4093}
4094
4095
4096/**
4097 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4098 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4099 */
4100DECL_INLINE_THROW(uint32_t)
4101iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4102{
4103#if defined(RT_ARCH_AMD64)
4104 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4105#elif defined(RT_ARCH_ARM64)
4106 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4107#else
4108# error "Port me"
4109#endif
4110 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4111 return off;
4112}
4113
4114
4115/**
4116 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4117 *
4118 * @note Will assert / throw if @a iGprTmp is not specified when needed.
4119 */
4120DECL_FORCE_INLINE_THROW(uint32_t)
4121iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4122{
4123#if defined(RT_ARCH_AMD64)
4124 if ((int8_t)iAddend == iAddend)
4125 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4126
4127 if ((int32_t)iAddend == iAddend)
4128 {
4129 /* add grp, imm32 */
4130 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4131 pCodeBuf[off++] = 0x81;
4132 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4133 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4134 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4135 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4136 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4137 }
4138 else if (iGprTmp != UINT8_MAX)
4139 {
4140 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4141
4142 /* add dst, tmpreg */
4143 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4144 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
4145 pCodeBuf[off++] = 0x03;
4146 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
4147 }
4148 else
4149# ifdef IEM_WITH_THROW_CATCH
4150 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4151# else
4152 AssertReleaseFailedStmt(off = UINT32_MAX);
4153# endif
4154
4155#elif defined(RT_ARCH_ARM64)
4156 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4157 if (uAbsAddend <= 0xffffffU)
4158 {
4159 bool const fSub = iAddend < 0;
4160 if (uAbsAddend > 0xfffU)
4161 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, true /*f64Bit*/,
4162 false /*fSetFlags*/, true /*fShift12*/);
4163 if (uAbsAddend & 0xfffU)
4164 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & UINT32_C(0xfff));
4165 }
4166 else if (iGprTmp != UINT8_MAX)
4167 {
4168 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4169 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
4170 }
4171 else
4172# ifdef IEM_WITH_THROW_CATCH
4173 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4174# else
4175 AssertReleaseFailedStmt(off = UINT32_MAX);
4176# endif
4177
4178#else
4179# error "Port me"
4180#endif
4181 return off;
4182}
4183
4184
4185/**
4186 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4187 */
4188DECL_INLINE_THROW(uint32_t)
4189iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
4190{
4191#if defined(RT_ARCH_AMD64)
4192 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4193 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
4194
4195 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
4196 {
4197 /* add grp, imm32 */
4198 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4199 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4200 pbCodeBuf[off++] = 0x81;
4201 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4202 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4203 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4204 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4205 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4206 }
4207 else
4208 {
4209 /* Best to use a temporary register to deal with this in the simplest way: */
4210 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4211
4212 /* add dst, tmpreg */
4213 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4214 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4215 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
4216 pbCodeBuf[off++] = 0x03;
4217 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
4218
4219 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4220 }
4221
4222#elif defined(RT_ARCH_ARM64)
4223 bool const fSub = iAddend < 0;
4224 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4225 if (uAbsAddend <= 0xffffffU)
4226 {
4227 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4228 if (uAbsAddend > 0xfffU)
4229 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, true /*f64Bit*/,
4230 false /*fSetFlags*/, true /*fShift12*/);
4231 if (uAbsAddend & 0xfffU)
4232 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & UINT32_C(0xfff));
4233 }
4234 else
4235 {
4236 /* Use temporary register for the immediate. */
4237 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uAbsAddend);
4238
4239 /* add gprdst, gprdst, tmpreg */
4240 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4241 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, iGprDst, iTmpReg);
4242
4243 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4244 }
4245
4246#else
4247# error "Port me"
4248#endif
4249 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4250 return off;
4251}
4252
4253
4254/**
4255 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4256 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4257 * @note For ARM64 the iAddend value must be in the range 0x000000..0xffffff.
4258 * The negative ranges are also allowed, making it behave like a
4259 * subtraction. If the constant does not conform, bad stuff will happen.
4260 */
4261DECL_FORCE_INLINE_THROW(uint32_t)
4262iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4263{
4264#if defined(RT_ARCH_AMD64)
4265 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4266 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4267
4268 /* add grp, imm32 */
4269 if (iGprDst >= 8)
4270 pCodeBuf[off++] = X86_OP_REX_B;
4271 pCodeBuf[off++] = 0x81;
4272 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4273 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4274 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4275 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4276 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4277
4278#elif defined(RT_ARCH_ARM64)
4279 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4280 if (uAbsAddend <= 0xffffffU)
4281 {
4282 bool const fSub = iAddend < 0;
4283 if (uAbsAddend > 0xfffU)
4284 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4285 false /*fSetFlags*/, true /*fShift12*/);
4286 if (uAbsAddend & 0xfffU)
4287 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4288 }
4289 else
4290# ifdef IEM_WITH_THROW_CATCH
4291 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4292# else
4293 AssertReleaseFailedStmt(off = UINT32_MAX);
4294# endif
4295
4296#else
4297# error "Port me"
4298#endif
4299 return off;
4300}
4301
4302
4303/**
4304 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4305 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4306 */
4307DECL_INLINE_THROW(uint32_t)
4308iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4309{
4310#if defined(RT_ARCH_AMD64)
4311 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
4312
4313#elif defined(RT_ARCH_ARM64)
4314 bool const fSub = iAddend < 0;
4315 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4316 if (uAbsAddend <= 0xffffffU)
4317 {
4318 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4319 if (uAbsAddend > 0xfffU)
4320 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4321 false /*fSetFlags*/, true /*fShift12*/);
4322 if (uAbsAddend & 0xfffU)
4323 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4324 }
4325 else
4326 {
4327 /* Use temporary register for the immediate. */
4328 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uAbsAddend);
4329
4330 /* add gprdst, gprdst, tmpreg */
4331 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4332 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
4333
4334 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4335 }
4336
4337#else
4338# error "Port me"
4339#endif
4340 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4341 return off;
4342}
4343
4344
4345/**
4346 * Emits a 16-bit GPR add with a signed immediate addend.
4347 *
4348 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
4349 * so not suitable as a base for conditional jumps.
4350 *
4351 * @note AMD64: Will only update the lower 16 bits of the register.
4352 * @note ARM64: Will update the entire register.
4353 * @sa iemNativeEmitSubGpr16ImmEx
4354 */
4355DECL_FORCE_INLINE(uint32_t)
4356iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend)
4357{
4358#ifdef RT_ARCH_AMD64
4359 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4360 if (iGprDst >= 8)
4361 pCodeBuf[off++] = X86_OP_REX_B;
4362 if (iAddend == 1)
4363 {
4364 /* inc r/m16 */
4365 pCodeBuf[off++] = 0xff;
4366 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4367 }
4368 else if (iAddend == -1)
4369 {
4370 /* dec r/m16 */
4371 pCodeBuf[off++] = 0xff;
4372 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4373 }
4374 else if ((int8_t)iAddend == iAddend)
4375 {
4376 /* add r/m16, imm8 */
4377 pCodeBuf[off++] = 0x83;
4378 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4379 pCodeBuf[off++] = (uint8_t)iAddend;
4380 }
4381 else
4382 {
4383 /* add r/m16, imm16 */
4384 pCodeBuf[off++] = 0x81;
4385 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4386 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
4387 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
4388 }
4389
4390#elif defined(RT_ARCH_ARM64)
4391 bool const fSub = iAddend < 0;
4392 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4393 if (uAbsAddend > 0xfffU)
4394 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4395 false /*fSetFlags*/, true /*fShift12*/);
4396 if (uAbsAddend & 0xfffU)
4397 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4398 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4399
4400#else
4401# error "Port me"
4402#endif
4403 return off;
4404}
4405
4406
4407
4408/**
4409 * Adds two 64-bit GPRs together, storing the result in a third register.
4410 */
4411DECL_FORCE_INLINE(uint32_t)
4412iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4413{
4414#ifdef RT_ARCH_AMD64
4415 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4416 {
4417 /** @todo consider LEA */
4418 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
4419 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
4420 }
4421 else
4422 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4423
4424#elif defined(RT_ARCH_ARM64)
4425 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
4426
4427#else
4428# error "Port me!"
4429#endif
4430 return off;
4431}
4432
4433
4434
4435/**
4436 * Adds two 32-bit GPRs together, storing the result in a third register.
4437 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4438 */
4439DECL_FORCE_INLINE(uint32_t)
4440iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4441{
4442#ifdef RT_ARCH_AMD64
4443 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4444 {
4445 /** @todo consider LEA */
4446 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
4447 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
4448 }
4449 else
4450 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4451
4452#elif defined(RT_ARCH_ARM64)
4453 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
4454
4455#else
4456# error "Port me!"
4457#endif
4458 return off;
4459}
4460
4461
4462/**
4463 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
4464 * third register.
4465 *
4466 * @note The ARM64 version does not work for non-trivial constants if the
4467 * two registers are the same. Will assert / throw exception.
4468 */
4469DECL_FORCE_INLINE_THROW(uint32_t)
4470iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4471{
4472#ifdef RT_ARCH_AMD64
4473 /** @todo consider LEA */
4474 if ((int8_t)iImmAddend == iImmAddend)
4475 {
4476 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4477 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4478 }
4479 else
4480 {
4481 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4482 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4483 }
4484
4485#elif defined(RT_ARCH_ARM64)
4486 bool const fSub = iImmAddend < 0;
4487 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4488 if (uAbsImmAddend <= 0xfffU)
4489 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend);
4490 else if (uAbsImmAddend <= 0xffffffU)
4491 {
4492 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend >> 12,
4493 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
4494 if (uAbsImmAddend & 0xfffU)
4495 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsImmAddend & UINT32_C(0xfff));
4496 }
4497 else if (iGprDst != iGprAddend)
4498 {
4499 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4500 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4501 }
4502 else
4503# ifdef IEM_WITH_THROW_CATCH
4504 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4505# else
4506 AssertReleaseFailedStmt(off = UINT32_MAX);
4507# endif
4508
4509#else
4510# error "Port me!"
4511#endif
4512 return off;
4513}
4514
4515
4516/**
4517 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4518 * third register.
4519 *
4520 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4521 *
4522 * @note The ARM64 version does not work for non-trivial constants if the
4523 * two registers are the same. Will assert / throw exception.
4524 */
4525DECL_FORCE_INLINE_THROW(uint32_t)
4526iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4527{
4528#ifdef RT_ARCH_AMD64
4529 /** @todo consider LEA */
4530 if ((int8_t)iImmAddend == iImmAddend)
4531 {
4532 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4533 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4534 }
4535 else
4536 {
4537 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4538 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4539 }
4540
4541#elif defined(RT_ARCH_ARM64)
4542 bool const fSub = iImmAddend < 0;
4543 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4544 if (uAbsImmAddend <= 0xfffU)
4545 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4546 else if (uAbsImmAddend <= 0xffffffU)
4547 {
4548 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend >> 12,
4549 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
4550 if (uAbsImmAddend & 0xfffU)
4551 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsImmAddend & 0xfff, false /*f64Bit*/);
4552 }
4553 else if (iGprDst != iGprAddend)
4554 {
4555 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4556 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4557 }
4558 else
4559# ifdef IEM_WITH_THROW_CATCH
4560 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4561# else
4562 AssertReleaseFailedStmt(off = UINT32_MAX);
4563# endif
4564
4565#else
4566# error "Port me!"
4567#endif
4568 return off;
4569}
4570
4571
4572/*********************************************************************************************************************************
4573* Unary Operations *
4574*********************************************************************************************************************************/
4575
4576/**
4577 * Emits code for two complement negation of a 64-bit GPR.
4578 */
4579DECL_FORCE_INLINE_THROW(uint32_t)
4580iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4581{
4582#if defined(RT_ARCH_AMD64)
4583 /* neg Ev */
4584 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4585 pCodeBuf[off++] = 0xf7;
4586 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4587
4588#elif defined(RT_ARCH_ARM64)
4589 /* sub dst, xzr, dst */
4590 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4591
4592#else
4593# error "Port me"
4594#endif
4595 return off;
4596}
4597
4598
4599/**
4600 * Emits code for two complement negation of a 64-bit GPR.
4601 */
4602DECL_INLINE_THROW(uint32_t)
4603iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4604{
4605#if defined(RT_ARCH_AMD64)
4606 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4607#elif defined(RT_ARCH_ARM64)
4608 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4609#else
4610# error "Port me"
4611#endif
4612 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4613 return off;
4614}
4615
4616
4617/**
4618 * Emits code for two complement negation of a 32-bit GPR.
4619 * @note bit 32 thru 63 are set to zero.
4620 */
4621DECL_FORCE_INLINE_THROW(uint32_t)
4622iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4623{
4624#if defined(RT_ARCH_AMD64)
4625 /* neg Ev */
4626 if (iGprDst >= 8)
4627 pCodeBuf[off++] = X86_OP_REX_B;
4628 pCodeBuf[off++] = 0xf7;
4629 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4630
4631#elif defined(RT_ARCH_ARM64)
4632 /* sub dst, xzr, dst */
4633 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4634
4635#else
4636# error "Port me"
4637#endif
4638 return off;
4639}
4640
4641
4642/**
4643 * Emits code for two complement negation of a 32-bit GPR.
4644 * @note bit 32 thru 63 are set to zero.
4645 */
4646DECL_INLINE_THROW(uint32_t)
4647iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4648{
4649#if defined(RT_ARCH_AMD64)
4650 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4651#elif defined(RT_ARCH_ARM64)
4652 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4653#else
4654# error "Port me"
4655#endif
4656 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4657 return off;
4658}
4659
4660
4661
4662/*********************************************************************************************************************************
4663* Bit Operations *
4664*********************************************************************************************************************************/
4665
4666/**
4667 * Emits code for clearing bits 16 thru 63 in the GPR.
4668 */
4669DECL_INLINE_THROW(uint32_t)
4670iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4671{
4672#if defined(RT_ARCH_AMD64)
4673 /* movzx Gv,Ew */
4674 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4675 if (iGprDst >= 8)
4676 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4677 pbCodeBuf[off++] = 0x0f;
4678 pbCodeBuf[off++] = 0xb7;
4679 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4680
4681#elif defined(RT_ARCH_ARM64)
4682 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4683# if 1
4684 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4685# else
4686 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4687 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4688# endif
4689#else
4690# error "Port me"
4691#endif
4692 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4693 return off;
4694}
4695
4696
4697/**
4698 * Emits code for AND'ing two 64-bit GPRs.
4699 *
4700 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4701 * and ARM64 hosts.
4702 */
4703DECL_FORCE_INLINE(uint32_t)
4704iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4705{
4706#if defined(RT_ARCH_AMD64)
4707 /* and Gv, Ev */
4708 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4709 pCodeBuf[off++] = 0x23;
4710 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4711 RT_NOREF(fSetFlags);
4712
4713#elif defined(RT_ARCH_ARM64)
4714 if (!fSetFlags)
4715 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4716 else
4717 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4718
4719#else
4720# error "Port me"
4721#endif
4722 return off;
4723}
4724
4725
4726/**
4727 * Emits code for AND'ing two 64-bit GPRs.
4728 *
4729 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4730 * and ARM64 hosts.
4731 */
4732DECL_INLINE_THROW(uint32_t)
4733iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4734{
4735#if defined(RT_ARCH_AMD64)
4736 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4737#elif defined(RT_ARCH_ARM64)
4738 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4739#else
4740# error "Port me"
4741#endif
4742 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4743 return off;
4744}
4745
4746
4747/**
4748 * Emits code for AND'ing two 32-bit GPRs.
4749 */
4750DECL_FORCE_INLINE(uint32_t)
4751iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4752{
4753#if defined(RT_ARCH_AMD64)
4754 /* and Gv, Ev */
4755 if (iGprDst >= 8 || iGprSrc >= 8)
4756 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4757 pCodeBuf[off++] = 0x23;
4758 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4759 RT_NOREF(fSetFlags);
4760
4761#elif defined(RT_ARCH_ARM64)
4762 if (!fSetFlags)
4763 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4764 else
4765 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4766
4767#else
4768# error "Port me"
4769#endif
4770 return off;
4771}
4772
4773
4774/**
4775 * Emits code for AND'ing two 32-bit GPRs.
4776 */
4777DECL_INLINE_THROW(uint32_t)
4778iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4779{
4780#if defined(RT_ARCH_AMD64)
4781 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4782#elif defined(RT_ARCH_ARM64)
4783 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4784#else
4785# error "Port me"
4786#endif
4787 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4788 return off;
4789}
4790
4791
4792/**
4793 * Emits code for AND'ing a 64-bit GPRs with a constant.
4794 *
4795 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4796 * and ARM64 hosts.
4797 */
4798DECL_INLINE_THROW(uint32_t)
4799iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4800{
4801#if defined(RT_ARCH_AMD64)
4802 if ((int64_t)uImm == (int8_t)uImm)
4803 {
4804 /* and Ev, imm8 */
4805 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4806 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4807 pbCodeBuf[off++] = 0x83;
4808 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4809 pbCodeBuf[off++] = (uint8_t)uImm;
4810 }
4811 else if ((int64_t)uImm == (int32_t)uImm)
4812 {
4813 /* and Ev, imm32 */
4814 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4815 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4816 pbCodeBuf[off++] = 0x81;
4817 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4818 pbCodeBuf[off++] = RT_BYTE1(uImm);
4819 pbCodeBuf[off++] = RT_BYTE2(uImm);
4820 pbCodeBuf[off++] = RT_BYTE3(uImm);
4821 pbCodeBuf[off++] = RT_BYTE4(uImm);
4822 }
4823 else
4824 {
4825 /* Use temporary register for the 64-bit immediate. */
4826 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4827 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
4828 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4829 }
4830 RT_NOREF(fSetFlags);
4831
4832#elif defined(RT_ARCH_ARM64)
4833 uint32_t uImmR = 0;
4834 uint32_t uImmNandS = 0;
4835 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4836 {
4837 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4838 if (!fSetFlags)
4839 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
4840 else
4841 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
4842 }
4843 else
4844 {
4845 /* Use temporary register for the 64-bit immediate. */
4846 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4847 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4848 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4849 }
4850
4851#else
4852# error "Port me"
4853#endif
4854 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4855 return off;
4856}
4857
4858
4859/**
4860 * Emits code for AND'ing an 32-bit GPRs with a constant.
4861 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4862 * @note For ARM64 this only supports @a uImm values that can be expressed using
4863 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
4864 * make sure this is possible!
4865 */
4866DECL_FORCE_INLINE_THROW(uint32_t)
4867iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4868{
4869#if defined(RT_ARCH_AMD64)
4870 /* and Ev, imm */
4871 if (iGprDst >= 8)
4872 pCodeBuf[off++] = X86_OP_REX_B;
4873 if ((int32_t)uImm == (int8_t)uImm)
4874 {
4875 pCodeBuf[off++] = 0x83;
4876 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4877 pCodeBuf[off++] = (uint8_t)uImm;
4878 }
4879 else
4880 {
4881 pCodeBuf[off++] = 0x81;
4882 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4883 pCodeBuf[off++] = RT_BYTE1(uImm);
4884 pCodeBuf[off++] = RT_BYTE2(uImm);
4885 pCodeBuf[off++] = RT_BYTE3(uImm);
4886 pCodeBuf[off++] = RT_BYTE4(uImm);
4887 }
4888 RT_NOREF(fSetFlags);
4889
4890#elif defined(RT_ARCH_ARM64)
4891 uint32_t uImmR = 0;
4892 uint32_t uImmNandS = 0;
4893 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4894 {
4895 if (!fSetFlags)
4896 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4897 else
4898 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4899 }
4900 else
4901# ifdef IEM_WITH_THROW_CATCH
4902 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4903# else
4904 AssertReleaseFailedStmt(off = UINT32_MAX);
4905# endif
4906
4907#else
4908# error "Port me"
4909#endif
4910 return off;
4911}
4912
4913
4914/**
4915 * Emits code for AND'ing an 32-bit GPRs with a constant.
4916 *
4917 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4918 */
4919DECL_INLINE_THROW(uint32_t)
4920iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4921{
4922#if defined(RT_ARCH_AMD64)
4923 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
4924
4925#elif defined(RT_ARCH_ARM64)
4926 uint32_t uImmR = 0;
4927 uint32_t uImmNandS = 0;
4928 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4929 {
4930 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4931 if (!fSetFlags)
4932 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4933 else
4934 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4935 }
4936 else
4937 {
4938 /* Use temporary register for the 64-bit immediate. */
4939 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4940 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4941 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4942 }
4943
4944#else
4945# error "Port me"
4946#endif
4947 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4948 return off;
4949}
4950
4951
4952/**
4953 * Emits code for AND'ing an 64-bit GPRs with a constant.
4954 *
4955 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4956 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4957 * the same.
4958 */
4959DECL_FORCE_INLINE_THROW(uint32_t)
4960iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
4961 bool fSetFlags = false)
4962{
4963#if defined(RT_ARCH_AMD64)
4964 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4965 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
4966 RT_NOREF(fSetFlags);
4967
4968#elif defined(RT_ARCH_ARM64)
4969 uint32_t uImmR = 0;
4970 uint32_t uImmNandS = 0;
4971 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4972 {
4973 if (!fSetFlags)
4974 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4975 else
4976 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4977 }
4978 else if (iGprDst != iGprSrc)
4979 {
4980 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4981 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4982 }
4983 else
4984# ifdef IEM_WITH_THROW_CATCH
4985 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4986# else
4987 AssertReleaseFailedStmt(off = UINT32_MAX);
4988# endif
4989
4990#else
4991# error "Port me"
4992#endif
4993 return off;
4994}
4995
4996/**
4997 * Emits code for AND'ing an 32-bit GPRs with a constant.
4998 *
4999 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
5000 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
5001 * the same.
5002 *
5003 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5004 */
5005DECL_FORCE_INLINE_THROW(uint32_t)
5006iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
5007 bool fSetFlags = false)
5008{
5009#if defined(RT_ARCH_AMD64)
5010 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5011 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5012 RT_NOREF(fSetFlags);
5013
5014#elif defined(RT_ARCH_ARM64)
5015 uint32_t uImmR = 0;
5016 uint32_t uImmNandS = 0;
5017 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5018 {
5019 if (!fSetFlags)
5020 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5021 else
5022 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5023 }
5024 else if (iGprDst != iGprSrc)
5025 {
5026 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5027 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
5028 }
5029 else
5030# ifdef IEM_WITH_THROW_CATCH
5031 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5032# else
5033 AssertReleaseFailedStmt(off = UINT32_MAX);
5034# endif
5035
5036#else
5037# error "Port me"
5038#endif
5039 return off;
5040}
5041
5042
5043/**
5044 * Emits code for OR'ing two 64-bit GPRs.
5045 */
5046DECL_FORCE_INLINE(uint32_t)
5047iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5048{
5049#if defined(RT_ARCH_AMD64)
5050 /* or Gv, Ev */
5051 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5052 pCodeBuf[off++] = 0x0b;
5053 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5054
5055#elif defined(RT_ARCH_ARM64)
5056 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
5057
5058#else
5059# error "Port me"
5060#endif
5061 return off;
5062}
5063
5064
5065/**
5066 * Emits code for OR'ing two 64-bit GPRs.
5067 */
5068DECL_INLINE_THROW(uint32_t)
5069iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5070{
5071#if defined(RT_ARCH_AMD64)
5072 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5073#elif defined(RT_ARCH_ARM64)
5074 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5075#else
5076# error "Port me"
5077#endif
5078 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5079 return off;
5080}
5081
5082
5083/**
5084 * Emits code for OR'ing two 32-bit GPRs.
5085 * @note Bits 63:32 of the destination GPR will be cleared.
5086 */
5087DECL_FORCE_INLINE(uint32_t)
5088iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5089{
5090#if defined(RT_ARCH_AMD64)
5091 /* or Gv, Ev */
5092 if (iGprDst >= 8 || iGprSrc >= 8)
5093 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5094 pCodeBuf[off++] = 0x0b;
5095 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5096
5097#elif defined(RT_ARCH_ARM64)
5098 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5099
5100#else
5101# error "Port me"
5102#endif
5103 return off;
5104}
5105
5106
5107/**
5108 * Emits code for OR'ing two 32-bit GPRs.
5109 * @note Bits 63:32 of the destination GPR will be cleared.
5110 */
5111DECL_INLINE_THROW(uint32_t)
5112iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5113{
5114#if defined(RT_ARCH_AMD64)
5115 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5116#elif defined(RT_ARCH_ARM64)
5117 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5118#else
5119# error "Port me"
5120#endif
5121 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5122 return off;
5123}
5124
5125
5126/**
5127 * Emits code for OR'ing a 64-bit GPRs with a constant.
5128 */
5129DECL_INLINE_THROW(uint32_t)
5130iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
5131{
5132#if defined(RT_ARCH_AMD64)
5133 if ((int64_t)uImm == (int8_t)uImm)
5134 {
5135 /* or Ev, imm8 */
5136 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5137 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5138 pbCodeBuf[off++] = 0x83;
5139 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5140 pbCodeBuf[off++] = (uint8_t)uImm;
5141 }
5142 else if ((int64_t)uImm == (int32_t)uImm)
5143 {
5144 /* or Ev, imm32 */
5145 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5146 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5147 pbCodeBuf[off++] = 0x81;
5148 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5149 pbCodeBuf[off++] = RT_BYTE1(uImm);
5150 pbCodeBuf[off++] = RT_BYTE2(uImm);
5151 pbCodeBuf[off++] = RT_BYTE3(uImm);
5152 pbCodeBuf[off++] = RT_BYTE4(uImm);
5153 }
5154 else
5155 {
5156 /* Use temporary register for the 64-bit immediate. */
5157 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5158 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
5159 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5160 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5161 }
5162
5163#elif defined(RT_ARCH_ARM64)
5164 uint32_t uImmR = 0;
5165 uint32_t uImmNandS = 0;
5166 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5167 {
5168 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5169 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
5170 }
5171 else
5172 {
5173 /* Use temporary register for the 64-bit immediate. */
5174 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5175 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
5176 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5177 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5178 }
5179
5180#else
5181# error "Port me"
5182#endif
5183 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5184 return off;
5185}
5186
5187
5188/**
5189 * Emits code for OR'ing an 32-bit GPRs with a constant.
5190 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5191 * @note For ARM64 this only supports @a uImm values that can be expressed using
5192 * the two 6-bit immediates of the OR instructions. The caller must make
5193 * sure this is possible!
5194 */
5195DECL_FORCE_INLINE_THROW(uint32_t)
5196iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5197{
5198#if defined(RT_ARCH_AMD64)
5199 /* or Ev, imm */
5200 if (iGprDst >= 8)
5201 pCodeBuf[off++] = X86_OP_REX_B;
5202 if ((int32_t)uImm == (int8_t)uImm)
5203 {
5204 pCodeBuf[off++] = 0x83;
5205 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5206 pCodeBuf[off++] = (uint8_t)uImm;
5207 }
5208 else
5209 {
5210 pCodeBuf[off++] = 0x81;
5211 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5212 pCodeBuf[off++] = RT_BYTE1(uImm);
5213 pCodeBuf[off++] = RT_BYTE2(uImm);
5214 pCodeBuf[off++] = RT_BYTE3(uImm);
5215 pCodeBuf[off++] = RT_BYTE4(uImm);
5216 }
5217
5218#elif defined(RT_ARCH_ARM64)
5219 uint32_t uImmR = 0;
5220 uint32_t uImmNandS = 0;
5221 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5222 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5223 else
5224# ifdef IEM_WITH_THROW_CATCH
5225 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5226# else
5227 AssertReleaseFailedStmt(off = UINT32_MAX);
5228# endif
5229
5230#else
5231# error "Port me"
5232#endif
5233 return off;
5234}
5235
5236
5237/**
5238 * Emits code for OR'ing an 32-bit GPRs with a constant.
5239 *
5240 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5241 */
5242DECL_INLINE_THROW(uint32_t)
5243iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5244{
5245#if defined(RT_ARCH_AMD64)
5246 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5247
5248#elif defined(RT_ARCH_ARM64)
5249 uint32_t uImmR = 0;
5250 uint32_t uImmNandS = 0;
5251 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5252 {
5253 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5254 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5255 }
5256 else
5257 {
5258 /* Use temporary register for the 64-bit immediate. */
5259 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5260 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
5261 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5262 }
5263
5264#else
5265# error "Port me"
5266#endif
5267 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5268 return off;
5269}
5270
5271
5272
5273/**
5274 * ORs two 64-bit GPRs together, storing the result in a third register.
5275 */
5276DECL_FORCE_INLINE(uint32_t)
5277iemNativeEmitGprEqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5278{
5279#ifdef RT_ARCH_AMD64
5280 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5281 {
5282 /** @todo consider LEA */
5283 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc1);
5284 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5285 }
5286 else
5287 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5288
5289#elif defined(RT_ARCH_ARM64)
5290 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2);
5291
5292#else
5293# error "Port me!"
5294#endif
5295 return off;
5296}
5297
5298
5299
5300/**
5301 * Ors two 32-bit GPRs together, storing the result in a third register.
5302 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
5303 */
5304DECL_FORCE_INLINE(uint32_t)
5305iemNativeEmitGpr32EqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5306{
5307#ifdef RT_ARCH_AMD64
5308 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5309 {
5310 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc1);
5311 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5312 }
5313 else
5314 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5315
5316#elif defined(RT_ARCH_ARM64)
5317 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2, false /*f64Bit*/);
5318
5319#else
5320# error "Port me!"
5321#endif
5322 return off;
5323}
5324
5325
5326/**
5327 * Emits code for XOR'ing two 64-bit GPRs.
5328 */
5329DECL_INLINE_THROW(uint32_t)
5330iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5331{
5332#if defined(RT_ARCH_AMD64)
5333 /* and Gv, Ev */
5334 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5335 pCodeBuf[off++] = 0x33;
5336 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5337
5338#elif defined(RT_ARCH_ARM64)
5339 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
5340
5341#else
5342# error "Port me"
5343#endif
5344 return off;
5345}
5346
5347
5348/**
5349 * Emits code for XOR'ing two 64-bit GPRs.
5350 */
5351DECL_INLINE_THROW(uint32_t)
5352iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5353{
5354#if defined(RT_ARCH_AMD64)
5355 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5356#elif defined(RT_ARCH_ARM64)
5357 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5358#else
5359# error "Port me"
5360#endif
5361 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5362 return off;
5363}
5364
5365
5366/**
5367 * Emits code for XOR'ing two 32-bit GPRs.
5368 */
5369DECL_INLINE_THROW(uint32_t)
5370iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5371{
5372#if defined(RT_ARCH_AMD64)
5373 /* and Gv, Ev */
5374 if (iGprDst >= 8 || iGprSrc >= 8)
5375 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5376 pCodeBuf[off++] = 0x33;
5377 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5378
5379#elif defined(RT_ARCH_ARM64)
5380 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5381
5382#else
5383# error "Port me"
5384#endif
5385 return off;
5386}
5387
5388
5389/**
5390 * Emits code for XOR'ing two 32-bit GPRs.
5391 */
5392DECL_INLINE_THROW(uint32_t)
5393iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5394{
5395#if defined(RT_ARCH_AMD64)
5396 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5397#elif defined(RT_ARCH_ARM64)
5398 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5399#else
5400# error "Port me"
5401#endif
5402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5403 return off;
5404}
5405
5406
5407/**
5408 * Emits code for XOR'ing an 32-bit GPRs with a constant.
5409 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5410 * @note For ARM64 this only supports @a uImm values that can be expressed using
5411 * the two 6-bit immediates of the EOR instructions. The caller must make
5412 * sure this is possible!
5413 */
5414DECL_FORCE_INLINE_THROW(uint32_t)
5415iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5416{
5417#if defined(RT_ARCH_AMD64)
5418 /* and Ev, imm */
5419 if (iGprDst >= 8)
5420 pCodeBuf[off++] = X86_OP_REX_B;
5421 if ((int32_t)uImm == (int8_t)uImm)
5422 {
5423 pCodeBuf[off++] = 0x83;
5424 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5425 pCodeBuf[off++] = (uint8_t)uImm;
5426 }
5427 else
5428 {
5429 pCodeBuf[off++] = 0x81;
5430 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5431 pCodeBuf[off++] = RT_BYTE1(uImm);
5432 pCodeBuf[off++] = RT_BYTE2(uImm);
5433 pCodeBuf[off++] = RT_BYTE3(uImm);
5434 pCodeBuf[off++] = RT_BYTE4(uImm);
5435 }
5436
5437#elif defined(RT_ARCH_ARM64)
5438 uint32_t uImmR = 0;
5439 uint32_t uImmNandS = 0;
5440 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5441 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5442 else
5443# ifdef IEM_WITH_THROW_CATCH
5444 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5445# else
5446 AssertReleaseFailedStmt(off = UINT32_MAX);
5447# endif
5448
5449#else
5450# error "Port me"
5451#endif
5452 return off;
5453}
5454
5455
5456/**
5457 * Emits code for XOR'ing two 32-bit GPRs.
5458 */
5459DECL_INLINE_THROW(uint32_t)
5460iemNativeEmitXorGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5461{
5462#if defined(RT_ARCH_AMD64)
5463 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5464#elif defined(RT_ARCH_ARM64)
5465 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, uImm);
5466#else
5467# error "Port me"
5468#endif
5469 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5470 return off;
5471}
5472
5473
5474/*********************************************************************************************************************************
5475* Shifting *
5476*********************************************************************************************************************************/
5477
5478/**
5479 * Emits code for shifting a GPR a fixed number of bits to the left.
5480 */
5481DECL_FORCE_INLINE(uint32_t)
5482iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5483{
5484 Assert(cShift > 0 && cShift < 64);
5485
5486#if defined(RT_ARCH_AMD64)
5487 /* shl dst, cShift */
5488 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5489 if (cShift != 1)
5490 {
5491 pCodeBuf[off++] = 0xc1;
5492 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5493 pCodeBuf[off++] = cShift;
5494 }
5495 else
5496 {
5497 pCodeBuf[off++] = 0xd1;
5498 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5499 }
5500
5501#elif defined(RT_ARCH_ARM64)
5502 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
5503
5504#else
5505# error "Port me"
5506#endif
5507 return off;
5508}
5509
5510
5511/**
5512 * Emits code for shifting a GPR a fixed number of bits to the left.
5513 */
5514DECL_INLINE_THROW(uint32_t)
5515iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5516{
5517#if defined(RT_ARCH_AMD64)
5518 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5519#elif defined(RT_ARCH_ARM64)
5520 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5521#else
5522# error "Port me"
5523#endif
5524 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5525 return off;
5526}
5527
5528
5529/**
5530 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5531 */
5532DECL_FORCE_INLINE(uint32_t)
5533iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5534{
5535 Assert(cShift > 0 && cShift < 32);
5536
5537#if defined(RT_ARCH_AMD64)
5538 /* shl dst, cShift */
5539 if (iGprDst >= 8)
5540 pCodeBuf[off++] = X86_OP_REX_B;
5541 if (cShift != 1)
5542 {
5543 pCodeBuf[off++] = 0xc1;
5544 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5545 pCodeBuf[off++] = cShift;
5546 }
5547 else
5548 {
5549 pCodeBuf[off++] = 0xd1;
5550 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5551 }
5552
5553#elif defined(RT_ARCH_ARM64)
5554 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5555
5556#else
5557# error "Port me"
5558#endif
5559 return off;
5560}
5561
5562
5563/**
5564 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5565 */
5566DECL_INLINE_THROW(uint32_t)
5567iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5568{
5569#if defined(RT_ARCH_AMD64)
5570 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5571#elif defined(RT_ARCH_ARM64)
5572 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5573#else
5574# error "Port me"
5575#endif
5576 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5577 return off;
5578}
5579
5580
5581/**
5582 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5583 */
5584DECL_FORCE_INLINE(uint32_t)
5585iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5586{
5587 Assert(cShift > 0 && cShift < 64);
5588
5589#if defined(RT_ARCH_AMD64)
5590 /* shr dst, cShift */
5591 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5592 if (cShift != 1)
5593 {
5594 pCodeBuf[off++] = 0xc1;
5595 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5596 pCodeBuf[off++] = cShift;
5597 }
5598 else
5599 {
5600 pCodeBuf[off++] = 0xd1;
5601 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5602 }
5603
5604#elif defined(RT_ARCH_ARM64)
5605 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5606
5607#else
5608# error "Port me"
5609#endif
5610 return off;
5611}
5612
5613
5614/**
5615 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5616 */
5617DECL_INLINE_THROW(uint32_t)
5618iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5619{
5620#if defined(RT_ARCH_AMD64)
5621 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5622#elif defined(RT_ARCH_ARM64)
5623 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5624#else
5625# error "Port me"
5626#endif
5627 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5628 return off;
5629}
5630
5631
5632/**
5633 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5634 * right.
5635 */
5636DECL_FORCE_INLINE(uint32_t)
5637iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5638{
5639 Assert(cShift > 0 && cShift < 32);
5640
5641#if defined(RT_ARCH_AMD64)
5642 /* shr dst, cShift */
5643 if (iGprDst >= 8)
5644 pCodeBuf[off++] = X86_OP_REX_B;
5645 if (cShift != 1)
5646 {
5647 pCodeBuf[off++] = 0xc1;
5648 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5649 pCodeBuf[off++] = cShift;
5650 }
5651 else
5652 {
5653 pCodeBuf[off++] = 0xd1;
5654 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5655 }
5656
5657#elif defined(RT_ARCH_ARM64)
5658 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5659
5660#else
5661# error "Port me"
5662#endif
5663 return off;
5664}
5665
5666
5667/**
5668 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5669 * right.
5670 */
5671DECL_INLINE_THROW(uint32_t)
5672iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5673{
5674#if defined(RT_ARCH_AMD64)
5675 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5676#elif defined(RT_ARCH_ARM64)
5677 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5678#else
5679# error "Port me"
5680#endif
5681 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5682 return off;
5683}
5684
5685
5686/**
5687 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5688 * right and assigning it to a different GPR.
5689 */
5690DECL_INLINE_THROW(uint32_t)
5691iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5692{
5693 Assert(cShift > 0); Assert(cShift < 32);
5694#if defined(RT_ARCH_AMD64)
5695 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5696 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5697
5698#elif defined(RT_ARCH_ARM64)
5699 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5700
5701#else
5702# error "Port me"
5703#endif
5704 return off;
5705}
5706
5707
5708/**
5709 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5710 */
5711DECL_FORCE_INLINE(uint32_t)
5712iemNativeEmitArithShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5713{
5714 Assert(cShift > 0 && cShift < 64);
5715
5716#if defined(RT_ARCH_AMD64)
5717 /* sar dst, cShift */
5718 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5719 if (cShift != 1)
5720 {
5721 pCodeBuf[off++] = 0xc1;
5722 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5723 pCodeBuf[off++] = cShift;
5724 }
5725 else
5726 {
5727 pCodeBuf[off++] = 0xd1;
5728 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5729 }
5730
5731#elif defined(RT_ARCH_ARM64)
5732 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift);
5733
5734#else
5735# error "Port me"
5736#endif
5737 return off;
5738}
5739
5740
5741/**
5742 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5743 */
5744DECL_INLINE_THROW(uint32_t)
5745iemNativeEmitArithShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5746{
5747#if defined(RT_ARCH_AMD64)
5748 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5749#elif defined(RT_ARCH_ARM64)
5750 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5751#else
5752# error "Port me"
5753#endif
5754 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5755 return off;
5756}
5757
5758
5759/**
5760 * Emits code for (signed) shifting a 32-bit GPR a fixed number of bits to the right.
5761 */
5762DECL_FORCE_INLINE(uint32_t)
5763iemNativeEmitArithShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5764{
5765 Assert(cShift > 0 && cShift < 64);
5766
5767#if defined(RT_ARCH_AMD64)
5768 /* sar dst, cShift */
5769 if (iGprDst >= 8)
5770 pCodeBuf[off++] = X86_OP_REX_B;
5771 if (cShift != 1)
5772 {
5773 pCodeBuf[off++] = 0xc1;
5774 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5775 pCodeBuf[off++] = cShift;
5776 }
5777 else
5778 {
5779 pCodeBuf[off++] = 0xd1;
5780 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5781 }
5782
5783#elif defined(RT_ARCH_ARM64)
5784 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift, false /*f64Bit*/);
5785
5786#else
5787# error "Port me"
5788#endif
5789 return off;
5790}
5791
5792
5793/**
5794 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5795 */
5796DECL_INLINE_THROW(uint32_t)
5797iemNativeEmitArithShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5798{
5799#if defined(RT_ARCH_AMD64)
5800 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5801#elif defined(RT_ARCH_ARM64)
5802 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5803#else
5804# error "Port me"
5805#endif
5806 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5807 return off;
5808}
5809
5810
5811/**
5812 * Emits code for rotating a GPR a fixed number of bits to the left.
5813 */
5814DECL_FORCE_INLINE(uint32_t)
5815iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5816{
5817 Assert(cShift > 0 && cShift < 64);
5818
5819#if defined(RT_ARCH_AMD64)
5820 /* rol dst, cShift */
5821 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5822 if (cShift != 1)
5823 {
5824 pCodeBuf[off++] = 0xc1;
5825 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5826 pCodeBuf[off++] = cShift;
5827 }
5828 else
5829 {
5830 pCodeBuf[off++] = 0xd1;
5831 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5832 }
5833
5834#elif defined(RT_ARCH_ARM64)
5835 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
5836
5837#else
5838# error "Port me"
5839#endif
5840 return off;
5841}
5842
5843
5844#if defined(RT_ARCH_AMD64)
5845/**
5846 * Emits code for rotating a 32-bit GPR a fixed number of bits to the left via carry.
5847 */
5848DECL_FORCE_INLINE(uint32_t)
5849iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5850{
5851 Assert(cShift > 0 && cShift < 32);
5852
5853 /* rcl dst, cShift */
5854 if (iGprDst >= 8)
5855 pCodeBuf[off++] = X86_OP_REX_B;
5856 if (cShift != 1)
5857 {
5858 pCodeBuf[off++] = 0xc1;
5859 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5860 pCodeBuf[off++] = cShift;
5861 }
5862 else
5863 {
5864 pCodeBuf[off++] = 0xd1;
5865 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5866 }
5867
5868 return off;
5869}
5870#endif /* RT_ARCH_AMD64 */
5871
5872
5873
5874/**
5875 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
5876 * @note Bits 63:32 of the destination GPR will be cleared.
5877 */
5878DECL_FORCE_INLINE(uint32_t)
5879iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5880{
5881#if defined(RT_ARCH_AMD64)
5882 /*
5883 * There is no bswap r16 on x86 (the encoding exists but does not work).
5884 * So just use a rol (gcc -O2 is doing that).
5885 *
5886 * rol r16, 0x8
5887 */
5888 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5889 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5890 if (iGpr >= 8)
5891 pbCodeBuf[off++] = X86_OP_REX_B;
5892 pbCodeBuf[off++] = 0xc1;
5893 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
5894 pbCodeBuf[off++] = 0x08;
5895#elif defined(RT_ARCH_ARM64)
5896 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5897
5898 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
5899#else
5900# error "Port me"
5901#endif
5902
5903 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5904 return off;
5905}
5906
5907
5908/**
5909 * Emits code for reversing the byte order in a 32-bit GPR.
5910 * @note Bits 63:32 of the destination GPR will be cleared.
5911 */
5912DECL_FORCE_INLINE(uint32_t)
5913iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5914{
5915#if defined(RT_ARCH_AMD64)
5916 /* bswap r32 */
5917 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5918
5919 if (iGpr >= 8)
5920 pbCodeBuf[off++] = X86_OP_REX_B;
5921 pbCodeBuf[off++] = 0x0f;
5922 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5923#elif defined(RT_ARCH_ARM64)
5924 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5925
5926 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
5927#else
5928# error "Port me"
5929#endif
5930
5931 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5932 return off;
5933}
5934
5935
5936/**
5937 * Emits code for reversing the byte order in a 64-bit GPR.
5938 */
5939DECL_FORCE_INLINE(uint32_t)
5940iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5941{
5942#if defined(RT_ARCH_AMD64)
5943 /* bswap r64 */
5944 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5945
5946 if (iGpr >= 8)
5947 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
5948 else
5949 pbCodeBuf[off++] = X86_OP_REX_W;
5950 pbCodeBuf[off++] = 0x0f;
5951 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5952#elif defined(RT_ARCH_ARM64)
5953 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5954
5955 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
5956#else
5957# error "Port me"
5958#endif
5959
5960 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5961 return off;
5962}
5963
5964
5965/*********************************************************************************************************************************
5966* Compare and Testing *
5967*********************************************************************************************************************************/
5968
5969
5970#ifdef RT_ARCH_ARM64
5971/**
5972 * Emits an ARM64 compare instruction.
5973 */
5974DECL_INLINE_THROW(uint32_t)
5975iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
5976 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
5977{
5978 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5979 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
5980 f64Bit, true /*fSetFlags*/, cShift, enmShift);
5981 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5982 return off;
5983}
5984#endif
5985
5986
5987/**
5988 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5989 * with conditional instruction.
5990 */
5991DECL_FORCE_INLINE(uint32_t)
5992iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5993{
5994#ifdef RT_ARCH_AMD64
5995 /* cmp Gv, Ev */
5996 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5997 pCodeBuf[off++] = 0x3b;
5998 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5999
6000#elif defined(RT_ARCH_ARM64)
6001 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
6002
6003#else
6004# error "Port me!"
6005#endif
6006 return off;
6007}
6008
6009
6010/**
6011 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
6012 * with conditional instruction.
6013 */
6014DECL_INLINE_THROW(uint32_t)
6015iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6016{
6017#ifdef RT_ARCH_AMD64
6018 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6019#elif defined(RT_ARCH_ARM64)
6020 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6021#else
6022# error "Port me!"
6023#endif
6024 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6025 return off;
6026}
6027
6028
6029/**
6030 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6031 * with conditional instruction.
6032 */
6033DECL_FORCE_INLINE(uint32_t)
6034iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6035{
6036#ifdef RT_ARCH_AMD64
6037 /* cmp Gv, Ev */
6038 if (iGprLeft >= 8 || iGprRight >= 8)
6039 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
6040 pCodeBuf[off++] = 0x3b;
6041 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
6042
6043#elif defined(RT_ARCH_ARM64)
6044 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
6045
6046#else
6047# error "Port me!"
6048#endif
6049 return off;
6050}
6051
6052
6053/**
6054 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6055 * with conditional instruction.
6056 */
6057DECL_INLINE_THROW(uint32_t)
6058iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6059{
6060#ifdef RT_ARCH_AMD64
6061 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6062#elif defined(RT_ARCH_ARM64)
6063 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6064#else
6065# error "Port me!"
6066#endif
6067 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6068 return off;
6069}
6070
6071
6072/**
6073 * Emits a compare of a 64-bit GPR with a constant value, settings status
6074 * flags/whatever for use with conditional instruction.
6075 */
6076DECL_INLINE_THROW(uint32_t)
6077iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
6078{
6079#ifdef RT_ARCH_AMD64
6080 if (uImm <= UINT32_C(0xff))
6081 {
6082 /* cmp Ev, Ib */
6083 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
6084 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6085 pbCodeBuf[off++] = 0x83;
6086 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6087 pbCodeBuf[off++] = (uint8_t)uImm;
6088 }
6089 else if ((int64_t)uImm == (int32_t)uImm)
6090 {
6091 /* cmp Ev, imm */
6092 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6093 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6094 pbCodeBuf[off++] = 0x81;
6095 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6096 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6097 pbCodeBuf[off++] = RT_BYTE1(uImm);
6098 pbCodeBuf[off++] = RT_BYTE2(uImm);
6099 pbCodeBuf[off++] = RT_BYTE3(uImm);
6100 pbCodeBuf[off++] = RT_BYTE4(uImm);
6101 }
6102 else
6103 {
6104 /* Use temporary register for the immediate. */
6105 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6106 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6107 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6108 }
6109
6110#elif defined(RT_ARCH_ARM64)
6111 /** @todo guess there are clevere things we can do here... */
6112 if (uImm < _4K)
6113 {
6114 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6115 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6116 true /*64Bit*/, true /*fSetFlags*/);
6117 }
6118 else if ((uImm & ~(uint64_t)0xfff000) == 0)
6119 {
6120 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6121 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
6122 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6123 }
6124 else
6125 {
6126 /* Use temporary register for the immediate. */
6127 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6128 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6129 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6130 }
6131
6132#else
6133# error "Port me!"
6134#endif
6135
6136 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6137 return off;
6138}
6139
6140
6141/**
6142 * Emits a compare of a 32-bit GPR with a constant value, settings status
6143 * flags/whatever for use with conditional instruction.
6144 *
6145 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6146 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6147 * bits all zero). Will release assert or throw exception if the caller
6148 * violates this restriction.
6149 */
6150DECL_FORCE_INLINE_THROW(uint32_t)
6151iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6152{
6153#ifdef RT_ARCH_AMD64
6154 if (iGprLeft >= 8)
6155 pCodeBuf[off++] = X86_OP_REX_B;
6156 if (uImm <= UINT32_C(0x7f))
6157 {
6158 /* cmp Ev, Ib */
6159 pCodeBuf[off++] = 0x83;
6160 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6161 pCodeBuf[off++] = (uint8_t)uImm;
6162 }
6163 else
6164 {
6165 /* cmp Ev, imm */
6166 pCodeBuf[off++] = 0x81;
6167 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6168 pCodeBuf[off++] = RT_BYTE1(uImm);
6169 pCodeBuf[off++] = RT_BYTE2(uImm);
6170 pCodeBuf[off++] = RT_BYTE3(uImm);
6171 pCodeBuf[off++] = RT_BYTE4(uImm);
6172 }
6173
6174#elif defined(RT_ARCH_ARM64)
6175 /** @todo guess there are clevere things we can do here... */
6176 if (uImm < _4K)
6177 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6178 false /*64Bit*/, true /*fSetFlags*/);
6179 else if ((uImm & ~(uint32_t)0xfff000) == 0)
6180 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6181 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6182 else
6183# ifdef IEM_WITH_THROW_CATCH
6184 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6185# else
6186 AssertReleaseFailedStmt(off = UINT32_MAX);
6187# endif
6188
6189#else
6190# error "Port me!"
6191#endif
6192 return off;
6193}
6194
6195
6196/**
6197 * Emits a compare of a 32-bit GPR with a constant value, settings status
6198 * flags/whatever for use with conditional instruction.
6199 */
6200DECL_INLINE_THROW(uint32_t)
6201iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6202{
6203#ifdef RT_ARCH_AMD64
6204 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
6205
6206#elif defined(RT_ARCH_ARM64)
6207 /** @todo guess there are clevere things we can do here... */
6208 if (uImm < _4K)
6209 {
6210 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6211 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6212 false /*64Bit*/, true /*fSetFlags*/);
6213 }
6214 else if ((uImm & ~(uint32_t)0xfff000) == 0)
6215 {
6216 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6217 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6218 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6219 }
6220 else
6221 {
6222 /* Use temporary register for the immediate. */
6223 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6224 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
6225 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6226 }
6227
6228#else
6229# error "Port me!"
6230#endif
6231
6232 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6233 return off;
6234}
6235
6236
6237/**
6238 * Emits a compare of a 32-bit GPR with a constant value, settings status
6239 * flags/whatever for use with conditional instruction.
6240 *
6241 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
6242 * 16-bit value from @a iGrpLeft.
6243 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6244 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6245 * bits all zero). Will release assert or throw exception if the caller
6246 * violates this restriction.
6247 */
6248DECL_FORCE_INLINE_THROW(uint32_t)
6249iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6250 uint8_t idxTmpReg = UINT8_MAX)
6251{
6252#ifdef RT_ARCH_AMD64
6253 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6254 if (iGprLeft >= 8)
6255 pCodeBuf[off++] = X86_OP_REX_B;
6256 if (uImm <= UINT32_C(0x7f))
6257 {
6258 /* cmp Ev, Ib */
6259 pCodeBuf[off++] = 0x83;
6260 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6261 pCodeBuf[off++] = (uint8_t)uImm;
6262 }
6263 else
6264 {
6265 /* cmp Ev, imm */
6266 pCodeBuf[off++] = 0x81;
6267 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6268 pCodeBuf[off++] = RT_BYTE1(uImm);
6269 pCodeBuf[off++] = RT_BYTE2(uImm);
6270 }
6271 RT_NOREF(idxTmpReg);
6272
6273#elif defined(RT_ARCH_ARM64)
6274# ifdef IEM_WITH_THROW_CATCH
6275 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6276# else
6277 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
6278# endif
6279 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6280 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
6281 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
6282
6283#else
6284# error "Port me!"
6285#endif
6286 return off;
6287}
6288
6289
6290/**
6291 * Emits a compare of a 16-bit GPR with a constant value, settings status
6292 * flags/whatever for use with conditional instruction.
6293 *
6294 * @note ARM64: Helper register is required (idxTmpReg).
6295 */
6296DECL_INLINE_THROW(uint32_t)
6297iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6298 uint8_t idxTmpReg = UINT8_MAX)
6299{
6300#ifdef RT_ARCH_AMD64
6301 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
6302#elif defined(RT_ARCH_ARM64)
6303 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
6304#else
6305# error "Port me!"
6306#endif
6307 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6308 return off;
6309}
6310
6311
6312
6313/*********************************************************************************************************************************
6314* Branching *
6315*********************************************************************************************************************************/
6316
6317/**
6318 * Emits a JMP rel32 / B imm19 to the given label.
6319 */
6320DECL_FORCE_INLINE_THROW(uint32_t)
6321iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
6322{
6323 Assert(idxLabel < pReNative->cLabels);
6324
6325#ifdef RT_ARCH_AMD64
6326 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6327 {
6328 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
6329 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
6330 {
6331 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
6332 pCodeBuf[off++] = (uint8_t)offRel;
6333 }
6334 else
6335 {
6336 offRel -= 3;
6337 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6338 pCodeBuf[off++] = RT_BYTE1(offRel);
6339 pCodeBuf[off++] = RT_BYTE2(offRel);
6340 pCodeBuf[off++] = RT_BYTE3(offRel);
6341 pCodeBuf[off++] = RT_BYTE4(offRel);
6342 }
6343 }
6344 else
6345 {
6346 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6347 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6348 pCodeBuf[off++] = 0xfe;
6349 pCodeBuf[off++] = 0xff;
6350 pCodeBuf[off++] = 0xff;
6351 pCodeBuf[off++] = 0xff;
6352 }
6353 pCodeBuf[off++] = 0xcc; /* int3 poison */
6354
6355#elif defined(RT_ARCH_ARM64)
6356 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6357 {
6358 pCodeBuf[off] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
6359 off++;
6360 }
6361 else
6362 {
6363 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
6364 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
6365 }
6366
6367#else
6368# error "Port me!"
6369#endif
6370 return off;
6371}
6372
6373
6374/**
6375 * Emits a JMP rel32 / B imm19 to the given label.
6376 */
6377DECL_INLINE_THROW(uint32_t)
6378iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6379{
6380#ifdef RT_ARCH_AMD64
6381 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
6382#elif defined(RT_ARCH_ARM64)
6383 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
6384#else
6385# error "Port me!"
6386#endif
6387 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6388 return off;
6389}
6390
6391
6392/**
6393 * Emits a JMP rel32 / B imm19 to a new undefined label.
6394 */
6395DECL_INLINE_THROW(uint32_t)
6396iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6397{
6398 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6399 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
6400}
6401
6402/** Condition type. */
6403#ifdef RT_ARCH_AMD64
6404typedef enum IEMNATIVEINSTRCOND : uint8_t
6405{
6406 kIemNativeInstrCond_o = 0,
6407 kIemNativeInstrCond_no,
6408 kIemNativeInstrCond_c,
6409 kIemNativeInstrCond_nc,
6410 kIemNativeInstrCond_e,
6411 kIemNativeInstrCond_z = kIemNativeInstrCond_e,
6412 kIemNativeInstrCond_ne,
6413 kIemNativeInstrCond_nz = kIemNativeInstrCond_ne,
6414 kIemNativeInstrCond_be,
6415 kIemNativeInstrCond_nbe,
6416 kIemNativeInstrCond_s,
6417 kIemNativeInstrCond_ns,
6418 kIemNativeInstrCond_p,
6419 kIemNativeInstrCond_np,
6420 kIemNativeInstrCond_l,
6421 kIemNativeInstrCond_nl,
6422 kIemNativeInstrCond_le,
6423 kIemNativeInstrCond_nle
6424} IEMNATIVEINSTRCOND;
6425#elif defined(RT_ARCH_ARM64)
6426typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
6427# define kIemNativeInstrCond_o todo_conditional_codes
6428# define kIemNativeInstrCond_no todo_conditional_codes
6429# define kIemNativeInstrCond_c todo_conditional_codes
6430# define kIemNativeInstrCond_nc todo_conditional_codes
6431# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
6432# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
6433# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
6434# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
6435# define kIemNativeInstrCond_s todo_conditional_codes
6436# define kIemNativeInstrCond_ns todo_conditional_codes
6437# define kIemNativeInstrCond_p todo_conditional_codes
6438# define kIemNativeInstrCond_np todo_conditional_codes
6439# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
6440# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
6441# define kIemNativeInstrCond_le kArmv8InstrCond_Le
6442# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
6443#else
6444# error "Port me!"
6445#endif
6446
6447
6448/**
6449 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6450 */
6451DECL_FORCE_INLINE_THROW(uint32_t)
6452iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6453 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6454{
6455 Assert(idxLabel < pReNative->cLabels);
6456
6457 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
6458#ifdef RT_ARCH_AMD64
6459 if (offLabel >= off)
6460 {
6461 /* jcc rel32 */
6462 pCodeBuf[off++] = 0x0f;
6463 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6464 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6465 pCodeBuf[off++] = 0x00;
6466 pCodeBuf[off++] = 0x00;
6467 pCodeBuf[off++] = 0x00;
6468 pCodeBuf[off++] = 0x00;
6469 }
6470 else
6471 {
6472 int32_t offDisp = offLabel - (off + 2);
6473 if ((int8_t)offDisp == offDisp)
6474 {
6475 /* jcc rel8 */
6476 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6477 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6478 }
6479 else
6480 {
6481 /* jcc rel32 */
6482 offDisp -= 4;
6483 pCodeBuf[off++] = 0x0f;
6484 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6485 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6486 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6487 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6488 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6489 }
6490 }
6491
6492#elif defined(RT_ARCH_ARM64)
6493 if (offLabel >= off)
6494 {
6495 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6496 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
6497 }
6498 else
6499 {
6500 Assert(off - offLabel <= 0x3ffffU);
6501 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
6502 off++;
6503 }
6504
6505#else
6506# error "Port me!"
6507#endif
6508 return off;
6509}
6510
6511
6512/**
6513 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6514 */
6515DECL_INLINE_THROW(uint32_t)
6516iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6517{
6518#ifdef RT_ARCH_AMD64
6519 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
6520#elif defined(RT_ARCH_ARM64)
6521 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
6522#else
6523# error "Port me!"
6524#endif
6525 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6526 return off;
6527}
6528
6529
6530/**
6531 * Emits a Jcc rel32 / B.cc imm19 to a new label.
6532 */
6533DECL_INLINE_THROW(uint32_t)
6534iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6535 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
6536{
6537 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6538 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
6539}
6540
6541
6542/**
6543 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
6544 */
6545DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6546{
6547#ifdef RT_ARCH_AMD64
6548 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
6549#elif defined(RT_ARCH_ARM64)
6550 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
6551#else
6552# error "Port me!"
6553#endif
6554}
6555
6556/**
6557 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
6558 */
6559DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6560 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6561{
6562#ifdef RT_ARCH_AMD64
6563 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
6564#elif defined(RT_ARCH_ARM64)
6565 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
6566#else
6567# error "Port me!"
6568#endif
6569}
6570
6571
6572/**
6573 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
6574 */
6575DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6576{
6577#ifdef RT_ARCH_AMD64
6578 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
6579#elif defined(RT_ARCH_ARM64)
6580 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
6581#else
6582# error "Port me!"
6583#endif
6584}
6585
6586/**
6587 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
6588 */
6589DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6590 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6591{
6592#ifdef RT_ARCH_AMD64
6593 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
6594#elif defined(RT_ARCH_ARM64)
6595 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
6596#else
6597# error "Port me!"
6598#endif
6599}
6600
6601
6602/**
6603 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
6604 */
6605DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6606{
6607#ifdef RT_ARCH_AMD64
6608 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
6609#elif defined(RT_ARCH_ARM64)
6610 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
6611#else
6612# error "Port me!"
6613#endif
6614}
6615
6616/**
6617 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
6618 */
6619DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6620 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6621{
6622#ifdef RT_ARCH_AMD64
6623 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
6624#elif defined(RT_ARCH_ARM64)
6625 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
6626#else
6627# error "Port me!"
6628#endif
6629}
6630
6631
6632/**
6633 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
6634 */
6635DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6636{
6637#ifdef RT_ARCH_AMD64
6638 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
6639#elif defined(RT_ARCH_ARM64)
6640 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
6641#else
6642# error "Port me!"
6643#endif
6644}
6645
6646/**
6647 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
6648 */
6649DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6650 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6651{
6652#ifdef RT_ARCH_AMD64
6653 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
6654#elif defined(RT_ARCH_ARM64)
6655 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
6656#else
6657# error "Port me!"
6658#endif
6659}
6660
6661
6662/**
6663 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
6664 */
6665DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6666{
6667#ifdef RT_ARCH_AMD64
6668 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
6669#elif defined(RT_ARCH_ARM64)
6670 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
6671#else
6672# error "Port me!"
6673#endif
6674}
6675
6676/**
6677 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
6678 */
6679DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6680 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6681{
6682#ifdef RT_ARCH_AMD64
6683 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
6684#elif defined(RT_ARCH_ARM64)
6685 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
6686#else
6687# error "Port me!"
6688#endif
6689}
6690
6691
6692/**
6693 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6694 *
6695 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6696 *
6697 * Only use hardcoded jumps forward when emitting for exactly one
6698 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6699 * the right target address on all platforms!
6700 *
6701 * Please also note that on x86 it is necessary pass off + 256 or higher
6702 * for @a offTarget one believe the intervening code is more than 127
6703 * bytes long.
6704 */
6705DECL_FORCE_INLINE(uint32_t)
6706iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6707{
6708#ifdef RT_ARCH_AMD64
6709 /* jcc rel8 / rel32 */
6710 int32_t offDisp = (int32_t)(offTarget - (off + 2));
6711 if (offDisp < 128 && offDisp >= -128)
6712 {
6713 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6714 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6715 }
6716 else
6717 {
6718 offDisp -= 4;
6719 pCodeBuf[off++] = 0x0f;
6720 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6721 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6722 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6723 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6724 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6725 }
6726
6727#elif defined(RT_ARCH_ARM64)
6728 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
6729 off++;
6730#else
6731# error "Port me!"
6732#endif
6733 return off;
6734}
6735
6736
6737/**
6738 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6739 *
6740 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6741 *
6742 * Only use hardcoded jumps forward when emitting for exactly one
6743 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6744 * the right target address on all platforms!
6745 *
6746 * Please also note that on x86 it is necessary pass off + 256 or higher
6747 * for @a offTarget if one believe the intervening code is more than 127
6748 * bytes long.
6749 */
6750DECL_INLINE_THROW(uint32_t)
6751iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6752{
6753#ifdef RT_ARCH_AMD64
6754 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
6755#elif defined(RT_ARCH_ARM64)
6756 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
6757#else
6758# error "Port me!"
6759#endif
6760 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6761 return off;
6762}
6763
6764
6765/**
6766 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
6767 *
6768 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6769 */
6770DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6771{
6772#ifdef RT_ARCH_AMD64
6773 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
6774#elif defined(RT_ARCH_ARM64)
6775 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
6776#else
6777# error "Port me!"
6778#endif
6779}
6780
6781
6782/**
6783 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
6784 *
6785 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6786 */
6787DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6788{
6789#ifdef RT_ARCH_AMD64
6790 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
6791#elif defined(RT_ARCH_ARM64)
6792 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
6793#else
6794# error "Port me!"
6795#endif
6796}
6797
6798
6799/**
6800 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
6801 *
6802 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6803 */
6804DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6805{
6806#ifdef RT_ARCH_AMD64
6807 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
6808#elif defined(RT_ARCH_ARM64)
6809 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
6810#else
6811# error "Port me!"
6812#endif
6813}
6814
6815
6816/**
6817 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
6818 *
6819 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6820 */
6821DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6822{
6823#ifdef RT_ARCH_AMD64
6824 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
6825#elif defined(RT_ARCH_ARM64)
6826 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
6827#else
6828# error "Port me!"
6829#endif
6830}
6831
6832
6833/**
6834 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6835 *
6836 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6837 */
6838DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
6839{
6840#ifdef RT_ARCH_AMD64
6841 /* jmp rel8 or rel32 */
6842 int32_t offDisp = offTarget - (off + 2);
6843 if (offDisp < 128 && offDisp >= -128)
6844 {
6845 pCodeBuf[off++] = 0xeb;
6846 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6847 }
6848 else
6849 {
6850 offDisp -= 3;
6851 pCodeBuf[off++] = 0xe9;
6852 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6853 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6854 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6855 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6856 }
6857
6858#elif defined(RT_ARCH_ARM64)
6859 pCodeBuf[off] = Armv8A64MkInstrB((int32_t)(offTarget - off));
6860 off++;
6861
6862#else
6863# error "Port me!"
6864#endif
6865 return off;
6866}
6867
6868
6869/**
6870 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6871 *
6872 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6873 */
6874DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6875{
6876#ifdef RT_ARCH_AMD64
6877 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
6878#elif defined(RT_ARCH_ARM64)
6879 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
6880#else
6881# error "Port me!"
6882#endif
6883 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6884 return off;
6885}
6886
6887
6888/**
6889 * Fixes up a conditional jump to a fixed label.
6890 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
6891 * iemNativeEmitJzToFixed, ...
6892 */
6893DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
6894{
6895#ifdef RT_ARCH_AMD64
6896 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
6897 uint8_t const bOpcode = pbCodeBuf[offFixup];
6898 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
6899 {
6900 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
6901 AssertStmt((int8_t)pbCodeBuf[offFixup + 1] == (int32_t)(offTarget - (offFixup + 2)),
6902 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
6903 }
6904 else
6905 {
6906 if (bOpcode != 0x0f)
6907 Assert(bOpcode == 0xe9);
6908 else
6909 {
6910 offFixup += 1;
6911 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
6912 }
6913 uint32_t const offRel32 = offTarget - (offFixup + 5);
6914 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
6915 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
6916 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
6917 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
6918 }
6919
6920#elif defined(RT_ARCH_ARM64)
6921 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
6922 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
6923 {
6924 /* B.COND + BC.COND */
6925 int32_t const offDisp = offTarget - offFixup;
6926 Assert(offDisp >= -262144 && offDisp < 262144);
6927 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
6928 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6929 }
6930 else
6931 {
6932 /* B imm26 */
6933 Assert((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000));
6934 int32_t const offDisp = offTarget - offFixup;
6935 Assert(offDisp >= -33554432 && offDisp < 33554432);
6936 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
6937 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6938 }
6939
6940#else
6941# error "Port me!"
6942#endif
6943}
6944
6945
6946#ifdef RT_ARCH_AMD64
6947/**
6948 * For doing bt on a register.
6949 */
6950DECL_INLINE_THROW(uint32_t)
6951iemNativeEmitAmd64TestBitInGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
6952{
6953 Assert(iBitNo < 64);
6954 /* bt Ev, imm8 */
6955 if (iBitNo >= 32)
6956 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6957 else if (iGprSrc >= 8)
6958 pCodeBuf[off++] = X86_OP_REX_B;
6959 pCodeBuf[off++] = 0x0f;
6960 pCodeBuf[off++] = 0xba;
6961 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6962 pCodeBuf[off++] = iBitNo;
6963 return off;
6964}
6965#endif /* RT_ARCH_AMD64 */
6966
6967
6968/**
6969 * Internal helper, don't call directly.
6970 */
6971DECL_INLINE_THROW(uint32_t)
6972iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
6973 uint32_t offTarget, uint32_t *poffFixup, bool fJmpIfSet)
6974{
6975 Assert(iBitNo < 64);
6976#ifdef RT_ARCH_AMD64
6977 if (iBitNo < 8)
6978 {
6979 /* test Eb, imm8 */
6980 if (iGprSrc >= 4)
6981 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6982 pCodeBuf[off++] = 0xf6;
6983 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6984 pCodeBuf[off++] = (uint8_t)1 << iBitNo;
6985 if (poffFixup)
6986 *poffFixup = off;
6987 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6988 }
6989 else
6990 {
6991 /* bt Ev, imm8 */
6992 if (iBitNo >= 32)
6993 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6994 else if (iGprSrc >= 8)
6995 pCodeBuf[off++] = X86_OP_REX_B;
6996 pCodeBuf[off++] = 0x0f;
6997 pCodeBuf[off++] = 0xba;
6998 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6999 pCodeBuf[off++] = iBitNo;
7000 if (poffFixup)
7001 *poffFixup = off;
7002 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
7003 }
7004
7005#elif defined(RT_ARCH_ARM64)
7006 /* Just use the TBNZ instruction here. */
7007 if (poffFixup)
7008 *poffFixup = off;
7009 pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, off - offTarget, iGprSrc, iBitNo);
7010
7011#else
7012# error "Port me!"
7013#endif
7014 return off;
7015}
7016
7017
7018/**
7019 * Emits a jump to @a idxTarget on the condition that bit @a iBitNo _is_ _set_
7020 * in @a iGprSrc.
7021 */
7022DECL_INLINE_THROW(uint32_t)
7023iemNativeEmitTestBitInGprAndJmpToFixedIfSetEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7024 uint32_t offTarget, uint32_t *poffFixup)
7025{
7026 return iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(pCodeBuf, off, iGprSrc, iBitNo, offTarget, poffFixup, true /*fJmpIfSet*/);
7027}
7028
7029
7030/**
7031 * Emits a jump to @a idxTarget on the condition that bit @a iBitNo _is_ _not_
7032 * _set_ in @a iGprSrc.
7033 */
7034DECL_INLINE_THROW(uint32_t)
7035iemNativeEmitTestBitInGprAndJmpToLabelIfNotSetEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7036 uint32_t offTarget, uint32_t *poffFixup)
7037{
7038 return iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(pCodeBuf, off, iGprSrc, iBitNo, offTarget, poffFixup, false /*fJmpIfSet*/);
7039}
7040
7041
7042
7043/**
7044 * Internal helper, don't call directly.
7045 */
7046DECL_INLINE_THROW(uint32_t)
7047iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7048 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
7049{
7050 Assert(iBitNo < 64);
7051#ifdef RT_ARCH_AMD64
7052 if (iBitNo < 8)
7053 {
7054 /* test Eb, imm8 */
7055 if (iGprSrc >= 4)
7056 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7057 pCodeBuf[off++] = 0xf6;
7058 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7059 pCodeBuf[off++] = (uint8_t)1 << iBitNo;
7060 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7061 fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7062 }
7063 else
7064 {
7065 /* bt Ev, imm8 */
7066 if (iBitNo >= 32)
7067 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7068 else if (iGprSrc >= 8)
7069 pCodeBuf[off++] = X86_OP_REX_B;
7070 pCodeBuf[off++] = 0x0f;
7071 pCodeBuf[off++] = 0xba;
7072 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7073 pCodeBuf[off++] = iBitNo;
7074 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7075 fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
7076 }
7077
7078#elif defined(RT_ARCH_ARM64)
7079 /* Use the TBNZ instruction here. */
7080 if (pReNative->paLabels[idxLabel].enmType > kIemNativeLabelType_LastWholeTbBranch)
7081 {
7082 AssertMsg(pReNative->paLabels[idxLabel].off == UINT32_MAX,
7083 ("TODO: Please enable & test commented out code for jumping back to a predefined label.\n"));
7084 //uint32_t offLabel = pReNative->paLabels[idxLabel].off;
7085 //if (offLabel == UINT32_MAX)
7086 {
7087 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
7088 pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
7089 }
7090 //else
7091 //{
7092 // RT_BREAKPOINT();
7093 // Assert(off - offLabel <= 0x1fffU);
7094 // pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, offLabel - off, iGprSrc, iBitNo);
7095 //
7096 //}
7097 }
7098 else
7099 {
7100 Assert(Armv8A64ConvertImmRImmS2Mask64(0x40, (64U - iBitNo) & 63U) == RT_BIT_64(iBitNo));
7101 pCodeBuf[off++] = Armv8A64MkInstrTstImm(iGprSrc, 0x40, (64U - iBitNo) & 63U);
7102 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7103 pCodeBuf[off++] = Armv8A64MkInstrBCond(fJmpIfSet ? kArmv8InstrCond_Ne : kArmv8InstrCond_Eq, 0);
7104 }
7105
7106#else
7107# error "Port me!"
7108#endif
7109 return off;
7110}
7111
7112
7113/**
7114 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
7115 * @a iGprSrc.
7116 */
7117DECL_INLINE_THROW(uint32_t)
7118iemNativeEmitTestBitInGprAndJmpToLabelIfSetEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7119 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7120{
7121 return iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, pCodeBuf, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
7122}
7123
7124
7125/**
7126 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
7127 * _set_ in @a iGprSrc.
7128 */
7129DECL_INLINE_THROW(uint32_t)
7130iemNativeEmitTestBitInGprAndJmpToLabelIfNotSetEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7131 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7132{
7133 return iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, pCodeBuf, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
7134}
7135
7136
7137/**
7138 * Internal helper, don't call directly.
7139 */
7140DECL_INLINE_THROW(uint32_t)
7141iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7142 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
7143{
7144#ifdef RT_ARCH_AMD64
7145 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 5+6), off,
7146 iGprSrc, iBitNo, idxLabel, fJmpIfSet);
7147#elif defined(RT_ARCH_ARM64)
7148 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 2), off,
7149 iGprSrc, iBitNo, idxLabel, fJmpIfSet);
7150#else
7151# error "Port me!"
7152#endif
7153 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7154 return off;
7155}
7156
7157
7158/**
7159 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
7160 * @a iGprSrc.
7161 */
7162DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7163 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7164{
7165 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
7166}
7167
7168
7169/**
7170 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
7171 * _set_ in @a iGprSrc.
7172 */
7173DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7174 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7175{
7176 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
7177}
7178
7179
7180/**
7181 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
7182 * flags accordingly.
7183 */
7184DECL_INLINE_THROW(uint32_t)
7185iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
7186{
7187 Assert(fBits != 0);
7188#ifdef RT_ARCH_AMD64
7189
7190 if (fBits >= UINT32_MAX)
7191 {
7192 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7193
7194 /* test Ev,Gv */
7195 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7196 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
7197 pbCodeBuf[off++] = 0x85;
7198 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
7199
7200 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7201 }
7202 else if (fBits <= UINT32_MAX)
7203 {
7204 /* test Eb, imm8 or test Ev, imm32 */
7205 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7206 if (fBits <= UINT8_MAX)
7207 {
7208 if (iGprSrc >= 4)
7209 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7210 pbCodeBuf[off++] = 0xf6;
7211 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7212 pbCodeBuf[off++] = (uint8_t)fBits;
7213 }
7214 else
7215 {
7216 if (iGprSrc >= 8)
7217 pbCodeBuf[off++] = X86_OP_REX_B;
7218 pbCodeBuf[off++] = 0xf7;
7219 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7220 pbCodeBuf[off++] = RT_BYTE1(fBits);
7221 pbCodeBuf[off++] = RT_BYTE2(fBits);
7222 pbCodeBuf[off++] = RT_BYTE3(fBits);
7223 pbCodeBuf[off++] = RT_BYTE4(fBits);
7224 }
7225 }
7226 /** @todo implement me. */
7227 else
7228 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
7229
7230#elif defined(RT_ARCH_ARM64)
7231 uint32_t uImmR = 0;
7232 uint32_t uImmNandS = 0;
7233 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
7234 {
7235 /* ands xzr, iGprSrc, #fBits */
7236 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7237 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
7238 }
7239 else
7240 {
7241 /* ands xzr, iGprSrc, iTmpReg */
7242 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7243 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7244 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
7245 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7246 }
7247
7248#else
7249# error "Port me!"
7250#endif
7251 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7252 return off;
7253}
7254
7255
7256/**
7257 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
7258 * @a iGprSrc, setting CPU flags accordingly.
7259 *
7260 * @note For ARM64 this only supports @a fBits values that can be expressed
7261 * using the two 6-bit immediates of the ANDS instruction. The caller
7262 * must make sure this is possible!
7263 */
7264DECL_FORCE_INLINE_THROW(uint32_t)
7265iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits)
7266{
7267 Assert(fBits != 0);
7268
7269#ifdef RT_ARCH_AMD64
7270 if (fBits <= UINT8_MAX)
7271 {
7272 /* test Eb, imm8 */
7273 if (iGprSrc >= 4)
7274 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7275 pCodeBuf[off++] = 0xf6;
7276 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7277 pCodeBuf[off++] = (uint8_t)fBits;
7278 }
7279 else
7280 {
7281 /* test Ev, imm32 */
7282 if (iGprSrc >= 8)
7283 pCodeBuf[off++] = X86_OP_REX_B;
7284 pCodeBuf[off++] = 0xf7;
7285 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7286 pCodeBuf[off++] = RT_BYTE1(fBits);
7287 pCodeBuf[off++] = RT_BYTE2(fBits);
7288 pCodeBuf[off++] = RT_BYTE3(fBits);
7289 pCodeBuf[off++] = RT_BYTE4(fBits);
7290 }
7291
7292#elif defined(RT_ARCH_ARM64)
7293 /* ands xzr, src, #fBits */
7294 uint32_t uImmR = 0;
7295 uint32_t uImmNandS = 0;
7296 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7297 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7298 else
7299# ifdef IEM_WITH_THROW_CATCH
7300 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7301# else
7302 AssertReleaseFailedStmt(off = UINT32_MAX);
7303# endif
7304
7305#else
7306# error "Port me!"
7307#endif
7308 return off;
7309}
7310
7311
7312
7313/**
7314 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7315 * @a iGprSrc, setting CPU flags accordingly.
7316 *
7317 * @note For ARM64 this only supports @a fBits values that can be expressed
7318 * using the two 6-bit immediates of the ANDS instruction. The caller
7319 * must make sure this is possible!
7320 */
7321DECL_FORCE_INLINE_THROW(uint32_t)
7322iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7323{
7324 Assert(fBits != 0);
7325
7326#ifdef RT_ARCH_AMD64
7327 /* test Eb, imm8 */
7328 if (iGprSrc >= 4)
7329 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7330 pCodeBuf[off++] = 0xf6;
7331 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7332 pCodeBuf[off++] = fBits;
7333
7334#elif defined(RT_ARCH_ARM64)
7335 /* ands xzr, src, #fBits */
7336 uint32_t uImmR = 0;
7337 uint32_t uImmNandS = 0;
7338 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7339 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7340 else
7341# ifdef IEM_WITH_THROW_CATCH
7342 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7343# else
7344 AssertReleaseFailedStmt(off = UINT32_MAX);
7345# endif
7346
7347#else
7348# error "Port me!"
7349#endif
7350 return off;
7351}
7352
7353
7354/**
7355 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7356 * @a iGprSrc, setting CPU flags accordingly.
7357 */
7358DECL_INLINE_THROW(uint32_t)
7359iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7360{
7361 Assert(fBits != 0);
7362
7363#ifdef RT_ARCH_AMD64
7364 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
7365
7366#elif defined(RT_ARCH_ARM64)
7367 /* ands xzr, src, [tmp|#imm] */
7368 uint32_t uImmR = 0;
7369 uint32_t uImmNandS = 0;
7370 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7371 {
7372 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7373 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7374 }
7375 else
7376 {
7377 /* Use temporary register for the 64-bit immediate. */
7378 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7379 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7380 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
7381 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7382 }
7383
7384#else
7385# error "Port me!"
7386#endif
7387 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7388 return off;
7389}
7390
7391
7392/**
7393 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
7394 * are set in @a iGprSrc.
7395 */
7396DECL_INLINE_THROW(uint32_t)
7397iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7398 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7399{
7400 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7401
7402 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7403 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7404
7405 return off;
7406}
7407
7408
7409/**
7410 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
7411 * are set in @a iGprSrc.
7412 */
7413DECL_INLINE_THROW(uint32_t)
7414iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7415 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7416{
7417 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7418
7419 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7420 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7421
7422 return off;
7423}
7424
7425
7426/**
7427 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7428 *
7429 * The operand size is given by @a f64Bit.
7430 */
7431DECL_FORCE_INLINE_THROW(uint32_t)
7432iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7433 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7434{
7435 Assert(idxLabel < pReNative->cLabels);
7436
7437#ifdef RT_ARCH_AMD64
7438 /* test reg32,reg32 / test reg64,reg64 */
7439 if (f64Bit)
7440 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7441 else if (iGprSrc >= 8)
7442 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7443 pCodeBuf[off++] = 0x85;
7444 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7445
7446 /* jnz idxLabel */
7447 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7448 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7449
7450#elif defined(RT_ARCH_ARM64)
7451 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
7452 {
7453 pCodeBuf[off] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
7454 iGprSrc, f64Bit);
7455 off++;
7456 }
7457 else
7458 {
7459 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7460 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
7461 }
7462
7463#else
7464# error "Port me!"
7465#endif
7466 return off;
7467}
7468
7469
7470/**
7471 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7472 *
7473 * The operand size is given by @a f64Bit.
7474 */
7475DECL_FORCE_INLINE_THROW(uint32_t)
7476iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7477 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7478{
7479#ifdef RT_ARCH_AMD64
7480 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7481 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7482#elif defined(RT_ARCH_ARM64)
7483 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
7484 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7485#else
7486# error "Port me!"
7487#endif
7488 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7489 return off;
7490}
7491
7492
7493/**
7494 * Emits code that jumps to @a offTarget if @a iGprSrc is not zero.
7495 *
7496 * The operand size is given by @a f64Bit.
7497 */
7498DECL_FORCE_INLINE_THROW(uint32_t)
7499iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7500 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t offTarget)
7501{
7502#ifdef RT_ARCH_AMD64
7503 /* test reg32,reg32 / test reg64,reg64 */
7504 if (f64Bit)
7505 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7506 else if (iGprSrc >= 8)
7507 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7508 pCodeBuf[off++] = 0x85;
7509 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7510
7511 /* jnz idxLabel */
7512 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget,
7513 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7514
7515#elif defined(RT_ARCH_ARM64)
7516 pCodeBuf[off] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(offTarget - off), iGprSrc, f64Bit);
7517 off++;
7518
7519#else
7520# error "Port me!"
7521#endif
7522 return off;
7523}
7524
7525
7526/**
7527 * Emits code that jumps to @a offTarget if @a iGprSrc is not zero.
7528 *
7529 * The operand size is given by @a f64Bit.
7530 */
7531DECL_FORCE_INLINE_THROW(uint32_t)
7532iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7533 bool f64Bit, bool fJmpIfNotZero, uint32_t offTarget)
7534{
7535#ifdef RT_ARCH_AMD64
7536 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7537 off, iGprSrc, f64Bit, fJmpIfNotZero, offTarget);
7538#elif defined(RT_ARCH_ARM64)
7539 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1),
7540 off, iGprSrc, f64Bit, fJmpIfNotZero, offTarget);
7541#else
7542# error "Port me!"
7543#endif
7544 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7545 return off;
7546}
7547
7548
7549/* if (Grp1 == 0) Jmp idxLabel; */
7550
7551/**
7552 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7553 *
7554 * The operand size is given by @a f64Bit.
7555 */
7556DECL_FORCE_INLINE_THROW(uint32_t)
7557iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7558 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7559{
7560 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7561 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7562}
7563
7564
7565/**
7566 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7567 *
7568 * The operand size is given by @a f64Bit.
7569 */
7570DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7571 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7572{
7573 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7574}
7575
7576
7577/**
7578 * Emits code that jumps to a new label if @a iGprSrc is zero.
7579 *
7580 * The operand size is given by @a f64Bit.
7581 */
7582DECL_INLINE_THROW(uint32_t)
7583iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7584 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7585{
7586 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7587 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7588}
7589
7590
7591/**
7592 * Emits code that jumps to @a offTarget if @a iGprSrc is zero.
7593 *
7594 * The operand size is given by @a f64Bit.
7595 */
7596DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7597 uint8_t iGprSrc, bool f64Bit, uint32_t offTarget)
7598{
7599 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixed(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, offTarget);
7600}
7601
7602
7603/* if (Grp1 != 0) Jmp idxLabel; */
7604
7605/**
7606 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7607 *
7608 * The operand size is given by @a f64Bit.
7609 */
7610DECL_FORCE_INLINE_THROW(uint32_t)
7611iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7612 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7613{
7614 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7615 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7616}
7617
7618
7619/**
7620 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7621 *
7622 * The operand size is given by @a f64Bit.
7623 */
7624DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7625 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7626{
7627 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7628}
7629
7630
7631/**
7632 * Emits code that jumps to a new label if @a iGprSrc is not zero.
7633 *
7634 * The operand size is given by @a f64Bit.
7635 */
7636DECL_INLINE_THROW(uint32_t)
7637iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7638 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7639{
7640 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7641 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7642}
7643
7644
7645/* if (Grp1 != Gpr2) Jmp idxLabel; */
7646
7647/**
7648 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
7649 * differs.
7650 */
7651DECL_INLINE_THROW(uint32_t)
7652iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7653 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
7654{
7655 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
7656 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7657 return off;
7658}
7659
7660
7661/**
7662 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
7663 */
7664DECL_INLINE_THROW(uint32_t)
7665iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7666 uint8_t iGprLeft, uint8_t iGprRight,
7667 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7668{
7669 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7670 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
7671}
7672
7673
7674/* if (Grp != Imm) Jmp idxLabel; */
7675
7676/**
7677 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
7678 */
7679DECL_INLINE_THROW(uint32_t)
7680iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7681 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7682{
7683 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7684 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7685 return off;
7686}
7687
7688
7689/**
7690 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
7691 */
7692DECL_INLINE_THROW(uint32_t)
7693iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7694 uint8_t iGprSrc, uint64_t uImm,
7695 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7696{
7697 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7698 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7699}
7700
7701
7702/**
7703 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
7704 * @a uImm.
7705 */
7706DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7707 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7708{
7709 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7710 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7711 return off;
7712}
7713
7714
7715/**
7716 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
7717 * @a uImm.
7718 */
7719DECL_INLINE_THROW(uint32_t)
7720iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7721 uint8_t iGprSrc, uint32_t uImm,
7722 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7723{
7724 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7725 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7726}
7727
7728
7729/**
7730 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
7731 * @a uImm.
7732 */
7733DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7734 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
7735{
7736 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
7737 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7738 return off;
7739}
7740
7741
7742/**
7743 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
7744 * @a uImm.
7745 */
7746DECL_INLINE_THROW(uint32_t)
7747iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7748 uint8_t iGprSrc, uint16_t uImm,
7749 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7750{
7751 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7752 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7753}
7754
7755
7756/* if (Grp == Imm) Jmp idxLabel; */
7757
7758/**
7759 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
7760 */
7761DECL_INLINE_THROW(uint32_t)
7762iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7763 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7764{
7765 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7766 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7767 return off;
7768}
7769
7770
7771/**
7772 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
7773 */
7774DECL_INLINE_THROW(uint32_t)
7775iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
7776 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7777{
7778 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7779 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7780}
7781
7782
7783/**
7784 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
7785 */
7786DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7787 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7788{
7789 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7790 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7791 return off;
7792}
7793
7794
7795/**
7796 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
7797 */
7798DECL_INLINE_THROW(uint32_t)
7799iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
7800 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7801{
7802 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7803 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7804}
7805
7806
7807/**
7808 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
7809 *
7810 * @note ARM64: Helper register is required (idxTmpReg).
7811 */
7812DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7813 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
7814 uint8_t idxTmpReg = UINT8_MAX)
7815{
7816 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
7817 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7818 return off;
7819}
7820
7821
7822/**
7823 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
7824 *
7825 * @note ARM64: Helper register is required (idxTmpReg).
7826 */
7827DECL_INLINE_THROW(uint32_t)
7828iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
7829 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
7830 uint8_t idxTmpReg = UINT8_MAX)
7831{
7832 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7833 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
7834}
7835
7836
7837
7838/*********************************************************************************************************************************
7839* Indirect Jumps. *
7840*********************************************************************************************************************************/
7841
7842/**
7843 * Emits an indirect jump a 64-bit address in a GPR.
7844 */
7845DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpViaGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc)
7846{
7847#ifdef RT_ARCH_AMD64
7848 uint8_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
7849 if (iGprSrc >= 8)
7850 pCodeBuf[off++] = X86_OP_REX_B;
7851 pCodeBuf[off++] = 0xff;
7852 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7853
7854#elif defined(RT_ARCH_ARM64)
7855 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7856 pCodeBuf[off++] = Armv8A64MkInstrBr(iGprSrc);
7857
7858#else
7859# error "port me"
7860#endif
7861 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7862 return off;
7863}
7864
7865
7866/**
7867 * Emits an indirect jump to an immediate 64-bit address (uses the temporary GPR).
7868 */
7869DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
7870{
7871 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
7872 return iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP0);
7873}
7874
7875
7876/*********************************************************************************************************************************
7877* Calls. *
7878*********************************************************************************************************************************/
7879
7880/**
7881 * Emits a call to a 64-bit address.
7882 */
7883DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
7884{
7885#ifdef RT_ARCH_AMD64
7886 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
7887
7888 /* call rax */
7889 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7890 pbCodeBuf[off++] = 0xff;
7891 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
7892
7893#elif defined(RT_ARCH_ARM64)
7894 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
7895
7896 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7897 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
7898
7899#else
7900# error "port me"
7901#endif
7902 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7903 return off;
7904}
7905
7906
7907/**
7908 * Emits code to load a stack variable into an argument GPR.
7909 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7910 */
7911DECL_FORCE_INLINE_THROW(uint32_t)
7912iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7913 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
7914 bool fSpilledVarsInVolatileRegs = false)
7915{
7916 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7917 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7918 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7919
7920 uint8_t const idxRegVar = pVar->idxReg;
7921 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
7922 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
7923 || !fSpilledVarsInVolatileRegs ))
7924 {
7925 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
7926 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
7927 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
7928 if (!offAddend)
7929 {
7930 if (idxRegArg != idxRegVar)
7931 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
7932 }
7933 else
7934 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
7935 }
7936 else
7937 {
7938 uint8_t const idxStackSlot = pVar->idxStackSlot;
7939 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7940 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
7941 if (offAddend)
7942 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
7943 }
7944 return off;
7945}
7946
7947
7948/**
7949 * Emits code to load a stack or immediate variable value into an argument GPR,
7950 * optional with a addend.
7951 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7952 */
7953DECL_FORCE_INLINE_THROW(uint32_t)
7954iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7955 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
7956 bool fSpilledVarsInVolatileRegs = false)
7957{
7958 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7959 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7960 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7961 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
7962 else
7963 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
7964 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
7965 return off;
7966}
7967
7968
7969/**
7970 * Emits code to load the variable address into an argument GPR.
7971 *
7972 * This only works for uninitialized and stack variables.
7973 */
7974DECL_FORCE_INLINE_THROW(uint32_t)
7975iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7976 bool fFlushShadows)
7977{
7978 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7979 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7980 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7981 || pVar->enmKind == kIemNativeVarKind_Stack,
7982 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7983 AssertStmt(!pVar->fSimdReg,
7984 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7985
7986 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7987 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7988
7989 uint8_t const idxRegVar = pVar->idxReg;
7990 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
7991 {
7992 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
7993 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
7994 Assert(pVar->idxReg == UINT8_MAX);
7995 }
7996 Assert( pVar->idxStackSlot != UINT8_MAX
7997 && pVar->idxReg == UINT8_MAX);
7998
7999 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
8000}
8001
8002
8003/*********************************************************************************************************************************
8004* TB exiting helpers. *
8005*********************************************************************************************************************************/
8006
8007/**
8008 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
8009 */
8010DECL_FORCE_INLINE_THROW(uint32_t)
8011iemNativeEmitJccTbExitEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
8012 IEMNATIVELABELTYPE enmExitReason, IEMNATIVEINSTRCOND enmCond)
8013{
8014 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
8015#if defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) && defined(RT_ARCH_AMD64)
8016 /* jcc rel32 */
8017 pCodeBuf[off++] = 0x0f;
8018 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
8019 iemNativeAddTbExitFixup(pReNative, off, enmExitReason);
8020 pCodeBuf[off++] = 0x00;
8021 pCodeBuf[off++] = 0x00;
8022 pCodeBuf[off++] = 0x00;
8023 pCodeBuf[off++] = 0x00;
8024
8025#else
8026 /* ARM64 doesn't have the necessary jump range, so we jump via local label
8027 just like when we keep everything local. */
8028 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8029 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel, enmCond);
8030#endif
8031 return off;
8032}
8033
8034
8035/**
8036 * Emits a Jcc rel32 / B.cc imm19 to the epilog.
8037 */
8038DECL_INLINE_THROW(uint32_t)
8039iemNativeEmitJccTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmExitReason, IEMNATIVEINSTRCOND enmCond)
8040{
8041 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
8042#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
8043# ifdef RT_ARCH_AMD64
8044 off = iemNativeEmitJccTbExitEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, enmExitReason, enmCond);
8045# elif defined(RT_ARCH_ARM64)
8046 off = iemNativeEmitJccTbExitEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 2), off, enmExitReason, enmCond);
8047# else
8048# error "Port me!"
8049# endif
8050 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8051 return off;
8052#else
8053 return iemNativeEmitJccToNewLabel(pReNative, off, enmExitReason, 0 /*uData*/, enmCond);
8054#endif
8055}
8056
8057
8058/**
8059 * Emits a JNZ/JNE rel32 / B.NE imm19 to the TB exit routine with the given reason.
8060 */
8061DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmExitReason)
8062{
8063#ifdef RT_ARCH_AMD64
8064 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_ne);
8065#elif defined(RT_ARCH_ARM64)
8066 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kArmv8InstrCond_Ne);
8067#else
8068# error "Port me!"
8069#endif
8070}
8071
8072
8073/**
8074 * Emits a JZ/JE rel32 / B.EQ imm19 to the TB exit routine with the given reason.
8075 */
8076DECL_INLINE_THROW(uint32_t) iemNativeEmitJzTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmExitReason)
8077{
8078#ifdef RT_ARCH_AMD64
8079 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_e);
8080#elif defined(RT_ARCH_ARM64)
8081 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kArmv8InstrCond_Eq);
8082#else
8083# error "Port me!"
8084#endif
8085}
8086
8087
8088/**
8089 * Emits a JA/JNBE rel32 / B.HI imm19 to the TB exit.
8090 */
8091DECL_INLINE_THROW(uint32_t) iemNativeEmitJaTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmExitReason)
8092{
8093#ifdef RT_ARCH_AMD64
8094 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_nbe);
8095#elif defined(RT_ARCH_ARM64)
8096 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kArmv8InstrCond_Hi);
8097#else
8098# error "Port me!"
8099#endif
8100}
8101
8102
8103/**
8104 * Emits a JL/JNGE rel32 / B.LT imm19 to the TB exit with the given reason.
8105 */
8106DECL_INLINE_THROW(uint32_t) iemNativeEmitJlTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmExitReason)
8107{
8108#ifdef RT_ARCH_AMD64
8109 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_l);
8110#elif defined(RT_ARCH_ARM64)
8111 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kArmv8InstrCond_Lt);
8112#else
8113# error "Port me!"
8114#endif
8115}
8116
8117
8118DECL_INLINE_THROW(uint32_t)
8119iemNativeEmitTbExitEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, IEMNATIVELABELTYPE enmExitReason)
8120{
8121 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
8122#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
8123# ifdef RT_ARCH_AMD64
8124 /* jmp rel32 */
8125 pCodeBuf[off++] = 0xe9;
8126 iemNativeAddTbExitFixup(pReNative, off, enmExitReason);
8127 pCodeBuf[off++] = 0xfe;
8128 pCodeBuf[off++] = 0xff;
8129 pCodeBuf[off++] = 0xff;
8130 pCodeBuf[off++] = 0xff;
8131
8132# elif defined(RT_ARCH_ARM64)
8133 iemNativeAddTbExitFixup(pReNative, off, enmExitReason);
8134 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
8135
8136# else
8137# error "Port me!"
8138# endif
8139 return off;
8140
8141#else
8142 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8143 return iemNativeEmitJmpToLabelEx(pReNative, pCodeBuf, off, idxLabel);
8144#endif
8145}
8146
8147
8148DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmExitReason)
8149{
8150 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
8151#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
8152# ifdef RT_ARCH_AMD64
8153 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
8154
8155 /* jmp rel32 */
8156 pCodeBuf[off++] = 0xe9;
8157 iemNativeAddTbExitFixup(pReNative, off, enmExitReason);
8158 pCodeBuf[off++] = 0xfe;
8159 pCodeBuf[off++] = 0xff;
8160 pCodeBuf[off++] = 0xff;
8161 pCodeBuf[off++] = 0xff;
8162
8163# elif defined(RT_ARCH_ARM64)
8164 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8165 iemNativeAddTbExitFixup(pReNative, off, enmExitReason);
8166 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
8167
8168# else
8169# error "Port me!"
8170# endif
8171 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8172 return off;
8173
8174#else
8175 return iemNativeEmitJmpToNewLabel(pReNative, off, enmExitReason);
8176#endif
8177}
8178
8179
8180/**
8181 * Emits a jump to the TB exit with @a enmExitReason on the condition _any_ of the bits in @a fBits
8182 * are set in @a iGprSrc.
8183 */
8184DECL_INLINE_THROW(uint32_t)
8185iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8186 uint8_t iGprSrc, uint64_t fBits, IEMNATIVELABELTYPE enmExitReason)
8187{
8188 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
8189
8190 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
8191 return iemNativeEmitJnzTbExit(pReNative, off, enmExitReason);
8192}
8193
8194
8195/**
8196 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
8197 * are set in @a iGprSrc.
8198 */
8199DECL_INLINE_THROW(uint32_t)
8200iemNativeEmitTestAnyBitsInGprAndTbExitIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8201 uint8_t iGprSrc, uint64_t fBits, IEMNATIVELABELTYPE enmExitReason)
8202{
8203 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
8204
8205 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
8206 return iemNativeEmitJzTbExit(pReNative, off, enmExitReason);
8207}
8208
8209
8210/**
8211 * Emits code that exits the TB with the given reason if @a iGprLeft and @a iGprRight
8212 * differs.
8213 */
8214DECL_INLINE_THROW(uint32_t)
8215iemNativeEmitTestIfGprNotEqualGprAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8216 uint8_t iGprLeft, uint8_t iGprRight, IEMNATIVELABELTYPE enmExitReason)
8217{
8218 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
8219 off = iemNativeEmitJnzTbExit(pReNative, off, enmExitReason);
8220 return off;
8221}
8222
8223
8224/**
8225 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
8226 * @a uImm.
8227 */
8228DECL_INLINE_THROW(uint32_t)
8229iemNativeEmitTestIfGpr32NotEqualImmAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8230 uint8_t iGprSrc, uint32_t uImm, IEMNATIVELABELTYPE enmExitReason)
8231{
8232 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8233 off = iemNativeEmitJnzTbExit(pReNative, off, enmExitReason);
8234 return off;
8235}
8236
8237
8238/**
8239 * Emits code that exits the current TB if @a iGprSrc differs from @a uImm.
8240 */
8241DECL_INLINE_THROW(uint32_t)
8242iemNativeEmitTestIfGprNotEqualImmAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8243 uint8_t iGprSrc, uint64_t uImm, IEMNATIVELABELTYPE enmExitReason)
8244{
8245 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
8246 off = iemNativeEmitJnzTbExit(pReNative, off, enmExitReason);
8247 return off;
8248}
8249
8250
8251/**
8252 * Emits code that exits the current TB with the given reason if 32-bit @a iGprSrc equals @a uImm.
8253 */
8254DECL_INLINE_THROW(uint32_t)
8255iemNativeEmitTestIfGpr32EqualsImmAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8256 uint8_t iGprSrc, uint32_t uImm, IEMNATIVELABELTYPE enmExitReason)
8257{
8258 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8259 off = iemNativeEmitJzTbExit(pReNative, off, enmExitReason);
8260 return off;
8261}
8262
8263
8264/**
8265 * Emits code to exit the current TB with the reason @a enmExitReason on the condition that bit @a iBitNo _is_ _set_ in
8266 * @a iGprSrc.
8267 *
8268 * @note On ARM64 the range is only +/-8191 instructions.
8269 */
8270DECL_INLINE_THROW(uint32_t)
8271iemNativeEmitTestBitInGprAndTbExitIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8272 uint8_t iGprSrc, uint8_t iBitNo, IEMNATIVELABELTYPE enmExitReason)
8273{
8274 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
8275#if defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) && defined(RT_ARCH_AMD64)
8276 Assert(iBitNo < 64);
8277 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8278 if (iBitNo < 8)
8279 {
8280 /* test Eb, imm8 */
8281 if (iGprSrc >= 4)
8282 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
8283 pbCodeBuf[off++] = 0xf6;
8284 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
8285 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
8286 off = iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_ne);
8287 }
8288 else
8289 {
8290 /* bt Ev, imm8 */
8291 if (iBitNo >= 32)
8292 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8293 else if (iGprSrc >= 8)
8294 pbCodeBuf[off++] = X86_OP_REX_B;
8295 pbCodeBuf[off++] = 0x0f;
8296 pbCodeBuf[off++] = 0xba;
8297 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
8298 pbCodeBuf[off++] = iBitNo;
8299 off = iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_c);
8300 }
8301 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8302 return off;
8303
8304#else
8305 /* ARM64 doesn't have the necessary jump range, so we jump via local label
8306 just like when we keep everything local. */
8307 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8308 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
8309#endif
8310}
8311
8312
8313/**
8314 * Emits code that exits the current TB with @a enmExitReason if @a iGprSrc is not zero.
8315 *
8316 * The operand size is given by @a f64Bit.
8317 */
8318DECL_FORCE_INLINE_THROW(uint32_t)
8319iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
8320 uint8_t iGprSrc, bool f64Bit, IEMNATIVELABELTYPE enmExitReason)
8321{
8322 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
8323#if defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) && defined(RT_ARCH_AMD64)
8324 /* test reg32,reg32 / test reg64,reg64 */
8325 if (f64Bit)
8326 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
8327 else if (iGprSrc >= 8)
8328 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8329 pCodeBuf[off++] = 0x85;
8330 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
8331
8332 /* jnz idxLabel */
8333 return iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, enmExitReason, kIemNativeInstrCond_ne);
8334
8335#else
8336 /* ARM64 doesn't have the necessary jump range, so we jump via local label
8337 just like when we keep everything local. */
8338 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8339 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
8340 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
8341#endif
8342}
8343
8344
8345/**
8346 * Emits code to exit the current TB with the given reason @a enmExitReason if @a iGprSrc is not zero.
8347 *
8348 * The operand size is given by @a f64Bit.
8349 */
8350DECL_INLINE_THROW(uint32_t)
8351iemNativeEmitTestIfGprIsNotZeroAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8352 uint8_t iGprSrc, bool f64Bit, IEMNATIVELABELTYPE enmExitReason)
8353{
8354#if defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) && defined(RT_ARCH_AMD64)
8355 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
8356 off, iGprSrc, f64Bit, enmExitReason);
8357 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8358 return off;
8359#else
8360 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8361 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
8362#endif
8363}
8364
8365
8366#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8367/*********************************************************************************************************************************
8368* SIMD helpers. *
8369*********************************************************************************************************************************/
8370
8371
8372/**
8373 * Emits code to load the variable address into an argument GPR.
8374 *
8375 * This is a special variant intended for SIMD variables only and only called
8376 * by the TLB miss path in the memory fetch/store code because there we pass
8377 * the value by reference and need both the register and stack depending on which
8378 * path is taken (TLB hit vs. miss).
8379 */
8380DECL_FORCE_INLINE_THROW(uint32_t)
8381iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8382 bool fSyncRegWithStack = true)
8383{
8384 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8385 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8386 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
8387 || pVar->enmKind == kIemNativeVarKind_Stack,
8388 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8389 AssertStmt(pVar->fSimdReg,
8390 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8391 Assert( pVar->idxStackSlot != UINT8_MAX
8392 && pVar->idxReg != UINT8_MAX);
8393
8394 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8395 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
8396
8397 uint8_t const idxRegVar = pVar->idxReg;
8398 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8399 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
8400
8401 if (fSyncRegWithStack)
8402 {
8403 if (pVar->cbVar == sizeof(RTUINT128U))
8404 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDisp, idxRegVar);
8405 else
8406 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDisp, idxRegVar);
8407 }
8408
8409 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
8410}
8411
8412
8413/**
8414 * Emits code to sync the host SIMD register assigned to the given SIMD variable.
8415 *
8416 * This is a special helper and only called
8417 * by the TLB miss path in the memory fetch/store code because there we pass
8418 * the value by reference and need to sync the value on the stack with the assigned host register
8419 * after a TLB miss where the value ends up on the stack.
8420 */
8421DECL_FORCE_INLINE_THROW(uint32_t)
8422iemNativeEmitSimdVarSyncStackToRegister(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar)
8423{
8424 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8425 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8426 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
8427 || pVar->enmKind == kIemNativeVarKind_Stack,
8428 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8429 AssertStmt(pVar->fSimdReg,
8430 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8431 Assert( pVar->idxStackSlot != UINT8_MAX
8432 && pVar->idxReg != UINT8_MAX);
8433
8434 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8435 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
8436
8437 uint8_t const idxRegVar = pVar->idxReg;
8438 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8439 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
8440
8441 if (pVar->cbVar == sizeof(RTUINT128U))
8442 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxRegVar, offBpDisp);
8443 else
8444 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxRegVar, offBpDisp);
8445
8446 return off;
8447}
8448
8449
8450/**
8451 * Emits a gprdst = ~gprsrc store.
8452 */
8453DECL_FORCE_INLINE_THROW(uint32_t)
8454iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
8455{
8456#ifdef RT_ARCH_AMD64
8457 if (iGprDst != iGprSrc)
8458 {
8459 /* mov gprdst, gprsrc. */
8460 if (f64Bit)
8461 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
8462 else
8463 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
8464 }
8465
8466 /* not gprdst */
8467 if (f64Bit || iGprDst >= 8)
8468 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
8469 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
8470 pCodeBuf[off++] = 0xf7;
8471 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
8472#elif defined(RT_ARCH_ARM64)
8473 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
8474#else
8475# error "port me"
8476#endif
8477 return off;
8478}
8479
8480
8481/**
8482 * Emits a gprdst = ~gprsrc store.
8483 */
8484DECL_INLINE_THROW(uint32_t)
8485iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
8486{
8487#ifdef RT_ARCH_AMD64
8488 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
8489#elif defined(RT_ARCH_ARM64)
8490 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
8491#else
8492# error "port me"
8493#endif
8494 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8495 return off;
8496}
8497
8498
8499/**
8500 * Emits a 128-bit vector register store to a VCpu value.
8501 */
8502DECL_FORCE_INLINE_THROW(uint32_t)
8503iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8504{
8505#ifdef RT_ARCH_AMD64
8506 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
8507 pCodeBuf[off++] = 0x66;
8508 if (iVecReg >= 8)
8509 pCodeBuf[off++] = X86_OP_REX_R;
8510 pCodeBuf[off++] = 0x0f;
8511 pCodeBuf[off++] = 0x7f;
8512 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8513#elif defined(RT_ARCH_ARM64)
8514 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
8515
8516#else
8517# error "port me"
8518#endif
8519 return off;
8520}
8521
8522
8523/**
8524 * Emits a 128-bit vector register load of a VCpu value.
8525 */
8526DECL_INLINE_THROW(uint32_t)
8527iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8528{
8529#ifdef RT_ARCH_AMD64
8530 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
8531#elif defined(RT_ARCH_ARM64)
8532 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
8533#else
8534# error "port me"
8535#endif
8536 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8537 return off;
8538}
8539
8540
8541/**
8542 * Emits a high 128-bit vector register store to a VCpu value.
8543 */
8544DECL_FORCE_INLINE_THROW(uint32_t)
8545iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8546{
8547#ifdef RT_ARCH_AMD64
8548 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
8549 pCodeBuf[off++] = X86_OP_VEX3;
8550 if (iVecReg >= 8)
8551 pCodeBuf[off++] = 0x63;
8552 else
8553 pCodeBuf[off++] = 0xe3;
8554 pCodeBuf[off++] = 0x7d;
8555 pCodeBuf[off++] = 0x39;
8556 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8557 pCodeBuf[off++] = 0x01; /* Immediate */
8558#elif defined(RT_ARCH_ARM64)
8559 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
8560#else
8561# error "port me"
8562#endif
8563 return off;
8564}
8565
8566
8567/**
8568 * Emits a high 128-bit vector register load of a VCpu value.
8569 */
8570DECL_INLINE_THROW(uint32_t)
8571iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8572{
8573#ifdef RT_ARCH_AMD64
8574 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
8575#elif defined(RT_ARCH_ARM64)
8576 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8577 Assert(!(iVecReg & 0x1));
8578 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
8579#else
8580# error "port me"
8581#endif
8582 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8583 return off;
8584}
8585
8586
8587/**
8588 * Emits a 128-bit vector register load of a VCpu value.
8589 */
8590DECL_FORCE_INLINE_THROW(uint32_t)
8591iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8592{
8593#ifdef RT_ARCH_AMD64
8594 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
8595 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8596 if (iVecReg >= 8)
8597 pCodeBuf[off++] = X86_OP_REX_R;
8598 pCodeBuf[off++] = 0x0f;
8599 pCodeBuf[off++] = 0x6f;
8600 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8601#elif defined(RT_ARCH_ARM64)
8602 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
8603
8604#else
8605# error "port me"
8606#endif
8607 return off;
8608}
8609
8610
8611/**
8612 * Emits a 128-bit vector register load of a VCpu value.
8613 */
8614DECL_INLINE_THROW(uint32_t)
8615iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8616{
8617#ifdef RT_ARCH_AMD64
8618 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
8619#elif defined(RT_ARCH_ARM64)
8620 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
8621#else
8622# error "port me"
8623#endif
8624 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8625 return off;
8626}
8627
8628
8629/**
8630 * Emits a 128-bit vector register load of a VCpu value.
8631 */
8632DECL_FORCE_INLINE_THROW(uint32_t)
8633iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8634{
8635#ifdef RT_ARCH_AMD64
8636 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
8637 pCodeBuf[off++] = X86_OP_VEX3;
8638 if (iVecReg >= 8)
8639 pCodeBuf[off++] = 0x63;
8640 else
8641 pCodeBuf[off++] = 0xe3;
8642 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8643 pCodeBuf[off++] = 0x38;
8644 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8645 pCodeBuf[off++] = 0x01; /* Immediate */
8646#elif defined(RT_ARCH_ARM64)
8647 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
8648#else
8649# error "port me"
8650#endif
8651 return off;
8652}
8653
8654
8655/**
8656 * Emits a 128-bit vector register load of a VCpu value.
8657 */
8658DECL_INLINE_THROW(uint32_t)
8659iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8660{
8661#ifdef RT_ARCH_AMD64
8662 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
8663#elif defined(RT_ARCH_ARM64)
8664 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8665 Assert(!(iVecReg & 0x1));
8666 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
8667#else
8668# error "port me"
8669#endif
8670 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8671 return off;
8672}
8673
8674
8675/**
8676 * Emits a vecdst = vecsrc load.
8677 */
8678DECL_FORCE_INLINE(uint32_t)
8679iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8680{
8681#ifdef RT_ARCH_AMD64
8682 /* movdqu vecdst, vecsrc */
8683 pCodeBuf[off++] = 0xf3;
8684
8685 if ((iVecRegDst | iVecRegSrc) >= 8)
8686 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
8687 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
8688 : X86_OP_REX_R;
8689 pCodeBuf[off++] = 0x0f;
8690 pCodeBuf[off++] = 0x6f;
8691 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8692
8693#elif defined(RT_ARCH_ARM64)
8694 /* mov dst, src; alias for: orr dst, src, src */
8695 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
8696
8697#else
8698# error "port me"
8699#endif
8700 return off;
8701}
8702
8703
8704/**
8705 * Emits a vecdst = vecsrc load, 128-bit.
8706 */
8707DECL_INLINE_THROW(uint32_t)
8708iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8709{
8710#ifdef RT_ARCH_AMD64
8711 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
8712#elif defined(RT_ARCH_ARM64)
8713 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
8714#else
8715# error "port me"
8716#endif
8717 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8718 return off;
8719}
8720
8721
8722/**
8723 * Emits a vecdst[128:255] = vecsrc[128:255] load.
8724 */
8725DECL_FORCE_INLINE_THROW(uint32_t)
8726iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8727{
8728#ifdef RT_ARCH_AMD64
8729 /* vperm2i128 dst, dst, src, 0x30. */ /* ASSUMES AVX2 support */
8730 pCodeBuf[off++] = X86_OP_VEX3;
8731 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
8732 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8733 pCodeBuf[off++] = 0x46;
8734 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8735 pCodeBuf[off++] = 0x30; /* Immediate, this will leave the low 128 bits of dst untouched and move the high 128 bits from src to dst. */
8736
8737#elif defined(RT_ARCH_ARM64)
8738 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
8739
8740 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128(). */
8741# ifdef IEM_WITH_THROW_CATCH
8742 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
8743# else
8744 AssertReleaseFailedStmt(off = UINT32_MAX);
8745# endif
8746#else
8747# error "port me"
8748#endif
8749 return off;
8750}
8751
8752
8753/**
8754 * Emits a vecdst[128:255] = vecsrc[128:255] load, high 128-bit.
8755 */
8756DECL_INLINE_THROW(uint32_t)
8757iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8758{
8759#ifdef RT_ARCH_AMD64
8760 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
8761#elif defined(RT_ARCH_ARM64)
8762 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8763 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iVecRegSrc + 1);
8764#else
8765# error "port me"
8766#endif
8767 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8768 return off;
8769}
8770
8771
8772/**
8773 * Emits a vecdst[0:127] = vecsrc[128:255] load.
8774 */
8775DECL_FORCE_INLINE_THROW(uint32_t)
8776iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8777{
8778#ifdef RT_ARCH_AMD64
8779 /* vextracti128 dst, src, 1. */ /* ASSUMES AVX2 support */
8780 pCodeBuf[off++] = X86_OP_VEX3;
8781 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegDst >= 8, false, iVecRegSrc >= 8);
8782 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8783 pCodeBuf[off++] = 0x39;
8784 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7);
8785 pCodeBuf[off++] = 0x1;
8786
8787#elif defined(RT_ARCH_ARM64)
8788 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
8789
8790 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(). */
8791# ifdef IEM_WITH_THROW_CATCH
8792 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
8793# else
8794 AssertReleaseFailedStmt(off = UINT32_MAX);
8795# endif
8796#else
8797# error "port me"
8798#endif
8799 return off;
8800}
8801
8802
8803/**
8804 * Emits a vecdst[0:127] = vecsrc[128:255] load, high 128-bit.
8805 */
8806DECL_INLINE_THROW(uint32_t)
8807iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8808{
8809#ifdef RT_ARCH_AMD64
8810 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
8811#elif defined(RT_ARCH_ARM64)
8812 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8813 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc + 1);
8814#else
8815# error "port me"
8816#endif
8817 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8818 return off;
8819}
8820
8821
8822/**
8823 * Emits a vecdst = vecsrc load, 256-bit.
8824 */
8825DECL_INLINE_THROW(uint32_t)
8826iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8827{
8828#ifdef RT_ARCH_AMD64
8829 /* vmovdqa ymm, ymm */
8830 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8831 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
8832 {
8833 pbCodeBuf[off++] = X86_OP_VEX3;
8834 pbCodeBuf[off++] = 0x41;
8835 pbCodeBuf[off++] = 0x7d;
8836 pbCodeBuf[off++] = 0x6f;
8837 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8838 }
8839 else
8840 {
8841 pbCodeBuf[off++] = X86_OP_VEX2;
8842 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
8843 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
8844 pbCodeBuf[off++] = iVecRegSrc >= 8
8845 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
8846 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8847 }
8848#elif defined(RT_ARCH_ARM64)
8849 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8850 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
8851 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
8852 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
8853#else
8854# error "port me"
8855#endif
8856 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8857 return off;
8858}
8859
8860
8861/**
8862 * Emits a vecdst = vecsrc load.
8863 */
8864DECL_FORCE_INLINE(uint32_t)
8865iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8866{
8867#ifdef RT_ARCH_AMD64
8868 /* vinserti128 dst, dst, src, 1. */ /* ASSUMES AVX2 support */
8869 pCodeBuf[off++] = X86_OP_VEX3;
8870 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
8871 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8872 pCodeBuf[off++] = 0x38;
8873 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8874 pCodeBuf[off++] = 0x01; /* Immediate */
8875
8876#elif defined(RT_ARCH_ARM64)
8877 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8878 /* mov dst, src; alias for: orr dst, src, src */
8879 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
8880
8881#else
8882# error "port me"
8883#endif
8884 return off;
8885}
8886
8887
8888/**
8889 * Emits a vecdst[128:255] = vecsrc[0:127] load, 128-bit.
8890 */
8891DECL_INLINE_THROW(uint32_t)
8892iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8893{
8894#ifdef RT_ARCH_AMD64
8895 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
8896#elif defined(RT_ARCH_ARM64)
8897 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
8898#else
8899# error "port me"
8900#endif
8901 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8902 return off;
8903}
8904
8905
8906/**
8907 * Emits a gprdst = vecsrc[x] load, 64-bit.
8908 */
8909DECL_FORCE_INLINE(uint32_t)
8910iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
8911{
8912#ifdef RT_ARCH_AMD64
8913 if (iQWord >= 2)
8914 {
8915 /*
8916 * vpextrq doesn't work on the upper 128-bits.
8917 * So we use the following sequence:
8918 * vextracti128 vectmp0, vecsrc, 1
8919 * pextrq gpr, vectmp0, #(iQWord - 2)
8920 */
8921 /* vextracti128 */
8922 pCodeBuf[off++] = X86_OP_VEX3;
8923 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
8924 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8925 pCodeBuf[off++] = 0x39;
8926 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8927 pCodeBuf[off++] = 0x1;
8928
8929 /* pextrq */
8930 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8931 pCodeBuf[off++] = X86_OP_REX_W
8932 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
8933 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8934 pCodeBuf[off++] = 0x0f;
8935 pCodeBuf[off++] = 0x3a;
8936 pCodeBuf[off++] = 0x16;
8937 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
8938 pCodeBuf[off++] = iQWord - 2;
8939 }
8940 else
8941 {
8942 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
8943 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8944 pCodeBuf[off++] = X86_OP_REX_W
8945 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8946 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8947 pCodeBuf[off++] = 0x0f;
8948 pCodeBuf[off++] = 0x3a;
8949 pCodeBuf[off++] = 0x16;
8950 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8951 pCodeBuf[off++] = iQWord;
8952 }
8953#elif defined(RT_ARCH_ARM64)
8954 /* umov gprdst, vecsrc[iQWord] */
8955 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
8956#else
8957# error "port me"
8958#endif
8959 return off;
8960}
8961
8962
8963/**
8964 * Emits a gprdst = vecsrc[x] load, 64-bit.
8965 */
8966DECL_INLINE_THROW(uint32_t)
8967iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
8968{
8969 Assert(iQWord <= 3);
8970
8971#ifdef RT_ARCH_AMD64
8972 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iVecRegSrc, iQWord);
8973#elif defined(RT_ARCH_ARM64)
8974 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8975 Assert(!(iVecRegSrc & 0x1));
8976 /* Need to access the "high" 128-bit vector register. */
8977 if (iQWord >= 2)
8978 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
8979 else
8980 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
8981#else
8982# error "port me"
8983#endif
8984 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8985 return off;
8986}
8987
8988
8989/**
8990 * Emits a gprdst = vecsrc[x] load, 32-bit.
8991 */
8992DECL_FORCE_INLINE(uint32_t)
8993iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
8994{
8995#ifdef RT_ARCH_AMD64
8996 if (iDWord >= 4)
8997 {
8998 /*
8999 * vpextrd doesn't work on the upper 128-bits.
9000 * So we use the following sequence:
9001 * vextracti128 vectmp0, vecsrc, 1
9002 * pextrd gpr, vectmp0, #(iDWord - 4)
9003 */
9004 /* vextracti128 */
9005 pCodeBuf[off++] = X86_OP_VEX3;
9006 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
9007 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9008 pCodeBuf[off++] = 0x39;
9009 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9010 pCodeBuf[off++] = 0x1;
9011
9012 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
9013 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9014 if (iGprDst >= 8 || IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8)
9015 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9016 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9017 pCodeBuf[off++] = 0x0f;
9018 pCodeBuf[off++] = 0x3a;
9019 pCodeBuf[off++] = 0x16;
9020 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
9021 pCodeBuf[off++] = iDWord - 4;
9022 }
9023 else
9024 {
9025 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
9026 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9027 if (iGprDst >= 8 || iVecRegSrc >= 8)
9028 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
9029 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9030 pCodeBuf[off++] = 0x0f;
9031 pCodeBuf[off++] = 0x3a;
9032 pCodeBuf[off++] = 0x16;
9033 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
9034 pCodeBuf[off++] = iDWord;
9035 }
9036#elif defined(RT_ARCH_ARM64)
9037 Assert(iDWord < 4);
9038
9039 /* umov gprdst, vecsrc[iDWord] */
9040 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
9041#else
9042# error "port me"
9043#endif
9044 return off;
9045}
9046
9047
9048/**
9049 * Emits a gprdst = vecsrc[x] load, 32-bit.
9050 */
9051DECL_INLINE_THROW(uint32_t)
9052iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
9053{
9054 Assert(iDWord <= 7);
9055
9056#ifdef RT_ARCH_AMD64
9057 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 15), off, iGprDst, iVecRegSrc, iDWord);
9058#elif defined(RT_ARCH_ARM64)
9059 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9060 Assert(!(iVecRegSrc & 0x1));
9061 /* Need to access the "high" 128-bit vector register. */
9062 if (iDWord >= 4)
9063 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
9064 else
9065 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
9066#else
9067# error "port me"
9068#endif
9069 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9070 return off;
9071}
9072
9073
9074/**
9075 * Emits a gprdst = vecsrc[x] load, 16-bit.
9076 */
9077DECL_FORCE_INLINE(uint32_t)
9078iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
9079{
9080#ifdef RT_ARCH_AMD64
9081 if (iWord >= 8)
9082 {
9083 /** @todo Currently not used. */
9084 AssertReleaseFailed();
9085 }
9086 else
9087 {
9088 /* pextrw gpr, vecsrc, #iWord */
9089 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9090 if (iGprDst >= 8 || iVecRegSrc >= 8)
9091 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
9092 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
9093 pCodeBuf[off++] = 0x0f;
9094 pCodeBuf[off++] = 0xc5;
9095 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
9096 pCodeBuf[off++] = iWord;
9097 }
9098#elif defined(RT_ARCH_ARM64)
9099 /* umov gprdst, vecsrc[iWord] */
9100 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
9101#else
9102# error "port me"
9103#endif
9104 return off;
9105}
9106
9107
9108/**
9109 * Emits a gprdst = vecsrc[x] load, 16-bit.
9110 */
9111DECL_INLINE_THROW(uint32_t)
9112iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
9113{
9114 Assert(iWord <= 16);
9115
9116#ifdef RT_ARCH_AMD64
9117 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
9118#elif defined(RT_ARCH_ARM64)
9119 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9120 Assert(!(iVecRegSrc & 0x1));
9121 /* Need to access the "high" 128-bit vector register. */
9122 if (iWord >= 8)
9123 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
9124 else
9125 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
9126#else
9127# error "port me"
9128#endif
9129 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9130 return off;
9131}
9132
9133
9134/**
9135 * Emits a gprdst = vecsrc[x] load, 8-bit.
9136 */
9137DECL_FORCE_INLINE(uint32_t)
9138iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
9139{
9140#ifdef RT_ARCH_AMD64
9141 if (iByte >= 16)
9142 {
9143 /** @todo Currently not used. */
9144 AssertReleaseFailed();
9145 }
9146 else
9147 {
9148 /* pextrb gpr, vecsrc, #iByte */
9149 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9150 if (iGprDst >= 8 || iVecRegSrc >= 8)
9151 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
9152 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9153 pCodeBuf[off++] = 0x0f;
9154 pCodeBuf[off++] = 0x3a;
9155 pCodeBuf[off++] = 0x14;
9156 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
9157 pCodeBuf[off++] = iByte;
9158 }
9159#elif defined(RT_ARCH_ARM64)
9160 /* umov gprdst, vecsrc[iByte] */
9161 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
9162#else
9163# error "port me"
9164#endif
9165 return off;
9166}
9167
9168
9169/**
9170 * Emits a gprdst = vecsrc[x] load, 8-bit.
9171 */
9172DECL_INLINE_THROW(uint32_t)
9173iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
9174{
9175 Assert(iByte <= 32);
9176
9177#ifdef RT_ARCH_AMD64
9178 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iByte);
9179#elif defined(RT_ARCH_ARM64)
9180 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9181 Assert(!(iVecRegSrc & 0x1));
9182 /* Need to access the "high" 128-bit vector register. */
9183 if (iByte >= 16)
9184 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
9185 else
9186 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
9187#else
9188# error "port me"
9189#endif
9190 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9191 return off;
9192}
9193
9194
9195/**
9196 * Emits a vecdst[x] = gprsrc store, 64-bit.
9197 */
9198DECL_FORCE_INLINE(uint32_t)
9199iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
9200{
9201#ifdef RT_ARCH_AMD64
9202 if (iQWord >= 2)
9203 {
9204 /*
9205 * vpinsrq doesn't work on the upper 128-bits.
9206 * So we use the following sequence:
9207 * vextracti128 vectmp0, vecdst, 1
9208 * pinsrq vectmp0, gpr, #(iQWord - 2)
9209 * vinserti128 vecdst, vectmp0, 1
9210 */
9211 /* vextracti128 */
9212 pCodeBuf[off++] = X86_OP_VEX3;
9213 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9214 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9215 pCodeBuf[off++] = 0x39;
9216 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9217 pCodeBuf[off++] = 0x1;
9218
9219 /* pinsrq */
9220 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9221 pCodeBuf[off++] = X86_OP_REX_W
9222 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9223 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9224 pCodeBuf[off++] = 0x0f;
9225 pCodeBuf[off++] = 0x3a;
9226 pCodeBuf[off++] = 0x22;
9227 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
9228 pCodeBuf[off++] = iQWord - 2;
9229
9230 /* vinserti128 */
9231 pCodeBuf[off++] = X86_OP_VEX3;
9232 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9233 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9234 pCodeBuf[off++] = 0x38;
9235 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9236 pCodeBuf[off++] = 0x01; /* Immediate */
9237 }
9238 else
9239 {
9240 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
9241 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9242 pCodeBuf[off++] = X86_OP_REX_W
9243 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9244 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9245 pCodeBuf[off++] = 0x0f;
9246 pCodeBuf[off++] = 0x3a;
9247 pCodeBuf[off++] = 0x22;
9248 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9249 pCodeBuf[off++] = iQWord;
9250 }
9251#elif defined(RT_ARCH_ARM64)
9252 /* ins vecsrc[iQWord], gpr */
9253 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
9254#else
9255# error "port me"
9256#endif
9257 return off;
9258}
9259
9260
9261/**
9262 * Emits a vecdst[x] = gprsrc store, 64-bit.
9263 */
9264DECL_INLINE_THROW(uint32_t)
9265iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
9266{
9267 Assert(iQWord <= 3);
9268
9269#ifdef RT_ARCH_AMD64
9270 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iQWord);
9271#elif defined(RT_ARCH_ARM64)
9272 Assert(!(iVecRegDst & 0x1));
9273 if (iQWord >= 2)
9274 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iQWord - 2);
9275 else
9276 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
9277#else
9278# error "port me"
9279#endif
9280 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9281 return off;
9282}
9283
9284
9285/**
9286 * Emits a vecdst[x] = gprsrc store, 32-bit.
9287 */
9288DECL_FORCE_INLINE(uint32_t)
9289iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
9290{
9291#ifdef RT_ARCH_AMD64
9292 if (iDWord >= 4)
9293 {
9294 /*
9295 * vpinsrq doesn't work on the upper 128-bits.
9296 * So we use the following sequence:
9297 * vextracti128 vectmp0, vecdst, 1
9298 * pinsrd vectmp0, gpr, #(iDword - 4)
9299 * vinserti128 vecdst, vectmp0, 1
9300 */
9301 /* vextracti128 */
9302 pCodeBuf[off++] = X86_OP_VEX3;
9303 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9304 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9305 pCodeBuf[off++] = 0x39;
9306 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9307 pCodeBuf[off++] = 0x1;
9308
9309 /* pinsrd */
9310 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9311 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 || iGprSrc >= 8)
9312 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9313 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9314 pCodeBuf[off++] = 0x0f;
9315 pCodeBuf[off++] = 0x3a;
9316 pCodeBuf[off++] = 0x22;
9317 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
9318 pCodeBuf[off++] = iDWord - 4;
9319
9320 /* vinserti128 */
9321 pCodeBuf[off++] = X86_OP_VEX3;
9322 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9323 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9324 pCodeBuf[off++] = 0x38;
9325 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9326 pCodeBuf[off++] = 0x01; /* Immediate */
9327 }
9328 else
9329 {
9330 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
9331 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9332 if (iVecRegDst >= 8 || iGprSrc >= 8)
9333 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9334 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9335 pCodeBuf[off++] = 0x0f;
9336 pCodeBuf[off++] = 0x3a;
9337 pCodeBuf[off++] = 0x22;
9338 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9339 pCodeBuf[off++] = iDWord;
9340 }
9341#elif defined(RT_ARCH_ARM64)
9342 /* ins vecsrc[iDWord], gpr */
9343 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
9344#else
9345# error "port me"
9346#endif
9347 return off;
9348}
9349
9350
9351/**
9352 * Emits a vecdst[x] = gprsrc store, 64-bit.
9353 */
9354DECL_INLINE_THROW(uint32_t)
9355iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
9356{
9357 Assert(iDWord <= 7);
9358
9359#ifdef RT_ARCH_AMD64
9360 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iDWord);
9361#elif defined(RT_ARCH_ARM64)
9362 Assert(!(iVecRegDst & 0x1));
9363 if (iDWord >= 4)
9364 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iDWord - 4);
9365 else
9366 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
9367#else
9368# error "port me"
9369#endif
9370 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9371 return off;
9372}
9373
9374
9375/**
9376 * Emits a vecdst[x] = gprsrc store, 16-bit.
9377 */
9378DECL_FORCE_INLINE(uint32_t)
9379iemNativeEmitSimdStoreGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
9380{
9381#ifdef RT_ARCH_AMD64
9382 /* pinsrw vecsrc, gpr, #iWord. */
9383 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9384 if (iVecRegDst >= 8 || iGprSrc >= 8)
9385 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9386 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9387 pCodeBuf[off++] = 0x0f;
9388 pCodeBuf[off++] = 0xc4;
9389 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9390 pCodeBuf[off++] = iWord;
9391#elif defined(RT_ARCH_ARM64)
9392 /* ins vecsrc[iWord], gpr */
9393 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iWord, kArmv8InstrUmovInsSz_U16);
9394#else
9395# error "port me"
9396#endif
9397 return off;
9398}
9399
9400
9401/**
9402 * Emits a vecdst[x] = gprsrc store, 16-bit.
9403 */
9404DECL_INLINE_THROW(uint32_t)
9405iemNativeEmitSimdStoreGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
9406{
9407 Assert(iWord <= 15);
9408
9409#ifdef RT_ARCH_AMD64
9410 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iGprSrc, iWord);
9411#elif defined(RT_ARCH_ARM64)
9412 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iWord);
9413#else
9414# error "port me"
9415#endif
9416 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9417 return off;
9418}
9419
9420
9421/**
9422 * Emits a vecdst[x] = gprsrc store, 8-bit.
9423 */
9424DECL_FORCE_INLINE(uint32_t)
9425iemNativeEmitSimdStoreGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
9426{
9427#ifdef RT_ARCH_AMD64
9428 /* pinsrb vecsrc, gpr, #iByte (ASSUMES SSE4.1). */
9429 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9430 if (iVecRegDst >= 8 || iGprSrc >= 8)
9431 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9432 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9433 pCodeBuf[off++] = 0x0f;
9434 pCodeBuf[off++] = 0x3a;
9435 pCodeBuf[off++] = 0x20;
9436 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9437 pCodeBuf[off++] = iByte;
9438#elif defined(RT_ARCH_ARM64)
9439 /* ins vecsrc[iByte], gpr */
9440 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iByte, kArmv8InstrUmovInsSz_U8);
9441#else
9442# error "port me"
9443#endif
9444 return off;
9445}
9446
9447
9448/**
9449 * Emits a vecdst[x] = gprsrc store, 8-bit.
9450 */
9451DECL_INLINE_THROW(uint32_t)
9452iemNativeEmitSimdStoreGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
9453{
9454 Assert(iByte <= 15);
9455
9456#ifdef RT_ARCH_AMD64
9457 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iByte);
9458#elif defined(RT_ARCH_ARM64)
9459 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iByte);
9460#else
9461# error "port me"
9462#endif
9463 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9464 return off;
9465}
9466
9467
9468/**
9469 * Emits a vecdst.au32[iDWord] = 0 store.
9470 */
9471DECL_FORCE_INLINE(uint32_t)
9472iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
9473{
9474 Assert(iDWord <= 7);
9475
9476#ifdef RT_ARCH_AMD64
9477 /*
9478 * xor tmp0, tmp0
9479 * pinsrd xmm, tmp0, iDword
9480 */
9481 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
9482 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
9483 pCodeBuf[off++] = 0x33;
9484 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
9485 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(pCodeBuf, off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
9486#elif defined(RT_ARCH_ARM64)
9487 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9488 Assert(!(iVecReg & 0x1));
9489 /* ins vecsrc[iDWord], wzr */
9490 if (iDWord >= 4)
9491 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
9492 else
9493 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
9494#else
9495# error "port me"
9496#endif
9497 return off;
9498}
9499
9500
9501/**
9502 * Emits a vecdst.au32[iDWord] = 0 store.
9503 */
9504DECL_INLINE_THROW(uint32_t)
9505iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
9506{
9507
9508#ifdef RT_ARCH_AMD64
9509 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
9510#elif defined(RT_ARCH_ARM64)
9511 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
9512#else
9513# error "port me"
9514#endif
9515 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9516 return off;
9517}
9518
9519
9520/**
9521 * Emits a vecdst[0:127] = 0 store.
9522 */
9523DECL_FORCE_INLINE(uint32_t)
9524iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
9525{
9526#ifdef RT_ARCH_AMD64
9527 /* pxor xmm, xmm */
9528 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9529 if (iVecReg >= 8)
9530 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
9531 pCodeBuf[off++] = 0x0f;
9532 pCodeBuf[off++] = 0xef;
9533 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
9534#elif defined(RT_ARCH_ARM64)
9535 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9536 Assert(!(iVecReg & 0x1));
9537 /* eor vecreg, vecreg, vecreg */
9538 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
9539#else
9540# error "port me"
9541#endif
9542 return off;
9543}
9544
9545
9546/**
9547 * Emits a vecdst[0:127] = 0 store.
9548 */
9549DECL_INLINE_THROW(uint32_t)
9550iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
9551{
9552#ifdef RT_ARCH_AMD64
9553 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
9554#elif defined(RT_ARCH_ARM64)
9555 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
9556#else
9557# error "port me"
9558#endif
9559 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9560 return off;
9561}
9562
9563
9564/**
9565 * Emits a vecdst[128:255] = 0 store.
9566 */
9567DECL_FORCE_INLINE(uint32_t)
9568iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
9569{
9570#ifdef RT_ARCH_AMD64
9571 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
9572 if (iVecReg < 8)
9573 {
9574 pCodeBuf[off++] = X86_OP_VEX2;
9575 pCodeBuf[off++] = 0xf9;
9576 }
9577 else
9578 {
9579 pCodeBuf[off++] = X86_OP_VEX3;
9580 pCodeBuf[off++] = 0x41;
9581 pCodeBuf[off++] = 0x79;
9582 }
9583 pCodeBuf[off++] = 0x6f;
9584 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
9585#elif defined(RT_ARCH_ARM64)
9586 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9587 Assert(!(iVecReg & 0x1));
9588 /* eor vecreg, vecreg, vecreg */
9589 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
9590#else
9591# error "port me"
9592#endif
9593 return off;
9594}
9595
9596
9597/**
9598 * Emits a vecdst[128:255] = 0 store.
9599 */
9600DECL_INLINE_THROW(uint32_t)
9601iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
9602{
9603#ifdef RT_ARCH_AMD64
9604 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
9605#elif defined(RT_ARCH_ARM64)
9606 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
9607#else
9608# error "port me"
9609#endif
9610 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9611 return off;
9612}
9613
9614
9615/**
9616 * Emits a vecdst[0:255] = 0 store.
9617 */
9618DECL_FORCE_INLINE(uint32_t)
9619iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
9620{
9621#ifdef RT_ARCH_AMD64
9622 /* vpxor ymm, ymm, ymm */
9623 if (iVecReg < 8)
9624 {
9625 pCodeBuf[off++] = X86_OP_VEX2;
9626 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
9627 }
9628 else
9629 {
9630 pCodeBuf[off++] = X86_OP_VEX3;
9631 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
9632 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
9633 }
9634 pCodeBuf[off++] = 0xef;
9635 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
9636#elif defined(RT_ARCH_ARM64)
9637 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9638 Assert(!(iVecReg & 0x1));
9639 /* eor vecreg, vecreg, vecreg */
9640 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
9641 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
9642#else
9643# error "port me"
9644#endif
9645 return off;
9646}
9647
9648
9649/**
9650 * Emits a vecdst[0:255] = 0 store.
9651 */
9652DECL_INLINE_THROW(uint32_t)
9653iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
9654{
9655#ifdef RT_ARCH_AMD64
9656 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
9657#elif defined(RT_ARCH_ARM64)
9658 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
9659#else
9660# error "port me"
9661#endif
9662 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9663 return off;
9664}
9665
9666
9667/**
9668 * Emits a vecdst = gprsrc broadcast, 8-bit.
9669 */
9670DECL_FORCE_INLINE(uint32_t)
9671iemNativeEmitSimdBroadcastGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9672{
9673#ifdef RT_ARCH_AMD64
9674 /* pinsrb vecdst, gpr, #0 (ASSUMES SSE 4.1) */
9675 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9676 if (iVecRegDst >= 8 || iGprSrc >= 8)
9677 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9678 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9679 pCodeBuf[off++] = 0x0f;
9680 pCodeBuf[off++] = 0x3a;
9681 pCodeBuf[off++] = 0x20;
9682 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9683 pCodeBuf[off++] = 0x00;
9684
9685 /* vpbroadcastb {y,x}mm, xmm (ASSUMES AVX2). */
9686 pCodeBuf[off++] = X86_OP_VEX3;
9687 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9688 | 0x02 /* opcode map. */
9689 | ( iVecRegDst >= 8
9690 ? 0
9691 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9692 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9693 pCodeBuf[off++] = 0x78;
9694 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9695#elif defined(RT_ARCH_ARM64)
9696 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9697 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9698
9699 /* dup vecsrc, gpr */
9700 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U8);
9701 if (f256Bit)
9702 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U8);
9703#else
9704# error "port me"
9705#endif
9706 return off;
9707}
9708
9709
9710/**
9711 * Emits a vecdst[x] = gprsrc broadcast, 8-bit.
9712 */
9713DECL_INLINE_THROW(uint32_t)
9714iemNativeEmitSimdBroadcastGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9715{
9716#ifdef RT_ARCH_AMD64
9717 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
9718#elif defined(RT_ARCH_ARM64)
9719 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9720#else
9721# error "port me"
9722#endif
9723 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9724 return off;
9725}
9726
9727
9728/**
9729 * Emits a vecdst = gprsrc broadcast, 16-bit.
9730 */
9731DECL_FORCE_INLINE(uint32_t)
9732iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9733{
9734#ifdef RT_ARCH_AMD64
9735 /* pinsrw vecdst, gpr, #0 */
9736 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9737 if (iVecRegDst >= 8 || iGprSrc >= 8)
9738 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9739 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9740 pCodeBuf[off++] = 0x0f;
9741 pCodeBuf[off++] = 0xc4;
9742 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9743 pCodeBuf[off++] = 0x00;
9744
9745 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
9746 pCodeBuf[off++] = X86_OP_VEX3;
9747 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9748 | 0x02 /* opcode map. */
9749 | ( iVecRegDst >= 8
9750 ? 0
9751 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9752 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9753 pCodeBuf[off++] = 0x79;
9754 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9755#elif defined(RT_ARCH_ARM64)
9756 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9757 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9758
9759 /* dup vecsrc, gpr */
9760 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
9761 if (f256Bit)
9762 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
9763#else
9764# error "port me"
9765#endif
9766 return off;
9767}
9768
9769
9770/**
9771 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
9772 */
9773DECL_INLINE_THROW(uint32_t)
9774iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9775{
9776#ifdef RT_ARCH_AMD64
9777 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
9778#elif defined(RT_ARCH_ARM64)
9779 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9780#else
9781# error "port me"
9782#endif
9783 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9784 return off;
9785}
9786
9787
9788/**
9789 * Emits a vecdst = gprsrc broadcast, 32-bit.
9790 */
9791DECL_FORCE_INLINE(uint32_t)
9792iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9793{
9794#ifdef RT_ARCH_AMD64
9795 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
9796 * vbroadcast needs a memory operand or another xmm register to work... */
9797
9798 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
9799 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9800 if (iVecRegDst >= 8 || iGprSrc >= 8)
9801 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9802 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9803 pCodeBuf[off++] = 0x0f;
9804 pCodeBuf[off++] = 0x3a;
9805 pCodeBuf[off++] = 0x22;
9806 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9807 pCodeBuf[off++] = 0x00;
9808
9809 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
9810 pCodeBuf[off++] = X86_OP_VEX3;
9811 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9812 | 0x02 /* opcode map. */
9813 | ( iVecRegDst >= 8
9814 ? 0
9815 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9816 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9817 pCodeBuf[off++] = 0x58;
9818 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9819#elif defined(RT_ARCH_ARM64)
9820 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9821 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9822
9823 /* dup vecsrc, gpr */
9824 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
9825 if (f256Bit)
9826 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
9827#else
9828# error "port me"
9829#endif
9830 return off;
9831}
9832
9833
9834/**
9835 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
9836 */
9837DECL_INLINE_THROW(uint32_t)
9838iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9839{
9840#ifdef RT_ARCH_AMD64
9841 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
9842#elif defined(RT_ARCH_ARM64)
9843 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9844#else
9845# error "port me"
9846#endif
9847 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9848 return off;
9849}
9850
9851
9852/**
9853 * Emits a vecdst = gprsrc broadcast, 64-bit.
9854 */
9855DECL_FORCE_INLINE(uint32_t)
9856iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9857{
9858#ifdef RT_ARCH_AMD64
9859 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
9860 * vbroadcast needs a memory operand or another xmm register to work... */
9861
9862 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
9863 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9864 pCodeBuf[off++] = X86_OP_REX_W
9865 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9866 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9867 pCodeBuf[off++] = 0x0f;
9868 pCodeBuf[off++] = 0x3a;
9869 pCodeBuf[off++] = 0x22;
9870 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9871 pCodeBuf[off++] = 0x00;
9872
9873 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
9874 pCodeBuf[off++] = X86_OP_VEX3;
9875 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9876 | 0x02 /* opcode map. */
9877 | ( iVecRegDst >= 8
9878 ? 0
9879 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9880 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9881 pCodeBuf[off++] = 0x59;
9882 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9883#elif defined(RT_ARCH_ARM64)
9884 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9885 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9886
9887 /* dup vecsrc, gpr */
9888 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
9889 if (f256Bit)
9890 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
9891#else
9892# error "port me"
9893#endif
9894 return off;
9895}
9896
9897
9898/**
9899 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
9900 */
9901DECL_INLINE_THROW(uint32_t)
9902iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9903{
9904#ifdef RT_ARCH_AMD64
9905 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
9906#elif defined(RT_ARCH_ARM64)
9907 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9908#else
9909# error "port me"
9910#endif
9911 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9912 return off;
9913}
9914
9915
9916/**
9917 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
9918 */
9919DECL_FORCE_INLINE(uint32_t)
9920iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9921{
9922#ifdef RT_ARCH_AMD64
9923 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(pCodeBuf, off, iVecRegDst, iVecRegSrc);
9924
9925 /* vinserti128 ymm, ymm, xmm, 1. */ /* ASSUMES AVX2 support */
9926 pCodeBuf[off++] = X86_OP_VEX3;
9927 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
9928 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9929 pCodeBuf[off++] = 0x38;
9930 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9931 pCodeBuf[off++] = 0x01; /* Immediate */
9932#elif defined(RT_ARCH_ARM64)
9933 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9934 Assert(!(iVecRegDst & 0x1));
9935
9936 /* mov dst, src; alias for: orr dst, src, src */
9937 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
9938 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
9939#else
9940# error "port me"
9941#endif
9942 return off;
9943}
9944
9945
9946/**
9947 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
9948 */
9949DECL_INLINE_THROW(uint32_t)
9950iemNativeEmitSimdBroadcastVecRegU128ToVecReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9951{
9952#ifdef RT_ARCH_AMD64
9953 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 11), off, iVecRegDst, iVecRegSrc);
9954#elif defined(RT_ARCH_ARM64)
9955 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecRegDst, iVecRegSrc);
9956#else
9957# error "port me"
9958#endif
9959 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9960 return off;
9961}
9962
9963#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
9964
9965/** @} */
9966
9967#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
9968
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette