VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 106286

Last change on this file since 106286 was 106202, checked in by vboxsync, 2 months ago

VMM/IEM: A couple of debug build fixes for arm. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 371.8 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 106202 2024-10-01 23:52:57Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 if (uInfo == 0)
71 pu32CodeBuf[off++] = ARMV8_A64_INSTR_NOP;
72 else
73 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(ARMV8_A64_REG_XZR, (uint16_t)uInfo);
74
75 RT_NOREF(uInfo);
76#else
77# error "port me"
78#endif
79 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
80 return off;
81}
82
83
84/**
85 * Emit a breakpoint instruction.
86 */
87DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
88{
89#ifdef RT_ARCH_AMD64
90 pCodeBuf[off++] = 0xcc;
91 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
92
93#elif defined(RT_ARCH_ARM64)
94 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
95
96#else
97# error "error"
98#endif
99 return off;
100}
101
102
103/**
104 * Emit a breakpoint instruction.
105 */
106DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
107{
108#ifdef RT_ARCH_AMD64
109 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
110#elif defined(RT_ARCH_ARM64)
111 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
112#else
113# error "error"
114#endif
115 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
116 return off;
117}
118
119
120/*********************************************************************************************************************************
121* Loads, Stores and Related Stuff. *
122*********************************************************************************************************************************/
123
124#ifdef RT_ARCH_AMD64
125/**
126 * Common bit of iemNativeEmitLoadGprByGpr and friends.
127 */
128DECL_FORCE_INLINE(uint32_t)
129iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
130{
131 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
132 {
133 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
134 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
135 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
136 }
137 else if (offDisp == (int8_t)offDisp)
138 {
139 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
140 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
141 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
142 pbCodeBuf[off++] = (uint8_t)offDisp;
143 }
144 else
145 {
146 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
147 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
148 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
149 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
150 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
151 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
152 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
153 }
154 return off;
155}
156#endif /* RT_ARCH_AMD64 */
157
158/**
159 * Emits setting a GPR to zero.
160 */
161DECL_INLINE_THROW(uint32_t)
162iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
163{
164#ifdef RT_ARCH_AMD64
165 /* xor gpr32, gpr32 */
166 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
167 if (iGpr >= 8)
168 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
169 pbCodeBuf[off++] = 0x33;
170 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
171
172#elif defined(RT_ARCH_ARM64)
173 /* mov gpr, #0x0 */
174 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
175 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
176
177#else
178# error "port me"
179#endif
180 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
181 return off;
182}
183
184
185/**
186 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
187 * buffer space.
188 *
189 * Max buffer consumption:
190 * - AMD64: 6 instruction bytes.
191 * - ARM64: 2 instruction words (8 bytes).
192 *
193 * @note The top 32 bits will be cleared.
194 */
195DECL_FORCE_INLINE(uint32_t)
196iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
197{
198#ifdef RT_ARCH_AMD64
199 if (uImm32 == 0)
200 {
201 /* xor gpr, gpr */
202 if (iGpr >= 8)
203 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
204 pCodeBuf[off++] = 0x33;
205 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
206 }
207 else
208 {
209 /* mov gpr, imm32 */
210 if (iGpr >= 8)
211 pCodeBuf[off++] = X86_OP_REX_B;
212 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
213 pCodeBuf[off++] = RT_BYTE1(uImm32);
214 pCodeBuf[off++] = RT_BYTE2(uImm32);
215 pCodeBuf[off++] = RT_BYTE3(uImm32);
216 pCodeBuf[off++] = RT_BYTE4(uImm32);
217 }
218
219#elif defined(RT_ARCH_ARM64)
220 if ((uImm32 >> 16) == 0)
221 /* movz gpr, imm16 */
222 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
223 else if ((uImm32 & UINT32_C(0xffff)) == 0)
224 /* movz gpr, imm16, lsl #16 */
225 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
226 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
227 /* movn gpr, imm16, lsl #16 */
228 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
229 else if ((uImm32 >> 16) == UINT32_C(0xffff))
230 /* movn gpr, imm16 */
231 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
232 else
233 {
234 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
235 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
236 }
237
238#else
239# error "port me"
240#endif
241 return off;
242}
243
244
245/**
246 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
247 * buffer space.
248 *
249 * Max buffer consumption:
250 * - AMD64: 6 instruction bytes.
251 * - ARM64: 2 instruction words (8 bytes).
252 *
253 * @note The top 32 bits will be cleared.
254 */
255template<uint32_t const a_uImm32>
256DECL_FORCE_INLINE(uint32_t) iemNativeEmitLoadGpr32ImmExT(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr)
257{
258#ifdef RT_ARCH_AMD64
259 if (a_uImm32 == 0)
260 {
261 /* xor gpr, gpr */
262 if (iGpr >= 8)
263 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
264 pCodeBuf[off++] = 0x33;
265 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
266 }
267 else
268 {
269 /* mov gpr, imm32 */
270 if (iGpr >= 8)
271 pCodeBuf[off++] = X86_OP_REX_B;
272 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
273 pCodeBuf[off++] = RT_BYTE1(a_uImm32);
274 pCodeBuf[off++] = RT_BYTE2(a_uImm32);
275 pCodeBuf[off++] = RT_BYTE3(a_uImm32);
276 pCodeBuf[off++] = RT_BYTE4(a_uImm32);
277 }
278
279#elif defined(RT_ARCH_ARM64)
280 if RT_CONSTEXPR_IF((a_uImm32 >> 16) == 0)
281 /* movz gpr, imm16 */
282 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, a_uImm32, 0, false /*f64Bit*/);
283 else if RT_CONSTEXPR_IF((a_uImm32 & UINT32_C(0xffff)) == 0)
284 /* movz gpr, imm16, lsl #16 */
285 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, a_uImm32 >> 16, 1, false /*f64Bit*/);
286 else if RT_CONSTEXPR_IF((a_uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
287 /* movn gpr, imm16, lsl #16 */
288 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~a_uImm32 >> 16, 1, false /*f64Bit*/);
289 else if RT_CONSTEXPR_IF((a_uImm32 >> 16) == UINT32_C(0xffff))
290 /* movn gpr, imm16 */
291 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~a_uImm32, 0, false /*f64Bit*/);
292 else
293 {
294 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, a_uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
295 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, a_uImm32 >> 16, 1, false /*f64Bit*/);
296 }
297
298#else
299# error "port me"
300#endif
301 return off;
302}
303
304
305/**
306 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
307 * buffer space.
308 *
309 * Max buffer consumption:
310 * - AMD64: 10 instruction bytes.
311 * - ARM64: 4 instruction words (16 bytes).
312 */
313DECL_FORCE_INLINE(uint32_t)
314iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
315{
316#ifdef RT_ARCH_AMD64
317 if (uImm64 == 0)
318 {
319 /* xor gpr, gpr */
320 if (iGpr >= 8)
321 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
322 pCodeBuf[off++] = 0x33;
323 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
324 }
325 else if (uImm64 <= UINT32_MAX)
326 {
327 /* mov gpr, imm32 */
328 if (iGpr >= 8)
329 pCodeBuf[off++] = X86_OP_REX_B;
330 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
331 pCodeBuf[off++] = RT_BYTE1(uImm64);
332 pCodeBuf[off++] = RT_BYTE2(uImm64);
333 pCodeBuf[off++] = RT_BYTE3(uImm64);
334 pCodeBuf[off++] = RT_BYTE4(uImm64);
335 }
336 else if (uImm64 == (uint64_t)(int32_t)uImm64)
337 {
338 /* mov gpr, sx(imm32) */
339 if (iGpr < 8)
340 pCodeBuf[off++] = X86_OP_REX_W;
341 else
342 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
343 pCodeBuf[off++] = 0xc7;
344 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
345 pCodeBuf[off++] = RT_BYTE1(uImm64);
346 pCodeBuf[off++] = RT_BYTE2(uImm64);
347 pCodeBuf[off++] = RT_BYTE3(uImm64);
348 pCodeBuf[off++] = RT_BYTE4(uImm64);
349 }
350 else
351 {
352 /* mov gpr, imm64 */
353 if (iGpr < 8)
354 pCodeBuf[off++] = X86_OP_REX_W;
355 else
356 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
357 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
358 pCodeBuf[off++] = RT_BYTE1(uImm64);
359 pCodeBuf[off++] = RT_BYTE2(uImm64);
360 pCodeBuf[off++] = RT_BYTE3(uImm64);
361 pCodeBuf[off++] = RT_BYTE4(uImm64);
362 pCodeBuf[off++] = RT_BYTE5(uImm64);
363 pCodeBuf[off++] = RT_BYTE6(uImm64);
364 pCodeBuf[off++] = RT_BYTE7(uImm64);
365 pCodeBuf[off++] = RT_BYTE8(uImm64);
366 }
367
368#elif defined(RT_ARCH_ARM64)
369 /*
370 * Quick simplification: Do 32-bit load if top half is zero.
371 */
372 if (uImm64 <= UINT32_MAX)
373 return iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGpr, (uint32_t)uImm64);
374
375 /*
376 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
377 * supply remaining bits using 'movk grp, imm16, lsl #x'.
378 *
379 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
380 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
381 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
382 * after the first non-zero immediate component so we switch to movk for
383 * the remainder.
384 */
385 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
386 + !((uImm64 >> 16) & UINT16_MAX)
387 + !((uImm64 >> 32) & UINT16_MAX)
388 + !((uImm64 >> 48) & UINT16_MAX);
389 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
390 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
391 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
392 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
393 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
394 if (cFfffHalfWords <= cZeroHalfWords)
395 {
396 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
397
398 /* movz gpr, imm16 */
399 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
400 if (uImmPart || cZeroHalfWords == 4)
401 {
402 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
403 fMovBase |= RT_BIT_32(29);
404 }
405 /* mov[z/k] gpr, imm16, lsl #16 */
406 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
407 if (uImmPart)
408 {
409 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
410 fMovBase |= RT_BIT_32(29);
411 }
412 /* mov[z/k] gpr, imm16, lsl #32 */
413 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
414 if (uImmPart)
415 {
416 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
417 fMovBase |= RT_BIT_32(29);
418 }
419 /* mov[z/k] gpr, imm16, lsl #48 */
420 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
421 if (uImmPart)
422 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
423 }
424 else
425 {
426 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
427
428 /* find the first half-word that isn't UINT16_MAX. */
429 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
430 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
431 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
432
433 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
434 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
435 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
436 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
437 /* movk gpr, imm16 */
438 if (iHwNotFfff != 0)
439 {
440 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
441 if (uImmPart != UINT32_C(0xffff))
442 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
443 }
444 /* movk gpr, imm16, lsl #16 */
445 if (iHwNotFfff != 1)
446 {
447 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
448 if (uImmPart != UINT32_C(0xffff))
449 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
450 }
451 /* movk gpr, imm16, lsl #32 */
452 if (iHwNotFfff != 2)
453 {
454 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
455 if (uImmPart != UINT32_C(0xffff))
456 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
457 }
458 /* movk gpr, imm16, lsl #48 */
459 if (iHwNotFfff != 3)
460 {
461 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
462 if (uImmPart != UINT32_C(0xffff))
463 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
464 }
465 }
466
467#else
468# error "port me"
469#endif
470 return off;
471}
472
473
474/**
475 * Emits loading a constant into a 64-bit GPR
476 */
477DECL_INLINE_THROW(uint32_t)
478iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
479{
480#ifdef RT_ARCH_AMD64
481 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
482#elif defined(RT_ARCH_ARM64)
483 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
484#else
485# error "port me"
486#endif
487 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
488 return off;
489}
490
491
492/**
493 * Emits loading a constant into a 32-bit GPR.
494 * @note The top 32 bits will be cleared.
495 */
496DECL_INLINE_THROW(uint32_t)
497iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
498{
499#ifdef RT_ARCH_AMD64
500 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
501#elif defined(RT_ARCH_ARM64)
502 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
503#else
504# error "port me"
505#endif
506 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
507 return off;
508}
509
510
511/**
512 * Emits loading a constant into a 8-bit GPR
513 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
514 * only the ARM64 version does that.
515 */
516DECL_INLINE_THROW(uint32_t)
517iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
518{
519#ifdef RT_ARCH_AMD64
520 /* mov gpr, imm8 */
521 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
522 if (iGpr >= 8)
523 pbCodeBuf[off++] = X86_OP_REX_B;
524 else if (iGpr >= 4)
525 pbCodeBuf[off++] = X86_OP_REX;
526 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
527 pbCodeBuf[off++] = RT_BYTE1(uImm8);
528
529#elif defined(RT_ARCH_ARM64)
530 /* movz gpr, imm16, lsl #0 */
531 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
532 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
533
534#else
535# error "port me"
536#endif
537 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
538 return off;
539}
540
541
542#ifdef RT_ARCH_AMD64
543/**
544 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
545 */
546DECL_FORCE_INLINE(uint32_t)
547iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
548{
549 if (offVCpu < 128)
550 {
551 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
552 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
553 }
554 else
555 {
556 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
557 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
558 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
559 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
560 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
561 }
562 return off;
563}
564
565/**
566 * Special variant of iemNativeEmitGprByVCpuDisp for accessing the VM structure.
567 */
568DECL_FORCE_INLINE(uint32_t)
569iemNativeEmitGprByVCpuSignedDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offVCpu)
570{
571 if (offVCpu < 128 && offVCpu >= -128)
572 {
573 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
574 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
575 }
576 else
577 {
578 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
579 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
580 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
581 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
582 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
583 }
584 return off;
585}
586
587#elif defined(RT_ARCH_ARM64)
588
589/**
590 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
591 *
592 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
593 * registers (@a iGprTmp).
594 * @note DON'T try this with prefetch.
595 */
596DECL_FORCE_INLINE_THROW(uint32_t)
597iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
598 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
599{
600 /*
601 * There are a couple of ldr variants that takes an immediate offset, so
602 * try use those if we can, otherwise we have to use the temporary register
603 * help with the addressing.
604 */
605 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
606 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
607 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
608 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
609 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
610 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
611 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
612 {
613 /* The offset is too large, so we must load it into a register and use
614 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
615 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
616 if (iGprTmp == UINT8_MAX)
617 iGprTmp = iGprReg;
618 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
619 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
620 }
621 else
622# ifdef IEM_WITH_THROW_CATCH
623 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
624# else
625 AssertReleaseFailedStmt(off = UINT32_MAX);
626# endif
627
628 return off;
629}
630
631/**
632 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
633 */
634DECL_FORCE_INLINE_THROW(uint32_t)
635iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
636 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
637{
638 /*
639 * There are a couple of ldr variants that takes an immediate offset, so
640 * try use those if we can, otherwise we have to use the temporary register
641 * help with the addressing.
642 */
643 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
644 {
645 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
646 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
647 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
648 }
649 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
650 {
651 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
652 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
653 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
654 }
655 else
656 {
657 /* The offset is too large, so we must load it into a register and use
658 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
659 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
660 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
661 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
662 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
663 IEMNATIVE_REG_FIXED_TMP0);
664 }
665 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
666 return off;
667}
668
669
670/**
671 * Special variant of iemNativeEmitGprByVCpuLdStEx for accessing the VM
672 * structure.
673 *
674 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
675 * registers (@a iGprTmp).
676 * @note DON'T try this with prefetch.
677 */
678DECL_FORCE_INLINE_THROW(uint32_t)
679iemNativeEmitGprBySignedVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offVCpu,
680 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
681{
682 Assert((uint32_t)RT_ABS(offVCpu) < RT_BIT_32(28)); /* we should be way out of range for problematic sign extending issues. */
683 Assert(!((uint32_t)RT_ABS(offVCpu) & (cbData - 1)));
684
685 /*
686 * For negative offsets we need to use put the displacement in a register
687 * as the two variants with signed immediates will either post or pre
688 * increment the base address register.
689 */
690 if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
691 {
692 uint8_t const idxIndexReg = !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) ? iGprReg : IEMNATIVE_REG_FIXED_TMP0;
693 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxIndexReg, offVCpu / (int32_t)cbData);
694 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, idxIndexReg,
695 kArmv8A64InstrLdStExtend_Sxtw, cbData > 1 /*fShifted*/);
696 }
697 else
698# ifdef IEM_WITH_THROW_CATCH
699 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
700# else
701 AssertReleaseFailedStmt(off = UINT32_MAX);
702# endif
703
704 return off;
705}
706
707/**
708 * Special variant of iemNativeEmitGprByVCpuLdSt for accessing the VM structure.
709 */
710DECL_FORCE_INLINE_THROW(uint32_t)
711iemNativeEmitGprBySignedVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
712 int32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
713{
714 off = iemNativeEmitGprBySignedVCpuLdStEx(iemNativeInstrBufEnsure(pReNative, off, 2 + 1), off, iGprReg,
715 offVCpu, enmOperation, cbData, IEMNATIVE_REG_FIXED_TMP0);
716 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
717 return off;
718}
719
720#endif /* RT_ARCH_ARM64 */
721
722
723/**
724 * Emits a 64-bit GPR load of a VCpu value.
725 */
726DECL_FORCE_INLINE_THROW(uint32_t)
727iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
728{
729#ifdef RT_ARCH_AMD64
730 /* mov reg64, mem64 */
731 if (iGpr < 8)
732 pCodeBuf[off++] = X86_OP_REX_W;
733 else
734 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
735 pCodeBuf[off++] = 0x8b;
736 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
737
738#elif defined(RT_ARCH_ARM64)
739 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
740
741#else
742# error "port me"
743#endif
744 return off;
745}
746
747
748/**
749 * Emits a 64-bit GPR load of a VCpu value.
750 */
751DECL_INLINE_THROW(uint32_t)
752iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
753{
754#ifdef RT_ARCH_AMD64
755 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
756 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
757
758#elif defined(RT_ARCH_ARM64)
759 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
760
761#else
762# error "port me"
763#endif
764 return off;
765}
766
767/**
768 * Emits a 32-bit GPR load of a VCpu value.
769 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
770 */
771DECL_FORCE_INLINE_THROW(uint32_t)
772iemNativeEmitLoadGprFromVCpuU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
773{
774#ifdef RT_ARCH_AMD64
775 /* mov reg32, mem32 */
776 if (iGpr >= 8)
777 pCodeBuf[off++] = X86_OP_REX_R;
778 pCodeBuf[off++] = 0x8b;
779 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
780
781#elif defined(RT_ARCH_ARM64)
782 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
783
784#else
785# error "port me"
786#endif
787 return off;
788}
789
790
791/**
792 * Emits a 32-bit GPR load of a VCpu value.
793 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
794 */
795DECL_INLINE_THROW(uint32_t)
796iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
797{
798#ifdef RT_ARCH_AMD64
799 off = iemNativeEmitLoadGprFromVCpuU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
800 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
801
802#elif defined(RT_ARCH_ARM64)
803 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
804
805#else
806# error "port me"
807#endif
808 return off;
809}
810
811
812/**
813 * Emits a 16-bit GPR load of a VCpu value.
814 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
815 */
816DECL_FORCE_INLINE_THROW(uint32_t)
817iemNativeEmitLoadGprFromVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
818{
819#ifdef RT_ARCH_AMD64
820 /* movzx reg32, mem16 */
821 if (iGpr >= 8)
822 pCodeBuf[off++] = X86_OP_REX_R;
823 pCodeBuf[off++] = 0x0f;
824 pCodeBuf[off++] = 0xb7;
825 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
826
827#elif defined(RT_ARCH_ARM64)
828 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
829
830#else
831# error "port me"
832#endif
833 return off;
834}
835
836
837/**
838 * Emits a 16-bit GPR load of a VCpu value.
839 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
840 */
841DECL_INLINE_THROW(uint32_t)
842iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
843{
844#ifdef RT_ARCH_AMD64
845 off = iemNativeEmitLoadGprFromVCpuU16Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGpr, offVCpu);
846 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
847
848#elif defined(RT_ARCH_ARM64)
849 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
850
851#else
852# error "port me"
853#endif
854 return off;
855}
856
857
858/**
859 * Emits a 8-bit GPR load of a VCpu value.
860 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
861 */
862DECL_INLINE_THROW(uint32_t)
863iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
864{
865#ifdef RT_ARCH_AMD64
866 /* movzx reg32, mem8 */
867 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
868 if (iGpr >= 8)
869 pbCodeBuf[off++] = X86_OP_REX_R;
870 pbCodeBuf[off++] = 0x0f;
871 pbCodeBuf[off++] = 0xb6;
872 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
873 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
874
875#elif defined(RT_ARCH_ARM64)
876 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
877
878#else
879# error "port me"
880#endif
881 return off;
882}
883
884
885/**
886 * Emits a store of a GPR value to a 64-bit VCpu field.
887 */
888DECL_FORCE_INLINE_THROW(uint32_t)
889iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
890 uint8_t iGprTmp = UINT8_MAX)
891{
892#ifdef RT_ARCH_AMD64
893 /* mov mem64, reg64 */
894 if (iGpr < 8)
895 pCodeBuf[off++] = X86_OP_REX_W;
896 else
897 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
898 pCodeBuf[off++] = 0x89;
899 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
900 RT_NOREF(iGprTmp);
901
902#elif defined(RT_ARCH_ARM64)
903 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
904
905#else
906# error "port me"
907#endif
908 return off;
909}
910
911
912/**
913 * Emits a store of a GPR value to a 64-bit VCpu field.
914 */
915DECL_INLINE_THROW(uint32_t)
916iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
917{
918#ifdef RT_ARCH_AMD64
919 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
920#elif defined(RT_ARCH_ARM64)
921 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
922 IEMNATIVE_REG_FIXED_TMP0);
923#else
924# error "port me"
925#endif
926 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
927 return off;
928}
929
930
931/**
932 * Emits a store of a GPR value to a 32-bit VCpu field.
933 *
934 * @note Limited range on ARM64.
935 */
936DECL_INLINE_THROW(uint32_t)
937iemNativeEmitStoreGprToVCpuU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
938{
939#ifdef RT_ARCH_AMD64
940 /* mov mem32, reg32 */
941 if (iGpr >= 8)
942 pCodeBuf[off++] = X86_OP_REX_R;
943 pCodeBuf[off++] = 0x89;
944 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
945
946#elif defined(RT_ARCH_ARM64)
947 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
948
949#else
950# error "port me"
951#endif
952 return off;
953}
954
955
956/**
957 * Emits a store of a GPR value to a 32-bit VCpu field.
958 */
959DECL_INLINE_THROW(uint32_t)
960iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
961{
962#ifdef RT_ARCH_AMD64
963 /* mov mem32, reg32 */
964 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
965 if (iGpr >= 8)
966 pbCodeBuf[off++] = X86_OP_REX_R;
967 pbCodeBuf[off++] = 0x89;
968 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
969 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
970
971#elif defined(RT_ARCH_ARM64)
972 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
973
974#else
975# error "port me"
976#endif
977 return off;
978}
979
980
981/**
982 * Emits a store of a GPR value to a 16-bit VCpu field.
983 */
984DECL_INLINE_THROW(uint32_t)
985iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
986{
987#ifdef RT_ARCH_AMD64
988 /* mov mem16, reg16 */
989 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
990 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
991 if (iGpr >= 8)
992 pbCodeBuf[off++] = X86_OP_REX_R;
993 pbCodeBuf[off++] = 0x89;
994 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
995 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
996
997#elif defined(RT_ARCH_ARM64)
998 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
999
1000#else
1001# error "port me"
1002#endif
1003 return off;
1004}
1005
1006
1007/**
1008 * Emits a store of a GPR value to a 8-bit VCpu field.
1009 */
1010DECL_INLINE_THROW(uint32_t)
1011iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
1012{
1013#ifdef RT_ARCH_AMD64
1014 /* mov mem8, reg8 */
1015 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1016 if (iGpr >= 8)
1017 pbCodeBuf[off++] = X86_OP_REX_R;
1018 pbCodeBuf[off++] = 0x88;
1019 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
1020 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1021
1022#elif defined(RT_ARCH_ARM64)
1023 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
1024
1025#else
1026# error "port me"
1027#endif
1028 return off;
1029}
1030
1031
1032/**
1033 * Emits a store of an immediate value to a 64-bit VCpu field.
1034 *
1035 * @note Will allocate temporary registers on both ARM64 and AMD64.
1036 */
1037DECL_FORCE_INLINE_THROW(uint32_t)
1038iemNativeEmitStoreImmToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uImm, uint32_t offVCpu)
1039{
1040#ifdef RT_ARCH_AMD64
1041 /* mov mem32, imm32 */
1042 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
1043 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxRegImm, offVCpu);
1044 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1045 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1046
1047#elif defined(RT_ARCH_ARM64)
1048 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
1049 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t));
1050 if (idxRegImm != ARMV8_A64_REG_XZR)
1051 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1052
1053#else
1054# error "port me"
1055#endif
1056 return off;
1057}
1058
1059
1060/**
1061 * Emits a store of an immediate value to a 32-bit VCpu field.
1062 *
1063 * @note ARM64: Will allocate temporary registers.
1064 */
1065DECL_FORCE_INLINE_THROW(uint32_t)
1066iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
1067{
1068#ifdef RT_ARCH_AMD64
1069 /* mov mem32, imm32 */
1070 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1071 pCodeBuf[off++] = 0xc7;
1072 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1073 pCodeBuf[off++] = RT_BYTE1(uImm);
1074 pCodeBuf[off++] = RT_BYTE2(uImm);
1075 pCodeBuf[off++] = RT_BYTE3(uImm);
1076 pCodeBuf[off++] = RT_BYTE4(uImm);
1077 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1078
1079#elif defined(RT_ARCH_ARM64)
1080 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
1081 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
1082 if (idxRegImm != ARMV8_A64_REG_XZR)
1083 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1084
1085#else
1086# error "port me"
1087#endif
1088 return off;
1089}
1090
1091
1092
1093/**
1094 * Emits a store of an immediate value to a 16-bit VCpu field.
1095 *
1096 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
1097 * offset can be encoded as an immediate or not. The @a offVCpu immediate
1098 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
1099 */
1100DECL_FORCE_INLINE_THROW(uint32_t)
1101iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
1102 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
1103{
1104#ifdef RT_ARCH_AMD64
1105 /* mov mem16, imm16 */
1106 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1107 pCodeBuf[off++] = 0xc7;
1108 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1109 pCodeBuf[off++] = RT_BYTE1(uImm);
1110 pCodeBuf[off++] = RT_BYTE2(uImm);
1111 RT_NOREF(idxTmp1, idxTmp2);
1112
1113#elif defined(RT_ARCH_ARM64)
1114 if (idxTmp1 != UINT8_MAX)
1115 {
1116 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
1117 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
1118 sizeof(uint16_t), idxTmp2);
1119 }
1120 else
1121# ifdef IEM_WITH_THROW_CATCH
1122 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
1123# else
1124 AssertReleaseFailedStmt(off = UINT32_MAX);
1125# endif
1126
1127#else
1128# error "port me"
1129#endif
1130 return off;
1131}
1132
1133
1134/**
1135 * Emits a store of an immediate value to a 8-bit VCpu field.
1136 */
1137DECL_INLINE_THROW(uint32_t)
1138iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu,
1139 uint8_t idxRegTmp = UINT8_MAX)
1140{
1141#ifdef RT_ARCH_AMD64
1142 /* mov mem8, imm8 */
1143 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1144 pbCodeBuf[off++] = 0xc6;
1145 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
1146 pbCodeBuf[off++] = bImm;
1147 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1148 RT_NOREF(idxRegTmp);
1149
1150#elif defined(RT_ARCH_ARM64)
1151 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
1152 if (idxRegTmp != UINT8_MAX)
1153 {
1154 Assert(idxRegTmp != IEMNATIVE_REG_FIXED_TMP0);
1155 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegTmp, bImm);
1156 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegTmp, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
1157 }
1158 else
1159 {
1160 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
1161 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
1162 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1163 }
1164
1165#else
1166# error "port me"
1167#endif
1168 return off;
1169}
1170
1171
1172/**
1173 * Emits a load effective address to a GRP of a VCpu field.
1174 */
1175DECL_INLINE_THROW(uint32_t)
1176iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
1177{
1178#ifdef RT_ARCH_AMD64
1179 /* lea gprdst, [rbx + offDisp] */
1180 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1181 if (iGprDst < 8)
1182 pbCodeBuf[off++] = X86_OP_REX_W;
1183 else
1184 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
1185 pbCodeBuf[off++] = 0x8d;
1186 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
1187
1188#elif defined(RT_ARCH_ARM64)
1189 if (offVCpu < (unsigned)_4K)
1190 {
1191 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1192 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
1193 }
1194 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
1195 {
1196 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1197 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
1198 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
1199 }
1200 else if (offVCpu <= 0xffffffU)
1201 {
1202 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1203 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu >> 12,
1204 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1205 if (offVCpu & 0xfffU)
1206 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, offVCpu & 0xfff);
1207 }
1208 else
1209 {
1210 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
1211 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
1212 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1213 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
1214 }
1215
1216#else
1217# error "port me"
1218#endif
1219 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1220 return off;
1221}
1222
1223
1224/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1225DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
1226{
1227 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
1228 Assert(off < sizeof(VMCPU));
1229 return off;
1230}
1231
1232
1233/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1234DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
1235{
1236 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
1237 Assert(off < sizeof(VMCPU));
1238 return off;
1239}
1240
1241
1242/**
1243 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1244 *
1245 * @note The two temp registers are not required for AMD64. ARM64 always
1246 * requires the first, and the 2nd is needed if the offset cannot be
1247 * encoded as an immediate.
1248 */
1249DECL_FORCE_INLINE(uint32_t)
1250iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1251{
1252#ifdef RT_ARCH_AMD64
1253 /* inc qword [pVCpu + off] */
1254 pCodeBuf[off++] = X86_OP_REX_W;
1255 pCodeBuf[off++] = 0xff;
1256 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1257 RT_NOREF(idxTmp1, idxTmp2);
1258
1259#elif defined(RT_ARCH_ARM64)
1260 /* Determine how we're to access pVCpu first. */
1261 uint32_t const cbData = sizeof(STAMCOUNTER);
1262 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1263 {
1264 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1265 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1266 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1267 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1268 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1269 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1270 }
1271 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1272 {
1273 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1274 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1275 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1276 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1277 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1278 }
1279 else
1280 {
1281 /* The offset is too large, so we must load it into a register and use
1282 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1283 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1284 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1285 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1286 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1287 }
1288
1289#else
1290# error "port me"
1291#endif
1292 return off;
1293}
1294
1295
1296/**
1297 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1298 *
1299 * @note The two temp registers are not required for AMD64. ARM64 always
1300 * requires the first, and the 2nd is needed if the offset cannot be
1301 * encoded as an immediate.
1302 */
1303DECL_FORCE_INLINE(uint32_t)
1304iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1305{
1306#ifdef RT_ARCH_AMD64
1307 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1308#elif defined(RT_ARCH_ARM64)
1309 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1310#else
1311# error "port me"
1312#endif
1313 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1314 return off;
1315}
1316
1317
1318/**
1319 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1320 *
1321 * @note The two temp registers are not required for AMD64. ARM64 always
1322 * requires the first, and the 2nd is needed if the offset cannot be
1323 * encoded as an immediate.
1324 */
1325DECL_FORCE_INLINE(uint32_t)
1326iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1327{
1328 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1329#ifdef RT_ARCH_AMD64
1330 /* inc dword [pVCpu + offVCpu] */
1331 pCodeBuf[off++] = 0xff;
1332 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1333 RT_NOREF(idxTmp1, idxTmp2);
1334
1335#elif defined(RT_ARCH_ARM64)
1336 /* Determine how we're to access pVCpu first. */
1337 uint32_t const cbData = sizeof(uint32_t);
1338 if (offVCpu < (unsigned)(_4K * cbData))
1339 {
1340 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1341 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1,
1342 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1343 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1344 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1,
1345 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1346 }
1347 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1348 {
1349 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1350 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1351 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1352 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1353 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1354 }
1355 else
1356 {
1357 /* The offset is too large, so we must load it into a register and use
1358 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1359 of the instruction if that'll reduce the constant to 16-bits. */
1360 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1361 {
1362 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1363 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1364 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1365 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1366 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1367 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1368 }
1369 else
1370 {
1371 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1372 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1373 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1374 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1375 }
1376 }
1377
1378#else
1379# error "port me"
1380#endif
1381 return off;
1382}
1383
1384
1385/**
1386 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1387 *
1388 * @note The two temp registers are not required for AMD64. ARM64 always
1389 * requires the first, and the 2nd is needed if the offset cannot be
1390 * encoded as an immediate.
1391 */
1392DECL_FORCE_INLINE(uint32_t)
1393iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1394{
1395#ifdef RT_ARCH_AMD64
1396 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1397#elif defined(RT_ARCH_ARM64)
1398 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1399#else
1400# error "port me"
1401#endif
1402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1403 return off;
1404}
1405
1406
1407/**
1408 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1409 *
1410 * @note May allocate temporary registers (not AMD64).
1411 */
1412DECL_FORCE_INLINE(uint32_t)
1413iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1414{
1415 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1416#ifdef RT_ARCH_AMD64
1417 /* or dword [pVCpu + offVCpu], imm8/32 */
1418 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1419 if (fMask < 0x80)
1420 {
1421 pCodeBuf[off++] = 0x83;
1422 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1423 pCodeBuf[off++] = (uint8_t)fMask;
1424 }
1425 else
1426 {
1427 pCodeBuf[off++] = 0x81;
1428 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1429 pCodeBuf[off++] = RT_BYTE1(fMask);
1430 pCodeBuf[off++] = RT_BYTE2(fMask);
1431 pCodeBuf[off++] = RT_BYTE3(fMask);
1432 pCodeBuf[off++] = RT_BYTE4(fMask);
1433 }
1434
1435#elif defined(RT_ARCH_ARM64)
1436 /* If the constant is unwieldy we'll need a register to hold it as well. */
1437 uint32_t uImmSizeLen, uImmRotate;
1438 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1439 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1440
1441 /* We need a temp register for holding the member value we're modifying. */
1442 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1443
1444 /* Determine how we're to access pVCpu first. */
1445 uint32_t const cbData = sizeof(uint32_t);
1446 if (offVCpu < (unsigned)(_4K * cbData))
1447 {
1448 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1449 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1450 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1451 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1452 if (idxTmpMask == UINT8_MAX)
1453 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1454 else
1455 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1456 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1457 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1458 }
1459 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1460 {
1461 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1462 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1463 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1464 if (idxTmpMask == UINT8_MAX)
1465 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1466 else
1467 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1468 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1469 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1470 }
1471 else
1472 {
1473 /* The offset is too large, so we must load it into a register and use
1474 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1475 of the instruction if that'll reduce the constant to 16-bits. */
1476 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1477 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1478 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1479 if (fShifted)
1480 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1481 else
1482 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1483
1484 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1485 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1486
1487 if (idxTmpMask == UINT8_MAX)
1488 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1489 else
1490 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1491
1492 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1493 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1494 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1495 }
1496 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1497 if (idxTmpMask != UINT8_MAX)
1498 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1499
1500#else
1501# error "port me"
1502#endif
1503 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1504 return off;
1505}
1506
1507
1508/**
1509 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1510 *
1511 * @note May allocate temporary registers (not AMD64).
1512 */
1513DECL_FORCE_INLINE(uint32_t)
1514iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1515{
1516 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1517#ifdef RT_ARCH_AMD64
1518 /* and dword [pVCpu + offVCpu], imm8/32 */
1519 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1520 if (fMask < 0x80)
1521 {
1522 pCodeBuf[off++] = 0x83;
1523 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1524 pCodeBuf[off++] = (uint8_t)fMask;
1525 }
1526 else
1527 {
1528 pCodeBuf[off++] = 0x81;
1529 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1530 pCodeBuf[off++] = RT_BYTE1(fMask);
1531 pCodeBuf[off++] = RT_BYTE2(fMask);
1532 pCodeBuf[off++] = RT_BYTE3(fMask);
1533 pCodeBuf[off++] = RT_BYTE4(fMask);
1534 }
1535
1536#elif defined(RT_ARCH_ARM64)
1537 /* If the constant is unwieldy we'll need a register to hold it as well. */
1538 uint32_t uImmSizeLen, uImmRotate;
1539 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1540 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1541
1542 /* We need a temp register for holding the member value we're modifying. */
1543 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1544
1545 /* Determine how we're to access pVCpu first. */
1546 uint32_t const cbData = sizeof(uint32_t);
1547 if (offVCpu < (unsigned)(_4K * cbData))
1548 {
1549 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1550 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1551 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1552 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1553 if (idxTmpMask == UINT8_MAX)
1554 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1555 else
1556 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1557 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1558 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1559 }
1560 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1561 {
1562 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1563 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1564 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1565 if (idxTmpMask == UINT8_MAX)
1566 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1567 else
1568 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1569 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1570 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1571 }
1572 else
1573 {
1574 /* The offset is too large, so we must load it into a register and use
1575 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1576 of the instruction if that'll reduce the constant to 16-bits. */
1577 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1578 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1579 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1580 if (fShifted)
1581 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1582 else
1583 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1584
1585 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1586 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1587
1588 if (idxTmpMask == UINT8_MAX)
1589 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1590 else
1591 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1592
1593 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1594 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1595 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1596 }
1597 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1598 if (idxTmpMask != UINT8_MAX)
1599 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1600
1601#else
1602# error "port me"
1603#endif
1604 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1605 return off;
1606}
1607
1608
1609/**
1610 * Emits a gprdst = gprsrc load.
1611 */
1612DECL_FORCE_INLINE(uint32_t)
1613iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1614{
1615#ifdef RT_ARCH_AMD64
1616 /* mov gprdst, gprsrc */
1617 if ((iGprDst | iGprSrc) >= 8)
1618 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1619 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1620 : X86_OP_REX_W | X86_OP_REX_R;
1621 else
1622 pCodeBuf[off++] = X86_OP_REX_W;
1623 pCodeBuf[off++] = 0x8b;
1624 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1625
1626#elif defined(RT_ARCH_ARM64)
1627 /* mov dst, src; alias for: orr dst, xzr, src */
1628 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1629
1630#else
1631# error "port me"
1632#endif
1633 return off;
1634}
1635
1636
1637/**
1638 * Emits a gprdst = gprsrc load.
1639 */
1640DECL_INLINE_THROW(uint32_t)
1641iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1642{
1643#ifdef RT_ARCH_AMD64
1644 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1645#elif defined(RT_ARCH_ARM64)
1646 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1647#else
1648# error "port me"
1649#endif
1650 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1651 return off;
1652}
1653
1654
1655/**
1656 * Emits a gprdst = gprsrc[31:0] load.
1657 * @note Bits 63 thru 32 are cleared.
1658 */
1659DECL_FORCE_INLINE(uint32_t)
1660iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1661{
1662#ifdef RT_ARCH_AMD64
1663 /* mov gprdst, gprsrc */
1664 if ((iGprDst | iGprSrc) >= 8)
1665 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1666 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1667 : X86_OP_REX_R;
1668 pCodeBuf[off++] = 0x8b;
1669 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1670
1671#elif defined(RT_ARCH_ARM64)
1672 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1673 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1674
1675#else
1676# error "port me"
1677#endif
1678 return off;
1679}
1680
1681
1682/**
1683 * Emits a gprdst = gprsrc[31:0] load.
1684 * @note Bits 63 thru 32 are cleared.
1685 */
1686DECL_INLINE_THROW(uint32_t)
1687iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1688{
1689#ifdef RT_ARCH_AMD64
1690 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1691#elif defined(RT_ARCH_ARM64)
1692 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1693#else
1694# error "port me"
1695#endif
1696 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1697 return off;
1698}
1699
1700
1701/**
1702 * Emits a gprdst = gprsrc[15:0] load.
1703 * @note Bits 63 thru 15 are cleared.
1704 */
1705DECL_INLINE_THROW(uint32_t)
1706iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1707{
1708#ifdef RT_ARCH_AMD64
1709 /* movzx Gv,Ew */
1710 if ((iGprDst | iGprSrc) >= 8)
1711 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1712 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1713 : X86_OP_REX_R;
1714 pCodeBuf[off++] = 0x0f;
1715 pCodeBuf[off++] = 0xb7;
1716 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1717
1718#elif defined(RT_ARCH_ARM64)
1719 /* and gprdst, gprsrc, #0xffff */
1720# if 1
1721 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1722 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1723# else
1724 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1725 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1726# endif
1727
1728#else
1729# error "port me"
1730#endif
1731 return off;
1732}
1733
1734
1735/**
1736 * Emits a gprdst = gprsrc[15:0] load.
1737 * @note Bits 63 thru 15 are cleared.
1738 */
1739DECL_INLINE_THROW(uint32_t)
1740iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1741{
1742#ifdef RT_ARCH_AMD64
1743 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1744#elif defined(RT_ARCH_ARM64)
1745 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1746#else
1747# error "port me"
1748#endif
1749 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1750 return off;
1751}
1752
1753
1754/**
1755 * Emits a gprdst = gprsrc[7:0] load.
1756 * @note Bits 63 thru 8 are cleared.
1757 */
1758DECL_FORCE_INLINE(uint32_t)
1759iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1760{
1761#ifdef RT_ARCH_AMD64
1762 /* movzx Gv,Eb */
1763 if (iGprDst >= 8 || iGprSrc >= 8)
1764 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1765 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1766 : X86_OP_REX_R;
1767 else if (iGprSrc >= 4)
1768 pCodeBuf[off++] = X86_OP_REX;
1769 pCodeBuf[off++] = 0x0f;
1770 pCodeBuf[off++] = 0xb6;
1771 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1772
1773#elif defined(RT_ARCH_ARM64)
1774 /* and gprdst, gprsrc, #0xff */
1775 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1776 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1777
1778#else
1779# error "port me"
1780#endif
1781 return off;
1782}
1783
1784
1785/**
1786 * Emits a gprdst = gprsrc[7:0] load.
1787 * @note Bits 63 thru 8 are cleared.
1788 */
1789DECL_INLINE_THROW(uint32_t)
1790iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1791{
1792#ifdef RT_ARCH_AMD64
1793 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1794#elif defined(RT_ARCH_ARM64)
1795 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1796#else
1797# error "port me"
1798#endif
1799 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1800 return off;
1801}
1802
1803
1804/**
1805 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1806 * @note Bits 63 thru 8 are cleared.
1807 */
1808DECL_INLINE_THROW(uint32_t)
1809iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1810{
1811#ifdef RT_ARCH_AMD64
1812 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1813
1814 /* movzx Gv,Ew */
1815 if ((iGprDst | iGprSrc) >= 8)
1816 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1817 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1818 : X86_OP_REX_R;
1819 pbCodeBuf[off++] = 0x0f;
1820 pbCodeBuf[off++] = 0xb7;
1821 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1822
1823 /* shr Ev,8 */
1824 if (iGprDst >= 8)
1825 pbCodeBuf[off++] = X86_OP_REX_B;
1826 pbCodeBuf[off++] = 0xc1;
1827 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1828 pbCodeBuf[off++] = 8;
1829
1830#elif defined(RT_ARCH_ARM64)
1831 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1832 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1833 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1834
1835#else
1836# error "port me"
1837#endif
1838 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1839 return off;
1840}
1841
1842
1843/**
1844 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1845 */
1846DECL_INLINE_THROW(uint32_t)
1847iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1848{
1849#ifdef RT_ARCH_AMD64
1850 /* movsxd r64, r/m32 */
1851 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1852 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1853 pbCodeBuf[off++] = 0x63;
1854 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1855
1856#elif defined(RT_ARCH_ARM64)
1857 /* sxtw dst, src */
1858 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1859 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1860
1861#else
1862# error "port me"
1863#endif
1864 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1865 return off;
1866}
1867
1868
1869/**
1870 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1871 */
1872DECL_INLINE_THROW(uint32_t)
1873iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1874{
1875#ifdef RT_ARCH_AMD64
1876 /* movsx r64, r/m16 */
1877 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1878 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1879 pbCodeBuf[off++] = 0x0f;
1880 pbCodeBuf[off++] = 0xbf;
1881 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1882
1883#elif defined(RT_ARCH_ARM64)
1884 /* sxth dst, src */
1885 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1886 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1887
1888#else
1889# error "port me"
1890#endif
1891 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1892 return off;
1893}
1894
1895
1896/**
1897 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1898 */
1899DECL_INLINE_THROW(uint32_t)
1900iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1901{
1902#ifdef RT_ARCH_AMD64
1903 /* movsx r64, r/m16 */
1904 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1905 if (iGprDst >= 8 || iGprSrc >= 8)
1906 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1907 pbCodeBuf[off++] = 0x0f;
1908 pbCodeBuf[off++] = 0xbf;
1909 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1910
1911#elif defined(RT_ARCH_ARM64)
1912 /* sxth dst32, src */
1913 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1914 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1915
1916#else
1917# error "port me"
1918#endif
1919 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1920 return off;
1921}
1922
1923
1924/**
1925 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1926 */
1927DECL_INLINE_THROW(uint32_t)
1928iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1929{
1930#ifdef RT_ARCH_AMD64
1931 /* movsx r64, r/m8 */
1932 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1933 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1934 pbCodeBuf[off++] = 0x0f;
1935 pbCodeBuf[off++] = 0xbe;
1936 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1937
1938#elif defined(RT_ARCH_ARM64)
1939 /* sxtb dst, src */
1940 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1941 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1942
1943#else
1944# error "port me"
1945#endif
1946 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1947 return off;
1948}
1949
1950
1951/**
1952 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1953 * @note Bits 63 thru 32 are cleared.
1954 */
1955DECL_INLINE_THROW(uint32_t)
1956iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1957{
1958#ifdef RT_ARCH_AMD64
1959 /* movsx r32, r/m8 */
1960 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1961 if (iGprDst >= 8 || iGprSrc >= 8)
1962 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1963 else if (iGprSrc >= 4)
1964 pbCodeBuf[off++] = X86_OP_REX;
1965 pbCodeBuf[off++] = 0x0f;
1966 pbCodeBuf[off++] = 0xbe;
1967 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1968
1969#elif defined(RT_ARCH_ARM64)
1970 /* sxtb dst32, src32 */
1971 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1972 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1973
1974#else
1975# error "port me"
1976#endif
1977 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1978 return off;
1979}
1980
1981
1982/**
1983 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1984 * @note Bits 63 thru 16 are cleared.
1985 */
1986DECL_INLINE_THROW(uint32_t)
1987iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1988{
1989#ifdef RT_ARCH_AMD64
1990 /* movsx r16, r/m8 */
1991 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1992 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1993 if (iGprDst >= 8 || iGprSrc >= 8)
1994 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1995 else if (iGprSrc >= 4)
1996 pbCodeBuf[off++] = X86_OP_REX;
1997 pbCodeBuf[off++] = 0x0f;
1998 pbCodeBuf[off++] = 0xbe;
1999 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
2000
2001 /* movzx r32, r/m16 */
2002 if (iGprDst >= 8)
2003 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
2004 pbCodeBuf[off++] = 0x0f;
2005 pbCodeBuf[off++] = 0xb7;
2006 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2007
2008#elif defined(RT_ARCH_ARM64)
2009 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
2010 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2011 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
2012 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2013 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
2014
2015#else
2016# error "port me"
2017#endif
2018 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2019 return off;
2020}
2021
2022
2023/**
2024 * Emits a gprdst = gprsrc + addend load.
2025 * @note The addend is 32-bit for AMD64 and 64-bit for ARM64.
2026 */
2027#ifdef RT_ARCH_AMD64
2028DECL_INLINE_THROW(uint32_t)
2029iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2030 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2031{
2032 Assert(iAddend != 0);
2033
2034 /* lea gprdst, [gprsrc + iAddend] */
2035 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2036 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
2037 pbCodeBuf[off++] = 0x8d;
2038 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
2039 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2040 return off;
2041}
2042
2043#elif defined(RT_ARCH_ARM64)
2044DECL_INLINE_THROW(uint32_t)
2045iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2046 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
2047{
2048 if ((uint32_t)iAddend < 4096)
2049 {
2050 /* add dst, src, uimm12 */
2051 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2052 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
2053 }
2054 else if ((uint32_t)-iAddend < 4096)
2055 {
2056 /* sub dst, src, uimm12 */
2057 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2058 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
2059 }
2060 else
2061 {
2062 Assert(iGprSrc != iGprDst);
2063 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
2064 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2065 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
2066 }
2067 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2068 return off;
2069}
2070#else
2071# error "port me"
2072#endif
2073
2074/**
2075 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
2076 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
2077 */
2078#ifdef RT_ARCH_AMD64
2079DECL_INLINE_THROW(uint32_t)
2080iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2081 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2082#else
2083DECL_INLINE_THROW(uint32_t)
2084iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2085 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
2086#endif
2087{
2088 if (iAddend != 0)
2089 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
2090 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
2091}
2092
2093
2094/**
2095 * Emits a gprdst = gprsrc32 + addend load.
2096 * @note Bits 63 thru 32 are cleared.
2097 */
2098DECL_INLINE_THROW(uint32_t)
2099iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2100 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2101{
2102 Assert(iAddend != 0);
2103
2104#ifdef RT_ARCH_AMD64
2105 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
2106 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2107 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
2108 if ((iGprDst | iGprSrc) >= 8)
2109 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
2110 pbCodeBuf[off++] = 0x8d;
2111 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
2112
2113#elif defined(RT_ARCH_ARM64)
2114 if ((uint32_t)iAddend < 4096)
2115 {
2116 /* add dst, src, uimm12 */
2117 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2118 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
2119 }
2120 else if ((uint32_t)-iAddend < 4096)
2121 {
2122 /* sub dst, src, uimm12 */
2123 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2124 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
2125 }
2126 else
2127 {
2128 Assert(iGprSrc != iGprDst);
2129 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
2130 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2131 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
2132 }
2133
2134#else
2135# error "port me"
2136#endif
2137 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2138 return off;
2139}
2140
2141
2142/**
2143 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
2144 */
2145DECL_INLINE_THROW(uint32_t)
2146iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2147 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2148{
2149 if (iAddend != 0)
2150 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
2151 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
2152}
2153
2154
2155/**
2156 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
2157 * destination.
2158 */
2159DECL_FORCE_INLINE(uint32_t)
2160iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
2161{
2162#ifdef RT_ARCH_AMD64
2163 /* mov reg16, r/m16 */
2164 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2165 if (idxDst >= 8 || idxSrc >= 8)
2166 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
2167 pCodeBuf[off++] = 0x8b;
2168 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
2169
2170#elif defined(RT_ARCH_ARM64)
2171 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
2172 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
2173
2174#else
2175# error "Port me!"
2176#endif
2177 return off;
2178}
2179
2180
2181/**
2182 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
2183 * destination.
2184 */
2185DECL_INLINE_THROW(uint32_t)
2186iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
2187{
2188#ifdef RT_ARCH_AMD64
2189 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
2190#elif defined(RT_ARCH_ARM64)
2191 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
2192#else
2193# error "Port me!"
2194#endif
2195 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2196 return off;
2197}
2198
2199
2200#ifdef RT_ARCH_AMD64
2201/**
2202 * Common bit of iemNativeEmitLoadGprByBp and friends.
2203 */
2204DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
2205 PIEMRECOMPILERSTATE pReNativeAssert)
2206{
2207 if (offDisp < 128 && offDisp >= -128)
2208 {
2209 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
2210 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
2211 }
2212 else
2213 {
2214 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
2215 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2216 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2217 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2218 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2219 }
2220 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
2221 return off;
2222}
2223#elif defined(RT_ARCH_ARM64)
2224/**
2225 * Common bit of iemNativeEmitLoadGprByBp and friends.
2226 */
2227DECL_FORCE_INLINE_THROW(uint32_t)
2228iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2229 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2230{
2231 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
2232 {
2233 /* str w/ unsigned imm12 (scaled) */
2234 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2235 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
2236 }
2237 else if (offDisp >= -256 && offDisp <= 256)
2238 {
2239 /* stur w/ signed imm9 (unscaled) */
2240 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2241 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
2242 }
2243 else
2244 {
2245 /* Use temporary indexing register. */
2246 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2247 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2248 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2249 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2250 }
2251 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2252 return off;
2253}
2254#endif
2255
2256
2257/**
2258 * Emits a 64-bit GRP load instruction with an BP relative source address.
2259 */
2260DECL_INLINE_THROW(uint32_t)
2261iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2262{
2263#ifdef RT_ARCH_AMD64
2264 /* mov gprdst, qword [rbp + offDisp] */
2265 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2266 if (iGprDst < 8)
2267 pbCodeBuf[off++] = X86_OP_REX_W;
2268 else
2269 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2270 pbCodeBuf[off++] = 0x8b;
2271 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2272
2273#elif defined(RT_ARCH_ARM64)
2274 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2275
2276#else
2277# error "port me"
2278#endif
2279}
2280
2281
2282/**
2283 * Emits a 32-bit GRP load instruction with an BP relative source address.
2284 * @note Bits 63 thru 32 of the GPR will be cleared.
2285 */
2286DECL_INLINE_THROW(uint32_t)
2287iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2288{
2289#ifdef RT_ARCH_AMD64
2290 /* mov gprdst, dword [rbp + offDisp] */
2291 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2292 if (iGprDst >= 8)
2293 pbCodeBuf[off++] = X86_OP_REX_R;
2294 pbCodeBuf[off++] = 0x8b;
2295 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2296
2297#elif defined(RT_ARCH_ARM64)
2298 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2299
2300#else
2301# error "port me"
2302#endif
2303}
2304
2305
2306/**
2307 * Emits a 16-bit GRP load instruction with an BP relative source address.
2308 * @note Bits 63 thru 16 of the GPR will be cleared.
2309 */
2310DECL_INLINE_THROW(uint32_t)
2311iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2312{
2313#ifdef RT_ARCH_AMD64
2314 /* movzx gprdst, word [rbp + offDisp] */
2315 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2316 if (iGprDst >= 8)
2317 pbCodeBuf[off++] = X86_OP_REX_R;
2318 pbCodeBuf[off++] = 0x0f;
2319 pbCodeBuf[off++] = 0xb7;
2320 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2321
2322#elif defined(RT_ARCH_ARM64)
2323 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2324
2325#else
2326# error "port me"
2327#endif
2328}
2329
2330
2331/**
2332 * Emits a 8-bit GRP load instruction with an BP relative source address.
2333 * @note Bits 63 thru 8 of the GPR will be cleared.
2334 */
2335DECL_INLINE_THROW(uint32_t)
2336iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2337{
2338#ifdef RT_ARCH_AMD64
2339 /* movzx gprdst, byte [rbp + offDisp] */
2340 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2341 if (iGprDst >= 8)
2342 pbCodeBuf[off++] = X86_OP_REX_R;
2343 pbCodeBuf[off++] = 0x0f;
2344 pbCodeBuf[off++] = 0xb6;
2345 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2346
2347#elif defined(RT_ARCH_ARM64)
2348 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2349
2350#else
2351# error "port me"
2352#endif
2353}
2354
2355
2356#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2357/**
2358 * Emits a 128-bit vector register load instruction with an BP relative source address.
2359 */
2360DECL_FORCE_INLINE_THROW(uint32_t)
2361iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2362{
2363#ifdef RT_ARCH_AMD64
2364 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2365
2366 /* movdqu reg128, mem128 */
2367 pbCodeBuf[off++] = 0xf3;
2368 if (iVecRegDst >= 8)
2369 pbCodeBuf[off++] = X86_OP_REX_R;
2370 pbCodeBuf[off++] = 0x0f;
2371 pbCodeBuf[off++] = 0x6f;
2372 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2373#elif defined(RT_ARCH_ARM64)
2374 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2375#else
2376# error "port me"
2377#endif
2378}
2379
2380
2381/**
2382 * Emits a 256-bit vector register load instruction with an BP relative source address.
2383 */
2384DECL_FORCE_INLINE_THROW(uint32_t)
2385iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2386{
2387#ifdef RT_ARCH_AMD64
2388 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2389
2390 /* vmovdqu reg256, mem256 */
2391 pbCodeBuf[off++] = X86_OP_VEX2;
2392 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2393 pbCodeBuf[off++] = 0x6f;
2394 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2395#elif defined(RT_ARCH_ARM64)
2396 /* ASSUMES two consecutive vector registers for the 256-bit value. */
2397 Assert(!(iVecRegDst & 0x1));
2398 off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2399 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2400#else
2401# error "port me"
2402#endif
2403}
2404
2405#endif
2406
2407
2408/**
2409 * Emits a load effective address to a GRP with an BP relative source address.
2410 */
2411DECL_INLINE_THROW(uint32_t)
2412iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2413{
2414#ifdef RT_ARCH_AMD64
2415 /* lea gprdst, [rbp + offDisp] */
2416 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2417 if (iGprDst < 8)
2418 pbCodeBuf[off++] = X86_OP_REX_W;
2419 else
2420 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2421 pbCodeBuf[off++] = 0x8d;
2422 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2423
2424#elif defined(RT_ARCH_ARM64)
2425 bool const fSub = offDisp < 0;
2426 uint32_t const offAbsDisp = (uint32_t)RT_ABS(offDisp);
2427 if (offAbsDisp <= 0xffffffU)
2428 {
2429 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2430 if (offAbsDisp <= 0xfffU)
2431 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, ARMV8_A64_REG_BP, offAbsDisp);
2432 else
2433 {
2434 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, ARMV8_A64_REG_BP, offAbsDisp >> 12,
2435 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2436 if (offAbsDisp & 0xfffU)
2437 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, offAbsDisp & 0xfff);
2438 }
2439 }
2440 else
2441 {
2442 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2443 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offAbsDisp);
2444 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2445 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2446 }
2447
2448#else
2449# error "port me"
2450#endif
2451
2452 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2453 return off;
2454}
2455
2456
2457/**
2458 * Emits a 64-bit GPR store with an BP relative destination address.
2459 *
2460 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2461 */
2462DECL_INLINE_THROW(uint32_t)
2463iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2464{
2465#ifdef RT_ARCH_AMD64
2466 /* mov qword [rbp + offDisp], gprdst */
2467 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2468 if (iGprSrc < 8)
2469 pbCodeBuf[off++] = X86_OP_REX_W;
2470 else
2471 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2472 pbCodeBuf[off++] = 0x89;
2473 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2474
2475#elif defined(RT_ARCH_ARM64)
2476 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2477 {
2478 /* str w/ unsigned imm12 (scaled) */
2479 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2480 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2481 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2482 }
2483 else if (offDisp >= -256 && offDisp <= 256)
2484 {
2485 /* stur w/ signed imm9 (unscaled) */
2486 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2487 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2488 }
2489 else if ((uint32_t)-offDisp < (unsigned)_4K)
2490 {
2491 /* Use temporary indexing register w/ sub uimm12. */
2492 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2493 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2494 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2495 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2496 }
2497 else
2498 {
2499 /* Use temporary indexing register. */
2500 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2501 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2502 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2503 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2504 }
2505 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2506 return off;
2507
2508#else
2509# error "Port me!"
2510#endif
2511}
2512
2513
2514/**
2515 * Emits a 64-bit immediate store with an BP relative destination address.
2516 *
2517 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2518 */
2519DECL_INLINE_THROW(uint32_t)
2520iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2521{
2522#ifdef RT_ARCH_AMD64
2523 if ((int64_t)uImm64 == (int32_t)uImm64)
2524 {
2525 /* mov qword [rbp + offDisp], imm32 - sign extended */
2526 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2527 pbCodeBuf[off++] = X86_OP_REX_W;
2528 pbCodeBuf[off++] = 0xc7;
2529 if (offDisp < 128 && offDisp >= -128)
2530 {
2531 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2532 pbCodeBuf[off++] = (uint8_t)offDisp;
2533 }
2534 else
2535 {
2536 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2537 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2538 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2539 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2540 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2541 }
2542 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2543 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2544 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2545 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2546 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2547 return off;
2548 }
2549#endif
2550
2551 /* Load tmp0, imm64; Store tmp to bp+disp. */
2552 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2553 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2554}
2555
2556#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2557
2558/**
2559 * Emits a 128-bit vector register store with an BP relative destination address.
2560 *
2561 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2562 */
2563DECL_INLINE_THROW(uint32_t)
2564iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2565{
2566#ifdef RT_ARCH_AMD64
2567 /* movdqu [rbp + offDisp], vecsrc */
2568 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2569 pbCodeBuf[off++] = 0xf3;
2570 if (iVecRegSrc >= 8)
2571 pbCodeBuf[off++] = X86_OP_REX_R;
2572 pbCodeBuf[off++] = 0x0f;
2573 pbCodeBuf[off++] = 0x7f;
2574 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2575
2576#elif defined(RT_ARCH_ARM64)
2577 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2578 {
2579 /* str w/ unsigned imm12 (scaled) */
2580 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2581 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc,
2582 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2583 }
2584 else if (offDisp >= -256 && offDisp <= 256)
2585 {
2586 /* stur w/ signed imm9 (unscaled) */
2587 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2588 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp);
2589 }
2590 else if ((uint32_t)-offDisp < (unsigned)_4K)
2591 {
2592 /* Use temporary indexing register w/ sub uimm12. */
2593 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2594 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2595 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2596 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2597 }
2598 else
2599 {
2600 /* Use temporary indexing register. */
2601 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2602 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2603 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP,
2604 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2605 }
2606 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2607 return off;
2608
2609#else
2610# error "Port me!"
2611#endif
2612}
2613
2614
2615/**
2616 * Emits a 256-bit vector register store with an BP relative destination address.
2617 *
2618 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2619 */
2620DECL_INLINE_THROW(uint32_t)
2621iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2622{
2623#ifdef RT_ARCH_AMD64
2624 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2625
2626 /* vmovdqu mem256, reg256 */
2627 pbCodeBuf[off++] = X86_OP_VEX2;
2628 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2629 pbCodeBuf[off++] = 0x7f;
2630 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2631#elif defined(RT_ARCH_ARM64)
2632 Assert(!(iVecRegSrc & 0x1));
2633 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp, iVecRegSrc);
2634 return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1);
2635#else
2636# error "Port me!"
2637#endif
2638}
2639
2640#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
2641#if defined(RT_ARCH_ARM64)
2642
2643/**
2644 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2645 *
2646 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2647 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2648 * caller does not heed this.
2649 *
2650 * @note DON'T try this with prefetch.
2651 */
2652DECL_FORCE_INLINE_THROW(uint32_t)
2653iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2654 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2655{
2656 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2657 {
2658 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2659 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2660 }
2661 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2662 && iGprReg != iGprBase)
2663 || iGprTmp != UINT8_MAX)
2664 {
2665 /* The offset is too large, so we must load it into a register and use
2666 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2667 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2668 if (iGprTmp == UINT8_MAX)
2669 iGprTmp = iGprReg;
2670 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2671 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2672 }
2673 else
2674# ifdef IEM_WITH_THROW_CATCH
2675 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2676# else
2677 AssertReleaseFailedStmt(off = UINT32_MAX);
2678# endif
2679 return off;
2680}
2681
2682/**
2683 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2684 */
2685DECL_FORCE_INLINE_THROW(uint32_t)
2686iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2687 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2688{
2689 /*
2690 * There are a couple of ldr variants that takes an immediate offset, so
2691 * try use those if we can, otherwise we have to use the temporary register
2692 * help with the addressing.
2693 */
2694 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2695 {
2696 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2697 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2698 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2699 }
2700 else
2701 {
2702 /* The offset is too large, so we must load it into a register and use
2703 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2704 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2705 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2706
2707 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2708 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2709
2710 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2711 }
2712 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2713 return off;
2714}
2715
2716# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2717/**
2718 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2719 *
2720 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2721 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2722 * caller does not heed this.
2723 *
2724 * @note DON'T try this with prefetch.
2725 */
2726DECL_FORCE_INLINE_THROW(uint32_t)
2727iemNativeEmitVecRegByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iGprBase, int32_t offDisp,
2728 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2729{
2730 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2731 {
2732 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2733 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2734 }
2735 else if ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2736 || iGprTmp != UINT8_MAX)
2737 {
2738 /* The offset is too large, so we must load it into a register and use
2739 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2740 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2741 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2742 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, iGprTmp);
2743 }
2744 else
2745# ifdef IEM_WITH_THROW_CATCH
2746 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2747# else
2748 AssertReleaseFailedStmt(off = UINT32_MAX);
2749# endif
2750 return off;
2751}
2752# endif
2753
2754
2755/**
2756 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2757 */
2758DECL_FORCE_INLINE_THROW(uint32_t)
2759iemNativeEmitVecRegByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg,
2760 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2761{
2762 /*
2763 * There are a couple of ldr variants that takes an immediate offset, so
2764 * try use those if we can, otherwise we have to use the temporary register
2765 * help with the addressing.
2766 */
2767 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2768 {
2769 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2770 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2771 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2772 }
2773 else
2774 {
2775 /* The offset is too large, so we must load it into a register and use
2776 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2777 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2778 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2779
2780 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2781 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, idxTmpReg);
2782
2783 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2784 }
2785 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2786 return off;
2787}
2788#endif /* RT_ARCH_ARM64 */
2789
2790/**
2791 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2792 *
2793 * @note ARM64: Misaligned @a offDisp values and values not in the
2794 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2795 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2796 * does not heed this.
2797 */
2798DECL_FORCE_INLINE_THROW(uint32_t)
2799iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2800 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2801{
2802#ifdef RT_ARCH_AMD64
2803 /* mov reg64, mem64 */
2804 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2805 pCodeBuf[off++] = 0x8b;
2806 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2807 RT_NOREF(iGprTmp);
2808
2809#elif defined(RT_ARCH_ARM64)
2810 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2811 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2812
2813#else
2814# error "port me"
2815#endif
2816 return off;
2817}
2818
2819
2820/**
2821 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2822 */
2823DECL_INLINE_THROW(uint32_t)
2824iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2825{
2826#ifdef RT_ARCH_AMD64
2827 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2828 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2829
2830#elif defined(RT_ARCH_ARM64)
2831 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2832
2833#else
2834# error "port me"
2835#endif
2836 return off;
2837}
2838
2839
2840/**
2841 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2842 *
2843 * @note ARM64: Misaligned @a offDisp values and values not in the
2844 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2845 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2846 * caller does not heed this.
2847 *
2848 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2849 */
2850DECL_FORCE_INLINE_THROW(uint32_t)
2851iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2852 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2853{
2854#ifdef RT_ARCH_AMD64
2855 /* mov reg32, mem32 */
2856 if (iGprDst >= 8 || iGprBase >= 8)
2857 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2858 pCodeBuf[off++] = 0x8b;
2859 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2860 RT_NOREF(iGprTmp);
2861
2862#elif defined(RT_ARCH_ARM64)
2863 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2864 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2865
2866#else
2867# error "port me"
2868#endif
2869 return off;
2870}
2871
2872
2873/**
2874 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2875 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2876 */
2877DECL_INLINE_THROW(uint32_t)
2878iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2879{
2880#ifdef RT_ARCH_AMD64
2881 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2882 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2883
2884#elif defined(RT_ARCH_ARM64)
2885 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2886
2887#else
2888# error "port me"
2889#endif
2890 return off;
2891}
2892
2893
2894/**
2895 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2896 * sign-extending the value to 64 bits.
2897 *
2898 * @note ARM64: Misaligned @a offDisp values and values not in the
2899 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2900 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2901 * caller does not heed this.
2902 */
2903DECL_FORCE_INLINE_THROW(uint32_t)
2904iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2905 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2906{
2907#ifdef RT_ARCH_AMD64
2908 /* movsxd reg64, mem32 */
2909 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2910 pCodeBuf[off++] = 0x63;
2911 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2912 RT_NOREF(iGprTmp);
2913
2914#elif defined(RT_ARCH_ARM64)
2915 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2916 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2917
2918#else
2919# error "port me"
2920#endif
2921 return off;
2922}
2923
2924
2925/**
2926 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2927 *
2928 * @note ARM64: Misaligned @a offDisp values and values not in the
2929 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2930 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2931 * caller does not heed this.
2932 *
2933 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2934 */
2935DECL_FORCE_INLINE_THROW(uint32_t)
2936iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2937 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2938{
2939#ifdef RT_ARCH_AMD64
2940 /* movzx reg32, mem16 */
2941 if (iGprDst >= 8 || iGprBase >= 8)
2942 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2943 pCodeBuf[off++] = 0x0f;
2944 pCodeBuf[off++] = 0xb7;
2945 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2946 RT_NOREF(iGprTmp);
2947
2948#elif defined(RT_ARCH_ARM64)
2949 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2950 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2951
2952#else
2953# error "port me"
2954#endif
2955 return off;
2956}
2957
2958
2959/**
2960 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2961 * sign-extending the value to 64 bits.
2962 *
2963 * @note ARM64: Misaligned @a offDisp values and values not in the
2964 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2965 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2966 * caller does not heed this.
2967 */
2968DECL_FORCE_INLINE_THROW(uint32_t)
2969iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2970 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2971{
2972#ifdef RT_ARCH_AMD64
2973 /* movsx reg64, mem16 */
2974 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2975 pCodeBuf[off++] = 0x0f;
2976 pCodeBuf[off++] = 0xbf;
2977 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2978 RT_NOREF(iGprTmp);
2979
2980#elif defined(RT_ARCH_ARM64)
2981 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2982 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2983
2984#else
2985# error "port me"
2986#endif
2987 return off;
2988}
2989
2990
2991/**
2992 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2993 * sign-extending the value to 32 bits.
2994 *
2995 * @note ARM64: Misaligned @a offDisp values and values not in the
2996 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2997 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2998 * caller does not heed this.
2999 *
3000 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
3001 */
3002DECL_FORCE_INLINE_THROW(uint32_t)
3003iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3004 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3005{
3006#ifdef RT_ARCH_AMD64
3007 /* movsx reg32, mem16 */
3008 if (iGprDst >= 8 || iGprBase >= 8)
3009 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3010 pCodeBuf[off++] = 0x0f;
3011 pCodeBuf[off++] = 0xbf;
3012 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3013 RT_NOREF(iGprTmp);
3014
3015#elif defined(RT_ARCH_ARM64)
3016 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3017 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
3018
3019#else
3020# error "port me"
3021#endif
3022 return off;
3023}
3024
3025
3026/**
3027 * Emits a 8-bit GPR load via a GPR base address with a displacement.
3028 *
3029 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3030 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3031 * same. Will assert / throw if caller does not heed this.
3032 *
3033 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
3034 */
3035DECL_FORCE_INLINE_THROW(uint32_t)
3036iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3037 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3038{
3039#ifdef RT_ARCH_AMD64
3040 /* movzx reg32, mem8 */
3041 if (iGprDst >= 8 || iGprBase >= 8)
3042 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3043 pCodeBuf[off++] = 0x0f;
3044 pCodeBuf[off++] = 0xb6;
3045 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3046 RT_NOREF(iGprTmp);
3047
3048#elif defined(RT_ARCH_ARM64)
3049 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3050 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
3051
3052#else
3053# error "port me"
3054#endif
3055 return off;
3056}
3057
3058
3059/**
3060 * Emits a 8-bit GPR load via a GPR base address with a displacement,
3061 * sign-extending the value to 64 bits.
3062 *
3063 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3064 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3065 * same. Will assert / throw if caller does not heed this.
3066 */
3067DECL_FORCE_INLINE_THROW(uint32_t)
3068iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3069 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3070{
3071#ifdef RT_ARCH_AMD64
3072 /* movsx reg64, mem8 */
3073 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3074 pCodeBuf[off++] = 0x0f;
3075 pCodeBuf[off++] = 0xbe;
3076 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3077 RT_NOREF(iGprTmp);
3078
3079#elif defined(RT_ARCH_ARM64)
3080 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3081 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
3082
3083#else
3084# error "port me"
3085#endif
3086 return off;
3087}
3088
3089
3090/**
3091 * Emits a 8-bit GPR load via a GPR base address with a displacement,
3092 * sign-extending the value to 32 bits.
3093 *
3094 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3095 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3096 * same. Will assert / throw if caller does not heed this.
3097 *
3098 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
3099 */
3100DECL_FORCE_INLINE_THROW(uint32_t)
3101iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3102 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3103{
3104#ifdef RT_ARCH_AMD64
3105 /* movsx reg32, mem8 */
3106 if (iGprDst >= 8 || iGprBase >= 8)
3107 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3108 pCodeBuf[off++] = 0x0f;
3109 pCodeBuf[off++] = 0xbe;
3110 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3111 RT_NOREF(iGprTmp);
3112
3113#elif defined(RT_ARCH_ARM64)
3114 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3115 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
3116
3117#else
3118# error "port me"
3119#endif
3120 return off;
3121}
3122
3123
3124/**
3125 * Emits a 8-bit GPR load via a GPR base address with a displacement,
3126 * sign-extending the value to 16 bits.
3127 *
3128 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3129 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3130 * same. Will assert / throw if caller does not heed this.
3131 *
3132 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
3133 */
3134DECL_FORCE_INLINE_THROW(uint32_t)
3135iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3136 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3137{
3138#ifdef RT_ARCH_AMD64
3139 /* movsx reg32, mem8 */
3140 if (iGprDst >= 8 || iGprBase >= 8)
3141 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3142 pCodeBuf[off++] = 0x0f;
3143 pCodeBuf[off++] = 0xbe;
3144 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3145# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
3146 /* and reg32, 0xffffh */
3147 if (iGprDst >= 8)
3148 pCodeBuf[off++] = X86_OP_REX_B;
3149 pCodeBuf[off++] = 0x81;
3150 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
3151 pCodeBuf[off++] = 0xff;
3152 pCodeBuf[off++] = 0xff;
3153 pCodeBuf[off++] = 0;
3154 pCodeBuf[off++] = 0;
3155# else
3156 /* movzx reg32, reg16 */
3157 if (iGprDst >= 8)
3158 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
3159 pCodeBuf[off++] = 0x0f;
3160 pCodeBuf[off++] = 0xb7;
3161 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
3162# endif
3163 RT_NOREF(iGprTmp);
3164
3165#elif defined(RT_ARCH_ARM64)
3166 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3167 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
3168 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
3169 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
3170
3171#else
3172# error "port me"
3173#endif
3174 return off;
3175}
3176
3177
3178#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3179/**
3180 * Emits a 128-bit vector register load via a GPR base address with a displacement.
3181 *
3182 * @note ARM64: Misaligned @a offDisp values and values not in the
3183 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3184 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3185 * does not heed this.
3186 */
3187DECL_FORCE_INLINE_THROW(uint32_t)
3188iemNativeEmitLoadVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3189 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3190{
3191#ifdef RT_ARCH_AMD64
3192 /* movdqu reg128, mem128 */
3193 pCodeBuf[off++] = 0xf3;
3194 if (iVecRegDst >= 8 || iGprBase >= 8)
3195 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3196 pCodeBuf[off++] = 0x0f;
3197 pCodeBuf[off++] = 0x6f;
3198 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3199 RT_NOREF(iGprTmp);
3200
3201#elif defined(RT_ARCH_ARM64)
3202 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3203 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3204
3205#else
3206# error "port me"
3207#endif
3208 return off;
3209}
3210
3211
3212/**
3213 * Emits a 128-bit GPR load via a GPR base address with a displacement.
3214 */
3215DECL_INLINE_THROW(uint32_t)
3216iemNativeEmitLoadVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3217{
3218#ifdef RT_ARCH_AMD64
3219 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3220 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3221
3222#elif defined(RT_ARCH_ARM64)
3223 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3224
3225#else
3226# error "port me"
3227#endif
3228 return off;
3229}
3230
3231
3232/**
3233 * Emits a 256-bit vector register load via a GPR base address with a displacement.
3234 *
3235 * @note ARM64: Misaligned @a offDisp values and values not in the
3236 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3237 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3238 * does not heed this.
3239 */
3240DECL_FORCE_INLINE_THROW(uint32_t)
3241iemNativeEmitLoadVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3242 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3243{
3244#ifdef RT_ARCH_AMD64
3245 /* vmovdqu reg256, mem256 */
3246 pCodeBuf[off++] = X86_OP_VEX3;
3247 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3248 | X86_OP_VEX3_BYTE1_X
3249 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3250 | UINT8_C(0x01);
3251 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3252 pCodeBuf[off++] = 0x6f;
3253 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3254 RT_NOREF(iGprTmp);
3255
3256#elif defined(RT_ARCH_ARM64)
3257 Assert(!(iVecRegDst & 0x1));
3258 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3259 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3260 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3261 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3262#else
3263# error "port me"
3264#endif
3265 return off;
3266}
3267
3268
3269/**
3270 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3271 */
3272DECL_INLINE_THROW(uint32_t)
3273iemNativeEmitLoadVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3274{
3275#ifdef RT_ARCH_AMD64
3276 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3277 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3278
3279#elif defined(RT_ARCH_ARM64)
3280 Assert(!(iVecRegDst & 0x1));
3281 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3282 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3283 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3284 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3285
3286#else
3287# error "port me"
3288#endif
3289 return off;
3290}
3291#endif
3292
3293
3294/**
3295 * Emits a 64-bit GPR store via a GPR base address with a displacement.
3296 *
3297 * @note ARM64: Misaligned @a offDisp values and values not in the
3298 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3299 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3300 * does not heed this.
3301 */
3302DECL_FORCE_INLINE_THROW(uint32_t)
3303iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3304 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3305{
3306#ifdef RT_ARCH_AMD64
3307 /* mov mem64, reg64 */
3308 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3309 pCodeBuf[off++] = 0x89;
3310 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3311 RT_NOREF(iGprTmp);
3312
3313#elif defined(RT_ARCH_ARM64)
3314 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3315 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
3316
3317#else
3318# error "port me"
3319#endif
3320 return off;
3321}
3322
3323
3324/**
3325 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3326 *
3327 * @note ARM64: Misaligned @a offDisp values and values not in the
3328 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3329 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3330 * does not heed this.
3331 */
3332DECL_FORCE_INLINE_THROW(uint32_t)
3333iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3334 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3335{
3336#ifdef RT_ARCH_AMD64
3337 /* mov mem32, reg32 */
3338 if (iGprSrc >= 8 || iGprBase >= 8)
3339 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3340 pCodeBuf[off++] = 0x89;
3341 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3342 RT_NOREF(iGprTmp);
3343
3344#elif defined(RT_ARCH_ARM64)
3345 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3346 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3347
3348#else
3349# error "port me"
3350#endif
3351 return off;
3352}
3353
3354
3355/**
3356 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3357 *
3358 * @note ARM64: Misaligned @a offDisp values and values not in the
3359 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3360 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3361 * does not heed this.
3362 */
3363DECL_FORCE_INLINE_THROW(uint32_t)
3364iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3365 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3366{
3367#ifdef RT_ARCH_AMD64
3368 /* mov mem16, reg16 */
3369 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3370 if (iGprSrc >= 8 || iGprBase >= 8)
3371 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3372 pCodeBuf[off++] = 0x89;
3373 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3374 RT_NOREF(iGprTmp);
3375
3376#elif defined(RT_ARCH_ARM64)
3377 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3378 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3379
3380#else
3381# error "port me"
3382#endif
3383 return off;
3384}
3385
3386
3387/**
3388 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3389 *
3390 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3391 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3392 * same. Will assert / throw if caller does not heed this.
3393 */
3394DECL_FORCE_INLINE_THROW(uint32_t)
3395iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3396 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3397{
3398#ifdef RT_ARCH_AMD64
3399 /* mov mem8, reg8 */
3400 if (iGprSrc >= 8 || iGprBase >= 8)
3401 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3402 else if (iGprSrc >= 4)
3403 pCodeBuf[off++] = X86_OP_REX;
3404 pCodeBuf[off++] = 0x88;
3405 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3406 RT_NOREF(iGprTmp);
3407
3408#elif defined(RT_ARCH_ARM64)
3409 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3410 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3411
3412#else
3413# error "port me"
3414#endif
3415 return off;
3416}
3417
3418
3419/**
3420 * Emits a 64-bit immediate store via a GPR base address with a displacement.
3421 *
3422 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
3423 * AMD64 it depends on the immediate value.
3424 *
3425 * @note ARM64: Misaligned @a offDisp values and values not in the
3426 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3427 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3428 * does not heed this.
3429 */
3430DECL_FORCE_INLINE_THROW(uint32_t)
3431iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
3432 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3433{
3434#ifdef RT_ARCH_AMD64
3435 if ((int32_t)uImm == (int64_t)uImm)
3436 {
3437 /* mov mem64, imm32 (sign-extended) */
3438 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3439 pCodeBuf[off++] = 0xc7;
3440 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3441 pCodeBuf[off++] = RT_BYTE1(uImm);
3442 pCodeBuf[off++] = RT_BYTE2(uImm);
3443 pCodeBuf[off++] = RT_BYTE3(uImm);
3444 pCodeBuf[off++] = RT_BYTE4(uImm);
3445 }
3446 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
3447 {
3448 /* require temporary register. */
3449 if (iGprImmTmp == UINT8_MAX)
3450 iGprImmTmp = iGprTmp;
3451 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3452 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
3453 }
3454 else
3455# ifdef IEM_WITH_THROW_CATCH
3456 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3457# else
3458 AssertReleaseFailedStmt(off = UINT32_MAX);
3459# endif
3460
3461#elif defined(RT_ARCH_ARM64)
3462 if (uImm == 0)
3463 iGprImmTmp = ARMV8_A64_REG_XZR;
3464 else
3465 {
3466 Assert(iGprImmTmp < 31);
3467 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3468 }
3469 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
3470
3471#else
3472# error "port me"
3473#endif
3474 return off;
3475}
3476
3477
3478/**
3479 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3480 *
3481 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3482 *
3483 * @note ARM64: Misaligned @a offDisp values and values not in the
3484 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3485 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3486 * does not heed this.
3487 */
3488DECL_FORCE_INLINE_THROW(uint32_t)
3489iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
3490 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3491{
3492#ifdef RT_ARCH_AMD64
3493 /* mov mem32, imm32 */
3494 if (iGprBase >= 8)
3495 pCodeBuf[off++] = X86_OP_REX_B;
3496 pCodeBuf[off++] = 0xc7;
3497 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3498 pCodeBuf[off++] = RT_BYTE1(uImm);
3499 pCodeBuf[off++] = RT_BYTE2(uImm);
3500 pCodeBuf[off++] = RT_BYTE3(uImm);
3501 pCodeBuf[off++] = RT_BYTE4(uImm);
3502 RT_NOREF(iGprImmTmp, iGprTmp);
3503
3504#elif defined(RT_ARCH_ARM64)
3505 Assert(iGprImmTmp < 31);
3506 if (uImm == 0)
3507 iGprImmTmp = ARMV8_A64_REG_XZR;
3508 else
3509 {
3510 Assert(iGprImmTmp < 31);
3511 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3512 }
3513 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3514 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3515
3516#else
3517# error "port me"
3518#endif
3519 return off;
3520}
3521
3522
3523/**
3524 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3525 *
3526 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3527 *
3528 * @note ARM64: Misaligned @a offDisp values and values not in the
3529 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3530 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3531 * does not heed this.
3532 */
3533DECL_FORCE_INLINE_THROW(uint32_t)
3534iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
3535 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3536{
3537#ifdef RT_ARCH_AMD64
3538 /* mov mem16, imm16 */
3539 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3540 if (iGprBase >= 8)
3541 pCodeBuf[off++] = X86_OP_REX_B;
3542 pCodeBuf[off++] = 0xc7;
3543 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3544 pCodeBuf[off++] = RT_BYTE1(uImm);
3545 pCodeBuf[off++] = RT_BYTE2(uImm);
3546 RT_NOREF(iGprImmTmp, iGprTmp);
3547
3548#elif defined(RT_ARCH_ARM64)
3549 if (uImm == 0)
3550 iGprImmTmp = ARMV8_A64_REG_XZR;
3551 else
3552 {
3553 Assert(iGprImmTmp < 31);
3554 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3555 }
3556 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3557 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3558
3559#else
3560# error "port me"
3561#endif
3562 return off;
3563}
3564
3565
3566/**
3567 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3568 *
3569 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3570 *
3571 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3572 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3573 * same. Will assert / throw if caller does not heed this.
3574 */
3575DECL_FORCE_INLINE_THROW(uint32_t)
3576iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3577 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3578{
3579#ifdef RT_ARCH_AMD64
3580 /* mov mem8, imm8 */
3581 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3582 if (iGprBase >= 8)
3583 pCodeBuf[off++] = X86_OP_REX_B;
3584 pCodeBuf[off++] = 0xc6;
3585 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3586 pCodeBuf[off++] = uImm;
3587 RT_NOREF(iGprImmTmp, iGprTmp);
3588
3589#elif defined(RT_ARCH_ARM64)
3590 if (uImm == 0)
3591 iGprImmTmp = ARMV8_A64_REG_XZR;
3592 else
3593 {
3594 Assert(iGprImmTmp < 31);
3595 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3596 }
3597 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3598 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3599
3600#else
3601# error "port me"
3602#endif
3603 return off;
3604}
3605
3606
3607#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3608/**
3609 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3610 *
3611 * @note ARM64: Misaligned @a offDisp values and values not in the
3612 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3613 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3614 * does not heed this.
3615 */
3616DECL_FORCE_INLINE_THROW(uint32_t)
3617iemNativeEmitStoreVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3618 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3619{
3620#ifdef RT_ARCH_AMD64
3621 /* movdqu mem128, reg128 */
3622 pCodeBuf[off++] = 0xf3;
3623 if (iVecRegDst >= 8 || iGprBase >= 8)
3624 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3625 pCodeBuf[off++] = 0x0f;
3626 pCodeBuf[off++] = 0x7f;
3627 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3628 RT_NOREF(iGprTmp);
3629
3630#elif defined(RT_ARCH_ARM64)
3631 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3632 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3633
3634#else
3635# error "port me"
3636#endif
3637 return off;
3638}
3639
3640
3641/**
3642 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3643 */
3644DECL_INLINE_THROW(uint32_t)
3645iemNativeEmitStoreVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3646{
3647#ifdef RT_ARCH_AMD64
3648 off = iemNativeEmitStoreVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3649 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3650
3651#elif defined(RT_ARCH_ARM64)
3652 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3653
3654#else
3655# error "port me"
3656#endif
3657 return off;
3658}
3659
3660
3661/**
3662 * Emits a 256-bit vector register store via a GPR base address with a displacement.
3663 *
3664 * @note ARM64: Misaligned @a offDisp values and values not in the
3665 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3666 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3667 * does not heed this.
3668 */
3669DECL_FORCE_INLINE_THROW(uint32_t)
3670iemNativeEmitStoreVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3671 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3672{
3673#ifdef RT_ARCH_AMD64
3674 /* vmovdqu mem256, reg256 */
3675 pCodeBuf[off++] = X86_OP_VEX3;
3676 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3677 | X86_OP_VEX3_BYTE1_X
3678 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3679 | UINT8_C(0x01);
3680 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3681 pCodeBuf[off++] = 0x7f;
3682 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3683 RT_NOREF(iGprTmp);
3684
3685#elif defined(RT_ARCH_ARM64)
3686 Assert(!(iVecRegDst & 0x1));
3687 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3688 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3689 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3690 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3691#else
3692# error "port me"
3693#endif
3694 return off;
3695}
3696
3697
3698/**
3699 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3700 */
3701DECL_INLINE_THROW(uint32_t)
3702iemNativeEmitStoreVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3703{
3704#ifdef RT_ARCH_AMD64
3705 off = iemNativeEmitStoreVecRegByGprU256Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3706 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3707
3708#elif defined(RT_ARCH_ARM64)
3709 Assert(!(iVecRegDst & 0x1));
3710 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3711 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3712 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3713 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3714
3715#else
3716# error "port me"
3717#endif
3718 return off;
3719}
3720#endif
3721
3722
3723
3724/*********************************************************************************************************************************
3725* Subtraction and Additions *
3726*********************************************************************************************************************************/
3727
3728/**
3729 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3730 * @note The AMD64 version sets flags.
3731 */
3732DECL_INLINE_THROW(uint32_t)
3733iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3734{
3735#if defined(RT_ARCH_AMD64)
3736 /* sub Gv,Ev */
3737 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3738 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3739 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3740 pbCodeBuf[off++] = 0x2b;
3741 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3742
3743#elif defined(RT_ARCH_ARM64)
3744 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3745 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3746
3747#else
3748# error "Port me"
3749#endif
3750 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3751 return off;
3752}
3753
3754
3755/**
3756 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3757 * @note The AMD64 version sets flags.
3758 */
3759DECL_FORCE_INLINE(uint32_t)
3760iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3761{
3762#if defined(RT_ARCH_AMD64)
3763 /* sub Gv,Ev */
3764 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3765 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3766 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3767 pCodeBuf[off++] = 0x2b;
3768 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3769
3770#elif defined(RT_ARCH_ARM64)
3771 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3772
3773#else
3774# error "Port me"
3775#endif
3776 return off;
3777}
3778
3779
3780/**
3781 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3782 * @note The AMD64 version sets flags.
3783 */
3784DECL_INLINE_THROW(uint32_t)
3785iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3786{
3787#if defined(RT_ARCH_AMD64)
3788 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3789#elif defined(RT_ARCH_ARM64)
3790 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3791#else
3792# error "Port me"
3793#endif
3794 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3795 return off;
3796}
3797
3798
3799/**
3800 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3801 *
3802 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3803 *
3804 * @note Larger constants will require a temporary register. Failing to specify
3805 * one when needed will trigger fatal assertion / throw.
3806 */
3807DECL_FORCE_INLINE_THROW(uint32_t)
3808iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3809 uint8_t iGprTmp = UINT8_MAX)
3810{
3811#ifdef RT_ARCH_AMD64
3812 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3813 if (iSubtrahend == 1)
3814 {
3815 /* dec r/m64 */
3816 pCodeBuf[off++] = 0xff;
3817 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3818 }
3819 else if (iSubtrahend == -1)
3820 {
3821 /* inc r/m64 */
3822 pCodeBuf[off++] = 0xff;
3823 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3824 }
3825 else if ((int8_t)iSubtrahend == iSubtrahend)
3826 {
3827 /* sub r/m64, imm8 */
3828 pCodeBuf[off++] = 0x83;
3829 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3830 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3831 }
3832 else if ((int32_t)iSubtrahend == iSubtrahend)
3833 {
3834 /* sub r/m64, imm32 */
3835 pCodeBuf[off++] = 0x81;
3836 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3837 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3838 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3839 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3840 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3841 }
3842 else if (iGprTmp != UINT8_MAX)
3843 {
3844 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3845 /* sub r/m64, r64 */
3846 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3847 pCodeBuf[off++] = 0x29;
3848 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3849 }
3850 else
3851# ifdef IEM_WITH_THROW_CATCH
3852 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3853# else
3854 AssertReleaseFailedStmt(off = UINT32_MAX);
3855# endif
3856
3857#elif defined(RT_ARCH_ARM64)
3858 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3859 if (uAbsSubtrahend < 4096)
3860 {
3861 if (iSubtrahend >= 0)
3862 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3863 else
3864 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3865 }
3866 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3867 {
3868 if (iSubtrahend >= 0)
3869 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3870 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3871 else
3872 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3873 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3874 }
3875 else if (iGprTmp != UINT8_MAX)
3876 {
3877 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3878 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3879 }
3880 else
3881# ifdef IEM_WITH_THROW_CATCH
3882 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3883# else
3884 AssertReleaseFailedStmt(off = UINT32_MAX);
3885# endif
3886
3887#else
3888# error "Port me"
3889#endif
3890 return off;
3891}
3892
3893
3894/**
3895 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3896 *
3897 * @note Larger constants will require a temporary register. Failing to specify
3898 * one when needed will trigger fatal assertion / throw.
3899 */
3900DECL_INLINE_THROW(uint32_t)
3901iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3902 uint8_t iGprTmp = UINT8_MAX)
3903
3904{
3905#ifdef RT_ARCH_AMD64
3906 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3907#elif defined(RT_ARCH_ARM64)
3908 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3909#else
3910# error "Port me"
3911#endif
3912 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3913 return off;
3914}
3915
3916
3917/**
3918 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3919 *
3920 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3921 *
3922 * @note ARM64: Larger constants will require a temporary register. Failing to
3923 * specify one when needed will trigger fatal assertion / throw.
3924 */
3925DECL_FORCE_INLINE_THROW(uint32_t)
3926iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3927 uint8_t iGprTmp = UINT8_MAX)
3928{
3929#ifdef RT_ARCH_AMD64
3930 if (iGprDst >= 8)
3931 pCodeBuf[off++] = X86_OP_REX_B;
3932 if (iSubtrahend == 1)
3933 {
3934 /* dec r/m32 */
3935 pCodeBuf[off++] = 0xff;
3936 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3937 }
3938 else if (iSubtrahend == -1)
3939 {
3940 /* inc r/m32 */
3941 pCodeBuf[off++] = 0xff;
3942 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3943 }
3944 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3945 {
3946 /* sub r/m32, imm8 */
3947 pCodeBuf[off++] = 0x83;
3948 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3949 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3950 }
3951 else
3952 {
3953 /* sub r/m32, imm32 */
3954 pCodeBuf[off++] = 0x81;
3955 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3956 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3957 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3958 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3959 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3960 }
3961 RT_NOREF(iGprTmp);
3962
3963#elif defined(RT_ARCH_ARM64)
3964 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3965 if (uAbsSubtrahend < 4096)
3966 {
3967 if (iSubtrahend >= 0)
3968 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3969 else
3970 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3971 }
3972 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3973 {
3974 if (iSubtrahend >= 0)
3975 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3976 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3977 else
3978 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3979 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3980 }
3981 else if (iGprTmp != UINT8_MAX)
3982 {
3983 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3984 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3985 }
3986 else
3987# ifdef IEM_WITH_THROW_CATCH
3988 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3989# else
3990 AssertReleaseFailedStmt(off = UINT32_MAX);
3991# endif
3992
3993#else
3994# error "Port me"
3995#endif
3996 return off;
3997}
3998
3999
4000/**
4001 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
4002 *
4003 * @note ARM64: Larger constants will require a temporary register. Failing to
4004 * specify one when needed will trigger fatal assertion / throw.
4005 */
4006DECL_INLINE_THROW(uint32_t)
4007iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
4008 uint8_t iGprTmp = UINT8_MAX)
4009
4010{
4011#ifdef RT_ARCH_AMD64
4012 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
4013#elif defined(RT_ARCH_ARM64)
4014 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
4015#else
4016# error "Port me"
4017#endif
4018 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4019 return off;
4020}
4021
4022
4023/**
4024 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
4025 *
4026 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
4027 * so not suitable as a base for conditional jumps.
4028 *
4029 * @note AMD64: Will only update the lower 16 bits of the register.
4030 * @note ARM64: Will update the entire register.
4031 * @note ARM64: Larger constants will require a temporary register. Failing to
4032 * specify one when needed will trigger fatal assertion / throw.
4033 */
4034DECL_FORCE_INLINE_THROW(uint32_t)
4035iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
4036 uint8_t iGprTmp = UINT8_MAX)
4037{
4038#ifdef RT_ARCH_AMD64
4039 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4040 if (iGprDst >= 8)
4041 pCodeBuf[off++] = X86_OP_REX_B;
4042 if (iSubtrahend == 1)
4043 {
4044 /* dec r/m16 */
4045 pCodeBuf[off++] = 0xff;
4046 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4047 }
4048 else if (iSubtrahend == -1)
4049 {
4050 /* inc r/m16 */
4051 pCodeBuf[off++] = 0xff;
4052 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4053 }
4054 else if ((int8_t)iSubtrahend == iSubtrahend)
4055 {
4056 /* sub r/m16, imm8 */
4057 pCodeBuf[off++] = 0x83;
4058 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
4059 pCodeBuf[off++] = (uint8_t)iSubtrahend;
4060 }
4061 else
4062 {
4063 /* sub r/m16, imm16 */
4064 pCodeBuf[off++] = 0x81;
4065 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
4066 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
4067 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
4068 }
4069 RT_NOREF(iGprTmp);
4070
4071#elif defined(RT_ARCH_ARM64)
4072 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
4073 if (uAbsSubtrahend < 4096)
4074 {
4075 if (iSubtrahend >= 0)
4076 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
4077 else
4078 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
4079 }
4080 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
4081 {
4082 if (iSubtrahend >= 0)
4083 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
4084 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4085 else
4086 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
4087 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4088 }
4089 else if (iGprTmp != UINT8_MAX)
4090 {
4091 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
4092 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4093 }
4094 else
4095# ifdef IEM_WITH_THROW_CATCH
4096 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4097# else
4098 AssertReleaseFailedStmt(off = UINT32_MAX);
4099# endif
4100 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4101
4102#else
4103# error "Port me"
4104#endif
4105 return off;
4106}
4107
4108
4109/**
4110 * Emits adding a 64-bit GPR to another, storing the result in the first.
4111 * @note The AMD64 version sets flags.
4112 */
4113DECL_FORCE_INLINE(uint32_t)
4114iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4115{
4116#if defined(RT_ARCH_AMD64)
4117 /* add Gv,Ev */
4118 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4119 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
4120 pCodeBuf[off++] = 0x03;
4121 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
4122
4123#elif defined(RT_ARCH_ARM64)
4124 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
4125
4126#else
4127# error "Port me"
4128#endif
4129 return off;
4130}
4131
4132
4133/**
4134 * Emits adding a 64-bit GPR to another, storing the result in the first.
4135 * @note The AMD64 version sets flags.
4136 */
4137DECL_INLINE_THROW(uint32_t)
4138iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4139{
4140#if defined(RT_ARCH_AMD64)
4141 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
4142#elif defined(RT_ARCH_ARM64)
4143 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
4144#else
4145# error "Port me"
4146#endif
4147 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4148 return off;
4149}
4150
4151
4152/**
4153 * Emits adding a 64-bit GPR to another, storing the result in the first.
4154 * @note The AMD64 version sets flags.
4155 */
4156DECL_FORCE_INLINE(uint32_t)
4157iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4158{
4159#if defined(RT_ARCH_AMD64)
4160 /* add Gv,Ev */
4161 if (iGprDst >= 8 || iGprAddend >= 8)
4162 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
4163 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
4164 pCodeBuf[off++] = 0x03;
4165 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
4166
4167#elif defined(RT_ARCH_ARM64)
4168 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
4169
4170#else
4171# error "Port me"
4172#endif
4173 return off;
4174}
4175
4176
4177/**
4178 * Emits adding a 64-bit GPR to another, storing the result in the first.
4179 * @note The AMD64 version sets flags.
4180 */
4181DECL_INLINE_THROW(uint32_t)
4182iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4183{
4184#if defined(RT_ARCH_AMD64)
4185 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
4186#elif defined(RT_ARCH_ARM64)
4187 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
4188#else
4189# error "Port me"
4190#endif
4191 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4192 return off;
4193}
4194
4195
4196/**
4197 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
4198 */
4199DECL_INLINE_THROW(uint32_t)
4200iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4201{
4202#if defined(RT_ARCH_AMD64)
4203 /* add or inc */
4204 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4205 if (iImm8 != 1)
4206 {
4207 pCodeBuf[off++] = 0x83;
4208 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4209 pCodeBuf[off++] = (uint8_t)iImm8;
4210 }
4211 else
4212 {
4213 pCodeBuf[off++] = 0xff;
4214 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4215 }
4216
4217#elif defined(RT_ARCH_ARM64)
4218 if (iImm8 >= 0)
4219 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
4220 else
4221 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
4222
4223#else
4224# error "Port me"
4225#endif
4226 return off;
4227}
4228
4229
4230/**
4231 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
4232 */
4233DECL_INLINE_THROW(uint32_t)
4234iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4235{
4236#if defined(RT_ARCH_AMD64)
4237 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4238#elif defined(RT_ARCH_ARM64)
4239 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4240#else
4241# error "Port me"
4242#endif
4243 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4244 return off;
4245}
4246
4247
4248/**
4249 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4250 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4251 */
4252DECL_FORCE_INLINE(uint32_t)
4253iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4254{
4255#if defined(RT_ARCH_AMD64)
4256 /* add or inc */
4257 if (iGprDst >= 8)
4258 pCodeBuf[off++] = X86_OP_REX_B;
4259 if (iImm8 != 1)
4260 {
4261 pCodeBuf[off++] = 0x83;
4262 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4263 pCodeBuf[off++] = (uint8_t)iImm8;
4264 }
4265 else
4266 {
4267 pCodeBuf[off++] = 0xff;
4268 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4269 }
4270
4271#elif defined(RT_ARCH_ARM64)
4272 if (iImm8 >= 0)
4273 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
4274 else
4275 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
4276
4277#else
4278# error "Port me"
4279#endif
4280 return off;
4281}
4282
4283
4284/**
4285 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4286 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4287 */
4288DECL_INLINE_THROW(uint32_t)
4289iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4290{
4291#if defined(RT_ARCH_AMD64)
4292 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4293#elif defined(RT_ARCH_ARM64)
4294 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4295#else
4296# error "Port me"
4297#endif
4298 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4299 return off;
4300}
4301
4302
4303/**
4304 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4305 *
4306 * @note Will assert / throw if @a iGprTmp is not specified when needed.
4307 */
4308DECL_FORCE_INLINE_THROW(uint32_t)
4309iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4310{
4311#if defined(RT_ARCH_AMD64)
4312 if ((int8_t)iAddend == iAddend)
4313 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4314
4315 if ((int32_t)iAddend == iAddend)
4316 {
4317 /* add grp, imm32 */
4318 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4319 pCodeBuf[off++] = 0x81;
4320 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4321 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4322 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4323 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4324 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4325 }
4326 else if (iGprTmp != UINT8_MAX)
4327 {
4328 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4329
4330 /* add dst, tmpreg */
4331 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4332 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
4333 pCodeBuf[off++] = 0x03;
4334 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
4335 }
4336 else
4337# ifdef IEM_WITH_THROW_CATCH
4338 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4339# else
4340 AssertReleaseFailedStmt(off = UINT32_MAX);
4341# endif
4342
4343#elif defined(RT_ARCH_ARM64)
4344 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4345 if (uAbsAddend <= 0xffffffU)
4346 {
4347 bool const fSub = iAddend < 0;
4348 if (uAbsAddend > 0xfffU)
4349 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, true /*f64Bit*/,
4350 false /*fSetFlags*/, true /*fShift12*/);
4351 if (uAbsAddend & 0xfffU)
4352 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & UINT32_C(0xfff));
4353 }
4354 else if (iGprTmp != UINT8_MAX)
4355 {
4356 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4357 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
4358 }
4359 else
4360# ifdef IEM_WITH_THROW_CATCH
4361 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4362# else
4363 AssertReleaseFailedStmt(off = UINT32_MAX);
4364# endif
4365
4366#else
4367# error "Port me"
4368#endif
4369 return off;
4370}
4371
4372
4373/**
4374 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4375 */
4376DECL_INLINE_THROW(uint32_t)
4377iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
4378{
4379#if defined(RT_ARCH_AMD64)
4380 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4381 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
4382
4383 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
4384 {
4385 /* add grp, imm32 */
4386 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4387 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4388 pbCodeBuf[off++] = 0x81;
4389 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4390 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4391 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4392 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4393 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4394 }
4395 else
4396 {
4397 /* Best to use a temporary register to deal with this in the simplest way: */
4398 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4399
4400 /* add dst, tmpreg */
4401 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4402 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4403 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
4404 pbCodeBuf[off++] = 0x03;
4405 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
4406
4407 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4408 }
4409
4410#elif defined(RT_ARCH_ARM64)
4411 bool const fSub = iAddend < 0;
4412 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4413 if (uAbsAddend <= 0xffffffU)
4414 {
4415 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4416 if (uAbsAddend > 0xfffU)
4417 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, true /*f64Bit*/,
4418 false /*fSetFlags*/, true /*fShift12*/);
4419 if (uAbsAddend & 0xfffU)
4420 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & UINT32_C(0xfff));
4421 }
4422 else
4423 {
4424 /* Use temporary register for the immediate. */
4425 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uAbsAddend);
4426
4427 /* add gprdst, gprdst, tmpreg */
4428 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4429 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, iGprDst, iTmpReg);
4430
4431 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4432 }
4433
4434#else
4435# error "Port me"
4436#endif
4437 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4438 return off;
4439}
4440
4441
4442/**
4443 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4444 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4445 * @note For ARM64 the iAddend value must be in the range 0x000000..0xffffff.
4446 * The negative ranges are also allowed, making it behave like a
4447 * subtraction. If the constant does not conform, bad stuff will happen.
4448 */
4449DECL_FORCE_INLINE_THROW(uint32_t)
4450iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4451{
4452#if defined(RT_ARCH_AMD64)
4453 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4454 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4455
4456 /* add grp, imm32 */
4457 if (iGprDst >= 8)
4458 pCodeBuf[off++] = X86_OP_REX_B;
4459 pCodeBuf[off++] = 0x81;
4460 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4461 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4462 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4463 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4464 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4465 RT_NOREF(iGprTmp);
4466
4467#elif defined(RT_ARCH_ARM64)
4468 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4469 if (uAbsAddend <= 0xffffffU)
4470 {
4471 bool const fSub = iAddend < 0;
4472 if (uAbsAddend > 0xfffU)
4473 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4474 false /*fSetFlags*/, true /*fShift12*/);
4475 if (uAbsAddend & 0xfffU)
4476 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4477 }
4478 else if (iGprTmp != UINT8_MAX)
4479 {
4480 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, iAddend);
4481 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4482 }
4483 else
4484# ifdef IEM_WITH_THROW_CATCH
4485 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4486# else
4487 AssertReleaseFailedStmt(off = UINT32_MAX);
4488# endif
4489
4490#else
4491# error "Port me"
4492#endif
4493 return off;
4494}
4495
4496
4497/**
4498 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4499 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4500 */
4501DECL_INLINE_THROW(uint32_t)
4502iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4503{
4504#if defined(RT_ARCH_AMD64)
4505 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
4506
4507#elif defined(RT_ARCH_ARM64)
4508 bool const fSub = iAddend < 0;
4509 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4510 if (uAbsAddend <= 0xffffffU)
4511 {
4512 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4513 if (uAbsAddend > 0xfffU)
4514 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4515 false /*fSetFlags*/, true /*fShift12*/);
4516 if (uAbsAddend & 0xfffU)
4517 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4518 }
4519 else
4520 {
4521 /* Use temporary register for the immediate. */
4522 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uAbsAddend);
4523
4524 /* add gprdst, gprdst, tmpreg */
4525 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4526 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
4527
4528 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4529 }
4530
4531#else
4532# error "Port me"
4533#endif
4534 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4535 return off;
4536}
4537
4538
4539/**
4540 * Emits a 16-bit GPR add with a signed immediate addend.
4541 *
4542 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
4543 * so not suitable as a base for conditional jumps.
4544 *
4545 * @note AMD64: Will only update the lower 16 bits of the register.
4546 * @note ARM64: Will update the entire register.
4547 * @sa iemNativeEmitSubGpr16ImmEx
4548 */
4549DECL_FORCE_INLINE(uint32_t)
4550iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend)
4551{
4552#ifdef RT_ARCH_AMD64
4553 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4554 if (iGprDst >= 8)
4555 pCodeBuf[off++] = X86_OP_REX_B;
4556 if (iAddend == 1)
4557 {
4558 /* inc r/m16 */
4559 pCodeBuf[off++] = 0xff;
4560 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4561 }
4562 else if (iAddend == -1)
4563 {
4564 /* dec r/m16 */
4565 pCodeBuf[off++] = 0xff;
4566 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4567 }
4568 else if ((int8_t)iAddend == iAddend)
4569 {
4570 /* add r/m16, imm8 */
4571 pCodeBuf[off++] = 0x83;
4572 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4573 pCodeBuf[off++] = (uint8_t)iAddend;
4574 }
4575 else
4576 {
4577 /* add r/m16, imm16 */
4578 pCodeBuf[off++] = 0x81;
4579 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4580 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
4581 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
4582 }
4583
4584#elif defined(RT_ARCH_ARM64)
4585 bool const fSub = iAddend < 0;
4586 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4587 if (uAbsAddend > 0xfffU)
4588 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4589 false /*fSetFlags*/, true /*fShift12*/);
4590 if (uAbsAddend & 0xfffU)
4591 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4592 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4593
4594#else
4595# error "Port me"
4596#endif
4597 return off;
4598}
4599
4600
4601
4602/**
4603 * Adds two 64-bit GPRs together, storing the result in a third register.
4604 */
4605DECL_FORCE_INLINE(uint32_t)
4606iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4607{
4608#ifdef RT_ARCH_AMD64
4609 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4610 {
4611 /** @todo consider LEA */
4612 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
4613 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
4614 }
4615 else
4616 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4617
4618#elif defined(RT_ARCH_ARM64)
4619 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
4620
4621#else
4622# error "Port me!"
4623#endif
4624 return off;
4625}
4626
4627
4628
4629/**
4630 * Adds two 32-bit GPRs together, storing the result in a third register.
4631 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4632 */
4633DECL_FORCE_INLINE(uint32_t)
4634iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4635{
4636#ifdef RT_ARCH_AMD64
4637 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4638 {
4639 /** @todo consider LEA */
4640 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
4641 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
4642 }
4643 else
4644 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4645
4646#elif defined(RT_ARCH_ARM64)
4647 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
4648
4649#else
4650# error "Port me!"
4651#endif
4652 return off;
4653}
4654
4655
4656/**
4657 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
4658 * third register.
4659 *
4660 * @note The ARM64 version does not work for non-trivial constants if the
4661 * two registers are the same. Will assert / throw exception.
4662 */
4663DECL_FORCE_INLINE_THROW(uint32_t)
4664iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4665{
4666#ifdef RT_ARCH_AMD64
4667 /** @todo consider LEA */
4668 if ((int8_t)iImmAddend == iImmAddend)
4669 {
4670 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4671 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4672 }
4673 else
4674 {
4675 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4676 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4677 }
4678
4679#elif defined(RT_ARCH_ARM64)
4680 bool const fSub = iImmAddend < 0;
4681 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4682 if (uAbsImmAddend <= 0xfffU)
4683 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend);
4684 else if (uAbsImmAddend <= 0xffffffU)
4685 {
4686 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend >> 12,
4687 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
4688 if (uAbsImmAddend & 0xfffU)
4689 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsImmAddend & UINT32_C(0xfff));
4690 }
4691 else if (iGprDst != iGprAddend)
4692 {
4693 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4694 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4695 }
4696 else
4697# ifdef IEM_WITH_THROW_CATCH
4698 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4699# else
4700 AssertReleaseFailedStmt(off = UINT32_MAX);
4701# endif
4702
4703#else
4704# error "Port me!"
4705#endif
4706 return off;
4707}
4708
4709
4710/**
4711 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4712 * third register.
4713 *
4714 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4715 *
4716 * @note The ARM64 version does not work for non-trivial constants if the
4717 * two registers are the same. Will assert / throw exception.
4718 */
4719DECL_FORCE_INLINE_THROW(uint32_t)
4720iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4721{
4722#ifdef RT_ARCH_AMD64
4723 /** @todo consider LEA */
4724 if ((int8_t)iImmAddend == iImmAddend)
4725 {
4726 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4727 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4728 }
4729 else
4730 {
4731 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4732 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4733 }
4734
4735#elif defined(RT_ARCH_ARM64)
4736 bool const fSub = iImmAddend < 0;
4737 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4738 if (uAbsImmAddend <= 0xfffU)
4739 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4740 else if (uAbsImmAddend <= 0xffffffU)
4741 {
4742 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend >> 12,
4743 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
4744 if (uAbsImmAddend & 0xfffU)
4745 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsImmAddend & 0xfff, false /*f64Bit*/);
4746 }
4747 else if (iGprDst != iGprAddend)
4748 {
4749 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4750 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4751 }
4752 else
4753# ifdef IEM_WITH_THROW_CATCH
4754 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4755# else
4756 AssertReleaseFailedStmt(off = UINT32_MAX);
4757# endif
4758
4759#else
4760# error "Port me!"
4761#endif
4762 return off;
4763}
4764
4765
4766/*********************************************************************************************************************************
4767* Unary Operations *
4768*********************************************************************************************************************************/
4769
4770/**
4771 * Emits code for two complement negation of a 64-bit GPR.
4772 */
4773DECL_FORCE_INLINE_THROW(uint32_t)
4774iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4775{
4776#if defined(RT_ARCH_AMD64)
4777 /* neg Ev */
4778 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4779 pCodeBuf[off++] = 0xf7;
4780 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4781
4782#elif defined(RT_ARCH_ARM64)
4783 /* sub dst, xzr, dst */
4784 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4785
4786#else
4787# error "Port me"
4788#endif
4789 return off;
4790}
4791
4792
4793/**
4794 * Emits code for two complement negation of a 64-bit GPR.
4795 */
4796DECL_INLINE_THROW(uint32_t)
4797iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4798{
4799#if defined(RT_ARCH_AMD64)
4800 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4801#elif defined(RT_ARCH_ARM64)
4802 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4803#else
4804# error "Port me"
4805#endif
4806 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4807 return off;
4808}
4809
4810
4811/**
4812 * Emits code for two complement negation of a 32-bit GPR.
4813 * @note bit 32 thru 63 are set to zero.
4814 */
4815DECL_FORCE_INLINE_THROW(uint32_t)
4816iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4817{
4818#if defined(RT_ARCH_AMD64)
4819 /* neg Ev */
4820 if (iGprDst >= 8)
4821 pCodeBuf[off++] = X86_OP_REX_B;
4822 pCodeBuf[off++] = 0xf7;
4823 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4824
4825#elif defined(RT_ARCH_ARM64)
4826 /* sub dst, xzr, dst */
4827 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4828
4829#else
4830# error "Port me"
4831#endif
4832 return off;
4833}
4834
4835
4836/**
4837 * Emits code for two complement negation of a 32-bit GPR.
4838 * @note bit 32 thru 63 are set to zero.
4839 */
4840DECL_INLINE_THROW(uint32_t)
4841iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4842{
4843#if defined(RT_ARCH_AMD64)
4844 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4845#elif defined(RT_ARCH_ARM64)
4846 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4847#else
4848# error "Port me"
4849#endif
4850 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4851 return off;
4852}
4853
4854
4855
4856/*********************************************************************************************************************************
4857* Bit Operations *
4858*********************************************************************************************************************************/
4859
4860/**
4861 * Emits code for clearing bits 16 thru 63 in the GPR.
4862 */
4863DECL_INLINE_THROW(uint32_t)
4864iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4865{
4866#if defined(RT_ARCH_AMD64)
4867 /* movzx Gv,Ew */
4868 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4869 if (iGprDst >= 8)
4870 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4871 pbCodeBuf[off++] = 0x0f;
4872 pbCodeBuf[off++] = 0xb7;
4873 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4874
4875#elif defined(RT_ARCH_ARM64)
4876 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4877# if 1
4878 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4879# else
4880 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4881 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4882# endif
4883#else
4884# error "Port me"
4885#endif
4886 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4887 return off;
4888}
4889
4890
4891/**
4892 * Emits code for AND'ing two 64-bit GPRs.
4893 *
4894 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4895 * and ARM64 hosts.
4896 */
4897DECL_FORCE_INLINE(uint32_t)
4898iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4899{
4900#if defined(RT_ARCH_AMD64)
4901 /* and Gv, Ev */
4902 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4903 pCodeBuf[off++] = 0x23;
4904 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4905 RT_NOREF(fSetFlags);
4906
4907#elif defined(RT_ARCH_ARM64)
4908 if (!fSetFlags)
4909 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4910 else
4911 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4912
4913#else
4914# error "Port me"
4915#endif
4916 return off;
4917}
4918
4919
4920/**
4921 * Emits code for AND'ing two 64-bit GPRs.
4922 *
4923 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4924 * and ARM64 hosts.
4925 */
4926DECL_INLINE_THROW(uint32_t)
4927iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4928{
4929#if defined(RT_ARCH_AMD64)
4930 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4931#elif defined(RT_ARCH_ARM64)
4932 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4933#else
4934# error "Port me"
4935#endif
4936 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4937 return off;
4938}
4939
4940
4941/**
4942 * Emits code for AND'ing two 32-bit GPRs.
4943 */
4944DECL_FORCE_INLINE(uint32_t)
4945iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4946{
4947#if defined(RT_ARCH_AMD64)
4948 /* and Gv, Ev */
4949 if (iGprDst >= 8 || iGprSrc >= 8)
4950 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4951 pCodeBuf[off++] = 0x23;
4952 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4953 RT_NOREF(fSetFlags);
4954
4955#elif defined(RT_ARCH_ARM64)
4956 if (!fSetFlags)
4957 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4958 else
4959 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4960
4961#else
4962# error "Port me"
4963#endif
4964 return off;
4965}
4966
4967
4968/**
4969 * Emits code for AND'ing two 32-bit GPRs.
4970 */
4971DECL_INLINE_THROW(uint32_t)
4972iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4973{
4974#if defined(RT_ARCH_AMD64)
4975 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4976#elif defined(RT_ARCH_ARM64)
4977 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4978#else
4979# error "Port me"
4980#endif
4981 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4982 return off;
4983}
4984
4985
4986/**
4987 * Emits code for AND'ing a 64-bit GPRs with a constant.
4988 *
4989 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4990 * and ARM64 hosts.
4991 */
4992DECL_INLINE_THROW(uint32_t)
4993iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4994{
4995#if defined(RT_ARCH_AMD64)
4996 if ((int64_t)uImm == (int8_t)uImm)
4997 {
4998 /* and Ev, imm8 */
4999 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5000 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5001 pbCodeBuf[off++] = 0x83;
5002 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5003 pbCodeBuf[off++] = (uint8_t)uImm;
5004 }
5005 else if ((int64_t)uImm == (int32_t)uImm)
5006 {
5007 /* and Ev, imm32 */
5008 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5009 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5010 pbCodeBuf[off++] = 0x81;
5011 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5012 pbCodeBuf[off++] = RT_BYTE1(uImm);
5013 pbCodeBuf[off++] = RT_BYTE2(uImm);
5014 pbCodeBuf[off++] = RT_BYTE3(uImm);
5015 pbCodeBuf[off++] = RT_BYTE4(uImm);
5016 }
5017 else
5018 {
5019 /* Use temporary register for the 64-bit immediate. */
5020 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5021 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
5022 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5023 }
5024 RT_NOREF(fSetFlags);
5025
5026#elif defined(RT_ARCH_ARM64)
5027 uint32_t uImmR = 0;
5028 uint32_t uImmNandS = 0;
5029 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5030 {
5031 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5032 if (!fSetFlags)
5033 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
5034 else
5035 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
5036 }
5037 else
5038 {
5039 /* Use temporary register for the 64-bit immediate. */
5040 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5041 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
5042 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5043 }
5044
5045#else
5046# error "Port me"
5047#endif
5048 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5049 return off;
5050}
5051
5052
5053/**
5054 * Emits code for AND'ing an 32-bit GPRs with a constant.
5055 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5056 * @note For ARM64 this only supports @a uImm values that can be expressed using
5057 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
5058 * make sure this is possible!
5059 */
5060DECL_FORCE_INLINE_THROW(uint32_t)
5061iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
5062{
5063#if defined(RT_ARCH_AMD64)
5064 /* and Ev, imm */
5065 if (iGprDst >= 8)
5066 pCodeBuf[off++] = X86_OP_REX_B;
5067 if ((int32_t)uImm == (int8_t)uImm)
5068 {
5069 pCodeBuf[off++] = 0x83;
5070 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5071 pCodeBuf[off++] = (uint8_t)uImm;
5072 }
5073 else
5074 {
5075 pCodeBuf[off++] = 0x81;
5076 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5077 pCodeBuf[off++] = RT_BYTE1(uImm);
5078 pCodeBuf[off++] = RT_BYTE2(uImm);
5079 pCodeBuf[off++] = RT_BYTE3(uImm);
5080 pCodeBuf[off++] = RT_BYTE4(uImm);
5081 }
5082 RT_NOREF(fSetFlags);
5083
5084#elif defined(RT_ARCH_ARM64)
5085 uint32_t uImmR = 0;
5086 uint32_t uImmNandS = 0;
5087 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5088 {
5089 if (!fSetFlags)
5090 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5091 else
5092 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5093 }
5094 else
5095# ifdef IEM_WITH_THROW_CATCH
5096 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5097# else
5098 AssertReleaseFailedStmt(off = UINT32_MAX);
5099# endif
5100
5101#else
5102# error "Port me"
5103#endif
5104 return off;
5105}
5106
5107
5108/**
5109 * Emits code for AND'ing an 32-bit GPRs with a constant.
5110 *
5111 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5112 */
5113DECL_INLINE_THROW(uint32_t)
5114iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
5115{
5116#if defined(RT_ARCH_AMD64)
5117 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
5118
5119#elif defined(RT_ARCH_ARM64)
5120 uint32_t uImmR = 0;
5121 uint32_t uImmNandS = 0;
5122 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5123 {
5124 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5125 if (!fSetFlags)
5126 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5127 else
5128 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5129 }
5130 else
5131 {
5132 /* Use temporary register for the 64-bit immediate. */
5133 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5134 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
5135 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5136 }
5137
5138#else
5139# error "Port me"
5140#endif
5141 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5142 return off;
5143}
5144
5145
5146/**
5147 * Emits code for AND'ing an 64-bit GPRs with a constant.
5148 *
5149 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
5150 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
5151 * the same.
5152 */
5153DECL_FORCE_INLINE_THROW(uint32_t)
5154iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
5155 bool fSetFlags = false)
5156{
5157#if defined(RT_ARCH_AMD64)
5158 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
5159 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
5160 RT_NOREF(fSetFlags);
5161
5162#elif defined(RT_ARCH_ARM64)
5163 uint32_t uImmR = 0;
5164 uint32_t uImmNandS = 0;
5165 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5166 {
5167 if (!fSetFlags)
5168 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
5169 else
5170 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
5171 }
5172 else if (iGprDst != iGprSrc)
5173 {
5174 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
5175 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
5176 }
5177 else
5178# ifdef IEM_WITH_THROW_CATCH
5179 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5180# else
5181 AssertReleaseFailedStmt(off = UINT32_MAX);
5182# endif
5183
5184#else
5185# error "Port me"
5186#endif
5187 return off;
5188}
5189
5190/**
5191 * Emits code for AND'ing an 32-bit GPRs with a constant.
5192 *
5193 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
5194 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
5195 * the same.
5196 *
5197 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5198 */
5199DECL_FORCE_INLINE_THROW(uint32_t)
5200iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
5201 bool fSetFlags = false)
5202{
5203#if defined(RT_ARCH_AMD64)
5204 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5205 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5206 RT_NOREF(fSetFlags);
5207
5208#elif defined(RT_ARCH_ARM64)
5209 uint32_t uImmR = 0;
5210 uint32_t uImmNandS = 0;
5211 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5212 {
5213 if (!fSetFlags)
5214 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5215 else
5216 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5217 }
5218 else if (iGprDst != iGprSrc)
5219 {
5220 /* If a value greater or equal than 64K isn't more than 16 bits wide,
5221 we can use shifting to save an instruction. We prefer the builtin ctz
5222 here to our own, since the compiler can process uImm at compile time
5223 if it is a constant value (which is often the case). This is useful
5224 for the TLB looup code. */
5225 if (uImm > 0xffffU)
5226 {
5227# if defined(__GNUC__)
5228 unsigned cTrailingZeros = __builtin_ctz(uImm);
5229# else
5230 unsigned cTrailingZeros = ASMBitFirstSetU32(uImm) - 1;
5231# endif
5232 if ((uImm >> cTrailingZeros) <= 0xffffU)
5233 {
5234 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprDst, uImm >> cTrailingZeros);
5235 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprSrc,
5236 iGprDst, true /*f64Bit*/, cTrailingZeros, kArmv8A64InstrShift_Lsl);
5237 return off;
5238 }
5239 }
5240 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5241 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
5242 }
5243 else
5244# ifdef IEM_WITH_THROW_CATCH
5245 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5246# else
5247 AssertReleaseFailedStmt(off = UINT32_MAX);
5248# endif
5249
5250#else
5251# error "Port me"
5252#endif
5253 return off;
5254}
5255
5256
5257/**
5258 * Emits code for OR'ing two 64-bit GPRs.
5259 */
5260DECL_FORCE_INLINE(uint32_t)
5261iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5262{
5263#if defined(RT_ARCH_AMD64)
5264 /* or Gv, Ev */
5265 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5266 pCodeBuf[off++] = 0x0b;
5267 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5268
5269#elif defined(RT_ARCH_ARM64)
5270 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
5271
5272#else
5273# error "Port me"
5274#endif
5275 return off;
5276}
5277
5278
5279/**
5280 * Emits code for OR'ing two 64-bit GPRs.
5281 */
5282DECL_INLINE_THROW(uint32_t)
5283iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5284{
5285#if defined(RT_ARCH_AMD64)
5286 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5287#elif defined(RT_ARCH_ARM64)
5288 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5289#else
5290# error "Port me"
5291#endif
5292 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5293 return off;
5294}
5295
5296
5297/**
5298 * Emits code for OR'ing two 32-bit GPRs.
5299 * @note Bits 63:32 of the destination GPR will be cleared.
5300 */
5301DECL_FORCE_INLINE(uint32_t)
5302iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5303{
5304#if defined(RT_ARCH_AMD64)
5305 /* or Gv, Ev */
5306 if (iGprDst >= 8 || iGprSrc >= 8)
5307 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5308 pCodeBuf[off++] = 0x0b;
5309 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5310
5311#elif defined(RT_ARCH_ARM64)
5312 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5313
5314#else
5315# error "Port me"
5316#endif
5317 return off;
5318}
5319
5320
5321/**
5322 * Emits code for OR'ing two 32-bit GPRs.
5323 * @note Bits 63:32 of the destination GPR will be cleared.
5324 */
5325DECL_INLINE_THROW(uint32_t)
5326iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5327{
5328#if defined(RT_ARCH_AMD64)
5329 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5330#elif defined(RT_ARCH_ARM64)
5331 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5332#else
5333# error "Port me"
5334#endif
5335 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5336 return off;
5337}
5338
5339
5340/**
5341 * Emits code for OR'ing a 64-bit GPRs with a constant.
5342 */
5343DECL_INLINE_THROW(uint32_t)
5344iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
5345{
5346#if defined(RT_ARCH_AMD64)
5347 if ((int64_t)uImm == (int8_t)uImm)
5348 {
5349 /* or Ev, imm8 */
5350 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5351 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5352 pbCodeBuf[off++] = 0x83;
5353 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5354 pbCodeBuf[off++] = (uint8_t)uImm;
5355 }
5356 else if ((int64_t)uImm == (int32_t)uImm)
5357 {
5358 /* or Ev, imm32 */
5359 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5360 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5361 pbCodeBuf[off++] = 0x81;
5362 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5363 pbCodeBuf[off++] = RT_BYTE1(uImm);
5364 pbCodeBuf[off++] = RT_BYTE2(uImm);
5365 pbCodeBuf[off++] = RT_BYTE3(uImm);
5366 pbCodeBuf[off++] = RT_BYTE4(uImm);
5367 }
5368 else
5369 {
5370 /* Use temporary register for the 64-bit immediate. */
5371 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5372 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
5373 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5374 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5375 }
5376
5377#elif defined(RT_ARCH_ARM64)
5378 uint32_t uImmR = 0;
5379 uint32_t uImmNandS = 0;
5380 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5381 {
5382 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5383 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
5384 }
5385 else
5386 {
5387 /* Use temporary register for the 64-bit immediate. */
5388 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5389 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
5390 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5391 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5392 }
5393
5394#else
5395# error "Port me"
5396#endif
5397 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5398 return off;
5399}
5400
5401
5402/**
5403 * Emits code for OR'ing an 32-bit GPRs with a constant.
5404 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5405 * @note For ARM64 this only supports @a uImm values that can be expressed using
5406 * the two 6-bit immediates of the OR instructions. The caller must make
5407 * sure this is possible!
5408 */
5409DECL_FORCE_INLINE_THROW(uint32_t)
5410iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5411{
5412#if defined(RT_ARCH_AMD64)
5413 /* or Ev, imm */
5414 if (iGprDst >= 8)
5415 pCodeBuf[off++] = X86_OP_REX_B;
5416 if ((int32_t)uImm == (int8_t)uImm)
5417 {
5418 pCodeBuf[off++] = 0x83;
5419 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5420 pCodeBuf[off++] = (uint8_t)uImm;
5421 }
5422 else
5423 {
5424 pCodeBuf[off++] = 0x81;
5425 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5426 pCodeBuf[off++] = RT_BYTE1(uImm);
5427 pCodeBuf[off++] = RT_BYTE2(uImm);
5428 pCodeBuf[off++] = RT_BYTE3(uImm);
5429 pCodeBuf[off++] = RT_BYTE4(uImm);
5430 }
5431
5432#elif defined(RT_ARCH_ARM64)
5433 uint32_t uImmR = 0;
5434 uint32_t uImmNandS = 0;
5435 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5436 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5437 else
5438# ifdef IEM_WITH_THROW_CATCH
5439 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5440# else
5441 AssertReleaseFailedStmt(off = UINT32_MAX);
5442# endif
5443
5444#else
5445# error "Port me"
5446#endif
5447 return off;
5448}
5449
5450
5451/**
5452 * Emits code for OR'ing an 32-bit GPRs with a constant.
5453 *
5454 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5455 */
5456DECL_INLINE_THROW(uint32_t)
5457iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5458{
5459#if defined(RT_ARCH_AMD64)
5460 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5461
5462#elif defined(RT_ARCH_ARM64)
5463 uint32_t uImmR = 0;
5464 uint32_t uImmNandS = 0;
5465 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5466 {
5467 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5468 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5469 }
5470 else
5471 {
5472 /* Use temporary register for the 64-bit immediate. */
5473 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5474 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
5475 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5476 }
5477
5478#else
5479# error "Port me"
5480#endif
5481 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5482 return off;
5483}
5484
5485
5486
5487/**
5488 * ORs two 64-bit GPRs together, storing the result in a third register.
5489 */
5490DECL_FORCE_INLINE(uint32_t)
5491iemNativeEmitGprEqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5492{
5493#ifdef RT_ARCH_AMD64
5494 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5495 {
5496 /** @todo consider LEA */
5497 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc1);
5498 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5499 }
5500 else
5501 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5502
5503#elif defined(RT_ARCH_ARM64)
5504 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2);
5505
5506#else
5507# error "Port me!"
5508#endif
5509 return off;
5510}
5511
5512
5513
5514/**
5515 * Ors two 32-bit GPRs together, storing the result in a third register.
5516 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
5517 */
5518DECL_FORCE_INLINE(uint32_t)
5519iemNativeEmitGpr32EqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5520{
5521#ifdef RT_ARCH_AMD64
5522 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5523 {
5524 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc1);
5525 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5526 }
5527 else
5528 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5529
5530#elif defined(RT_ARCH_ARM64)
5531 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2, false /*f64Bit*/);
5532
5533#else
5534# error "Port me!"
5535#endif
5536 return off;
5537}
5538
5539
5540/**
5541 * Emits code for XOR'ing two 64-bit GPRs.
5542 */
5543DECL_INLINE_THROW(uint32_t)
5544iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5545{
5546#if defined(RT_ARCH_AMD64)
5547 /* and Gv, Ev */
5548 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5549 pCodeBuf[off++] = 0x33;
5550 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5551
5552#elif defined(RT_ARCH_ARM64)
5553 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
5554
5555#else
5556# error "Port me"
5557#endif
5558 return off;
5559}
5560
5561
5562/**
5563 * Emits code for XOR'ing two 64-bit GPRs.
5564 */
5565DECL_INLINE_THROW(uint32_t)
5566iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5567{
5568#if defined(RT_ARCH_AMD64)
5569 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5570#elif defined(RT_ARCH_ARM64)
5571 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5572#else
5573# error "Port me"
5574#endif
5575 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5576 return off;
5577}
5578
5579
5580/**
5581 * Emits code for XOR'ing two 32-bit GPRs.
5582 */
5583DECL_INLINE_THROW(uint32_t)
5584iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5585{
5586#if defined(RT_ARCH_AMD64)
5587 /* and Gv, Ev */
5588 if (iGprDst >= 8 || iGprSrc >= 8)
5589 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5590 pCodeBuf[off++] = 0x33;
5591 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5592
5593#elif defined(RT_ARCH_ARM64)
5594 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5595
5596#else
5597# error "Port me"
5598#endif
5599 return off;
5600}
5601
5602
5603/**
5604 * Emits code for XOR'ing two 32-bit GPRs.
5605 */
5606DECL_INLINE_THROW(uint32_t)
5607iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5608{
5609#if defined(RT_ARCH_AMD64)
5610 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5611#elif defined(RT_ARCH_ARM64)
5612 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5613#else
5614# error "Port me"
5615#endif
5616 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5617 return off;
5618}
5619
5620
5621/**
5622 * Emits code for XOR'ing an 32-bit GPRs with a constant.
5623 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5624 * @note For ARM64 this only supports @a uImm values that can be expressed using
5625 * the two 6-bit immediates of the EOR instructions. The caller must make
5626 * sure this is possible!
5627 */
5628DECL_FORCE_INLINE_THROW(uint32_t)
5629iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5630{
5631#if defined(RT_ARCH_AMD64)
5632 /* xor Ev, imm */
5633 if (iGprDst >= 8)
5634 pCodeBuf[off++] = X86_OP_REX_B;
5635 if ((int32_t)uImm == (int8_t)uImm)
5636 {
5637 pCodeBuf[off++] = 0x83;
5638 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5639 pCodeBuf[off++] = (uint8_t)uImm;
5640 }
5641 else
5642 {
5643 pCodeBuf[off++] = 0x81;
5644 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5645 pCodeBuf[off++] = RT_BYTE1(uImm);
5646 pCodeBuf[off++] = RT_BYTE2(uImm);
5647 pCodeBuf[off++] = RT_BYTE3(uImm);
5648 pCodeBuf[off++] = RT_BYTE4(uImm);
5649 }
5650
5651#elif defined(RT_ARCH_ARM64)
5652 uint32_t uImmR = 0;
5653 uint32_t uImmNandS = 0;
5654 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5655 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5656 else
5657# ifdef IEM_WITH_THROW_CATCH
5658 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5659# else
5660 AssertReleaseFailedStmt(off = UINT32_MAX);
5661# endif
5662
5663#else
5664# error "Port me"
5665#endif
5666 return off;
5667}
5668
5669
5670/**
5671 * Emits code for XOR'ing two 32-bit GPRs.
5672 */
5673DECL_INLINE_THROW(uint32_t)
5674iemNativeEmitXorGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5675{
5676#if defined(RT_ARCH_AMD64)
5677 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5678#elif defined(RT_ARCH_ARM64)
5679 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, uImm);
5680#else
5681# error "Port me"
5682#endif
5683 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5684 return off;
5685}
5686
5687
5688/*********************************************************************************************************************************
5689* Shifting *
5690*********************************************************************************************************************************/
5691
5692/**
5693 * Emits code for shifting a GPR a fixed number of bits to the left.
5694 */
5695DECL_FORCE_INLINE(uint32_t)
5696iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5697{
5698 Assert(cShift > 0 && cShift < 64);
5699
5700#if defined(RT_ARCH_AMD64)
5701 /* shl dst, cShift */
5702 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5703 if (cShift != 1)
5704 {
5705 pCodeBuf[off++] = 0xc1;
5706 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5707 pCodeBuf[off++] = cShift;
5708 }
5709 else
5710 {
5711 pCodeBuf[off++] = 0xd1;
5712 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5713 }
5714
5715#elif defined(RT_ARCH_ARM64)
5716 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
5717
5718#else
5719# error "Port me"
5720#endif
5721 return off;
5722}
5723
5724
5725/**
5726 * Emits code for shifting a GPR a fixed number of bits to the left.
5727 */
5728DECL_INLINE_THROW(uint32_t)
5729iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5730{
5731#if defined(RT_ARCH_AMD64)
5732 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5733#elif defined(RT_ARCH_ARM64)
5734 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5735#else
5736# error "Port me"
5737#endif
5738 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5739 return off;
5740}
5741
5742
5743/**
5744 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5745 */
5746DECL_FORCE_INLINE(uint32_t)
5747iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5748{
5749 Assert(cShift > 0 && cShift < 32);
5750
5751#if defined(RT_ARCH_AMD64)
5752 /* shl dst, cShift */
5753 if (iGprDst >= 8)
5754 pCodeBuf[off++] = X86_OP_REX_B;
5755 if (cShift != 1)
5756 {
5757 pCodeBuf[off++] = 0xc1;
5758 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5759 pCodeBuf[off++] = cShift;
5760 }
5761 else
5762 {
5763 pCodeBuf[off++] = 0xd1;
5764 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5765 }
5766
5767#elif defined(RT_ARCH_ARM64)
5768 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5769
5770#else
5771# error "Port me"
5772#endif
5773 return off;
5774}
5775
5776
5777/**
5778 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5779 */
5780DECL_INLINE_THROW(uint32_t)
5781iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5782{
5783#if defined(RT_ARCH_AMD64)
5784 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5785#elif defined(RT_ARCH_ARM64)
5786 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5787#else
5788# error "Port me"
5789#endif
5790 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5791 return off;
5792}
5793
5794
5795/**
5796 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5797 */
5798DECL_FORCE_INLINE(uint32_t)
5799iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5800{
5801 Assert(cShift > 0 && cShift < 64);
5802
5803#if defined(RT_ARCH_AMD64)
5804 /* shr dst, cShift */
5805 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5806 if (cShift != 1)
5807 {
5808 pCodeBuf[off++] = 0xc1;
5809 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5810 pCodeBuf[off++] = cShift;
5811 }
5812 else
5813 {
5814 pCodeBuf[off++] = 0xd1;
5815 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5816 }
5817
5818#elif defined(RT_ARCH_ARM64)
5819 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5820
5821#else
5822# error "Port me"
5823#endif
5824 return off;
5825}
5826
5827
5828/**
5829 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5830 */
5831DECL_INLINE_THROW(uint32_t)
5832iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5833{
5834#if defined(RT_ARCH_AMD64)
5835 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5836#elif defined(RT_ARCH_ARM64)
5837 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5838#else
5839# error "Port me"
5840#endif
5841 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5842 return off;
5843}
5844
5845
5846/**
5847 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5848 * right.
5849 */
5850DECL_FORCE_INLINE(uint32_t)
5851iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5852{
5853 Assert(cShift > 0 && cShift < 32);
5854
5855#if defined(RT_ARCH_AMD64)
5856 /* shr dst, cShift */
5857 if (iGprDst >= 8)
5858 pCodeBuf[off++] = X86_OP_REX_B;
5859 if (cShift != 1)
5860 {
5861 pCodeBuf[off++] = 0xc1;
5862 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5863 pCodeBuf[off++] = cShift;
5864 }
5865 else
5866 {
5867 pCodeBuf[off++] = 0xd1;
5868 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5869 }
5870
5871#elif defined(RT_ARCH_ARM64)
5872 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5873
5874#else
5875# error "Port me"
5876#endif
5877 return off;
5878}
5879
5880
5881/**
5882 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5883 * right.
5884 */
5885DECL_INLINE_THROW(uint32_t)
5886iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5887{
5888#if defined(RT_ARCH_AMD64)
5889 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5890#elif defined(RT_ARCH_ARM64)
5891 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5892#else
5893# error "Port me"
5894#endif
5895 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5896 return off;
5897}
5898
5899
5900/**
5901 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5902 * right and assigning it to a different GPR.
5903 */
5904DECL_INLINE_THROW(uint32_t)
5905iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5906{
5907 Assert(cShift > 0); Assert(cShift < 32);
5908#if defined(RT_ARCH_AMD64)
5909 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5910 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5911
5912#elif defined(RT_ARCH_ARM64)
5913 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5914
5915#else
5916# error "Port me"
5917#endif
5918 return off;
5919}
5920
5921
5922/**
5923 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5924 */
5925DECL_FORCE_INLINE(uint32_t)
5926iemNativeEmitArithShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5927{
5928 Assert(cShift > 0 && cShift < 64);
5929
5930#if defined(RT_ARCH_AMD64)
5931 /* sar dst, cShift */
5932 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5933 if (cShift != 1)
5934 {
5935 pCodeBuf[off++] = 0xc1;
5936 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5937 pCodeBuf[off++] = cShift;
5938 }
5939 else
5940 {
5941 pCodeBuf[off++] = 0xd1;
5942 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5943 }
5944
5945#elif defined(RT_ARCH_ARM64)
5946 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift);
5947
5948#else
5949# error "Port me"
5950#endif
5951 return off;
5952}
5953
5954
5955/**
5956 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5957 */
5958DECL_INLINE_THROW(uint32_t)
5959iemNativeEmitArithShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5960{
5961#if defined(RT_ARCH_AMD64)
5962 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5963#elif defined(RT_ARCH_ARM64)
5964 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5965#else
5966# error "Port me"
5967#endif
5968 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5969 return off;
5970}
5971
5972
5973/**
5974 * Emits code for (signed) shifting a 32-bit GPR a fixed number of bits to the right.
5975 */
5976DECL_FORCE_INLINE(uint32_t)
5977iemNativeEmitArithShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5978{
5979 Assert(cShift > 0 && cShift < 64);
5980
5981#if defined(RT_ARCH_AMD64)
5982 /* sar dst, cShift */
5983 if (iGprDst >= 8)
5984 pCodeBuf[off++] = X86_OP_REX_B;
5985 if (cShift != 1)
5986 {
5987 pCodeBuf[off++] = 0xc1;
5988 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5989 pCodeBuf[off++] = cShift;
5990 }
5991 else
5992 {
5993 pCodeBuf[off++] = 0xd1;
5994 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5995 }
5996
5997#elif defined(RT_ARCH_ARM64)
5998 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift, false /*f64Bit*/);
5999
6000#else
6001# error "Port me"
6002#endif
6003 return off;
6004}
6005
6006
6007/**
6008 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
6009 */
6010DECL_INLINE_THROW(uint32_t)
6011iemNativeEmitArithShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
6012{
6013#if defined(RT_ARCH_AMD64)
6014 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
6015#elif defined(RT_ARCH_ARM64)
6016 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
6017#else
6018# error "Port me"
6019#endif
6020 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6021 return off;
6022}
6023
6024
6025/**
6026 * Emits code for rotating a GPR a fixed number of bits to the left.
6027 */
6028DECL_FORCE_INLINE(uint32_t)
6029iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
6030{
6031 Assert(cShift > 0 && cShift < 64);
6032
6033#if defined(RT_ARCH_AMD64)
6034 /* rol dst, cShift */
6035 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
6036 if (cShift != 1)
6037 {
6038 pCodeBuf[off++] = 0xc1;
6039 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
6040 pCodeBuf[off++] = cShift;
6041 }
6042 else
6043 {
6044 pCodeBuf[off++] = 0xd1;
6045 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
6046 }
6047
6048#elif defined(RT_ARCH_ARM64)
6049 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
6050
6051#else
6052# error "Port me"
6053#endif
6054 return off;
6055}
6056
6057
6058#if defined(RT_ARCH_AMD64)
6059/**
6060 * Emits code for rotating a 32-bit GPR a fixed number of bits to the left via carry.
6061 */
6062DECL_FORCE_INLINE(uint32_t)
6063iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
6064{
6065 Assert(cShift > 0 && cShift < 32);
6066
6067 /* rcl dst, cShift */
6068 if (iGprDst >= 8)
6069 pCodeBuf[off++] = X86_OP_REX_B;
6070 if (cShift != 1)
6071 {
6072 pCodeBuf[off++] = 0xc1;
6073 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
6074 pCodeBuf[off++] = cShift;
6075 }
6076 else
6077 {
6078 pCodeBuf[off++] = 0xd1;
6079 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
6080 }
6081
6082 return off;
6083}
6084#endif /* RT_ARCH_AMD64 */
6085
6086
6087
6088/**
6089 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
6090 * @note Bits 63:32 of the destination GPR will be cleared.
6091 */
6092DECL_FORCE_INLINE(uint32_t)
6093iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
6094{
6095#if defined(RT_ARCH_AMD64)
6096 /*
6097 * There is no bswap r16 on x86 (the encoding exists but does not work).
6098 * So just use a rol (gcc -O2 is doing that).
6099 *
6100 * rol r16, 0x8
6101 */
6102 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6103 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6104 if (iGpr >= 8)
6105 pbCodeBuf[off++] = X86_OP_REX_B;
6106 pbCodeBuf[off++] = 0xc1;
6107 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
6108 pbCodeBuf[off++] = 0x08;
6109#elif defined(RT_ARCH_ARM64)
6110 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6111
6112 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
6113#else
6114# error "Port me"
6115#endif
6116
6117 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6118 return off;
6119}
6120
6121
6122/**
6123 * Emits code for reversing the byte order in a 32-bit GPR.
6124 * @note Bits 63:32 of the destination GPR will be cleared.
6125 */
6126DECL_FORCE_INLINE(uint32_t)
6127iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
6128{
6129#if defined(RT_ARCH_AMD64)
6130 /* bswap r32 */
6131 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6132
6133 if (iGpr >= 8)
6134 pbCodeBuf[off++] = X86_OP_REX_B;
6135 pbCodeBuf[off++] = 0x0f;
6136 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
6137#elif defined(RT_ARCH_ARM64)
6138 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6139
6140 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
6141#else
6142# error "Port me"
6143#endif
6144
6145 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6146 return off;
6147}
6148
6149
6150/**
6151 * Emits code for reversing the byte order in a 64-bit GPR.
6152 */
6153DECL_FORCE_INLINE(uint32_t)
6154iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
6155{
6156#if defined(RT_ARCH_AMD64)
6157 /* bswap r64 */
6158 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6159
6160 if (iGpr >= 8)
6161 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
6162 else
6163 pbCodeBuf[off++] = X86_OP_REX_W;
6164 pbCodeBuf[off++] = 0x0f;
6165 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
6166#elif defined(RT_ARCH_ARM64)
6167 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6168
6169 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
6170#else
6171# error "Port me"
6172#endif
6173
6174 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6175 return off;
6176}
6177
6178
6179/*********************************************************************************************************************************
6180* Bitfield manipulation *
6181*********************************************************************************************************************************/
6182
6183/**
6184 * Emits code for clearing.
6185 */
6186DECL_FORCE_INLINE(uint32_t)
6187iemNativeEmitBitClearInGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const iGpr, uint8_t iBit)
6188{
6189 Assert(iBit < 32);
6190
6191#if defined(RT_ARCH_AMD64)
6192 /* btr r32, imm8 */
6193 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6194
6195 if (iGpr >= 8)
6196 pbCodeBuf[off++] = X86_OP_REX_B;
6197 pbCodeBuf[off++] = 0x0f;
6198 pbCodeBuf[off++] = 0xba;
6199 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGpr & 7);
6200 pbCodeBuf[off++] = iBit;
6201#elif defined(RT_ARCH_ARM64)
6202 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6203
6204 pu32CodeBuf[off++] = Armv8A64MkInstrBfc(iGpr, iBit /*offFirstBit*/, 1 /*cBits*/, true /*f64Bit*/);
6205#else
6206# error "Port me"
6207#endif
6208
6209 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6210 return off;
6211}
6212
6213
6214/*********************************************************************************************************************************
6215* Compare and Testing *
6216*********************************************************************************************************************************/
6217
6218
6219#ifdef RT_ARCH_ARM64
6220/**
6221 * Emits an ARM64 compare instruction.
6222 */
6223DECL_INLINE_THROW(uint32_t)
6224iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
6225 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
6226{
6227 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6228 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
6229 f64Bit, true /*fSetFlags*/, cShift, enmShift);
6230 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6231 return off;
6232}
6233#endif
6234
6235
6236/**
6237 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
6238 * with conditional instruction.
6239 */
6240DECL_FORCE_INLINE(uint32_t)
6241iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6242{
6243#ifdef RT_ARCH_AMD64
6244 /* cmp Gv, Ev */
6245 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
6246 pCodeBuf[off++] = 0x3b;
6247 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
6248
6249#elif defined(RT_ARCH_ARM64)
6250 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
6251
6252#else
6253# error "Port me!"
6254#endif
6255 return off;
6256}
6257
6258
6259/**
6260 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
6261 * with conditional instruction.
6262 */
6263DECL_INLINE_THROW(uint32_t)
6264iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6265{
6266#ifdef RT_ARCH_AMD64
6267 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6268#elif defined(RT_ARCH_ARM64)
6269 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6270#else
6271# error "Port me!"
6272#endif
6273 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6274 return off;
6275}
6276
6277
6278/**
6279 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6280 * with conditional instruction.
6281 */
6282DECL_FORCE_INLINE(uint32_t)
6283iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6284{
6285#ifdef RT_ARCH_AMD64
6286 /* cmp Gv, Ev */
6287 if (iGprLeft >= 8 || iGprRight >= 8)
6288 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
6289 pCodeBuf[off++] = 0x3b;
6290 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
6291
6292#elif defined(RT_ARCH_ARM64)
6293 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
6294
6295#else
6296# error "Port me!"
6297#endif
6298 return off;
6299}
6300
6301
6302/**
6303 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6304 * with conditional instruction.
6305 */
6306DECL_INLINE_THROW(uint32_t)
6307iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6308{
6309#ifdef RT_ARCH_AMD64
6310 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6311#elif defined(RT_ARCH_ARM64)
6312 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6313#else
6314# error "Port me!"
6315#endif
6316 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6317 return off;
6318}
6319
6320
6321/**
6322 * Emits a compare of a 64-bit GPR with a constant value, settings status
6323 * flags/whatever for use with conditional instruction.
6324 */
6325DECL_INLINE_THROW(uint32_t)
6326iemNativeEmitCmpGprWithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft,
6327 uint64_t uImm, uint8_t idxTmpReg = UINT8_MAX)
6328{
6329#ifdef RT_ARCH_AMD64
6330 if ((int8_t)uImm == (int64_t)uImm)
6331 {
6332 /* cmp Ev, Ib */
6333 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6334 pCodeBuf[off++] = 0x83;
6335 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6336 pCodeBuf[off++] = (uint8_t)uImm;
6337 return off;
6338 }
6339 if ((int32_t)uImm == (int64_t)uImm)
6340 {
6341 /* cmp Ev, imm */
6342 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6343 pCodeBuf[off++] = 0x81;
6344 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6345 pCodeBuf[off++] = RT_BYTE1(uImm);
6346 pCodeBuf[off++] = RT_BYTE2(uImm);
6347 pCodeBuf[off++] = RT_BYTE3(uImm);
6348 pCodeBuf[off++] = RT_BYTE4(uImm);
6349 return off;
6350 }
6351
6352#elif defined(RT_ARCH_ARM64)
6353 if (uImm < _4K)
6354 {
6355 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6356 true /*64Bit*/, true /*fSetFlags*/);
6357 return off;
6358 }
6359 if ((uImm & ~(uint64_t)0xfff000) == 0)
6360 {
6361 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
6362 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6363 return off;
6364 }
6365
6366#else
6367# error "Port me!"
6368#endif
6369
6370 if (idxTmpReg != UINT8_MAX)
6371 {
6372 /* Use temporary register for the immediate. */
6373 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpReg, uImm);
6374 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, iGprLeft, idxTmpReg);
6375 }
6376 else
6377# ifdef IEM_WITH_THROW_CATCH
6378 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6379# else
6380 AssertReleaseFailedStmt(off = UINT32_MAX);
6381# endif
6382
6383 return off;
6384}
6385
6386
6387/**
6388 * Emits a compare of a 64-bit GPR with a constant value, settings status
6389 * flags/whatever for use with conditional instruction.
6390 */
6391DECL_INLINE_THROW(uint32_t)
6392iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
6393{
6394#ifdef RT_ARCH_AMD64
6395 if ((int8_t)uImm == (int64_t)uImm)
6396 {
6397 /* cmp Ev, Ib */
6398 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
6399 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6400 pbCodeBuf[off++] = 0x83;
6401 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6402 pbCodeBuf[off++] = (uint8_t)uImm;
6403 }
6404 else if ((int32_t)uImm == (int64_t)uImm)
6405 {
6406 /* cmp Ev, imm */
6407 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6408 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6409 pbCodeBuf[off++] = 0x81;
6410 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6411 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6412 pbCodeBuf[off++] = RT_BYTE1(uImm);
6413 pbCodeBuf[off++] = RT_BYTE2(uImm);
6414 pbCodeBuf[off++] = RT_BYTE3(uImm);
6415 pbCodeBuf[off++] = RT_BYTE4(uImm);
6416 }
6417 else
6418 {
6419 /* Use temporary register for the immediate. */
6420 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6421 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6422 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6423 }
6424
6425#elif defined(RT_ARCH_ARM64)
6426 /** @todo guess there are clevere things we can do here... */
6427 if (uImm < _4K)
6428 {
6429 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6430 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6431 true /*64Bit*/, true /*fSetFlags*/);
6432 }
6433 else if ((uImm & ~(uint64_t)0xfff000) == 0)
6434 {
6435 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6436 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
6437 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6438 }
6439 else
6440 {
6441 /* Use temporary register for the immediate. */
6442 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6443 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6444 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6445 }
6446
6447#else
6448# error "Port me!"
6449#endif
6450
6451 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6452 return off;
6453}
6454
6455
6456/**
6457 * Emits a compare of a 32-bit GPR with a constant value, settings status
6458 * flags/whatever for use with conditional instruction.
6459 *
6460 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6461 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6462 * bits all zero). Will release assert or throw exception if the caller
6463 * violates this restriction.
6464 */
6465DECL_FORCE_INLINE_THROW(uint32_t)
6466iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6467{
6468#ifdef RT_ARCH_AMD64
6469 if (iGprLeft >= 8)
6470 pCodeBuf[off++] = X86_OP_REX_B;
6471 if (uImm <= UINT32_C(0x7f))
6472 {
6473 /* cmp Ev, Ib */
6474 pCodeBuf[off++] = 0x83;
6475 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6476 pCodeBuf[off++] = (uint8_t)uImm;
6477 }
6478 else
6479 {
6480 /* cmp Ev, imm */
6481 pCodeBuf[off++] = 0x81;
6482 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6483 pCodeBuf[off++] = RT_BYTE1(uImm);
6484 pCodeBuf[off++] = RT_BYTE2(uImm);
6485 pCodeBuf[off++] = RT_BYTE3(uImm);
6486 pCodeBuf[off++] = RT_BYTE4(uImm);
6487 }
6488
6489#elif defined(RT_ARCH_ARM64)
6490 /** @todo guess there are clevere things we can do here... */
6491 if (uImm < _4K)
6492 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6493 false /*64Bit*/, true /*fSetFlags*/);
6494 else if ((uImm & ~(uint32_t)0xfff000) == 0)
6495 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6496 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6497 else
6498# ifdef IEM_WITH_THROW_CATCH
6499 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6500# else
6501 AssertReleaseFailedStmt(off = UINT32_MAX);
6502# endif
6503
6504#else
6505# error "Port me!"
6506#endif
6507 return off;
6508}
6509
6510
6511/**
6512 * Emits a compare of a 32-bit GPR with a constant value, settings status
6513 * flags/whatever for use with conditional instruction.
6514 */
6515DECL_INLINE_THROW(uint32_t)
6516iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6517{
6518#ifdef RT_ARCH_AMD64
6519 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
6520
6521#elif defined(RT_ARCH_ARM64)
6522 /** @todo guess there are clevere things we can do here... */
6523 if (uImm < _4K)
6524 {
6525 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6526 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6527 false /*64Bit*/, true /*fSetFlags*/);
6528 }
6529 else if ((uImm & ~(uint32_t)0xfff000) == 0)
6530 {
6531 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6532 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6533 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6534 }
6535 else
6536 {
6537 /* Use temporary register for the immediate. */
6538 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6539 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
6540 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6541 }
6542
6543#else
6544# error "Port me!"
6545#endif
6546
6547 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6548 return off;
6549}
6550
6551
6552/**
6553 * Emits a compare of a 32-bit GPR with a constant value, settings status
6554 * flags/whatever for use with conditional instruction.
6555 *
6556 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
6557 * 16-bit value from @a iGrpLeft.
6558 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6559 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6560 * bits all zero). Will release assert or throw exception if the caller
6561 * violates this restriction.
6562 */
6563DECL_FORCE_INLINE_THROW(uint32_t)
6564iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6565 uint8_t idxTmpReg = UINT8_MAX)
6566{
6567#ifdef RT_ARCH_AMD64
6568 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6569 if (iGprLeft >= 8)
6570 pCodeBuf[off++] = X86_OP_REX_B;
6571 if (uImm <= UINT32_C(0x7f))
6572 {
6573 /* cmp Ev, Ib */
6574 pCodeBuf[off++] = 0x83;
6575 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6576 pCodeBuf[off++] = (uint8_t)uImm;
6577 }
6578 else
6579 {
6580 /* cmp Ev, imm */
6581 pCodeBuf[off++] = 0x81;
6582 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6583 pCodeBuf[off++] = RT_BYTE1(uImm);
6584 pCodeBuf[off++] = RT_BYTE2(uImm);
6585 }
6586 RT_NOREF(idxTmpReg);
6587
6588#elif defined(RT_ARCH_ARM64)
6589# ifdef IEM_WITH_THROW_CATCH
6590 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6591# else
6592 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
6593# endif
6594 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6595 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
6596 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
6597
6598#else
6599# error "Port me!"
6600#endif
6601 return off;
6602}
6603
6604
6605/**
6606 * Emits a compare of a 16-bit GPR with a constant value, settings status
6607 * flags/whatever for use with conditional instruction.
6608 *
6609 * @note ARM64: Helper register is required (idxTmpReg).
6610 */
6611DECL_INLINE_THROW(uint32_t)
6612iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6613 uint8_t idxTmpReg = UINT8_MAX)
6614{
6615#ifdef RT_ARCH_AMD64
6616 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
6617#elif defined(RT_ARCH_ARM64)
6618 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
6619#else
6620# error "Port me!"
6621#endif
6622 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6623 return off;
6624}
6625
6626
6627
6628/*********************************************************************************************************************************
6629* Branching *
6630*********************************************************************************************************************************/
6631
6632/**
6633 * Emits a JMP rel32 / B imm19 to the given label.
6634 */
6635DECL_FORCE_INLINE_THROW(uint32_t)
6636iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
6637{
6638 Assert(idxLabel < pReNative->cLabels);
6639
6640#ifdef RT_ARCH_AMD64
6641 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6642 {
6643 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
6644 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
6645 {
6646 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
6647 pCodeBuf[off++] = (uint8_t)offRel;
6648 }
6649 else
6650 {
6651 offRel -= 3;
6652 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6653 pCodeBuf[off++] = RT_BYTE1(offRel);
6654 pCodeBuf[off++] = RT_BYTE2(offRel);
6655 pCodeBuf[off++] = RT_BYTE3(offRel);
6656 pCodeBuf[off++] = RT_BYTE4(offRel);
6657 }
6658 }
6659 else
6660 {
6661 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6662 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6663 pCodeBuf[off++] = 0xfe;
6664 pCodeBuf[off++] = 0xff;
6665 pCodeBuf[off++] = 0xff;
6666 pCodeBuf[off++] = 0xff;
6667 }
6668 pCodeBuf[off++] = 0xcc; /* int3 poison */
6669
6670#elif defined(RT_ARCH_ARM64)
6671 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6672 {
6673 pCodeBuf[off] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
6674 off++;
6675 }
6676 else
6677 {
6678 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
6679 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
6680 }
6681
6682#else
6683# error "Port me!"
6684#endif
6685 return off;
6686}
6687
6688
6689/**
6690 * Emits a JMP rel32 / B imm19 to the given label.
6691 */
6692DECL_INLINE_THROW(uint32_t)
6693iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6694{
6695#ifdef RT_ARCH_AMD64
6696 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
6697#elif defined(RT_ARCH_ARM64)
6698 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
6699#else
6700# error "Port me!"
6701#endif
6702 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6703 return off;
6704}
6705
6706
6707/**
6708 * Emits a JMP rel32 / B imm19 to a new undefined label.
6709 */
6710DECL_INLINE_THROW(uint32_t)
6711iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6712{
6713 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6714 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
6715}
6716
6717/** Condition type. */
6718#ifdef RT_ARCH_AMD64
6719typedef enum IEMNATIVEINSTRCOND : uint8_t
6720{
6721 kIemNativeInstrCond_o = 0,
6722 kIemNativeInstrCond_no,
6723 kIemNativeInstrCond_c,
6724 kIemNativeInstrCond_nc,
6725 kIemNativeInstrCond_e,
6726 kIemNativeInstrCond_z = kIemNativeInstrCond_e,
6727 kIemNativeInstrCond_ne,
6728 kIemNativeInstrCond_nz = kIemNativeInstrCond_ne,
6729 kIemNativeInstrCond_be,
6730 kIemNativeInstrCond_nbe,
6731 kIemNativeInstrCond_s,
6732 kIemNativeInstrCond_ns,
6733 kIemNativeInstrCond_p,
6734 kIemNativeInstrCond_np,
6735 kIemNativeInstrCond_l,
6736 kIemNativeInstrCond_nl,
6737 kIemNativeInstrCond_le,
6738 kIemNativeInstrCond_nle
6739} IEMNATIVEINSTRCOND;
6740#elif defined(RT_ARCH_ARM64)
6741typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
6742# define kIemNativeInstrCond_o todo_conditional_codes
6743# define kIemNativeInstrCond_no todo_conditional_codes
6744# define kIemNativeInstrCond_c todo_conditional_codes
6745# define kIemNativeInstrCond_nc todo_conditional_codes
6746# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
6747# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
6748# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
6749# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
6750# define kIemNativeInstrCond_s todo_conditional_codes
6751# define kIemNativeInstrCond_ns todo_conditional_codes
6752# define kIemNativeInstrCond_p todo_conditional_codes
6753# define kIemNativeInstrCond_np todo_conditional_codes
6754# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
6755# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
6756# define kIemNativeInstrCond_le kArmv8InstrCond_Le
6757# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
6758#else
6759# error "Port me!"
6760#endif
6761
6762
6763/**
6764 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6765 */
6766DECL_FORCE_INLINE_THROW(uint32_t)
6767iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6768 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6769{
6770 Assert(idxLabel < pReNative->cLabels);
6771
6772 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
6773#ifdef RT_ARCH_AMD64
6774 if (offLabel >= off)
6775 {
6776 /* jcc rel32 */
6777 pCodeBuf[off++] = 0x0f;
6778 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6779 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6780 pCodeBuf[off++] = 0x00;
6781 pCodeBuf[off++] = 0x00;
6782 pCodeBuf[off++] = 0x00;
6783 pCodeBuf[off++] = 0x00;
6784 }
6785 else
6786 {
6787 int32_t offDisp = offLabel - (off + 2);
6788 if ((int8_t)offDisp == offDisp)
6789 {
6790 /* jcc rel8 */
6791 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6792 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6793 }
6794 else
6795 {
6796 /* jcc rel32 */
6797 offDisp -= 4;
6798 pCodeBuf[off++] = 0x0f;
6799 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6800 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6801 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6802 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6803 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6804 }
6805 }
6806
6807#elif defined(RT_ARCH_ARM64)
6808 if (offLabel >= off)
6809 {
6810 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6811 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
6812 }
6813 else
6814 {
6815 Assert(off - offLabel <= 0x3ffffU);
6816 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
6817 off++;
6818 }
6819
6820#else
6821# error "Port me!"
6822#endif
6823 return off;
6824}
6825
6826
6827/**
6828 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6829 */
6830DECL_INLINE_THROW(uint32_t)
6831iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6832{
6833#ifdef RT_ARCH_AMD64
6834 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
6835#elif defined(RT_ARCH_ARM64)
6836 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
6837#else
6838# error "Port me!"
6839#endif
6840 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6841 return off;
6842}
6843
6844
6845/**
6846 * Emits a Jcc rel32 / B.cc imm19 to a new label.
6847 */
6848DECL_INLINE_THROW(uint32_t)
6849iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6850 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
6851{
6852 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6853 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
6854}
6855
6856
6857/**
6858 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
6859 */
6860DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6861{
6862#ifdef RT_ARCH_AMD64
6863 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
6864#elif defined(RT_ARCH_ARM64)
6865 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
6866#else
6867# error "Port me!"
6868#endif
6869}
6870
6871/**
6872 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
6873 */
6874DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6875 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6876{
6877#ifdef RT_ARCH_AMD64
6878 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
6879#elif defined(RT_ARCH_ARM64)
6880 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
6881#else
6882# error "Port me!"
6883#endif
6884}
6885
6886
6887/**
6888 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
6889 */
6890DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6891{
6892#ifdef RT_ARCH_AMD64
6893 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
6894#elif defined(RT_ARCH_ARM64)
6895 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
6896#else
6897# error "Port me!"
6898#endif
6899}
6900
6901/**
6902 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
6903 */
6904DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6905 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6906{
6907#ifdef RT_ARCH_AMD64
6908 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
6909#elif defined(RT_ARCH_ARM64)
6910 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
6911#else
6912# error "Port me!"
6913#endif
6914}
6915
6916
6917/**
6918 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
6919 */
6920DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6921{
6922#ifdef RT_ARCH_AMD64
6923 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
6924#elif defined(RT_ARCH_ARM64)
6925 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
6926#else
6927# error "Port me!"
6928#endif
6929}
6930
6931/**
6932 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
6933 */
6934DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6935 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6936{
6937#ifdef RT_ARCH_AMD64
6938 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
6939#elif defined(RT_ARCH_ARM64)
6940 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
6941#else
6942# error "Port me!"
6943#endif
6944}
6945
6946
6947/**
6948 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
6949 */
6950DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6951{
6952#ifdef RT_ARCH_AMD64
6953 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
6954#elif defined(RT_ARCH_ARM64)
6955 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
6956#else
6957# error "Port me!"
6958#endif
6959}
6960
6961/**
6962 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
6963 */
6964DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6965 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6966{
6967#ifdef RT_ARCH_AMD64
6968 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
6969#elif defined(RT_ARCH_ARM64)
6970 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
6971#else
6972# error "Port me!"
6973#endif
6974}
6975
6976
6977/**
6978 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
6979 */
6980DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6981{
6982#ifdef RT_ARCH_AMD64
6983 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
6984#elif defined(RT_ARCH_ARM64)
6985 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
6986#else
6987# error "Port me!"
6988#endif
6989}
6990
6991/**
6992 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
6993 */
6994DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6995 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6996{
6997#ifdef RT_ARCH_AMD64
6998 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
6999#elif defined(RT_ARCH_ARM64)
7000 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
7001#else
7002# error "Port me!"
7003#endif
7004}
7005
7006
7007/**
7008 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
7009 *
7010 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
7011 *
7012 * Only use hardcoded jumps forward when emitting for exactly one
7013 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
7014 * the right target address on all platforms!
7015 *
7016 * Please also note that on x86 it is necessary pass off + 256 or higher
7017 * for @a offTarget one believe the intervening code is more than 127
7018 * bytes long.
7019 */
7020DECL_FORCE_INLINE(uint32_t)
7021iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
7022{
7023#ifdef RT_ARCH_AMD64
7024 /* jcc rel8 / rel32 */
7025 int32_t offDisp = (int32_t)(offTarget - (off + 2));
7026 if (offDisp < 128 && offDisp >= -128)
7027 {
7028 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
7029 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
7030 }
7031 else
7032 {
7033 offDisp -= 4;
7034 pCodeBuf[off++] = 0x0f;
7035 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
7036 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
7037 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
7038 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
7039 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
7040 }
7041
7042#elif defined(RT_ARCH_ARM64)
7043 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
7044 off++;
7045#else
7046# error "Port me!"
7047#endif
7048 return off;
7049}
7050
7051
7052/**
7053 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
7054 *
7055 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
7056 *
7057 * Only use hardcoded jumps forward when emitting for exactly one
7058 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
7059 * the right target address on all platforms!
7060 *
7061 * Please also note that on x86 it is necessary pass off + 256 or higher
7062 * for @a offTarget if one believe the intervening code is more than 127
7063 * bytes long.
7064 */
7065DECL_INLINE_THROW(uint32_t)
7066iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
7067{
7068#ifdef RT_ARCH_AMD64
7069 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
7070#elif defined(RT_ARCH_ARM64)
7071 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
7072#else
7073# error "Port me!"
7074#endif
7075 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7076 return off;
7077}
7078
7079
7080/**
7081 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
7082 *
7083 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7084 */
7085DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7086{
7087#ifdef RT_ARCH_AMD64
7088 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
7089#elif defined(RT_ARCH_ARM64)
7090 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
7091#else
7092# error "Port me!"
7093#endif
7094}
7095
7096
7097/**
7098 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
7099 *
7100 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7101 */
7102DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7103{
7104#ifdef RT_ARCH_AMD64
7105 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
7106#elif defined(RT_ARCH_ARM64)
7107 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
7108#else
7109# error "Port me!"
7110#endif
7111}
7112
7113
7114/**
7115 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
7116 *
7117 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7118 */
7119DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7120{
7121#ifdef RT_ARCH_AMD64
7122 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
7123#elif defined(RT_ARCH_ARM64)
7124 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
7125#else
7126# error "Port me!"
7127#endif
7128}
7129
7130
7131/**
7132 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
7133 *
7134 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7135 */
7136DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7137{
7138#ifdef RT_ARCH_AMD64
7139 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
7140#elif defined(RT_ARCH_ARM64)
7141 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
7142#else
7143# error "Port me!"
7144#endif
7145}
7146
7147
7148/**
7149 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
7150 *
7151 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7152 */
7153DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
7154{
7155#ifdef RT_ARCH_AMD64
7156 /* jmp rel8 or rel32 */
7157 int32_t offDisp = offTarget - (off + 2);
7158 if (offDisp < 128 && offDisp >= -128)
7159 {
7160 pCodeBuf[off++] = 0xeb;
7161 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
7162 }
7163 else
7164 {
7165 offDisp -= 3;
7166 pCodeBuf[off++] = 0xe9;
7167 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
7168 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
7169 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
7170 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
7171 }
7172
7173#elif defined(RT_ARCH_ARM64)
7174 pCodeBuf[off] = Armv8A64MkInstrB((int32_t)(offTarget - off));
7175 off++;
7176
7177#else
7178# error "Port me!"
7179#endif
7180 return off;
7181}
7182
7183
7184/**
7185 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
7186 *
7187 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7188 */
7189DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7190{
7191#ifdef RT_ARCH_AMD64
7192 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
7193#elif defined(RT_ARCH_ARM64)
7194 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
7195#else
7196# error "Port me!"
7197#endif
7198 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7199 return off;
7200}
7201
7202
7203/**
7204 * Fixes up a conditional jump to a fixed label.
7205 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
7206 * iemNativeEmitJzToFixed, ...
7207 */
7208DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
7209{
7210#ifdef RT_ARCH_AMD64
7211 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
7212 uint8_t const bOpcode = pbCodeBuf[offFixup];
7213 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
7214 {
7215 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
7216 AssertStmt((int8_t)pbCodeBuf[offFixup + 1] == (int32_t)(offTarget - (offFixup + 2)),
7217 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
7218 }
7219 else
7220 {
7221 if (bOpcode != 0x0f)
7222 Assert(bOpcode == 0xe9);
7223 else
7224 {
7225 offFixup += 1;
7226 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
7227 }
7228 uint32_t const offRel32 = offTarget - (offFixup + 5);
7229 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
7230 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
7231 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
7232 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
7233 }
7234
7235#elif defined(RT_ARCH_ARM64)
7236 int32_t const offDisp = offTarget - offFixup;
7237 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
7238 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
7239 {
7240 /* B.COND + BC.COND */
7241 Assert(offDisp >= -262144 && offDisp < 262144);
7242 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
7243 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
7244 }
7245 else if ((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000))
7246 {
7247 /* B imm26 */
7248 Assert(offDisp >= -33554432 && offDisp < 33554432);
7249 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
7250 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
7251 }
7252 else if ((pu32CodeBuf[offFixup] & UINT32_C(0x7e000000)) == UINT32_C(0x34000000))
7253 {
7254 /* CBZ / CBNZ reg, imm19 */
7255 Assert((pu32CodeBuf[offFixup] & UINT32_C(0x7e000000)) == UINT32_C(0x34000000));
7256 Assert(offDisp >= -1048576 && offDisp < 1048576);
7257 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
7258 | (((uint32_t)offDisp << 5) & UINT32_C(0x00ffffe0));
7259 }
7260 else
7261 {
7262 /* TBZ / TBNZ reg, bit5, imm14 */
7263 Assert((pu32CodeBuf[offFixup] & UINT32_C(0x7e000000)) == UINT32_C(0x36000000));
7264 Assert(offDisp >= -8192 && offDisp < 8192);
7265 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfff8001f))
7266 | (((uint32_t)offDisp << 5) & UINT32_C(0x0007ffe0));
7267 }
7268
7269#else
7270# error "Port me!"
7271#endif
7272}
7273
7274
7275#ifdef RT_ARCH_AMD64
7276/**
7277 * For doing bt on a register.
7278 */
7279DECL_INLINE_THROW(uint32_t)
7280iemNativeEmitAmd64TestBitInGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
7281{
7282 Assert(iBitNo < 64);
7283 /* bt Ev, imm8 */
7284 if (iBitNo >= 32)
7285 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7286 else if (iGprSrc >= 8)
7287 pCodeBuf[off++] = X86_OP_REX_B;
7288 pCodeBuf[off++] = 0x0f;
7289 pCodeBuf[off++] = 0xba;
7290 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7291 pCodeBuf[off++] = iBitNo;
7292 return off;
7293}
7294#endif /* RT_ARCH_AMD64 */
7295
7296
7297/**
7298 * Internal helper, don't call directly.
7299 */
7300DECL_INLINE_THROW(uint32_t)
7301iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7302 uint32_t offTarget, uint32_t *poffFixup, bool fJmpIfSet)
7303{
7304 Assert(iBitNo < 64);
7305#ifdef RT_ARCH_AMD64
7306 if (iBitNo < 8)
7307 {
7308 /* test Eb, imm8 */
7309 if (iGprSrc >= 4)
7310 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7311 pCodeBuf[off++] = 0xf6;
7312 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7313 pCodeBuf[off++] = (uint8_t)1 << iBitNo;
7314 if (poffFixup)
7315 *poffFixup = off;
7316 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7317 }
7318 else
7319 {
7320 /* bt Ev, imm8 */
7321 if (iBitNo >= 32)
7322 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7323 else if (iGprSrc >= 8)
7324 pCodeBuf[off++] = X86_OP_REX_B;
7325 pCodeBuf[off++] = 0x0f;
7326 pCodeBuf[off++] = 0xba;
7327 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7328 pCodeBuf[off++] = iBitNo;
7329 if (poffFixup)
7330 *poffFixup = off;
7331 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
7332 }
7333
7334#elif defined(RT_ARCH_ARM64)
7335 /* Just use the TBNZ instruction here. */
7336 if (poffFixup)
7337 *poffFixup = off;
7338 pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, off - offTarget, iGprSrc, iBitNo);
7339
7340#else
7341# error "Port me!"
7342#endif
7343 return off;
7344}
7345
7346
7347/**
7348 * Emits a jump to @a idxTarget on the condition that bit @a iBitNo _is_ _set_
7349 * in @a iGprSrc.
7350 */
7351DECL_INLINE_THROW(uint32_t)
7352iemNativeEmitTestBitInGprAndJmpToFixedIfSetEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7353 uint32_t offTarget, uint32_t *poffFixup)
7354{
7355 return iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(pCodeBuf, off, iGprSrc, iBitNo, offTarget, poffFixup, true /*fJmpIfSet*/);
7356}
7357
7358
7359/**
7360 * Emits a jump to @a idxTarget on the condition that bit @a iBitNo _is_ _not_
7361 * _set_ in @a iGprSrc.
7362 */
7363DECL_INLINE_THROW(uint32_t)
7364iemNativeEmitTestBitInGprAndJmpToLabelIfNotSetEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7365 uint32_t offTarget, uint32_t *poffFixup)
7366{
7367 return iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(pCodeBuf, off, iGprSrc, iBitNo, offTarget, poffFixup, false /*fJmpIfSet*/);
7368}
7369
7370
7371
7372/**
7373 * Internal helper, don't call directly.
7374 */
7375DECL_INLINE_THROW(uint32_t)
7376iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7377 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
7378{
7379 Assert(iBitNo < 64);
7380#ifdef RT_ARCH_AMD64
7381 if (iBitNo < 8)
7382 {
7383 /* test Eb, imm8 */
7384 if (iGprSrc >= 4)
7385 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7386 pCodeBuf[off++] = 0xf6;
7387 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7388 pCodeBuf[off++] = (uint8_t)1 << iBitNo;
7389 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7390 fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7391 }
7392 else
7393 {
7394 /* bt Ev, imm8 */
7395 if (iBitNo >= 32)
7396 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7397 else if (iGprSrc >= 8)
7398 pCodeBuf[off++] = X86_OP_REX_B;
7399 pCodeBuf[off++] = 0x0f;
7400 pCodeBuf[off++] = 0xba;
7401 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7402 pCodeBuf[off++] = iBitNo;
7403 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7404 fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
7405 }
7406
7407#elif defined(RT_ARCH_ARM64)
7408 /* Use the TBNZ instruction here. */
7409 if (pReNative->paLabels[idxLabel].enmType > kIemNativeLabelType_LastWholeTbBranch)
7410 {
7411 AssertMsg(pReNative->paLabels[idxLabel].off == UINT32_MAX,
7412 ("TODO: Please enable & test commented out code for jumping back to a predefined label.\n"));
7413 //uint32_t offLabel = pReNative->paLabels[idxLabel].off;
7414 //if (offLabel == UINT32_MAX)
7415 {
7416 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
7417 pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
7418 }
7419 //else
7420 //{
7421 // RT_BREAKPOINT();
7422 // Assert(off - offLabel <= 0x1fffU);
7423 // pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, offLabel - off, iGprSrc, iBitNo);
7424 //
7425 //}
7426 }
7427 else
7428 {
7429 Assert(Armv8A64ConvertImmRImmS2Mask64(0x40, (64U - iBitNo) & 63U) == RT_BIT_64(iBitNo));
7430 pCodeBuf[off++] = Armv8A64MkInstrTstImm(iGprSrc, 0x40, (64U - iBitNo) & 63U);
7431 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7432 pCodeBuf[off++] = Armv8A64MkInstrBCond(fJmpIfSet ? kArmv8InstrCond_Ne : kArmv8InstrCond_Eq, 0);
7433 }
7434
7435#else
7436# error "Port me!"
7437#endif
7438 return off;
7439}
7440
7441
7442/**
7443 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
7444 * @a iGprSrc.
7445 */
7446DECL_INLINE_THROW(uint32_t)
7447iemNativeEmitTestBitInGprAndJmpToLabelIfSetEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7448 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7449{
7450 return iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, pCodeBuf, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
7451}
7452
7453
7454/**
7455 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
7456 * _set_ in @a iGprSrc.
7457 */
7458DECL_INLINE_THROW(uint32_t)
7459iemNativeEmitTestBitInGprAndJmpToLabelIfNotSetEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7460 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7461{
7462 return iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, pCodeBuf, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
7463}
7464
7465
7466/**
7467 * Internal helper, don't call directly.
7468 */
7469DECL_INLINE_THROW(uint32_t)
7470iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7471 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
7472{
7473#ifdef RT_ARCH_AMD64
7474 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 5+6), off,
7475 iGprSrc, iBitNo, idxLabel, fJmpIfSet);
7476#elif defined(RT_ARCH_ARM64)
7477 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 2), off,
7478 iGprSrc, iBitNo, idxLabel, fJmpIfSet);
7479#else
7480# error "Port me!"
7481#endif
7482 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7483 return off;
7484}
7485
7486
7487/**
7488 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
7489 * @a iGprSrc.
7490 */
7491DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7492 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7493{
7494 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
7495}
7496
7497
7498/**
7499 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
7500 * _set_ in @a iGprSrc.
7501 */
7502DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7503 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7504{
7505 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
7506}
7507
7508
7509/**
7510 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
7511 * flags accordingly.
7512 */
7513DECL_INLINE_THROW(uint32_t)
7514iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
7515{
7516 Assert(fBits != 0);
7517#ifdef RT_ARCH_AMD64
7518
7519 if (fBits >= UINT32_MAX)
7520 {
7521 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7522
7523 /* test Ev,Gv */
7524 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7525 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
7526 pbCodeBuf[off++] = 0x85;
7527 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
7528
7529 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7530 }
7531 else if (fBits <= UINT32_MAX)
7532 {
7533 /* test Eb, imm8 or test Ev, imm32 */
7534 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7535 if (fBits <= UINT8_MAX)
7536 {
7537 if (iGprSrc >= 4)
7538 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7539 pbCodeBuf[off++] = 0xf6;
7540 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7541 pbCodeBuf[off++] = (uint8_t)fBits;
7542 }
7543 else
7544 {
7545 if (iGprSrc >= 8)
7546 pbCodeBuf[off++] = X86_OP_REX_B;
7547 pbCodeBuf[off++] = 0xf7;
7548 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7549 pbCodeBuf[off++] = RT_BYTE1(fBits);
7550 pbCodeBuf[off++] = RT_BYTE2(fBits);
7551 pbCodeBuf[off++] = RT_BYTE3(fBits);
7552 pbCodeBuf[off++] = RT_BYTE4(fBits);
7553 }
7554 }
7555 /** @todo implement me. */
7556 else
7557 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
7558
7559#elif defined(RT_ARCH_ARM64)
7560 uint32_t uImmR = 0;
7561 uint32_t uImmNandS = 0;
7562 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
7563 {
7564 /* ands xzr, iGprSrc, #fBits */
7565 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7566 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
7567 }
7568 else
7569 {
7570 /* ands xzr, iGprSrc, iTmpReg */
7571 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7572 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7573 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
7574 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7575 }
7576
7577#else
7578# error "Port me!"
7579#endif
7580 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7581 return off;
7582}
7583
7584
7585/**
7586 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
7587 * @a iGprSrc, setting CPU flags accordingly.
7588 *
7589 * @note For ARM64 this only supports @a fBits values that can be expressed
7590 * using the two 6-bit immediates of the ANDS instruction. The caller
7591 * must make sure this is possible!
7592 */
7593DECL_FORCE_INLINE_THROW(uint32_t)
7594iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits,
7595 uint8_t iTmpReg = UINT8_MAX)
7596{
7597 Assert(fBits != 0);
7598
7599#ifdef RT_ARCH_AMD64
7600 if (fBits <= UINT8_MAX)
7601 {
7602 /* test Eb, imm8 */
7603 if (iGprSrc >= 4)
7604 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7605 pCodeBuf[off++] = 0xf6;
7606 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7607 pCodeBuf[off++] = (uint8_t)fBits;
7608 }
7609 else
7610 {
7611 /* test Ev, imm32 */
7612 if (iGprSrc >= 8)
7613 pCodeBuf[off++] = X86_OP_REX_B;
7614 pCodeBuf[off++] = 0xf7;
7615 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7616 pCodeBuf[off++] = RT_BYTE1(fBits);
7617 pCodeBuf[off++] = RT_BYTE2(fBits);
7618 pCodeBuf[off++] = RT_BYTE3(fBits);
7619 pCodeBuf[off++] = RT_BYTE4(fBits);
7620 }
7621 RT_NOREF(iTmpReg);
7622
7623#elif defined(RT_ARCH_ARM64)
7624 /* ands xzr, src, #fBits */
7625 uint32_t uImmR = 0;
7626 uint32_t uImmNandS = 0;
7627 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7628 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7629 else if (iTmpReg != UINT8_MAX)
7630 {
7631 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iTmpReg, fBits);
7632 pCodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
7633 }
7634 else
7635# ifdef IEM_WITH_THROW_CATCH
7636 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7637# else
7638 AssertReleaseFailedStmt(off = UINT32_MAX);
7639# endif
7640
7641#else
7642# error "Port me!"
7643#endif
7644 return off;
7645}
7646
7647
7648
7649/**
7650 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7651 * @a iGprSrc, setting CPU flags accordingly.
7652 *
7653 * @note For ARM64 this only supports @a fBits values that can be expressed
7654 * using the two 6-bit immediates of the ANDS instruction. The caller
7655 * must make sure this is possible!
7656 */
7657DECL_FORCE_INLINE_THROW(uint32_t)
7658iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7659{
7660 Assert(fBits != 0);
7661
7662#ifdef RT_ARCH_AMD64
7663 /* test Eb, imm8 */
7664 if (iGprSrc >= 4)
7665 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7666 pCodeBuf[off++] = 0xf6;
7667 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7668 pCodeBuf[off++] = fBits;
7669
7670#elif defined(RT_ARCH_ARM64)
7671 /* ands xzr, src, #fBits */
7672 uint32_t uImmR = 0;
7673 uint32_t uImmNandS = 0;
7674 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7675 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7676 else
7677# ifdef IEM_WITH_THROW_CATCH
7678 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7679# else
7680 AssertReleaseFailedStmt(off = UINT32_MAX);
7681# endif
7682
7683#else
7684# error "Port me!"
7685#endif
7686 return off;
7687}
7688
7689
7690/**
7691 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7692 * @a iGprSrc, setting CPU flags accordingly.
7693 */
7694DECL_INLINE_THROW(uint32_t)
7695iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7696{
7697 Assert(fBits != 0);
7698
7699#ifdef RT_ARCH_AMD64
7700 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
7701
7702#elif defined(RT_ARCH_ARM64)
7703 /* ands xzr, src, [tmp|#imm] */
7704 uint32_t uImmR = 0;
7705 uint32_t uImmNandS = 0;
7706 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7707 {
7708 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7709 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7710 }
7711 else
7712 {
7713 /* Use temporary register for the 64-bit immediate. */
7714 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7715 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7716 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
7717 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7718 }
7719
7720#else
7721# error "Port me!"
7722#endif
7723 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7724 return off;
7725}
7726
7727
7728/**
7729 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
7730 * are set in @a iGprSrc.
7731 */
7732DECL_INLINE_THROW(uint32_t)
7733iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7734 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7735{
7736 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7737
7738 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7739 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7740
7741 return off;
7742}
7743
7744
7745/**
7746 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
7747 * are set in @a iGprSrc.
7748 */
7749DECL_INLINE_THROW(uint32_t)
7750iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7751 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7752{
7753 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7754
7755 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7756 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7757
7758 return off;
7759}
7760
7761
7762/**
7763 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7764 *
7765 * The operand size is given by @a f64Bit.
7766 */
7767DECL_FORCE_INLINE_THROW(uint32_t)
7768iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7769 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7770{
7771 Assert(idxLabel < pReNative->cLabels);
7772
7773#ifdef RT_ARCH_AMD64
7774 /* test reg32,reg32 / test reg64,reg64 */
7775 if (f64Bit)
7776 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7777 else if (iGprSrc >= 8)
7778 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7779 pCodeBuf[off++] = 0x85;
7780 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7781
7782 /* jnz idxLabel */
7783 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7784 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7785
7786#elif defined(RT_ARCH_ARM64)
7787 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
7788 {
7789 pCodeBuf[off] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
7790 iGprSrc, f64Bit);
7791 off++;
7792 }
7793 else
7794 {
7795 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7796 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
7797 }
7798
7799#else
7800# error "Port me!"
7801#endif
7802 return off;
7803}
7804
7805
7806/**
7807 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7808 *
7809 * The operand size is given by @a f64Bit.
7810 */
7811DECL_FORCE_INLINE_THROW(uint32_t)
7812iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7813 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7814{
7815#ifdef RT_ARCH_AMD64
7816 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7817 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7818#elif defined(RT_ARCH_ARM64)
7819 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
7820 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7821#else
7822# error "Port me!"
7823#endif
7824 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7825 return off;
7826}
7827
7828
7829/**
7830 * Emits code that jumps to @a offTarget if @a iGprSrc is not zero.
7831 *
7832 * The operand size is given by @a f64Bit.
7833 */
7834DECL_FORCE_INLINE_THROW(uint32_t)
7835iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7836 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t offTarget)
7837{
7838#ifdef RT_ARCH_AMD64
7839 /* test reg32,reg32 / test reg64,reg64 */
7840 if (f64Bit)
7841 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7842 else if (iGprSrc >= 8)
7843 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7844 pCodeBuf[off++] = 0x85;
7845 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7846
7847 /* jnz idxLabel */
7848 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget,
7849 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7850
7851#elif defined(RT_ARCH_ARM64)
7852 pCodeBuf[off] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(offTarget - off), iGprSrc, f64Bit);
7853 off++;
7854
7855#else
7856# error "Port me!"
7857#endif
7858 return off;
7859}
7860
7861
7862/**
7863 * Emits code that jumps to @a offTarget if @a iGprSrc is not zero.
7864 *
7865 * The operand size is given by @a f64Bit.
7866 */
7867DECL_FORCE_INLINE_THROW(uint32_t)
7868iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7869 bool f64Bit, bool fJmpIfNotZero, uint32_t offTarget)
7870{
7871#ifdef RT_ARCH_AMD64
7872 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7873 off, iGprSrc, f64Bit, fJmpIfNotZero, offTarget);
7874#elif defined(RT_ARCH_ARM64)
7875 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1),
7876 off, iGprSrc, f64Bit, fJmpIfNotZero, offTarget);
7877#else
7878# error "Port me!"
7879#endif
7880 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7881 return off;
7882}
7883
7884
7885/* if (Grp1 == 0) Jmp idxLabel; */
7886
7887/**
7888 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7889 *
7890 * The operand size is given by @a f64Bit.
7891 */
7892DECL_FORCE_INLINE_THROW(uint32_t)
7893iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7894 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7895{
7896 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7897 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7898}
7899
7900
7901/**
7902 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7903 *
7904 * The operand size is given by @a f64Bit.
7905 */
7906DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7907 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7908{
7909 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7910}
7911
7912
7913/**
7914 * Emits code that jumps to a new label if @a iGprSrc is zero.
7915 *
7916 * The operand size is given by @a f64Bit.
7917 */
7918DECL_INLINE_THROW(uint32_t)
7919iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7920 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7921{
7922 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7923 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7924}
7925
7926
7927/**
7928 * Emits code that jumps to @a offTarget if @a iGprSrc is zero.
7929 *
7930 * The operand size is given by @a f64Bit.
7931 */
7932DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7933 uint8_t iGprSrc, bool f64Bit, uint32_t offTarget)
7934{
7935 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixed(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, offTarget);
7936}
7937
7938
7939/* if (Grp1 != 0) Jmp idxLabel; */
7940
7941/**
7942 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7943 *
7944 * The operand size is given by @a f64Bit.
7945 */
7946DECL_FORCE_INLINE_THROW(uint32_t)
7947iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7948 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7949{
7950 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7951 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7952}
7953
7954
7955/**
7956 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7957 *
7958 * The operand size is given by @a f64Bit.
7959 */
7960DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7961 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7962{
7963 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7964}
7965
7966
7967/**
7968 * Emits code that jumps to a new label if @a iGprSrc is not zero.
7969 *
7970 * The operand size is given by @a f64Bit.
7971 */
7972DECL_INLINE_THROW(uint32_t)
7973iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7974 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7975{
7976 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7977 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7978}
7979
7980
7981/* if (Grp1 != Gpr2) Jmp idxLabel; */
7982
7983/**
7984 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
7985 * differs.
7986 */
7987DECL_INLINE_THROW(uint32_t)
7988iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7989 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
7990{
7991 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
7992 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7993 return off;
7994}
7995
7996
7997/**
7998 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
7999 */
8000DECL_INLINE_THROW(uint32_t)
8001iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8002 uint8_t iGprLeft, uint8_t iGprRight,
8003 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8004{
8005 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8006 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
8007}
8008
8009
8010/* if (Grp != Imm) Jmp idxLabel; */
8011
8012/**
8013 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
8014 */
8015DECL_INLINE_THROW(uint32_t)
8016iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8017 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
8018{
8019 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
8020 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
8021 return off;
8022}
8023
8024
8025/**
8026 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
8027 */
8028DECL_INLINE_THROW(uint32_t)
8029iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8030 uint8_t iGprSrc, uint64_t uImm,
8031 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8032{
8033 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8034 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8035}
8036
8037
8038/**
8039 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
8040 * @a uImm.
8041 */
8042DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8043 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
8044{
8045 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8046 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
8047 return off;
8048}
8049
8050
8051/**
8052 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
8053 * @a uImm.
8054 */
8055DECL_INLINE_THROW(uint32_t)
8056iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8057 uint8_t iGprSrc, uint32_t uImm,
8058 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8059{
8060 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8061 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8062}
8063
8064
8065/**
8066 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
8067 * @a uImm.
8068 */
8069DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8070 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
8071{
8072 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
8073 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
8074 return off;
8075}
8076
8077
8078/**
8079 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
8080 * @a uImm.
8081 */
8082DECL_INLINE_THROW(uint32_t)
8083iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8084 uint8_t iGprSrc, uint16_t uImm,
8085 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8086{
8087 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8088 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8089}
8090
8091
8092/* if (Grp == Imm) Jmp idxLabel; */
8093
8094/**
8095 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
8096 */
8097DECL_INLINE_THROW(uint32_t)
8098iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8099 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
8100{
8101 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
8102 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
8103 return off;
8104}
8105
8106
8107/**
8108 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
8109 */
8110DECL_INLINE_THROW(uint32_t)
8111iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
8112 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8113{
8114 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8115 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8116}
8117
8118
8119/**
8120 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
8121 */
8122DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8123 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
8124{
8125 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8126 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
8127 return off;
8128}
8129
8130
8131/**
8132 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
8133 */
8134DECL_INLINE_THROW(uint32_t)
8135iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
8136 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8137{
8138 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8139 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8140}
8141
8142
8143/**
8144 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
8145 *
8146 * @note ARM64: Helper register is required (idxTmpReg).
8147 */
8148DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8149 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
8150 uint8_t idxTmpReg = UINT8_MAX)
8151{
8152 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
8153 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
8154 return off;
8155}
8156
8157
8158/**
8159 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
8160 *
8161 * @note ARM64: Helper register is required (idxTmpReg).
8162 */
8163DECL_INLINE_THROW(uint32_t)
8164iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
8165 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
8166 uint8_t idxTmpReg = UINT8_MAX)
8167{
8168 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8169 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
8170}
8171
8172
8173
8174/*********************************************************************************************************************************
8175* Indirect Jumps. *
8176*********************************************************************************************************************************/
8177
8178/**
8179 * Emits an indirect jump a 64-bit address in a GPR.
8180 */
8181DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpViaGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc)
8182{
8183#ifdef RT_ARCH_AMD64
8184 uint8_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
8185 if (iGprSrc >= 8)
8186 pCodeBuf[off++] = X86_OP_REX_B;
8187 pCodeBuf[off++] = 0xff;
8188 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
8189
8190#elif defined(RT_ARCH_ARM64)
8191 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8192 pCodeBuf[off++] = Armv8A64MkInstrBr(iGprSrc);
8193
8194#else
8195# error "port me"
8196#endif
8197 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8198 return off;
8199}
8200
8201
8202/**
8203 * Emits an indirect jump to an immediate 64-bit address (uses the temporary GPR).
8204 */
8205DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
8206{
8207 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
8208 return iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP0);
8209}
8210
8211
8212/*********************************************************************************************************************************
8213* Calls. *
8214*********************************************************************************************************************************/
8215
8216/**
8217 * Emits a call to a 64-bit address.
8218 */
8219DECL_FORCE_INLINE(uint32_t) iemNativeEmitCallImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uintptr_t uPfn,
8220#ifdef RT_ARCH_AMD64
8221 uint8_t idxRegTmp = X86_GREG_xAX
8222#elif defined(RT_ARCH_ARM64)
8223 uint8_t idxRegTmp = IEMNATIVE_REG_FIXED_TMP0
8224#else
8225# error "Port me"
8226#endif
8227 )
8228{
8229 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxRegTmp, uPfn);
8230
8231#ifdef RT_ARCH_AMD64
8232 /* call idxRegTmp */
8233 if (idxRegTmp >= 8)
8234 pCodeBuf[off++] = X86_OP_REX_B;
8235 pCodeBuf[off++] = 0xff;
8236 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, idxRegTmp & 7);
8237
8238#elif defined(RT_ARCH_ARM64)
8239 pCodeBuf[off++] = Armv8A64MkInstrBlr(idxRegTmp);
8240
8241#else
8242# error "port me"
8243#endif
8244 return off;
8245}
8246
8247
8248/**
8249 * Emits a call to a 64-bit address.
8250 */
8251template<bool const a_fSkipEflChecks = false>
8252DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
8253{
8254 if RT_CONSTEXPR_IF(!a_fSkipEflChecks)
8255 {
8256 IEMNATIVE_ASSERT_EFLAGS_POSTPONING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8257 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY( pReNative, X86_EFL_STATUS_BITS);
8258 }
8259
8260#ifdef RT_ARCH_AMD64
8261 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
8262
8263 /* call rax */
8264 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8265 pbCodeBuf[off++] = 0xff;
8266 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
8267
8268#elif defined(RT_ARCH_ARM64)
8269 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
8270
8271 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8272 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
8273
8274#else
8275# error "port me"
8276#endif
8277 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8278 return off;
8279}
8280
8281
8282/**
8283 * Emits code to load a stack variable into an argument GPR.
8284 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
8285 */
8286DECL_FORCE_INLINE_THROW(uint32_t)
8287iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8288 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
8289 bool fSpilledVarsInVolatileRegs = false)
8290{
8291 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8292 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8293 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8294
8295 uint8_t const idxRegVar = pVar->idxReg;
8296 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
8297 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
8298 || !fSpilledVarsInVolatileRegs ))
8299 {
8300 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
8301 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
8302 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
8303 if (!offAddend)
8304 {
8305 if (idxRegArg != idxRegVar)
8306 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
8307 }
8308 else
8309 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
8310 }
8311 else
8312 {
8313 uint8_t const idxStackSlot = pVar->idxStackSlot;
8314 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8315 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
8316 if (offAddend)
8317 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
8318 }
8319 return off;
8320}
8321
8322
8323/**
8324 * Emits code to load a stack or immediate variable value into an argument GPR,
8325 * optional with a addend.
8326 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
8327 */
8328DECL_FORCE_INLINE_THROW(uint32_t)
8329iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8330 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
8331 bool fSpilledVarsInVolatileRegs = false)
8332{
8333 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8334 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8335 if (pVar->enmKind == kIemNativeVarKind_Immediate)
8336 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
8337 else
8338 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
8339 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
8340 return off;
8341}
8342
8343
8344/**
8345 * Emits code to load the variable address into an argument GPR.
8346 *
8347 * This only works for uninitialized and stack variables.
8348 */
8349DECL_FORCE_INLINE_THROW(uint32_t)
8350iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8351 bool fFlushShadows)
8352{
8353 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8354 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8355 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
8356 || pVar->enmKind == kIemNativeVarKind_Stack,
8357 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8358 AssertStmt(!pVar->fSimdReg,
8359 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8360
8361 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8362 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
8363
8364 uint8_t const idxRegVar = pVar->idxReg;
8365 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
8366 {
8367 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
8368 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
8369 Assert(pVar->idxReg == UINT8_MAX);
8370 }
8371 Assert( pVar->idxStackSlot != UINT8_MAX
8372 && pVar->idxReg == UINT8_MAX);
8373
8374 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
8375}
8376
8377
8378
8379/*********************************************************************************************************************************
8380* TB exiting helpers. *
8381*********************************************************************************************************************************/
8382
8383#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8384/* IEMAllN8veEmit-x86.h: */
8385template<uint32_t const a_bmInputRegs>
8386DECL_FORCE_INLINE_THROW(uint32_t)
8387iemNativeDoPostponedEFlagsAtTbExitEx(PIEMRECOMPILERSTATE pReNative, uint32_t off, PIEMNATIVEINSTR pCodeBuf);
8388
8389template<uint32_t const a_bmInputRegs>
8390DECL_FORCE_INLINE_THROW(uint32_t)
8391iemNativeDoPostponedEFlagsAtTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off);
8392#endif
8393
8394
8395/**
8396 * Helper for marking the current conditional branch as exiting the TB.
8397 *
8398 * This simplifies the state consolidation later when we reach the IEM_MC_ENDIF.
8399 */
8400DECL_FORCE_INLINE(void) iemNativeMarkCurCondBranchAsExiting(PIEMRECOMPILERSTATE pReNative)
8401{
8402 uint8_t idxCondDepth = pReNative->cCondDepth;
8403 if (idxCondDepth)
8404 {
8405 idxCondDepth--;
8406 pReNative->aCondStack[idxCondDepth].afExitTb[pReNative->aCondStack[idxCondDepth].fInElse] = true;
8407 }
8408}
8409
8410
8411/**
8412 * Unconditionally exits the translation block via a branch instructions.
8413 *
8414 * @note In case a delayed EFLAGS calculation is pending, this may emit an
8415 * additional IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS instructions.
8416 */
8417template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fActuallyExitingTb = true, bool const a_fPostponedEfl = true>
8418DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off)
8419{
8420 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8421 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8422
8423 if RT_CONSTEXPR_IF(a_fActuallyExitingTb)
8424 iemNativeMarkCurCondBranchAsExiting(pReNative);
8425
8426#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8427 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8428 off = iemNativeDoPostponedEFlagsAtTbExitEx<IEMNATIVELABELTYPE_GET_INPUT_REG_MASK(a_enmExitReason)>(pReNative, off,
8429 pCodeBuf);
8430#endif
8431
8432#ifdef RT_ARCH_AMD64
8433 /* jmp rel32 */
8434 pCodeBuf[off++] = 0xe9;
8435 iemNativeAddTbExitFixup(pReNative, off, a_enmExitReason);
8436 pCodeBuf[off++] = 0xfe;
8437 pCodeBuf[off++] = 0xff;
8438 pCodeBuf[off++] = 0xff;
8439 pCodeBuf[off++] = 0xff;
8440
8441#elif defined(RT_ARCH_ARM64)
8442 iemNativeAddTbExitFixup(pReNative, off, a_enmExitReason);
8443 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
8444
8445#else
8446# error "Port me!"
8447#endif
8448 return off;
8449}
8450
8451
8452/**
8453 * Unconditionally exits the translation block via a branch instructions.
8454 *
8455 * @note In case a delayed EFLAGS calculation is pending, this may emit an
8456 * additional IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS instructions.
8457 */
8458template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fActuallyExitingTb = true, bool const a_fPostponedEfl = true>
8459DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8460{
8461 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8462 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8463
8464 if RT_CONSTEXPR_IF(a_fActuallyExitingTb)
8465 iemNativeMarkCurCondBranchAsExiting(pReNative);
8466
8467#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8468 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8469 off = iemNativeDoPostponedEFlagsAtTbExit<IEMNATIVELABELTYPE_GET_INPUT_REG_MASK(a_enmExitReason)>(pReNative, off);
8470#endif
8471
8472#ifdef RT_ARCH_AMD64
8473 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
8474
8475 /* jmp rel32 */
8476 pCodeBuf[off++] = 0xe9;
8477 iemNativeAddTbExitFixup(pReNative, off, a_enmExitReason);
8478 pCodeBuf[off++] = 0xfe;
8479 pCodeBuf[off++] = 0xff;
8480 pCodeBuf[off++] = 0xff;
8481 pCodeBuf[off++] = 0xff;
8482
8483#elif defined(RT_ARCH_ARM64)
8484 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8485 iemNativeAddTbExitFixup(pReNative, off, a_enmExitReason);
8486 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
8487
8488#else
8489# error "Port me!"
8490#endif
8491 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8492 return off;
8493}
8494
8495
8496/**
8497 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
8498 *
8499 * @note In case a delayed EFLAGS calculation is pending, this may emit an
8500 * additional IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS instructions.
8501 */
8502template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8503DECL_FORCE_INLINE_THROW(uint32_t)
8504iemNativeEmitTbExitJccEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, IEMNATIVEINSTRCOND enmCond)
8505{
8506 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8507 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8508
8509#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8510 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8511 if (pReNative->PostponedEfl.fEFlags)
8512 {
8513 /* Jcc l_NonPrimaryCodeStreamTarget */
8514 uint32_t const offFixup1 = off;
8515 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 1, enmCond);
8516
8517 /* JMP l_PrimaryCodeStreamResume */
8518 uint32_t const offFixup2 = off;
8519 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, off + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8520
8521 /* l_NonPrimaryCodeStreamTarget: */
8522 iemNativeFixupFixedJump(pReNative, offFixup1, off);
8523 off = iemNativeEmitTbExitEx<a_enmExitReason, false /*a_fActuallyExitingTb*/, true>(pReNative, pCodeBuf, off);
8524
8525 /* l_PrimaryCodeStreamResume: */
8526 iemNativeFixupFixedJump(pReNative, offFixup2, off);
8527 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8528 return off;
8529 }
8530#endif
8531
8532#if defined(RT_ARCH_AMD64)
8533 /* jcc rel32 */
8534 pCodeBuf[off++] = 0x0f;
8535 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
8536 iemNativeAddTbExitFixup(pReNative, off, a_enmExitReason);
8537 pCodeBuf[off++] = 0x00;
8538 pCodeBuf[off++] = 0x00;
8539 pCodeBuf[off++] = 0x00;
8540 pCodeBuf[off++] = 0x00;
8541
8542#else
8543 /* ARM64 doesn't have the necessary jump range, so we jump via local label
8544 just like when we keep everything local. */
8545 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, a_enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8546 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel, enmCond);
8547#endif
8548 return off;
8549}
8550
8551
8552/**
8553 * Emits a Jcc rel32 / B.cc imm19 to the epilog.
8554 */
8555template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8556DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitJcc(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEINSTRCOND enmCond)
8557{
8558 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8559 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8560
8561#ifdef RT_ARCH_AMD64
8562 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS + 5);
8563#elif defined(RT_ARCH_ARM64)
8564 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS + 1);
8565#else
8566# error "Port me!"
8567#endif
8568 off = iemNativeEmitTbExitJccEx<a_enmExitReason, a_fPostponedEfl>(pReNative, pCodeBuf, off, enmCond);
8569 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8570 return off;
8571}
8572
8573
8574/**
8575 * Emits a JNZ/JNE rel32 / B.NE imm19 to the TB exit routine with the given reason.
8576 */
8577template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8578DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitJnz(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8579{
8580#ifdef RT_ARCH_AMD64
8581 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_ne);
8582#elif defined(RT_ARCH_ARM64)
8583 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kArmv8InstrCond_Ne);
8584#else
8585# error "Port me!"
8586#endif
8587}
8588
8589
8590/**
8591 * Emits a JZ/JE rel32 / B.EQ imm19 to the TB exit routine with the given reason.
8592 */
8593template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8594DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitJz(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8595{
8596#ifdef RT_ARCH_AMD64
8597 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_e);
8598#elif defined(RT_ARCH_ARM64)
8599 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kArmv8InstrCond_Eq);
8600#else
8601# error "Port me!"
8602#endif
8603}
8604
8605
8606/**
8607 * Emits a JA/JNBE rel32 / B.HI imm19 to the TB exit.
8608 */
8609template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8610DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitJa(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8611{
8612#ifdef RT_ARCH_AMD64
8613 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_nbe);
8614#elif defined(RT_ARCH_ARM64)
8615 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kArmv8InstrCond_Hi);
8616#else
8617# error "Port me!"
8618#endif
8619}
8620
8621
8622/**
8623 * Emits a JL/JNGE rel32 / B.LT imm19 to the TB exit with the given reason.
8624 */
8625template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8626DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitJl(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8627{
8628#ifdef RT_ARCH_AMD64
8629 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_l);
8630#elif defined(RT_ARCH_ARM64)
8631 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kArmv8InstrCond_Lt);
8632#else
8633# error "Port me!"
8634#endif
8635}
8636
8637
8638/**
8639 * Emits a jump to the TB exit with @a a_enmExitReason on the condition _any_ of
8640 * the bits in @a fBits are set in @a iGprSrc.
8641 */
8642template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8643DECL_INLINE_THROW(uint32_t)
8644iemNativeEmitTbExitIfAnyBitsSetInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
8645{
8646 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
8647
8648 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
8649 return iemNativeEmitTbExitJnz<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8650}
8651
8652
8653#if 0 /* unused */
8654/**
8655 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
8656 * are set in @a iGprSrc.
8657 */
8658template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8659DECL_INLINE_THROW(uint32_t)
8660iemNativeEmitTbExitIfNoBitsSetInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
8661{
8662 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
8663
8664 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
8665 return iemNativeEmitJzTbExit<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8666}
8667#endif
8668
8669
8670#if 0 /* unused */
8671/**
8672 * Emits code that exits the TB with the given reason if @a iGprLeft and @a iGprRight
8673 * differs.
8674 */
8675template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8676DECL_INLINE_THROW(uint32_t)
8677iemNativeEmitTbExitIfGprNotEqualGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
8678{
8679 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
8680 off = iemNativeEmitJnzTbExit<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8681 return off;
8682}
8683#endif
8684
8685
8686/**
8687 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
8688 * @a uImm.
8689 */
8690template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8691DECL_INLINE_THROW(uint32_t)
8692iemNativeEmitTbExitIfGpr32NotEqualImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm)
8693{
8694 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8695 off = iemNativeEmitTbExitJnz<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8696 return off;
8697}
8698
8699
8700/**
8701 * Emits code that exits the current TB if @a iGprSrc differs from @a uImm.
8702 */
8703template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8704DECL_INLINE_THROW(uint32_t)
8705iemNativeEmitTbExitIfGprNotEqualImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm)
8706{
8707 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
8708 off = iemNativeEmitTbExitJnz<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8709 return off;
8710}
8711
8712
8713/**
8714 * Emits code that exits the current TB with the given reason if 32-bit @a iGprSrc equals @a uImm.
8715 */
8716template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8717DECL_INLINE_THROW(uint32_t)
8718iemNativeEmitTbExitIfGpr32EqualsImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm)
8719{
8720 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8721 off = iemNativeEmitTbExitJz<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8722 return off;
8723}
8724
8725
8726/**
8727 * Emits code to exit the current TB with the reason @a a_enmExitReason on the
8728 * condition that bit @a iBitNo _is_ _set_ in @a iGprSrc.
8729 *
8730 * @note On ARM64 the range is only +/-8191 instructions.
8731 */
8732template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8733DECL_INLINE_THROW(uint32_t)
8734iemNativeEmitTbExitIfBitSetInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
8735{
8736 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8737
8738#if defined(RT_ARCH_AMD64)
8739 Assert(iBitNo < 64);
8740 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8741 if (iBitNo < 8)
8742 {
8743 /* test Eb, imm8 */
8744 if (iGprSrc >= 4)
8745 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
8746 pbCodeBuf[off++] = 0xf6;
8747 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
8748 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
8749 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8750 off = iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_ne);
8751 }
8752 else
8753 {
8754 /* bt Ev, imm8 */
8755 if (iBitNo >= 32)
8756 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8757 else if (iGprSrc >= 8)
8758 pbCodeBuf[off++] = X86_OP_REX_B;
8759 pbCodeBuf[off++] = 0x0f;
8760 pbCodeBuf[off++] = 0xba;
8761 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
8762 pbCodeBuf[off++] = iBitNo;
8763 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8764 off = iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_c);
8765 }
8766 return off;
8767
8768#elif defined(RT_ARCH_ARM64)
8769 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8770 /** @todo Perhaps we should always apply the PostponedEfl code pattern here,
8771 * it's the same number of instructions as the TST + B.CC stuff? */
8772# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8773 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8774 if (pReNative->PostponedEfl.fEFlags)
8775 {
8776 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
8777 3 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8778 pCodeBuf[off++] = Armv8A64MkInstrTbnz(1 /*l_NonPrimaryCodeStreamTarget*/, iGprSrc, iBitNo);
8779 uint32_t const offFixup = off;
8780 pCodeBuf[off++] = Armv8A64MkInstrB(0 /*l_PrimaryCodeStreamResume*/);
8781 /* l_NonPrimaryCodeStreamTarget: */
8782 off = iemNativeEmitTbExitEx<a_enmExitReason, false /*a_fActuallyExitingTb*/, true>(pReNative, pCodeBuf, off);
8783 /* l_PrimaryCodeStreamResume: */
8784 iemNativeFixupFixedJump(pReNative, offFixup, off);
8785 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8786 return off;
8787 }
8788# endif
8789 /* ARM64 doesn't have the necessary range to reach the per-chunk code, so
8790 we go via a local trampoline. */
8791 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, a_enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8792 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
8793#else
8794# error "port me"
8795#endif
8796}
8797
8798
8799/**
8800 * Emits code that exits the current TB with @a a_enmExitReason if @a iGprSrc is
8801 * not zero.
8802 *
8803 * The operand size is given by @a f64Bit.
8804 */
8805template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8806DECL_FORCE_INLINE_THROW(uint32_t)
8807iemNativeEmitTbExitIfGprIsNotZeroEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
8808 uint8_t iGprSrc, bool f64Bit)
8809{
8810 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8811
8812#if defined(RT_ARCH_AMD64)
8813 /* test reg32,reg32 / test reg64,reg64 */
8814 if (f64Bit)
8815 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
8816 else if (iGprSrc >= 8)
8817 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8818 pCodeBuf[off++] = 0x85;
8819 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
8820
8821 /* jnz idxLabel */
8822 return iemNativeEmitTbExitJccEx<a_enmExitReason, a_fPostponedEfl>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
8823
8824#elif defined(RT_ARCH_ARM64)
8825 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8826# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8827 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8828 if (pReNative->PostponedEfl.fEFlags)
8829 {
8830 pCodeBuf[off++] = Armv8A64MkInstrCbnz(1 /*l_NonPrimaryCodeStreamTarget*/, iGprSrc, f64Bit);
8831 uint32_t const offFixup = off;
8832 pCodeBuf[off++] = Armv8A64MkInstrB(0 /*l_PrimaryCodeStreamResume*/);
8833 /* l_NonPrimaryCodeStreamTarget: */
8834 off = iemNativeEmitTbExitEx<a_enmExitReason, false /*a_fActuallyExitingTb*/, true>(pReNative, pCodeBuf, off);
8835 /* l_PrimaryCodeStreamResume: */
8836 iemNativeFixupFixedJump(pReNative, offFixup, off);
8837 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8838 return off;
8839 }
8840# endif
8841 /* ARM64 doesn't have the necessary range to reach the per-chunk code, so
8842 we go via a local trampoline. */
8843 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, a_enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8844 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
8845 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
8846#else
8847# error "port me"
8848#endif
8849}
8850
8851
8852/**
8853 * Emits code to exit the current TB with the given reason @a a_enmExitReason if
8854 * @a iGprSrc is not zero.
8855 *
8856 * The operand size is given by @a f64Bit.
8857 */
8858template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8859DECL_INLINE_THROW(uint32_t)
8860iemNativeEmitTbExitIfGprIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit)
8861{
8862#if defined(RT_ARCH_AMD64)
8863 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3 + 6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8864
8865#else
8866 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8867#endif
8868 off = iemNativeEmitTbExitIfGprIsNotZeroEx<a_enmExitReason, a_fPostponedEfl>(pReNative, pCodeBuf, off, iGprSrc, f64Bit);
8869 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8870 return off;
8871}
8872
8873
8874/**
8875 * Emits code that exits the current TB with @a a_enmExitReason if @a iGprSrc is
8876 * zero.
8877 *
8878 * The operand size is given by @a f64Bit.
8879 */
8880template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8881DECL_FORCE_INLINE_THROW(uint32_t)
8882iemNativeEmitTbExitIfGprIsZeroEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
8883 uint8_t iGprSrc, bool f64Bit)
8884{
8885 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8886
8887#if defined(RT_ARCH_AMD64)
8888 /* test reg32,reg32 / test reg64,reg64 */
8889 if (f64Bit)
8890 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
8891 else if (iGprSrc >= 8)
8892 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8893 pCodeBuf[off++] = 0x85;
8894 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
8895
8896 /* jnz idxLabel */
8897 return iemNativeEmitTbExitJccEx<a_enmExitReason, a_fPostponedEfl>(pReNative, pCodeBuf, off, kIemNativeInstrCond_e);
8898
8899#elif defined(RT_ARCH_ARM64)
8900 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8901# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8902 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8903 if (pReNative->PostponedEfl.fEFlags)
8904 {
8905 pCodeBuf[off++] = Armv8A64MkInstrCbz(1 /*l_NonPrimaryCodeStreamTarget*/, iGprSrc, f64Bit);
8906 uint32_t const offFixup = off;
8907 pCodeBuf[off++] = Armv8A64MkInstrB(0 /*l_PrimaryCodeStreamResume*/);
8908 /* l_NonPrimaryCodeStreamTarget: */
8909 off = iemNativeEmitTbExitEx<a_enmExitReason, false /*a_fActuallyExitingTb*/, true>(pReNative, pCodeBuf, off);
8910 /* l_PrimaryCodeStreamResume: */
8911 iemNativeFixupFixedJump(pReNative, offFixup, off);
8912 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8913 return off;
8914 }
8915# endif
8916 /* ARM64 doesn't have the necessary range to reach the per-chunk code, so
8917 we go via a local trampoline. */
8918 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, a_enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8919 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
8920 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
8921#else
8922# error "port me"
8923#endif
8924}
8925
8926
8927/**
8928 * Emits code to exit the current TB with the given reason @a a_enmExitReason if @a iGprSrc is zero.
8929 *
8930 * The operand size is given by @a f64Bit.
8931 */
8932template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8933DECL_INLINE_THROW(uint32_t)
8934iemNativeEmitTbExitIfGprIsZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit)
8935{
8936#if defined(RT_ARCH_AMD64)
8937 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3 + 6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8938
8939#else
8940 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8941#endif
8942 off = iemNativeEmitTbExitIfGprIsZeroEx<a_enmExitReason, a_fPostponedEfl>(pReNative, pCodeBuf, off, iGprSrc, f64Bit);
8943 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8944 return off;
8945}
8946
8947
8948#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8949/*********************************************************************************************************************************
8950* SIMD helpers. *
8951*********************************************************************************************************************************/
8952
8953
8954/**
8955 * Emits code to load the variable address into an argument GPR.
8956 *
8957 * This is a special variant intended for SIMD variables only and only called
8958 * by the TLB miss path in the memory fetch/store code because there we pass
8959 * the value by reference and need both the register and stack depending on which
8960 * path is taken (TLB hit vs. miss).
8961 */
8962DECL_FORCE_INLINE_THROW(uint32_t)
8963iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8964 bool fSyncRegWithStack = true)
8965{
8966 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8967 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8968 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
8969 || pVar->enmKind == kIemNativeVarKind_Stack,
8970 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8971 AssertStmt(pVar->fSimdReg,
8972 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8973 Assert( pVar->idxStackSlot != UINT8_MAX
8974 && pVar->idxReg != UINT8_MAX);
8975
8976 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8977 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
8978
8979 uint8_t const idxRegVar = pVar->idxReg;
8980 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8981 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
8982
8983 if (fSyncRegWithStack)
8984 {
8985 if (pVar->cbVar == sizeof(RTUINT128U))
8986 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDisp, idxRegVar);
8987 else
8988 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDisp, idxRegVar);
8989 }
8990
8991 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
8992}
8993
8994
8995/**
8996 * Emits code to sync the host SIMD register assigned to the given SIMD variable.
8997 *
8998 * This is a special helper and only called
8999 * by the TLB miss path in the memory fetch/store code because there we pass
9000 * the value by reference and need to sync the value on the stack with the assigned host register
9001 * after a TLB miss where the value ends up on the stack.
9002 */
9003DECL_FORCE_INLINE_THROW(uint32_t)
9004iemNativeEmitSimdVarSyncStackToRegister(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar)
9005{
9006 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9007 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9008 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
9009 || pVar->enmKind == kIemNativeVarKind_Stack,
9010 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9011 AssertStmt(pVar->fSimdReg,
9012 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9013 Assert( pVar->idxStackSlot != UINT8_MAX
9014 && pVar->idxReg != UINT8_MAX);
9015
9016 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9017 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
9018
9019 uint8_t const idxRegVar = pVar->idxReg;
9020 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
9021 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
9022
9023 if (pVar->cbVar == sizeof(RTUINT128U))
9024 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxRegVar, offBpDisp);
9025 else
9026 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxRegVar, offBpDisp);
9027
9028 return off;
9029}
9030
9031
9032/**
9033 * Emits a gprdst = ~gprsrc store.
9034 */
9035DECL_FORCE_INLINE_THROW(uint32_t)
9036iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
9037{
9038#ifdef RT_ARCH_AMD64
9039 if (iGprDst != iGprSrc)
9040 {
9041 /* mov gprdst, gprsrc. */
9042 if (f64Bit)
9043 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
9044 else
9045 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
9046 }
9047
9048 /* not gprdst */
9049 if (f64Bit || iGprDst >= 8)
9050 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
9051 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
9052 pCodeBuf[off++] = 0xf7;
9053 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
9054#elif defined(RT_ARCH_ARM64)
9055 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
9056#else
9057# error "port me"
9058#endif
9059 return off;
9060}
9061
9062
9063/**
9064 * Emits a gprdst = ~gprsrc store.
9065 */
9066DECL_INLINE_THROW(uint32_t)
9067iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
9068{
9069#ifdef RT_ARCH_AMD64
9070 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
9071#elif defined(RT_ARCH_ARM64)
9072 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
9073#else
9074# error "port me"
9075#endif
9076 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9077 return off;
9078}
9079
9080
9081/**
9082 * Emits a 128-bit vector register store to a VCpu value.
9083 */
9084DECL_FORCE_INLINE_THROW(uint32_t)
9085iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9086{
9087#ifdef RT_ARCH_AMD64
9088 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
9089 pCodeBuf[off++] = 0x66;
9090 if (iVecReg >= 8)
9091 pCodeBuf[off++] = X86_OP_REX_R;
9092 pCodeBuf[off++] = 0x0f;
9093 pCodeBuf[off++] = 0x7f;
9094 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
9095#elif defined(RT_ARCH_ARM64)
9096 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
9097
9098#else
9099# error "port me"
9100#endif
9101 return off;
9102}
9103
9104
9105/**
9106 * Emits a 128-bit vector register load of a VCpu value.
9107 */
9108DECL_INLINE_THROW(uint32_t)
9109iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9110{
9111#ifdef RT_ARCH_AMD64
9112 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
9113#elif defined(RT_ARCH_ARM64)
9114 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
9115#else
9116# error "port me"
9117#endif
9118 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9119 return off;
9120}
9121
9122
9123/**
9124 * Emits a high 128-bit vector register store to a VCpu value.
9125 */
9126DECL_FORCE_INLINE_THROW(uint32_t)
9127iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9128{
9129#ifdef RT_ARCH_AMD64
9130 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
9131 pCodeBuf[off++] = X86_OP_VEX3;
9132 if (iVecReg >= 8)
9133 pCodeBuf[off++] = 0x63;
9134 else
9135 pCodeBuf[off++] = 0xe3;
9136 pCodeBuf[off++] = 0x7d;
9137 pCodeBuf[off++] = 0x39;
9138 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
9139 pCodeBuf[off++] = 0x01; /* Immediate */
9140#elif defined(RT_ARCH_ARM64)
9141 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
9142#else
9143# error "port me"
9144#endif
9145 return off;
9146}
9147
9148
9149/**
9150 * Emits a high 128-bit vector register load of a VCpu value.
9151 */
9152DECL_INLINE_THROW(uint32_t)
9153iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9154{
9155#ifdef RT_ARCH_AMD64
9156 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
9157#elif defined(RT_ARCH_ARM64)
9158 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9159 Assert(!(iVecReg & 0x1));
9160 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
9161#else
9162# error "port me"
9163#endif
9164 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9165 return off;
9166}
9167
9168
9169/**
9170 * Emits a 128-bit vector register load of a VCpu value.
9171 */
9172DECL_FORCE_INLINE_THROW(uint32_t)
9173iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9174{
9175#ifdef RT_ARCH_AMD64
9176 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
9177 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9178 if (iVecReg >= 8)
9179 pCodeBuf[off++] = X86_OP_REX_R;
9180 pCodeBuf[off++] = 0x0f;
9181 pCodeBuf[off++] = 0x6f;
9182 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
9183#elif defined(RT_ARCH_ARM64)
9184 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
9185
9186#else
9187# error "port me"
9188#endif
9189 return off;
9190}
9191
9192
9193/**
9194 * Emits a 128-bit vector register load of a VCpu value.
9195 */
9196DECL_INLINE_THROW(uint32_t)
9197iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9198{
9199#ifdef RT_ARCH_AMD64
9200 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
9201#elif defined(RT_ARCH_ARM64)
9202 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
9203#else
9204# error "port me"
9205#endif
9206 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9207 return off;
9208}
9209
9210
9211/**
9212 * Emits a 128-bit vector register load of a VCpu value.
9213 */
9214DECL_FORCE_INLINE_THROW(uint32_t)
9215iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9216{
9217#ifdef RT_ARCH_AMD64
9218 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
9219 pCodeBuf[off++] = X86_OP_VEX3;
9220 if (iVecReg >= 8)
9221 pCodeBuf[off++] = 0x63;
9222 else
9223 pCodeBuf[off++] = 0xe3;
9224 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
9225 pCodeBuf[off++] = 0x38;
9226 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
9227 pCodeBuf[off++] = 0x01; /* Immediate */
9228#elif defined(RT_ARCH_ARM64)
9229 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
9230#else
9231# error "port me"
9232#endif
9233 return off;
9234}
9235
9236
9237/**
9238 * Emits a 128-bit vector register load of a VCpu value.
9239 */
9240DECL_INLINE_THROW(uint32_t)
9241iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9242{
9243#ifdef RT_ARCH_AMD64
9244 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
9245#elif defined(RT_ARCH_ARM64)
9246 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9247 Assert(!(iVecReg & 0x1));
9248 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
9249#else
9250# error "port me"
9251#endif
9252 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9253 return off;
9254}
9255
9256
9257/**
9258 * Emits a vecdst = vecsrc load.
9259 */
9260DECL_FORCE_INLINE(uint32_t)
9261iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9262{
9263#ifdef RT_ARCH_AMD64
9264 /* movdqu vecdst, vecsrc */
9265 pCodeBuf[off++] = 0xf3;
9266
9267 if ((iVecRegDst | iVecRegSrc) >= 8)
9268 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
9269 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
9270 : X86_OP_REX_R;
9271 pCodeBuf[off++] = 0x0f;
9272 pCodeBuf[off++] = 0x6f;
9273 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9274
9275#elif defined(RT_ARCH_ARM64)
9276 /* mov dst, src; alias for: orr dst, src, src */
9277 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
9278
9279#else
9280# error "port me"
9281#endif
9282 return off;
9283}
9284
9285
9286/**
9287 * Emits a vecdst = vecsrc load, 128-bit.
9288 */
9289DECL_INLINE_THROW(uint32_t)
9290iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9291{
9292#ifdef RT_ARCH_AMD64
9293 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
9294#elif defined(RT_ARCH_ARM64)
9295 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
9296#else
9297# error "port me"
9298#endif
9299 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9300 return off;
9301}
9302
9303
9304/**
9305 * Emits a vecdst[128:255] = vecsrc[128:255] load.
9306 */
9307DECL_FORCE_INLINE_THROW(uint32_t)
9308iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9309{
9310#ifdef RT_ARCH_AMD64
9311 /* vperm2i128 dst, dst, src, 0x30. */ /* ASSUMES AVX2 support */
9312 pCodeBuf[off++] = X86_OP_VEX3;
9313 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
9314 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9315 pCodeBuf[off++] = 0x46;
9316 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9317 pCodeBuf[off++] = 0x30; /* Immediate, this will leave the low 128 bits of dst untouched and move the high 128 bits from src to dst. */
9318
9319#elif defined(RT_ARCH_ARM64)
9320 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
9321
9322 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128(). */
9323# ifdef IEM_WITH_THROW_CATCH
9324 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
9325# else
9326 AssertReleaseFailedStmt(off = UINT32_MAX);
9327# endif
9328#else
9329# error "port me"
9330#endif
9331 return off;
9332}
9333
9334
9335/**
9336 * Emits a vecdst[128:255] = vecsrc[128:255] load, high 128-bit.
9337 */
9338DECL_INLINE_THROW(uint32_t)
9339iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9340{
9341#ifdef RT_ARCH_AMD64
9342 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
9343#elif defined(RT_ARCH_ARM64)
9344 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
9345 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iVecRegSrc + 1);
9346#else
9347# error "port me"
9348#endif
9349 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9350 return off;
9351}
9352
9353
9354/**
9355 * Emits a vecdst[0:127] = vecsrc[128:255] load.
9356 */
9357DECL_FORCE_INLINE_THROW(uint32_t)
9358iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9359{
9360#ifdef RT_ARCH_AMD64
9361 /* vextracti128 dst, src, 1. */ /* ASSUMES AVX2 support */
9362 pCodeBuf[off++] = X86_OP_VEX3;
9363 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegDst >= 8, false, iVecRegSrc >= 8);
9364 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9365 pCodeBuf[off++] = 0x39;
9366 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7);
9367 pCodeBuf[off++] = 0x1;
9368
9369#elif defined(RT_ARCH_ARM64)
9370 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
9371
9372 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(). */
9373# ifdef IEM_WITH_THROW_CATCH
9374 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
9375# else
9376 AssertReleaseFailedStmt(off = UINT32_MAX);
9377# endif
9378#else
9379# error "port me"
9380#endif
9381 return off;
9382}
9383
9384
9385/**
9386 * Emits a vecdst[0:127] = vecsrc[128:255] load, high 128-bit.
9387 */
9388DECL_INLINE_THROW(uint32_t)
9389iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9390{
9391#ifdef RT_ARCH_AMD64
9392 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
9393#elif defined(RT_ARCH_ARM64)
9394 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
9395 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc + 1);
9396#else
9397# error "port me"
9398#endif
9399 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9400 return off;
9401}
9402
9403
9404/**
9405 * Emits a vecdst = vecsrc load, 256-bit.
9406 */
9407DECL_INLINE_THROW(uint32_t)
9408iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9409{
9410#ifdef RT_ARCH_AMD64
9411 /* vmovdqa ymm, ymm */
9412 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
9413 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
9414 {
9415 pbCodeBuf[off++] = X86_OP_VEX3;
9416 pbCodeBuf[off++] = 0x41;
9417 pbCodeBuf[off++] = 0x7d;
9418 pbCodeBuf[off++] = 0x6f;
9419 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9420 }
9421 else
9422 {
9423 pbCodeBuf[off++] = X86_OP_VEX2;
9424 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
9425 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
9426 pbCodeBuf[off++] = iVecRegSrc >= 8
9427 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
9428 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9429 }
9430#elif defined(RT_ARCH_ARM64)
9431 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9432 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
9433 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
9434 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
9435#else
9436# error "port me"
9437#endif
9438 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9439 return off;
9440}
9441
9442
9443/**
9444 * Emits a vecdst = vecsrc load.
9445 */
9446DECL_FORCE_INLINE(uint32_t)
9447iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9448{
9449#ifdef RT_ARCH_AMD64
9450 /* vinserti128 dst, dst, src, 1. */ /* ASSUMES AVX2 support */
9451 pCodeBuf[off++] = X86_OP_VEX3;
9452 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
9453 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9454 pCodeBuf[off++] = 0x38;
9455 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9456 pCodeBuf[off++] = 0x01; /* Immediate */
9457
9458#elif defined(RT_ARCH_ARM64)
9459 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
9460 /* mov dst, src; alias for: orr dst, src, src */
9461 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
9462
9463#else
9464# error "port me"
9465#endif
9466 return off;
9467}
9468
9469
9470/**
9471 * Emits a vecdst[128:255] = vecsrc[0:127] load, 128-bit.
9472 */
9473DECL_INLINE_THROW(uint32_t)
9474iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9475{
9476#ifdef RT_ARCH_AMD64
9477 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
9478#elif defined(RT_ARCH_ARM64)
9479 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
9480#else
9481# error "port me"
9482#endif
9483 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9484 return off;
9485}
9486
9487
9488/**
9489 * Emits a gprdst = vecsrc[x] load, 64-bit.
9490 */
9491DECL_FORCE_INLINE(uint32_t)
9492iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
9493{
9494#ifdef RT_ARCH_AMD64
9495 if (iQWord >= 2)
9496 {
9497 /*
9498 * vpextrq doesn't work on the upper 128-bits.
9499 * So we use the following sequence:
9500 * vextracti128 vectmp0, vecsrc, 1
9501 * pextrq gpr, vectmp0, #(iQWord - 2)
9502 */
9503 /* vextracti128 */
9504 pCodeBuf[off++] = X86_OP_VEX3;
9505 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
9506 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9507 pCodeBuf[off++] = 0x39;
9508 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9509 pCodeBuf[off++] = 0x1;
9510
9511 /* pextrq */
9512 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9513 pCodeBuf[off++] = X86_OP_REX_W
9514 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9515 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9516 pCodeBuf[off++] = 0x0f;
9517 pCodeBuf[off++] = 0x3a;
9518 pCodeBuf[off++] = 0x16;
9519 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
9520 pCodeBuf[off++] = iQWord - 2;
9521 }
9522 else
9523 {
9524 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
9525 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9526 pCodeBuf[off++] = X86_OP_REX_W
9527 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
9528 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9529 pCodeBuf[off++] = 0x0f;
9530 pCodeBuf[off++] = 0x3a;
9531 pCodeBuf[off++] = 0x16;
9532 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
9533 pCodeBuf[off++] = iQWord;
9534 }
9535#elif defined(RT_ARCH_ARM64)
9536 /* umov gprdst, vecsrc[iQWord] */
9537 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
9538#else
9539# error "port me"
9540#endif
9541 return off;
9542}
9543
9544
9545/**
9546 * Emits a gprdst = vecsrc[x] load, 64-bit.
9547 */
9548DECL_INLINE_THROW(uint32_t)
9549iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
9550{
9551 Assert(iQWord <= 3);
9552
9553#ifdef RT_ARCH_AMD64
9554 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iVecRegSrc, iQWord);
9555#elif defined(RT_ARCH_ARM64)
9556 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9557 Assert(!(iVecRegSrc & 0x1));
9558 /* Need to access the "high" 128-bit vector register. */
9559 if (iQWord >= 2)
9560 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
9561 else
9562 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
9563#else
9564# error "port me"
9565#endif
9566 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9567 return off;
9568}
9569
9570
9571/**
9572 * Emits a gprdst = vecsrc[x] load, 32-bit.
9573 */
9574DECL_FORCE_INLINE(uint32_t)
9575iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
9576{
9577#ifdef RT_ARCH_AMD64
9578 if (iDWord >= 4)
9579 {
9580 /*
9581 * vpextrd doesn't work on the upper 128-bits.
9582 * So we use the following sequence:
9583 * vextracti128 vectmp0, vecsrc, 1
9584 * pextrd gpr, vectmp0, #(iDWord - 4)
9585 */
9586 /* vextracti128 */
9587 pCodeBuf[off++] = X86_OP_VEX3;
9588 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
9589 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9590 pCodeBuf[off++] = 0x39;
9591 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9592 pCodeBuf[off++] = 0x1;
9593
9594 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
9595 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9596 if (iGprDst >= 8 || IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8)
9597 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9598 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9599 pCodeBuf[off++] = 0x0f;
9600 pCodeBuf[off++] = 0x3a;
9601 pCodeBuf[off++] = 0x16;
9602 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
9603 pCodeBuf[off++] = iDWord - 4;
9604 }
9605 else
9606 {
9607 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
9608 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9609 if (iGprDst >= 8 || iVecRegSrc >= 8)
9610 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
9611 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9612 pCodeBuf[off++] = 0x0f;
9613 pCodeBuf[off++] = 0x3a;
9614 pCodeBuf[off++] = 0x16;
9615 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
9616 pCodeBuf[off++] = iDWord;
9617 }
9618#elif defined(RT_ARCH_ARM64)
9619 Assert(iDWord < 4);
9620
9621 /* umov gprdst, vecsrc[iDWord] */
9622 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
9623#else
9624# error "port me"
9625#endif
9626 return off;
9627}
9628
9629
9630/**
9631 * Emits a gprdst = vecsrc[x] load, 32-bit.
9632 */
9633DECL_INLINE_THROW(uint32_t)
9634iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
9635{
9636 Assert(iDWord <= 7);
9637
9638#ifdef RT_ARCH_AMD64
9639 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 15), off, iGprDst, iVecRegSrc, iDWord);
9640#elif defined(RT_ARCH_ARM64)
9641 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9642 Assert(!(iVecRegSrc & 0x1));
9643 /* Need to access the "high" 128-bit vector register. */
9644 if (iDWord >= 4)
9645 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
9646 else
9647 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
9648#else
9649# error "port me"
9650#endif
9651 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9652 return off;
9653}
9654
9655
9656/**
9657 * Emits a gprdst = vecsrc[x] load, 16-bit.
9658 */
9659DECL_FORCE_INLINE(uint32_t)
9660iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
9661{
9662#ifdef RT_ARCH_AMD64
9663 if (iWord >= 8)
9664 {
9665 /** @todo Currently not used. */
9666 AssertReleaseFailed();
9667 }
9668 else
9669 {
9670 /* pextrw gpr, vecsrc, #iWord */
9671 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9672 if (iGprDst >= 8 || iVecRegSrc >= 8)
9673 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
9674 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
9675 pCodeBuf[off++] = 0x0f;
9676 pCodeBuf[off++] = 0xc5;
9677 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
9678 pCodeBuf[off++] = iWord;
9679 }
9680#elif defined(RT_ARCH_ARM64)
9681 /* umov gprdst, vecsrc[iWord] */
9682 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
9683#else
9684# error "port me"
9685#endif
9686 return off;
9687}
9688
9689
9690/**
9691 * Emits a gprdst = vecsrc[x] load, 16-bit.
9692 */
9693DECL_INLINE_THROW(uint32_t)
9694iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
9695{
9696 Assert(iWord <= 16);
9697
9698#ifdef RT_ARCH_AMD64
9699 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
9700#elif defined(RT_ARCH_ARM64)
9701 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9702 Assert(!(iVecRegSrc & 0x1));
9703 /* Need to access the "high" 128-bit vector register. */
9704 if (iWord >= 8)
9705 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
9706 else
9707 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
9708#else
9709# error "port me"
9710#endif
9711 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9712 return off;
9713}
9714
9715
9716/**
9717 * Emits a gprdst = vecsrc[x] load, 8-bit.
9718 */
9719DECL_FORCE_INLINE(uint32_t)
9720iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
9721{
9722#ifdef RT_ARCH_AMD64
9723 if (iByte >= 16)
9724 {
9725 /** @todo Currently not used. */
9726 AssertReleaseFailed();
9727 }
9728 else
9729 {
9730 /* pextrb gpr, vecsrc, #iByte */
9731 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9732 if (iGprDst >= 8 || iVecRegSrc >= 8)
9733 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
9734 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9735 pCodeBuf[off++] = 0x0f;
9736 pCodeBuf[off++] = 0x3a;
9737 pCodeBuf[off++] = 0x14;
9738 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
9739 pCodeBuf[off++] = iByte;
9740 }
9741#elif defined(RT_ARCH_ARM64)
9742 /* umov gprdst, vecsrc[iByte] */
9743 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
9744#else
9745# error "port me"
9746#endif
9747 return off;
9748}
9749
9750
9751/**
9752 * Emits a gprdst = vecsrc[x] load, 8-bit.
9753 */
9754DECL_INLINE_THROW(uint32_t)
9755iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
9756{
9757 Assert(iByte <= 32);
9758
9759#ifdef RT_ARCH_AMD64
9760 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iByte);
9761#elif defined(RT_ARCH_ARM64)
9762 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9763 Assert(!(iVecRegSrc & 0x1));
9764 /* Need to access the "high" 128-bit vector register. */
9765 if (iByte >= 16)
9766 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
9767 else
9768 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
9769#else
9770# error "port me"
9771#endif
9772 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9773 return off;
9774}
9775
9776
9777/**
9778 * Emits a vecdst[x] = gprsrc store, 64-bit.
9779 */
9780DECL_FORCE_INLINE(uint32_t)
9781iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
9782{
9783#ifdef RT_ARCH_AMD64
9784 if (iQWord >= 2)
9785 {
9786 /*
9787 * vpinsrq doesn't work on the upper 128-bits.
9788 * So we use the following sequence:
9789 * vextracti128 vectmp0, vecdst, 1
9790 * pinsrq vectmp0, gpr, #(iQWord - 2)
9791 * vinserti128 vecdst, vectmp0, 1
9792 */
9793 /* vextracti128 */
9794 pCodeBuf[off++] = X86_OP_VEX3;
9795 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9796 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9797 pCodeBuf[off++] = 0x39;
9798 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9799 pCodeBuf[off++] = 0x1;
9800
9801 /* pinsrq */
9802 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9803 pCodeBuf[off++] = X86_OP_REX_W
9804 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9805 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9806 pCodeBuf[off++] = 0x0f;
9807 pCodeBuf[off++] = 0x3a;
9808 pCodeBuf[off++] = 0x22;
9809 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
9810 pCodeBuf[off++] = iQWord - 2;
9811
9812 /* vinserti128 */
9813 pCodeBuf[off++] = X86_OP_VEX3;
9814 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9815 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9816 pCodeBuf[off++] = 0x38;
9817 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9818 pCodeBuf[off++] = 0x01; /* Immediate */
9819 }
9820 else
9821 {
9822 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
9823 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9824 pCodeBuf[off++] = X86_OP_REX_W
9825 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9826 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9827 pCodeBuf[off++] = 0x0f;
9828 pCodeBuf[off++] = 0x3a;
9829 pCodeBuf[off++] = 0x22;
9830 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9831 pCodeBuf[off++] = iQWord;
9832 }
9833#elif defined(RT_ARCH_ARM64)
9834 /* ins vecsrc[iQWord], gpr */
9835 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
9836#else
9837# error "port me"
9838#endif
9839 return off;
9840}
9841
9842
9843/**
9844 * Emits a vecdst[x] = gprsrc store, 64-bit.
9845 */
9846DECL_INLINE_THROW(uint32_t)
9847iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
9848{
9849 Assert(iQWord <= 3);
9850
9851#ifdef RT_ARCH_AMD64
9852 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iQWord);
9853#elif defined(RT_ARCH_ARM64)
9854 Assert(!(iVecRegDst & 0x1));
9855 if (iQWord >= 2)
9856 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iQWord - 2);
9857 else
9858 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
9859#else
9860# error "port me"
9861#endif
9862 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9863 return off;
9864}
9865
9866
9867/**
9868 * Emits a vecdst[x] = gprsrc store, 32-bit.
9869 */
9870DECL_FORCE_INLINE(uint32_t)
9871iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
9872{
9873#ifdef RT_ARCH_AMD64
9874 if (iDWord >= 4)
9875 {
9876 /*
9877 * vpinsrq doesn't work on the upper 128-bits.
9878 * So we use the following sequence:
9879 * vextracti128 vectmp0, vecdst, 1
9880 * pinsrd vectmp0, gpr, #(iDword - 4)
9881 * vinserti128 vecdst, vectmp0, 1
9882 */
9883 /* vextracti128 */
9884 pCodeBuf[off++] = X86_OP_VEX3;
9885 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9886 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9887 pCodeBuf[off++] = 0x39;
9888 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9889 pCodeBuf[off++] = 0x1;
9890
9891 /* pinsrd */
9892 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9893 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 || iGprSrc >= 8)
9894 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9895 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9896 pCodeBuf[off++] = 0x0f;
9897 pCodeBuf[off++] = 0x3a;
9898 pCodeBuf[off++] = 0x22;
9899 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
9900 pCodeBuf[off++] = iDWord - 4;
9901
9902 /* vinserti128 */
9903 pCodeBuf[off++] = X86_OP_VEX3;
9904 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9905 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9906 pCodeBuf[off++] = 0x38;
9907 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9908 pCodeBuf[off++] = 0x01; /* Immediate */
9909 }
9910 else
9911 {
9912 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
9913 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9914 if (iVecRegDst >= 8 || iGprSrc >= 8)
9915 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9916 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9917 pCodeBuf[off++] = 0x0f;
9918 pCodeBuf[off++] = 0x3a;
9919 pCodeBuf[off++] = 0x22;
9920 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9921 pCodeBuf[off++] = iDWord;
9922 }
9923#elif defined(RT_ARCH_ARM64)
9924 /* ins vecsrc[iDWord], gpr */
9925 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
9926#else
9927# error "port me"
9928#endif
9929 return off;
9930}
9931
9932
9933/**
9934 * Emits a vecdst[x] = gprsrc store, 64-bit.
9935 */
9936DECL_INLINE_THROW(uint32_t)
9937iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
9938{
9939 Assert(iDWord <= 7);
9940
9941#ifdef RT_ARCH_AMD64
9942 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iDWord);
9943#elif defined(RT_ARCH_ARM64)
9944 Assert(!(iVecRegDst & 0x1));
9945 if (iDWord >= 4)
9946 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iDWord - 4);
9947 else
9948 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
9949#else
9950# error "port me"
9951#endif
9952 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9953 return off;
9954}
9955
9956
9957/**
9958 * Emits a vecdst[x] = gprsrc store, 16-bit.
9959 */
9960DECL_FORCE_INLINE(uint32_t)
9961iemNativeEmitSimdStoreGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
9962{
9963#ifdef RT_ARCH_AMD64
9964 /* pinsrw vecsrc, gpr, #iWord. */
9965 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9966 if (iVecRegDst >= 8 || iGprSrc >= 8)
9967 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9968 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9969 pCodeBuf[off++] = 0x0f;
9970 pCodeBuf[off++] = 0xc4;
9971 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9972 pCodeBuf[off++] = iWord;
9973#elif defined(RT_ARCH_ARM64)
9974 /* ins vecsrc[iWord], gpr */
9975 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iWord, kArmv8InstrUmovInsSz_U16);
9976#else
9977# error "port me"
9978#endif
9979 return off;
9980}
9981
9982
9983/**
9984 * Emits a vecdst[x] = gprsrc store, 16-bit.
9985 */
9986DECL_INLINE_THROW(uint32_t)
9987iemNativeEmitSimdStoreGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
9988{
9989 Assert(iWord <= 15);
9990
9991#ifdef RT_ARCH_AMD64
9992 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iGprSrc, iWord);
9993#elif defined(RT_ARCH_ARM64)
9994 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iWord);
9995#else
9996# error "port me"
9997#endif
9998 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9999 return off;
10000}
10001
10002
10003/**
10004 * Emits a vecdst[x] = gprsrc store, 8-bit.
10005 */
10006DECL_FORCE_INLINE(uint32_t)
10007iemNativeEmitSimdStoreGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
10008{
10009#ifdef RT_ARCH_AMD64
10010 /* pinsrb vecsrc, gpr, #iByte (ASSUMES SSE4.1). */
10011 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10012 if (iVecRegDst >= 8 || iGprSrc >= 8)
10013 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10014 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10015 pCodeBuf[off++] = 0x0f;
10016 pCodeBuf[off++] = 0x3a;
10017 pCodeBuf[off++] = 0x20;
10018 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10019 pCodeBuf[off++] = iByte;
10020#elif defined(RT_ARCH_ARM64)
10021 /* ins vecsrc[iByte], gpr */
10022 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iByte, kArmv8InstrUmovInsSz_U8);
10023#else
10024# error "port me"
10025#endif
10026 return off;
10027}
10028
10029
10030/**
10031 * Emits a vecdst[x] = gprsrc store, 8-bit.
10032 */
10033DECL_INLINE_THROW(uint32_t)
10034iemNativeEmitSimdStoreGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
10035{
10036 Assert(iByte <= 15);
10037
10038#ifdef RT_ARCH_AMD64
10039 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iByte);
10040#elif defined(RT_ARCH_ARM64)
10041 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iByte);
10042#else
10043# error "port me"
10044#endif
10045 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10046 return off;
10047}
10048
10049
10050/**
10051 * Emits a vecdst.au32[iDWord] = 0 store.
10052 */
10053DECL_FORCE_INLINE(uint32_t)
10054iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
10055{
10056 Assert(iDWord <= 7);
10057
10058#ifdef RT_ARCH_AMD64
10059 /*
10060 * xor tmp0, tmp0
10061 * pinsrd xmm, tmp0, iDword
10062 */
10063 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
10064 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
10065 pCodeBuf[off++] = 0x33;
10066 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
10067 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(pCodeBuf, off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
10068#elif defined(RT_ARCH_ARM64)
10069 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10070 Assert(!(iVecReg & 0x1));
10071 /* ins vecsrc[iDWord], wzr */
10072 if (iDWord >= 4)
10073 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
10074 else
10075 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
10076#else
10077# error "port me"
10078#endif
10079 return off;
10080}
10081
10082
10083/**
10084 * Emits a vecdst.au32[iDWord] = 0 store.
10085 */
10086DECL_INLINE_THROW(uint32_t)
10087iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
10088{
10089
10090#ifdef RT_ARCH_AMD64
10091 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
10092#elif defined(RT_ARCH_ARM64)
10093 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
10094#else
10095# error "port me"
10096#endif
10097 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10098 return off;
10099}
10100
10101
10102/**
10103 * Emits a vecdst[0:127] = 0 store.
10104 */
10105DECL_FORCE_INLINE(uint32_t)
10106iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
10107{
10108#ifdef RT_ARCH_AMD64
10109 /* pxor xmm, xmm */
10110 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10111 if (iVecReg >= 8)
10112 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
10113 pCodeBuf[off++] = 0x0f;
10114 pCodeBuf[off++] = 0xef;
10115 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
10116#elif defined(RT_ARCH_ARM64)
10117 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10118 Assert(!(iVecReg & 0x1));
10119 /* eor vecreg, vecreg, vecreg */
10120 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
10121#else
10122# error "port me"
10123#endif
10124 return off;
10125}
10126
10127
10128/**
10129 * Emits a vecdst[0:127] = 0 store.
10130 */
10131DECL_INLINE_THROW(uint32_t)
10132iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
10133{
10134#ifdef RT_ARCH_AMD64
10135 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
10136#elif defined(RT_ARCH_ARM64)
10137 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
10138#else
10139# error "port me"
10140#endif
10141 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10142 return off;
10143}
10144
10145
10146/**
10147 * Emits a vecdst[128:255] = 0 store.
10148 */
10149DECL_FORCE_INLINE(uint32_t)
10150iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
10151{
10152#ifdef RT_ARCH_AMD64
10153 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
10154 if (iVecReg < 8)
10155 {
10156 pCodeBuf[off++] = X86_OP_VEX2;
10157 pCodeBuf[off++] = 0xf9;
10158 }
10159 else
10160 {
10161 pCodeBuf[off++] = X86_OP_VEX3;
10162 pCodeBuf[off++] = 0x41;
10163 pCodeBuf[off++] = 0x79;
10164 }
10165 pCodeBuf[off++] = 0x6f;
10166 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
10167#elif defined(RT_ARCH_ARM64)
10168 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10169 Assert(!(iVecReg & 0x1));
10170 /* eor vecreg, vecreg, vecreg */
10171 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
10172#else
10173# error "port me"
10174#endif
10175 return off;
10176}
10177
10178
10179/**
10180 * Emits a vecdst[128:255] = 0 store.
10181 */
10182DECL_INLINE_THROW(uint32_t)
10183iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
10184{
10185#ifdef RT_ARCH_AMD64
10186 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
10187#elif defined(RT_ARCH_ARM64)
10188 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
10189#else
10190# error "port me"
10191#endif
10192 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10193 return off;
10194}
10195
10196
10197/**
10198 * Emits a vecdst[0:255] = 0 store.
10199 */
10200DECL_FORCE_INLINE(uint32_t)
10201iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
10202{
10203#ifdef RT_ARCH_AMD64
10204 /* vpxor ymm, ymm, ymm */
10205 if (iVecReg < 8)
10206 {
10207 pCodeBuf[off++] = X86_OP_VEX2;
10208 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
10209 }
10210 else
10211 {
10212 pCodeBuf[off++] = X86_OP_VEX3;
10213 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
10214 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
10215 }
10216 pCodeBuf[off++] = 0xef;
10217 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
10218#elif defined(RT_ARCH_ARM64)
10219 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10220 Assert(!(iVecReg & 0x1));
10221 /* eor vecreg, vecreg, vecreg */
10222 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
10223 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
10224#else
10225# error "port me"
10226#endif
10227 return off;
10228}
10229
10230
10231/**
10232 * Emits a vecdst[0:255] = 0 store.
10233 */
10234DECL_INLINE_THROW(uint32_t)
10235iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
10236{
10237#ifdef RT_ARCH_AMD64
10238 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
10239#elif defined(RT_ARCH_ARM64)
10240 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
10241#else
10242# error "port me"
10243#endif
10244 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10245 return off;
10246}
10247
10248
10249/**
10250 * Emits a vecdst = gprsrc broadcast, 8-bit.
10251 */
10252DECL_FORCE_INLINE(uint32_t)
10253iemNativeEmitSimdBroadcastGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10254{
10255#ifdef RT_ARCH_AMD64
10256 /* pinsrb vecdst, gpr, #0 (ASSUMES SSE 4.1) */
10257 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10258 if (iVecRegDst >= 8 || iGprSrc >= 8)
10259 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10260 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10261 pCodeBuf[off++] = 0x0f;
10262 pCodeBuf[off++] = 0x3a;
10263 pCodeBuf[off++] = 0x20;
10264 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10265 pCodeBuf[off++] = 0x00;
10266
10267 /* vpbroadcastb {y,x}mm, xmm (ASSUMES AVX2). */
10268 pCodeBuf[off++] = X86_OP_VEX3;
10269 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
10270 | 0x02 /* opcode map. */
10271 | ( iVecRegDst >= 8
10272 ? 0
10273 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
10274 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
10275 pCodeBuf[off++] = 0x78;
10276 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
10277#elif defined(RT_ARCH_ARM64)
10278 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10279 Assert(!(iVecRegDst & 0x1) || !f256Bit);
10280
10281 /* dup vecsrc, gpr */
10282 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U8);
10283 if (f256Bit)
10284 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U8);
10285#else
10286# error "port me"
10287#endif
10288 return off;
10289}
10290
10291
10292/**
10293 * Emits a vecdst[x] = gprsrc broadcast, 8-bit.
10294 */
10295DECL_INLINE_THROW(uint32_t)
10296iemNativeEmitSimdBroadcastGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10297{
10298#ifdef RT_ARCH_AMD64
10299 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
10300#elif defined(RT_ARCH_ARM64)
10301 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
10302#else
10303# error "port me"
10304#endif
10305 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10306 return off;
10307}
10308
10309
10310/**
10311 * Emits a vecdst = gprsrc broadcast, 16-bit.
10312 */
10313DECL_FORCE_INLINE(uint32_t)
10314iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10315{
10316#ifdef RT_ARCH_AMD64
10317 /* pinsrw vecdst, gpr, #0 */
10318 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10319 if (iVecRegDst >= 8 || iGprSrc >= 8)
10320 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10321 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10322 pCodeBuf[off++] = 0x0f;
10323 pCodeBuf[off++] = 0xc4;
10324 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10325 pCodeBuf[off++] = 0x00;
10326
10327 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
10328 pCodeBuf[off++] = X86_OP_VEX3;
10329 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
10330 | 0x02 /* opcode map. */
10331 | ( iVecRegDst >= 8
10332 ? 0
10333 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
10334 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
10335 pCodeBuf[off++] = 0x79;
10336 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
10337#elif defined(RT_ARCH_ARM64)
10338 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10339 Assert(!(iVecRegDst & 0x1) || !f256Bit);
10340
10341 /* dup vecsrc, gpr */
10342 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
10343 if (f256Bit)
10344 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
10345#else
10346# error "port me"
10347#endif
10348 return off;
10349}
10350
10351
10352/**
10353 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
10354 */
10355DECL_INLINE_THROW(uint32_t)
10356iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10357{
10358#ifdef RT_ARCH_AMD64
10359 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
10360#elif defined(RT_ARCH_ARM64)
10361 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
10362#else
10363# error "port me"
10364#endif
10365 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10366 return off;
10367}
10368
10369
10370/**
10371 * Emits a vecdst = gprsrc broadcast, 32-bit.
10372 */
10373DECL_FORCE_INLINE(uint32_t)
10374iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10375{
10376#ifdef RT_ARCH_AMD64
10377 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
10378 * vbroadcast needs a memory operand or another xmm register to work... */
10379
10380 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
10381 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10382 if (iVecRegDst >= 8 || iGprSrc >= 8)
10383 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10384 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10385 pCodeBuf[off++] = 0x0f;
10386 pCodeBuf[off++] = 0x3a;
10387 pCodeBuf[off++] = 0x22;
10388 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10389 pCodeBuf[off++] = 0x00;
10390
10391 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
10392 pCodeBuf[off++] = X86_OP_VEX3;
10393 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
10394 | 0x02 /* opcode map. */
10395 | ( iVecRegDst >= 8
10396 ? 0
10397 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
10398 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
10399 pCodeBuf[off++] = 0x58;
10400 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
10401#elif defined(RT_ARCH_ARM64)
10402 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10403 Assert(!(iVecRegDst & 0x1) || !f256Bit);
10404
10405 /* dup vecsrc, gpr */
10406 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
10407 if (f256Bit)
10408 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
10409#else
10410# error "port me"
10411#endif
10412 return off;
10413}
10414
10415
10416/**
10417 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
10418 */
10419DECL_INLINE_THROW(uint32_t)
10420iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10421{
10422#ifdef RT_ARCH_AMD64
10423 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
10424#elif defined(RT_ARCH_ARM64)
10425 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
10426#else
10427# error "port me"
10428#endif
10429 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10430 return off;
10431}
10432
10433
10434/**
10435 * Emits a vecdst = gprsrc broadcast, 64-bit.
10436 */
10437DECL_FORCE_INLINE(uint32_t)
10438iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10439{
10440#ifdef RT_ARCH_AMD64
10441 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
10442 * vbroadcast needs a memory operand or another xmm register to work... */
10443
10444 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
10445 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10446 pCodeBuf[off++] = X86_OP_REX_W
10447 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10448 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10449 pCodeBuf[off++] = 0x0f;
10450 pCodeBuf[off++] = 0x3a;
10451 pCodeBuf[off++] = 0x22;
10452 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10453 pCodeBuf[off++] = 0x00;
10454
10455 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
10456 pCodeBuf[off++] = X86_OP_VEX3;
10457 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
10458 | 0x02 /* opcode map. */
10459 | ( iVecRegDst >= 8
10460 ? 0
10461 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
10462 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
10463 pCodeBuf[off++] = 0x59;
10464 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
10465#elif defined(RT_ARCH_ARM64)
10466 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10467 Assert(!(iVecRegDst & 0x1) || !f256Bit);
10468
10469 /* dup vecsrc, gpr */
10470 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
10471 if (f256Bit)
10472 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
10473#else
10474# error "port me"
10475#endif
10476 return off;
10477}
10478
10479
10480/**
10481 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
10482 */
10483DECL_INLINE_THROW(uint32_t)
10484iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10485{
10486#ifdef RT_ARCH_AMD64
10487 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
10488#elif defined(RT_ARCH_ARM64)
10489 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
10490#else
10491# error "port me"
10492#endif
10493 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10494 return off;
10495}
10496
10497
10498/**
10499 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
10500 */
10501DECL_FORCE_INLINE(uint32_t)
10502iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
10503{
10504#ifdef RT_ARCH_AMD64
10505 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(pCodeBuf, off, iVecRegDst, iVecRegSrc);
10506
10507 /* vinserti128 ymm, ymm, xmm, 1. */ /* ASSUMES AVX2 support */
10508 pCodeBuf[off++] = X86_OP_VEX3;
10509 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
10510 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
10511 pCodeBuf[off++] = 0x38;
10512 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
10513 pCodeBuf[off++] = 0x01; /* Immediate */
10514#elif defined(RT_ARCH_ARM64)
10515 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10516 Assert(!(iVecRegDst & 0x1));
10517
10518 /* mov dst, src; alias for: orr dst, src, src */
10519 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
10520 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
10521#else
10522# error "port me"
10523#endif
10524 return off;
10525}
10526
10527
10528/**
10529 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
10530 */
10531DECL_INLINE_THROW(uint32_t)
10532iemNativeEmitSimdBroadcastVecRegU128ToVecReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
10533{
10534#ifdef RT_ARCH_AMD64
10535 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 11), off, iVecRegDst, iVecRegSrc);
10536#elif defined(RT_ARCH_ARM64)
10537 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecRegDst, iVecRegSrc);
10538#else
10539# error "port me"
10540#endif
10541 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10542 return off;
10543}
10544
10545#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
10546
10547/** @} */
10548
10549#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
10550
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette