VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 106192

Last change on this file since 106192 was 106187, checked in by vboxsync, 5 months ago

VMM/IEM: Injecting postponed eflags calculations into the TLB miss code paths. Fixed if constexpr for gcc. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 371.4 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 106187 2024-10-01 09:05:44Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 if (uInfo == 0)
71 pu32CodeBuf[off++] = ARMV8_A64_INSTR_NOP;
72 else
73 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(ARMV8_A64_REG_XZR, (uint16_t)uInfo);
74
75 RT_NOREF(uInfo);
76#else
77# error "port me"
78#endif
79 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
80 return off;
81}
82
83
84/**
85 * Emit a breakpoint instruction.
86 */
87DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
88{
89#ifdef RT_ARCH_AMD64
90 pCodeBuf[off++] = 0xcc;
91 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
92
93#elif defined(RT_ARCH_ARM64)
94 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
95
96#else
97# error "error"
98#endif
99 return off;
100}
101
102
103/**
104 * Emit a breakpoint instruction.
105 */
106DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
107{
108#ifdef RT_ARCH_AMD64
109 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
110#elif defined(RT_ARCH_ARM64)
111 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
112#else
113# error "error"
114#endif
115 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
116 return off;
117}
118
119
120/*********************************************************************************************************************************
121* Loads, Stores and Related Stuff. *
122*********************************************************************************************************************************/
123
124#ifdef RT_ARCH_AMD64
125/**
126 * Common bit of iemNativeEmitLoadGprByGpr and friends.
127 */
128DECL_FORCE_INLINE(uint32_t)
129iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
130{
131 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
132 {
133 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
134 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
135 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
136 }
137 else if (offDisp == (int8_t)offDisp)
138 {
139 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
140 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
141 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
142 pbCodeBuf[off++] = (uint8_t)offDisp;
143 }
144 else
145 {
146 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
147 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
148 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
149 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
150 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
151 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
152 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
153 }
154 return off;
155}
156#endif /* RT_ARCH_AMD64 */
157
158/**
159 * Emits setting a GPR to zero.
160 */
161DECL_INLINE_THROW(uint32_t)
162iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
163{
164#ifdef RT_ARCH_AMD64
165 /* xor gpr32, gpr32 */
166 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
167 if (iGpr >= 8)
168 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
169 pbCodeBuf[off++] = 0x33;
170 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
171
172#elif defined(RT_ARCH_ARM64)
173 /* mov gpr, #0x0 */
174 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
175 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
176
177#else
178# error "port me"
179#endif
180 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
181 return off;
182}
183
184
185/**
186 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
187 * buffer space.
188 *
189 * Max buffer consumption:
190 * - AMD64: 6 instruction bytes.
191 * - ARM64: 2 instruction words (8 bytes).
192 *
193 * @note The top 32 bits will be cleared.
194 */
195DECL_FORCE_INLINE(uint32_t)
196iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
197{
198#ifdef RT_ARCH_AMD64
199 if (uImm32 == 0)
200 {
201 /* xor gpr, gpr */
202 if (iGpr >= 8)
203 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
204 pCodeBuf[off++] = 0x33;
205 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
206 }
207 else
208 {
209 /* mov gpr, imm32 */
210 if (iGpr >= 8)
211 pCodeBuf[off++] = X86_OP_REX_B;
212 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
213 pCodeBuf[off++] = RT_BYTE1(uImm32);
214 pCodeBuf[off++] = RT_BYTE2(uImm32);
215 pCodeBuf[off++] = RT_BYTE3(uImm32);
216 pCodeBuf[off++] = RT_BYTE4(uImm32);
217 }
218
219#elif defined(RT_ARCH_ARM64)
220 if ((uImm32 >> 16) == 0)
221 /* movz gpr, imm16 */
222 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
223 else if ((uImm32 & UINT32_C(0xffff)) == 0)
224 /* movz gpr, imm16, lsl #16 */
225 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
226 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
227 /* movn gpr, imm16, lsl #16 */
228 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
229 else if ((uImm32 >> 16) == UINT32_C(0xffff))
230 /* movn gpr, imm16 */
231 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
232 else
233 {
234 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
235 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
236 }
237
238#else
239# error "port me"
240#endif
241 return off;
242}
243
244
245/**
246 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
247 * buffer space.
248 *
249 * Max buffer consumption:
250 * - AMD64: 6 instruction bytes.
251 * - ARM64: 2 instruction words (8 bytes).
252 *
253 * @note The top 32 bits will be cleared.
254 */
255template<uint32_t const a_uImm32>
256DECL_FORCE_INLINE(uint32_t) iemNativeEmitLoadGpr32ImmExT(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr)
257{
258#ifdef RT_ARCH_AMD64
259 if (a_uImm32 == 0)
260 {
261 /* xor gpr, gpr */
262 if (iGpr >= 8)
263 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
264 pCodeBuf[off++] = 0x33;
265 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
266 }
267 else
268 {
269 /* mov gpr, imm32 */
270 if (iGpr >= 8)
271 pCodeBuf[off++] = X86_OP_REX_B;
272 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
273 pCodeBuf[off++] = RT_BYTE1(a_uImm32);
274 pCodeBuf[off++] = RT_BYTE2(a_uImm32);
275 pCodeBuf[off++] = RT_BYTE3(a_uImm32);
276 pCodeBuf[off++] = RT_BYTE4(a_uImm32);
277 }
278
279#elif defined(RT_ARCH_ARM64)
280 if RT_CONSTEXPR_IF((a_uImm32 >> 16) == 0)
281 /* movz gpr, imm16 */
282 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, a_uImm32, 0, false /*f64Bit*/);
283 else if RT_CONSTEXPR_IF((a_uImm32 & UINT32_C(0xffff)) == 0)
284 /* movz gpr, imm16, lsl #16 */
285 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, a_uImm32 >> 16, 1, false /*f64Bit*/);
286 else if RT_CONSTEXPR_IF((a_uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
287 /* movn gpr, imm16, lsl #16 */
288 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~a_uImm32 >> 16, 1, false /*f64Bit*/);
289 else if RT_CONSTEXPR_IF((a_uImm32 >> 16) == UINT32_C(0xffff))
290 /* movn gpr, imm16 */
291 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~a_uImm32, 0, false /*f64Bit*/);
292 else
293 {
294 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, a_uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
295 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, a_uImm32 >> 16, 1, false /*f64Bit*/);
296 }
297
298#else
299# error "port me"
300#endif
301 return off;
302}
303
304
305/**
306 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
307 * buffer space.
308 *
309 * Max buffer consumption:
310 * - AMD64: 10 instruction bytes.
311 * - ARM64: 4 instruction words (16 bytes).
312 */
313DECL_FORCE_INLINE(uint32_t)
314iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
315{
316#ifdef RT_ARCH_AMD64
317 if (uImm64 == 0)
318 {
319 /* xor gpr, gpr */
320 if (iGpr >= 8)
321 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
322 pCodeBuf[off++] = 0x33;
323 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
324 }
325 else if (uImm64 <= UINT32_MAX)
326 {
327 /* mov gpr, imm32 */
328 if (iGpr >= 8)
329 pCodeBuf[off++] = X86_OP_REX_B;
330 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
331 pCodeBuf[off++] = RT_BYTE1(uImm64);
332 pCodeBuf[off++] = RT_BYTE2(uImm64);
333 pCodeBuf[off++] = RT_BYTE3(uImm64);
334 pCodeBuf[off++] = RT_BYTE4(uImm64);
335 }
336 else if (uImm64 == (uint64_t)(int32_t)uImm64)
337 {
338 /* mov gpr, sx(imm32) */
339 if (iGpr < 8)
340 pCodeBuf[off++] = X86_OP_REX_W;
341 else
342 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
343 pCodeBuf[off++] = 0xc7;
344 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
345 pCodeBuf[off++] = RT_BYTE1(uImm64);
346 pCodeBuf[off++] = RT_BYTE2(uImm64);
347 pCodeBuf[off++] = RT_BYTE3(uImm64);
348 pCodeBuf[off++] = RT_BYTE4(uImm64);
349 }
350 else
351 {
352 /* mov gpr, imm64 */
353 if (iGpr < 8)
354 pCodeBuf[off++] = X86_OP_REX_W;
355 else
356 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
357 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
358 pCodeBuf[off++] = RT_BYTE1(uImm64);
359 pCodeBuf[off++] = RT_BYTE2(uImm64);
360 pCodeBuf[off++] = RT_BYTE3(uImm64);
361 pCodeBuf[off++] = RT_BYTE4(uImm64);
362 pCodeBuf[off++] = RT_BYTE5(uImm64);
363 pCodeBuf[off++] = RT_BYTE6(uImm64);
364 pCodeBuf[off++] = RT_BYTE7(uImm64);
365 pCodeBuf[off++] = RT_BYTE8(uImm64);
366 }
367
368#elif defined(RT_ARCH_ARM64)
369 /*
370 * Quick simplification: Do 32-bit load if top half is zero.
371 */
372 if (uImm64 <= UINT32_MAX)
373 return iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGpr, (uint32_t)uImm64);
374
375 /*
376 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
377 * supply remaining bits using 'movk grp, imm16, lsl #x'.
378 *
379 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
380 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
381 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
382 * after the first non-zero immediate component so we switch to movk for
383 * the remainder.
384 */
385 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
386 + !((uImm64 >> 16) & UINT16_MAX)
387 + !((uImm64 >> 32) & UINT16_MAX)
388 + !((uImm64 >> 48) & UINT16_MAX);
389 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
390 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
391 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
392 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
393 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
394 if (cFfffHalfWords <= cZeroHalfWords)
395 {
396 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
397
398 /* movz gpr, imm16 */
399 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
400 if (uImmPart || cZeroHalfWords == 4)
401 {
402 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
403 fMovBase |= RT_BIT_32(29);
404 }
405 /* mov[z/k] gpr, imm16, lsl #16 */
406 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
407 if (uImmPart)
408 {
409 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
410 fMovBase |= RT_BIT_32(29);
411 }
412 /* mov[z/k] gpr, imm16, lsl #32 */
413 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
414 if (uImmPart)
415 {
416 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
417 fMovBase |= RT_BIT_32(29);
418 }
419 /* mov[z/k] gpr, imm16, lsl #48 */
420 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
421 if (uImmPart)
422 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
423 }
424 else
425 {
426 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
427
428 /* find the first half-word that isn't UINT16_MAX. */
429 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
430 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
431 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
432
433 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
434 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
435 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
436 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
437 /* movk gpr, imm16 */
438 if (iHwNotFfff != 0)
439 {
440 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
441 if (uImmPart != UINT32_C(0xffff))
442 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
443 }
444 /* movk gpr, imm16, lsl #16 */
445 if (iHwNotFfff != 1)
446 {
447 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
448 if (uImmPart != UINT32_C(0xffff))
449 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
450 }
451 /* movk gpr, imm16, lsl #32 */
452 if (iHwNotFfff != 2)
453 {
454 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
455 if (uImmPart != UINT32_C(0xffff))
456 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
457 }
458 /* movk gpr, imm16, lsl #48 */
459 if (iHwNotFfff != 3)
460 {
461 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
462 if (uImmPart != UINT32_C(0xffff))
463 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
464 }
465 }
466
467#else
468# error "port me"
469#endif
470 return off;
471}
472
473
474/**
475 * Emits loading a constant into a 64-bit GPR
476 */
477DECL_INLINE_THROW(uint32_t)
478iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
479{
480#ifdef RT_ARCH_AMD64
481 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
482#elif defined(RT_ARCH_ARM64)
483 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
484#else
485# error "port me"
486#endif
487 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
488 return off;
489}
490
491
492/**
493 * Emits loading a constant into a 32-bit GPR.
494 * @note The top 32 bits will be cleared.
495 */
496DECL_INLINE_THROW(uint32_t)
497iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
498{
499#ifdef RT_ARCH_AMD64
500 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
501#elif defined(RT_ARCH_ARM64)
502 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
503#else
504# error "port me"
505#endif
506 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
507 return off;
508}
509
510
511/**
512 * Emits loading a constant into a 8-bit GPR
513 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
514 * only the ARM64 version does that.
515 */
516DECL_INLINE_THROW(uint32_t)
517iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
518{
519#ifdef RT_ARCH_AMD64
520 /* mov gpr, imm8 */
521 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
522 if (iGpr >= 8)
523 pbCodeBuf[off++] = X86_OP_REX_B;
524 else if (iGpr >= 4)
525 pbCodeBuf[off++] = X86_OP_REX;
526 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
527 pbCodeBuf[off++] = RT_BYTE1(uImm8);
528
529#elif defined(RT_ARCH_ARM64)
530 /* movz gpr, imm16, lsl #0 */
531 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
532 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
533
534#else
535# error "port me"
536#endif
537 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
538 return off;
539}
540
541
542#ifdef RT_ARCH_AMD64
543/**
544 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
545 */
546DECL_FORCE_INLINE(uint32_t)
547iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
548{
549 if (offVCpu < 128)
550 {
551 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
552 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
553 }
554 else
555 {
556 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
557 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
558 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
559 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
560 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
561 }
562 return off;
563}
564
565/**
566 * Special variant of iemNativeEmitGprByVCpuDisp for accessing the VM structure.
567 */
568DECL_FORCE_INLINE(uint32_t)
569iemNativeEmitGprByVCpuSignedDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offVCpu)
570{
571 if (offVCpu < 128 && offVCpu >= -128)
572 {
573 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
574 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
575 }
576 else
577 {
578 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
579 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
580 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
581 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
582 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
583 }
584 return off;
585}
586
587#elif defined(RT_ARCH_ARM64)
588
589/**
590 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
591 *
592 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
593 * registers (@a iGprTmp).
594 * @note DON'T try this with prefetch.
595 */
596DECL_FORCE_INLINE_THROW(uint32_t)
597iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
598 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
599{
600 /*
601 * There are a couple of ldr variants that takes an immediate offset, so
602 * try use those if we can, otherwise we have to use the temporary register
603 * help with the addressing.
604 */
605 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
606 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
607 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
608 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
609 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
610 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
611 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
612 {
613 /* The offset is too large, so we must load it into a register and use
614 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
615 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
616 if (iGprTmp == UINT8_MAX)
617 iGprTmp = iGprReg;
618 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
619 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
620 }
621 else
622# ifdef IEM_WITH_THROW_CATCH
623 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
624# else
625 AssertReleaseFailedStmt(off = UINT32_MAX);
626# endif
627
628 return off;
629}
630
631/**
632 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
633 */
634DECL_FORCE_INLINE_THROW(uint32_t)
635iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
636 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
637{
638 /*
639 * There are a couple of ldr variants that takes an immediate offset, so
640 * try use those if we can, otherwise we have to use the temporary register
641 * help with the addressing.
642 */
643 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
644 {
645 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
646 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
647 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
648 }
649 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
650 {
651 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
652 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
653 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
654 }
655 else
656 {
657 /* The offset is too large, so we must load it into a register and use
658 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
659 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
660 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
661 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
662 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
663 IEMNATIVE_REG_FIXED_TMP0);
664 }
665 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
666 return off;
667}
668
669
670/**
671 * Special variant of iemNativeEmitGprByVCpuLdStEx for accessing the VM
672 * structure.
673 *
674 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
675 * registers (@a iGprTmp).
676 * @note DON'T try this with prefetch.
677 */
678DECL_FORCE_INLINE_THROW(uint32_t)
679iemNativeEmitGprBySignedVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offVCpu,
680 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
681{
682 Assert((uint32_t)RT_ABS(offVCpu) < RT_BIT_32(28)); /* we should be way out of range for problematic sign extending issues. */
683 Assert(!((uint32_t)RT_ABS(offVCpu) & (cbData - 1)));
684
685 /*
686 * For negative offsets we need to use put the displacement in a register
687 * as the two variants with signed immediates will either post or pre
688 * increment the base address register.
689 */
690 if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
691 {
692 uint8_t const idxIndexReg = !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) ? iGprReg : IEMNATIVE_REG_FIXED_TMP0;
693 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxIndexReg, offVCpu / (int32_t)cbData);
694 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, idxIndexReg,
695 kArmv8A64InstrLdStExtend_Sxtw, cbData > 1 /*fShifted*/);
696 }
697 else
698# ifdef IEM_WITH_THROW_CATCH
699 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
700# else
701 AssertReleaseFailedStmt(off = UINT32_MAX);
702# endif
703
704 return off;
705}
706
707/**
708 * Special variant of iemNativeEmitGprByVCpuLdSt for accessing the VM structure.
709 */
710DECL_FORCE_INLINE_THROW(uint32_t)
711iemNativeEmitGprBySignedVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
712 int32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
713{
714 off = iemNativeEmitGprBySignedVCpuLdStEx(iemNativeInstrBufEnsure(pReNative, off, 2 + 1), off, iGprReg,
715 offVCpu, enmOperation, cbData, IEMNATIVE_REG_FIXED_TMP0);
716 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
717 return off;
718}
719
720#endif /* RT_ARCH_ARM64 */
721
722
723/**
724 * Emits a 64-bit GPR load of a VCpu value.
725 */
726DECL_FORCE_INLINE_THROW(uint32_t)
727iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
728{
729#ifdef RT_ARCH_AMD64
730 /* mov reg64, mem64 */
731 if (iGpr < 8)
732 pCodeBuf[off++] = X86_OP_REX_W;
733 else
734 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
735 pCodeBuf[off++] = 0x8b;
736 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
737
738#elif defined(RT_ARCH_ARM64)
739 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
740
741#else
742# error "port me"
743#endif
744 return off;
745}
746
747
748/**
749 * Emits a 64-bit GPR load of a VCpu value.
750 */
751DECL_INLINE_THROW(uint32_t)
752iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
753{
754#ifdef RT_ARCH_AMD64
755 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
756 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
757
758#elif defined(RT_ARCH_ARM64)
759 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
760
761#else
762# error "port me"
763#endif
764 return off;
765}
766
767/**
768 * Emits a 32-bit GPR load of a VCpu value.
769 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
770 */
771DECL_FORCE_INLINE_THROW(uint32_t)
772iemNativeEmitLoadGprFromVCpuU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
773{
774#ifdef RT_ARCH_AMD64
775 /* mov reg32, mem32 */
776 if (iGpr >= 8)
777 pCodeBuf[off++] = X86_OP_REX_R;
778 pCodeBuf[off++] = 0x8b;
779 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
780
781#elif defined(RT_ARCH_ARM64)
782 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
783
784#else
785# error "port me"
786#endif
787 return off;
788}
789
790
791/**
792 * Emits a 32-bit GPR load of a VCpu value.
793 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
794 */
795DECL_INLINE_THROW(uint32_t)
796iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
797{
798#ifdef RT_ARCH_AMD64
799 off = iemNativeEmitLoadGprFromVCpuU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
800 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
801
802#elif defined(RT_ARCH_ARM64)
803 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
804
805#else
806# error "port me"
807#endif
808 return off;
809}
810
811
812/**
813 * Emits a 16-bit GPR load of a VCpu value.
814 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
815 */
816DECL_FORCE_INLINE_THROW(uint32_t)
817iemNativeEmitLoadGprFromVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
818{
819#ifdef RT_ARCH_AMD64
820 /* movzx reg32, mem16 */
821 if (iGpr >= 8)
822 pCodeBuf[off++] = X86_OP_REX_R;
823 pCodeBuf[off++] = 0x0f;
824 pCodeBuf[off++] = 0xb7;
825 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
826
827#elif defined(RT_ARCH_ARM64)
828 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
829
830#else
831# error "port me"
832#endif
833 return off;
834}
835
836
837/**
838 * Emits a 16-bit GPR load of a VCpu value.
839 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
840 */
841DECL_INLINE_THROW(uint32_t)
842iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
843{
844#ifdef RT_ARCH_AMD64
845 off = iemNativeEmitLoadGprFromVCpuU16Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGpr, offVCpu);
846 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
847
848#elif defined(RT_ARCH_ARM64)
849 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
850
851#else
852# error "port me"
853#endif
854 return off;
855}
856
857
858/**
859 * Emits a 8-bit GPR load of a VCpu value.
860 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
861 */
862DECL_INLINE_THROW(uint32_t)
863iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
864{
865#ifdef RT_ARCH_AMD64
866 /* movzx reg32, mem8 */
867 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
868 if (iGpr >= 8)
869 pbCodeBuf[off++] = X86_OP_REX_R;
870 pbCodeBuf[off++] = 0x0f;
871 pbCodeBuf[off++] = 0xb6;
872 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
873 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
874
875#elif defined(RT_ARCH_ARM64)
876 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
877
878#else
879# error "port me"
880#endif
881 return off;
882}
883
884
885/**
886 * Emits a store of a GPR value to a 64-bit VCpu field.
887 */
888DECL_FORCE_INLINE_THROW(uint32_t)
889iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
890 uint8_t iGprTmp = UINT8_MAX)
891{
892#ifdef RT_ARCH_AMD64
893 /* mov mem64, reg64 */
894 if (iGpr < 8)
895 pCodeBuf[off++] = X86_OP_REX_W;
896 else
897 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
898 pCodeBuf[off++] = 0x89;
899 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
900 RT_NOREF(iGprTmp);
901
902#elif defined(RT_ARCH_ARM64)
903 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
904
905#else
906# error "port me"
907#endif
908 return off;
909}
910
911
912/**
913 * Emits a store of a GPR value to a 64-bit VCpu field.
914 */
915DECL_INLINE_THROW(uint32_t)
916iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
917{
918#ifdef RT_ARCH_AMD64
919 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
920#elif defined(RT_ARCH_ARM64)
921 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
922 IEMNATIVE_REG_FIXED_TMP0);
923#else
924# error "port me"
925#endif
926 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
927 return off;
928}
929
930
931/**
932 * Emits a store of a GPR value to a 32-bit VCpu field.
933 *
934 * @note Limited range on ARM64.
935 */
936DECL_INLINE_THROW(uint32_t)
937iemNativeEmitStoreGprToVCpuU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
938{
939#ifdef RT_ARCH_AMD64
940 /* mov mem32, reg32 */
941 if (iGpr >= 8)
942 pCodeBuf[off++] = X86_OP_REX_R;
943 pCodeBuf[off++] = 0x89;
944 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
945
946#elif defined(RT_ARCH_ARM64)
947 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
948
949#else
950# error "port me"
951#endif
952 return off;
953}
954
955
956/**
957 * Emits a store of a GPR value to a 32-bit VCpu field.
958 */
959DECL_INLINE_THROW(uint32_t)
960iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
961{
962#ifdef RT_ARCH_AMD64
963 /* mov mem32, reg32 */
964 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
965 if (iGpr >= 8)
966 pbCodeBuf[off++] = X86_OP_REX_R;
967 pbCodeBuf[off++] = 0x89;
968 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
969 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
970
971#elif defined(RT_ARCH_ARM64)
972 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
973
974#else
975# error "port me"
976#endif
977 return off;
978}
979
980
981/**
982 * Emits a store of a GPR value to a 16-bit VCpu field.
983 */
984DECL_INLINE_THROW(uint32_t)
985iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
986{
987#ifdef RT_ARCH_AMD64
988 /* mov mem16, reg16 */
989 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
990 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
991 if (iGpr >= 8)
992 pbCodeBuf[off++] = X86_OP_REX_R;
993 pbCodeBuf[off++] = 0x89;
994 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
995 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
996
997#elif defined(RT_ARCH_ARM64)
998 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
999
1000#else
1001# error "port me"
1002#endif
1003 return off;
1004}
1005
1006
1007/**
1008 * Emits a store of a GPR value to a 8-bit VCpu field.
1009 */
1010DECL_INLINE_THROW(uint32_t)
1011iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
1012{
1013#ifdef RT_ARCH_AMD64
1014 /* mov mem8, reg8 */
1015 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1016 if (iGpr >= 8)
1017 pbCodeBuf[off++] = X86_OP_REX_R;
1018 pbCodeBuf[off++] = 0x88;
1019 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
1020 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1021
1022#elif defined(RT_ARCH_ARM64)
1023 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
1024
1025#else
1026# error "port me"
1027#endif
1028 return off;
1029}
1030
1031
1032/**
1033 * Emits a store of an immediate value to a 64-bit VCpu field.
1034 *
1035 * @note Will allocate temporary registers on both ARM64 and AMD64.
1036 */
1037DECL_FORCE_INLINE_THROW(uint32_t)
1038iemNativeEmitStoreImmToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uImm, uint32_t offVCpu)
1039{
1040#ifdef RT_ARCH_AMD64
1041 /* mov mem32, imm32 */
1042 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
1043 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxRegImm, offVCpu);
1044 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1045 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1046
1047#elif defined(RT_ARCH_ARM64)
1048 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
1049 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t));
1050 if (idxRegImm != ARMV8_A64_REG_XZR)
1051 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1052
1053#else
1054# error "port me"
1055#endif
1056 return off;
1057}
1058
1059
1060/**
1061 * Emits a store of an immediate value to a 32-bit VCpu field.
1062 *
1063 * @note ARM64: Will allocate temporary registers.
1064 */
1065DECL_FORCE_INLINE_THROW(uint32_t)
1066iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
1067{
1068#ifdef RT_ARCH_AMD64
1069 /* mov mem32, imm32 */
1070 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1071 pCodeBuf[off++] = 0xc7;
1072 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1073 pCodeBuf[off++] = RT_BYTE1(uImm);
1074 pCodeBuf[off++] = RT_BYTE2(uImm);
1075 pCodeBuf[off++] = RT_BYTE3(uImm);
1076 pCodeBuf[off++] = RT_BYTE4(uImm);
1077 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1078
1079#elif defined(RT_ARCH_ARM64)
1080 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
1081 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
1082 if (idxRegImm != ARMV8_A64_REG_XZR)
1083 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1084
1085#else
1086# error "port me"
1087#endif
1088 return off;
1089}
1090
1091
1092
1093/**
1094 * Emits a store of an immediate value to a 16-bit VCpu field.
1095 *
1096 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
1097 * offset can be encoded as an immediate or not. The @a offVCpu immediate
1098 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
1099 */
1100DECL_FORCE_INLINE_THROW(uint32_t)
1101iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
1102 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
1103{
1104#ifdef RT_ARCH_AMD64
1105 /* mov mem16, imm16 */
1106 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1107 pCodeBuf[off++] = 0xc7;
1108 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1109 pCodeBuf[off++] = RT_BYTE1(uImm);
1110 pCodeBuf[off++] = RT_BYTE2(uImm);
1111 RT_NOREF(idxTmp1, idxTmp2);
1112
1113#elif defined(RT_ARCH_ARM64)
1114 if (idxTmp1 != UINT8_MAX)
1115 {
1116 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
1117 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
1118 sizeof(uint16_t), idxTmp2);
1119 }
1120 else
1121# ifdef IEM_WITH_THROW_CATCH
1122 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
1123# else
1124 AssertReleaseFailedStmt(off = UINT32_MAX);
1125# endif
1126
1127#else
1128# error "port me"
1129#endif
1130 return off;
1131}
1132
1133
1134/**
1135 * Emits a store of an immediate value to a 8-bit VCpu field.
1136 */
1137DECL_INLINE_THROW(uint32_t)
1138iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu,
1139 uint8_t idxRegTmp = UINT8_MAX)
1140{
1141#ifdef RT_ARCH_AMD64
1142 /* mov mem8, imm8 */
1143 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1144 pbCodeBuf[off++] = 0xc6;
1145 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
1146 pbCodeBuf[off++] = bImm;
1147 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1148 RT_NOREF(idxRegTmp);
1149
1150#elif defined(RT_ARCH_ARM64)
1151 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
1152 if (idxRegTmp != UINT8_MAX)
1153 {
1154 Assert(idxRegTmp != IEMNATIVE_REG_FIXED_TMP0);
1155 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegTmp, bImm);
1156 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegTmp, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
1157 }
1158 else
1159 {
1160 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
1161 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
1162 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1163 }
1164
1165#else
1166# error "port me"
1167#endif
1168 return off;
1169}
1170
1171
1172/**
1173 * Emits a load effective address to a GRP of a VCpu field.
1174 */
1175DECL_INLINE_THROW(uint32_t)
1176iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
1177{
1178#ifdef RT_ARCH_AMD64
1179 /* lea gprdst, [rbx + offDisp] */
1180 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1181 if (iGprDst < 8)
1182 pbCodeBuf[off++] = X86_OP_REX_W;
1183 else
1184 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
1185 pbCodeBuf[off++] = 0x8d;
1186 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
1187
1188#elif defined(RT_ARCH_ARM64)
1189 if (offVCpu < (unsigned)_4K)
1190 {
1191 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1192 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
1193 }
1194 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
1195 {
1196 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1197 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
1198 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
1199 }
1200 else if (offVCpu <= 0xffffffU)
1201 {
1202 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1203 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu >> 12,
1204 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1205 if (offVCpu & 0xfffU)
1206 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, offVCpu & 0xfff);
1207 }
1208 else
1209 {
1210 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
1211 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
1212 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1213 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
1214 }
1215
1216#else
1217# error "port me"
1218#endif
1219 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1220 return off;
1221}
1222
1223
1224/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1225DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
1226{
1227 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
1228 Assert(off < sizeof(VMCPU));
1229 return off;
1230}
1231
1232
1233/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1234DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
1235{
1236 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
1237 Assert(off < sizeof(VMCPU));
1238 return off;
1239}
1240
1241
1242/**
1243 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1244 *
1245 * @note The two temp registers are not required for AMD64. ARM64 always
1246 * requires the first, and the 2nd is needed if the offset cannot be
1247 * encoded as an immediate.
1248 */
1249DECL_FORCE_INLINE(uint32_t)
1250iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1251{
1252#ifdef RT_ARCH_AMD64
1253 /* inc qword [pVCpu + off] */
1254 pCodeBuf[off++] = X86_OP_REX_W;
1255 pCodeBuf[off++] = 0xff;
1256 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1257 RT_NOREF(idxTmp1, idxTmp2);
1258
1259#elif defined(RT_ARCH_ARM64)
1260 /* Determine how we're to access pVCpu first. */
1261 uint32_t const cbData = sizeof(STAMCOUNTER);
1262 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1263 {
1264 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1265 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1266 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1267 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1268 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1269 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1270 }
1271 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1272 {
1273 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1274 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1275 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1276 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1277 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1278 }
1279 else
1280 {
1281 /* The offset is too large, so we must load it into a register and use
1282 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1283 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1284 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1285 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1286 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1287 }
1288
1289#else
1290# error "port me"
1291#endif
1292 return off;
1293}
1294
1295
1296/**
1297 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1298 *
1299 * @note The two temp registers are not required for AMD64. ARM64 always
1300 * requires the first, and the 2nd is needed if the offset cannot be
1301 * encoded as an immediate.
1302 */
1303DECL_FORCE_INLINE(uint32_t)
1304iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1305{
1306#ifdef RT_ARCH_AMD64
1307 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1308#elif defined(RT_ARCH_ARM64)
1309 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1310#else
1311# error "port me"
1312#endif
1313 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1314 return off;
1315}
1316
1317
1318/**
1319 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1320 *
1321 * @note The two temp registers are not required for AMD64. ARM64 always
1322 * requires the first, and the 2nd is needed if the offset cannot be
1323 * encoded as an immediate.
1324 */
1325DECL_FORCE_INLINE(uint32_t)
1326iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1327{
1328 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1329#ifdef RT_ARCH_AMD64
1330 /* inc dword [pVCpu + offVCpu] */
1331 pCodeBuf[off++] = 0xff;
1332 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1333 RT_NOREF(idxTmp1, idxTmp2);
1334
1335#elif defined(RT_ARCH_ARM64)
1336 /* Determine how we're to access pVCpu first. */
1337 uint32_t const cbData = sizeof(uint32_t);
1338 if (offVCpu < (unsigned)(_4K * cbData))
1339 {
1340 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1341 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1,
1342 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1343 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1344 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1,
1345 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1346 }
1347 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1348 {
1349 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1350 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1351 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1352 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1353 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1354 }
1355 else
1356 {
1357 /* The offset is too large, so we must load it into a register and use
1358 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1359 of the instruction if that'll reduce the constant to 16-bits. */
1360 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1361 {
1362 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1363 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1364 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1365 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1366 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1367 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1368 }
1369 else
1370 {
1371 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1372 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1373 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1374 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1375 }
1376 }
1377
1378#else
1379# error "port me"
1380#endif
1381 return off;
1382}
1383
1384
1385/**
1386 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1387 *
1388 * @note The two temp registers are not required for AMD64. ARM64 always
1389 * requires the first, and the 2nd is needed if the offset cannot be
1390 * encoded as an immediate.
1391 */
1392DECL_FORCE_INLINE(uint32_t)
1393iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1394{
1395#ifdef RT_ARCH_AMD64
1396 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1397#elif defined(RT_ARCH_ARM64)
1398 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1399#else
1400# error "port me"
1401#endif
1402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1403 return off;
1404}
1405
1406
1407/**
1408 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1409 *
1410 * @note May allocate temporary registers (not AMD64).
1411 */
1412DECL_FORCE_INLINE(uint32_t)
1413iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1414{
1415 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1416#ifdef RT_ARCH_AMD64
1417 /* or dword [pVCpu + offVCpu], imm8/32 */
1418 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1419 if (fMask < 0x80)
1420 {
1421 pCodeBuf[off++] = 0x83;
1422 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1423 pCodeBuf[off++] = (uint8_t)fMask;
1424 }
1425 else
1426 {
1427 pCodeBuf[off++] = 0x81;
1428 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1429 pCodeBuf[off++] = RT_BYTE1(fMask);
1430 pCodeBuf[off++] = RT_BYTE2(fMask);
1431 pCodeBuf[off++] = RT_BYTE3(fMask);
1432 pCodeBuf[off++] = RT_BYTE4(fMask);
1433 }
1434
1435#elif defined(RT_ARCH_ARM64)
1436 /* If the constant is unwieldy we'll need a register to hold it as well. */
1437 uint32_t uImmSizeLen, uImmRotate;
1438 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1439 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1440
1441 /* We need a temp register for holding the member value we're modifying. */
1442 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1443
1444 /* Determine how we're to access pVCpu first. */
1445 uint32_t const cbData = sizeof(uint32_t);
1446 if (offVCpu < (unsigned)(_4K * cbData))
1447 {
1448 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1449 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1450 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1451 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1452 if (idxTmpMask == UINT8_MAX)
1453 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1454 else
1455 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1456 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1457 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1458 }
1459 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1460 {
1461 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1462 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1463 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1464 if (idxTmpMask == UINT8_MAX)
1465 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1466 else
1467 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1468 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1469 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1470 }
1471 else
1472 {
1473 /* The offset is too large, so we must load it into a register and use
1474 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1475 of the instruction if that'll reduce the constant to 16-bits. */
1476 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1477 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1478 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1479 if (fShifted)
1480 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1481 else
1482 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1483
1484 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1485 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1486
1487 if (idxTmpMask == UINT8_MAX)
1488 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1489 else
1490 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1491
1492 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1493 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1494 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1495 }
1496 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1497 if (idxTmpMask != UINT8_MAX)
1498 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1499
1500#else
1501# error "port me"
1502#endif
1503 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1504 return off;
1505}
1506
1507
1508/**
1509 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1510 *
1511 * @note May allocate temporary registers (not AMD64).
1512 */
1513DECL_FORCE_INLINE(uint32_t)
1514iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1515{
1516 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1517#ifdef RT_ARCH_AMD64
1518 /* and dword [pVCpu + offVCpu], imm8/32 */
1519 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1520 if (fMask < 0x80)
1521 {
1522 pCodeBuf[off++] = 0x83;
1523 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1524 pCodeBuf[off++] = (uint8_t)fMask;
1525 }
1526 else
1527 {
1528 pCodeBuf[off++] = 0x81;
1529 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1530 pCodeBuf[off++] = RT_BYTE1(fMask);
1531 pCodeBuf[off++] = RT_BYTE2(fMask);
1532 pCodeBuf[off++] = RT_BYTE3(fMask);
1533 pCodeBuf[off++] = RT_BYTE4(fMask);
1534 }
1535
1536#elif defined(RT_ARCH_ARM64)
1537 /* If the constant is unwieldy we'll need a register to hold it as well. */
1538 uint32_t uImmSizeLen, uImmRotate;
1539 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1540 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1541
1542 /* We need a temp register for holding the member value we're modifying. */
1543 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1544
1545 /* Determine how we're to access pVCpu first. */
1546 uint32_t const cbData = sizeof(uint32_t);
1547 if (offVCpu < (unsigned)(_4K * cbData))
1548 {
1549 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1550 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1551 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1552 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1553 if (idxTmpMask == UINT8_MAX)
1554 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1555 else
1556 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1557 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1558 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1559 }
1560 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1561 {
1562 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1563 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1564 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1565 if (idxTmpMask == UINT8_MAX)
1566 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1567 else
1568 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1569 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1570 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1571 }
1572 else
1573 {
1574 /* The offset is too large, so we must load it into a register and use
1575 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1576 of the instruction if that'll reduce the constant to 16-bits. */
1577 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1578 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1579 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1580 if (fShifted)
1581 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1582 else
1583 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1584
1585 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1586 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1587
1588 if (idxTmpMask == UINT8_MAX)
1589 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1590 else
1591 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1592
1593 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1594 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1595 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1596 }
1597 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1598 if (idxTmpMask != UINT8_MAX)
1599 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1600
1601#else
1602# error "port me"
1603#endif
1604 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1605 return off;
1606}
1607
1608
1609/**
1610 * Emits a gprdst = gprsrc load.
1611 */
1612DECL_FORCE_INLINE(uint32_t)
1613iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1614{
1615#ifdef RT_ARCH_AMD64
1616 /* mov gprdst, gprsrc */
1617 if ((iGprDst | iGprSrc) >= 8)
1618 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1619 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1620 : X86_OP_REX_W | X86_OP_REX_R;
1621 else
1622 pCodeBuf[off++] = X86_OP_REX_W;
1623 pCodeBuf[off++] = 0x8b;
1624 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1625
1626#elif defined(RT_ARCH_ARM64)
1627 /* mov dst, src; alias for: orr dst, xzr, src */
1628 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1629
1630#else
1631# error "port me"
1632#endif
1633 return off;
1634}
1635
1636
1637/**
1638 * Emits a gprdst = gprsrc load.
1639 */
1640DECL_INLINE_THROW(uint32_t)
1641iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1642{
1643#ifdef RT_ARCH_AMD64
1644 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1645#elif defined(RT_ARCH_ARM64)
1646 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1647#else
1648# error "port me"
1649#endif
1650 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1651 return off;
1652}
1653
1654
1655/**
1656 * Emits a gprdst = gprsrc[31:0] load.
1657 * @note Bits 63 thru 32 are cleared.
1658 */
1659DECL_FORCE_INLINE(uint32_t)
1660iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1661{
1662#ifdef RT_ARCH_AMD64
1663 /* mov gprdst, gprsrc */
1664 if ((iGprDst | iGprSrc) >= 8)
1665 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1666 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1667 : X86_OP_REX_R;
1668 pCodeBuf[off++] = 0x8b;
1669 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1670
1671#elif defined(RT_ARCH_ARM64)
1672 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1673 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1674
1675#else
1676# error "port me"
1677#endif
1678 return off;
1679}
1680
1681
1682/**
1683 * Emits a gprdst = gprsrc[31:0] load.
1684 * @note Bits 63 thru 32 are cleared.
1685 */
1686DECL_INLINE_THROW(uint32_t)
1687iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1688{
1689#ifdef RT_ARCH_AMD64
1690 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1691#elif defined(RT_ARCH_ARM64)
1692 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1693#else
1694# error "port me"
1695#endif
1696 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1697 return off;
1698}
1699
1700
1701/**
1702 * Emits a gprdst = gprsrc[15:0] load.
1703 * @note Bits 63 thru 15 are cleared.
1704 */
1705DECL_INLINE_THROW(uint32_t)
1706iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1707{
1708#ifdef RT_ARCH_AMD64
1709 /* movzx Gv,Ew */
1710 if ((iGprDst | iGprSrc) >= 8)
1711 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1712 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1713 : X86_OP_REX_R;
1714 pCodeBuf[off++] = 0x0f;
1715 pCodeBuf[off++] = 0xb7;
1716 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1717
1718#elif defined(RT_ARCH_ARM64)
1719 /* and gprdst, gprsrc, #0xffff */
1720# if 1
1721 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1722 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1723# else
1724 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1725 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1726# endif
1727
1728#else
1729# error "port me"
1730#endif
1731 return off;
1732}
1733
1734
1735/**
1736 * Emits a gprdst = gprsrc[15:0] load.
1737 * @note Bits 63 thru 15 are cleared.
1738 */
1739DECL_INLINE_THROW(uint32_t)
1740iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1741{
1742#ifdef RT_ARCH_AMD64
1743 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1744#elif defined(RT_ARCH_ARM64)
1745 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1746#else
1747# error "port me"
1748#endif
1749 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1750 return off;
1751}
1752
1753
1754/**
1755 * Emits a gprdst = gprsrc[7:0] load.
1756 * @note Bits 63 thru 8 are cleared.
1757 */
1758DECL_FORCE_INLINE(uint32_t)
1759iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1760{
1761#ifdef RT_ARCH_AMD64
1762 /* movzx Gv,Eb */
1763 if (iGprDst >= 8 || iGprSrc >= 8)
1764 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1765 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1766 : X86_OP_REX_R;
1767 else if (iGprSrc >= 4)
1768 pCodeBuf[off++] = X86_OP_REX;
1769 pCodeBuf[off++] = 0x0f;
1770 pCodeBuf[off++] = 0xb6;
1771 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1772
1773#elif defined(RT_ARCH_ARM64)
1774 /* and gprdst, gprsrc, #0xff */
1775 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1776 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1777
1778#else
1779# error "port me"
1780#endif
1781 return off;
1782}
1783
1784
1785/**
1786 * Emits a gprdst = gprsrc[7:0] load.
1787 * @note Bits 63 thru 8 are cleared.
1788 */
1789DECL_INLINE_THROW(uint32_t)
1790iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1791{
1792#ifdef RT_ARCH_AMD64
1793 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1794#elif defined(RT_ARCH_ARM64)
1795 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1796#else
1797# error "port me"
1798#endif
1799 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1800 return off;
1801}
1802
1803
1804/**
1805 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1806 * @note Bits 63 thru 8 are cleared.
1807 */
1808DECL_INLINE_THROW(uint32_t)
1809iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1810{
1811#ifdef RT_ARCH_AMD64
1812 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1813
1814 /* movzx Gv,Ew */
1815 if ((iGprDst | iGprSrc) >= 8)
1816 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1817 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1818 : X86_OP_REX_R;
1819 pbCodeBuf[off++] = 0x0f;
1820 pbCodeBuf[off++] = 0xb7;
1821 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1822
1823 /* shr Ev,8 */
1824 if (iGprDst >= 8)
1825 pbCodeBuf[off++] = X86_OP_REX_B;
1826 pbCodeBuf[off++] = 0xc1;
1827 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1828 pbCodeBuf[off++] = 8;
1829
1830#elif defined(RT_ARCH_ARM64)
1831 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1832 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1833 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1834
1835#else
1836# error "port me"
1837#endif
1838 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1839 return off;
1840}
1841
1842
1843/**
1844 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1845 */
1846DECL_INLINE_THROW(uint32_t)
1847iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1848{
1849#ifdef RT_ARCH_AMD64
1850 /* movsxd r64, r/m32 */
1851 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1852 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1853 pbCodeBuf[off++] = 0x63;
1854 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1855
1856#elif defined(RT_ARCH_ARM64)
1857 /* sxtw dst, src */
1858 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1859 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1860
1861#else
1862# error "port me"
1863#endif
1864 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1865 return off;
1866}
1867
1868
1869/**
1870 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1871 */
1872DECL_INLINE_THROW(uint32_t)
1873iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1874{
1875#ifdef RT_ARCH_AMD64
1876 /* movsx r64, r/m16 */
1877 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1878 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1879 pbCodeBuf[off++] = 0x0f;
1880 pbCodeBuf[off++] = 0xbf;
1881 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1882
1883#elif defined(RT_ARCH_ARM64)
1884 /* sxth dst, src */
1885 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1886 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1887
1888#else
1889# error "port me"
1890#endif
1891 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1892 return off;
1893}
1894
1895
1896/**
1897 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1898 */
1899DECL_INLINE_THROW(uint32_t)
1900iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1901{
1902#ifdef RT_ARCH_AMD64
1903 /* movsx r64, r/m16 */
1904 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1905 if (iGprDst >= 8 || iGprSrc >= 8)
1906 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1907 pbCodeBuf[off++] = 0x0f;
1908 pbCodeBuf[off++] = 0xbf;
1909 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1910
1911#elif defined(RT_ARCH_ARM64)
1912 /* sxth dst32, src */
1913 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1914 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1915
1916#else
1917# error "port me"
1918#endif
1919 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1920 return off;
1921}
1922
1923
1924/**
1925 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1926 */
1927DECL_INLINE_THROW(uint32_t)
1928iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1929{
1930#ifdef RT_ARCH_AMD64
1931 /* movsx r64, r/m8 */
1932 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1933 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1934 pbCodeBuf[off++] = 0x0f;
1935 pbCodeBuf[off++] = 0xbe;
1936 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1937
1938#elif defined(RT_ARCH_ARM64)
1939 /* sxtb dst, src */
1940 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1941 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1942
1943#else
1944# error "port me"
1945#endif
1946 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1947 return off;
1948}
1949
1950
1951/**
1952 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1953 * @note Bits 63 thru 32 are cleared.
1954 */
1955DECL_INLINE_THROW(uint32_t)
1956iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1957{
1958#ifdef RT_ARCH_AMD64
1959 /* movsx r32, r/m8 */
1960 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1961 if (iGprDst >= 8 || iGprSrc >= 8)
1962 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1963 else if (iGprSrc >= 4)
1964 pbCodeBuf[off++] = X86_OP_REX;
1965 pbCodeBuf[off++] = 0x0f;
1966 pbCodeBuf[off++] = 0xbe;
1967 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1968
1969#elif defined(RT_ARCH_ARM64)
1970 /* sxtb dst32, src32 */
1971 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1972 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1973
1974#else
1975# error "port me"
1976#endif
1977 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1978 return off;
1979}
1980
1981
1982/**
1983 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1984 * @note Bits 63 thru 16 are cleared.
1985 */
1986DECL_INLINE_THROW(uint32_t)
1987iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1988{
1989#ifdef RT_ARCH_AMD64
1990 /* movsx r16, r/m8 */
1991 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1992 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1993 if (iGprDst >= 8 || iGprSrc >= 8)
1994 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1995 else if (iGprSrc >= 4)
1996 pbCodeBuf[off++] = X86_OP_REX;
1997 pbCodeBuf[off++] = 0x0f;
1998 pbCodeBuf[off++] = 0xbe;
1999 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
2000
2001 /* movzx r32, r/m16 */
2002 if (iGprDst >= 8)
2003 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
2004 pbCodeBuf[off++] = 0x0f;
2005 pbCodeBuf[off++] = 0xb7;
2006 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2007
2008#elif defined(RT_ARCH_ARM64)
2009 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
2010 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2011 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
2012 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2013 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
2014
2015#else
2016# error "port me"
2017#endif
2018 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2019 return off;
2020}
2021
2022
2023/**
2024 * Emits a gprdst = gprsrc + addend load.
2025 * @note The addend is 32-bit for AMD64 and 64-bit for ARM64.
2026 */
2027#ifdef RT_ARCH_AMD64
2028DECL_INLINE_THROW(uint32_t)
2029iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2030 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2031{
2032 Assert(iAddend != 0);
2033
2034 /* lea gprdst, [gprsrc + iAddend] */
2035 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2036 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
2037 pbCodeBuf[off++] = 0x8d;
2038 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
2039 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2040 return off;
2041}
2042
2043#elif defined(RT_ARCH_ARM64)
2044DECL_INLINE_THROW(uint32_t)
2045iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2046 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
2047{
2048 if ((uint32_t)iAddend < 4096)
2049 {
2050 /* add dst, src, uimm12 */
2051 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2052 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
2053 }
2054 else if ((uint32_t)-iAddend < 4096)
2055 {
2056 /* sub dst, src, uimm12 */
2057 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2058 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
2059 }
2060 else
2061 {
2062 Assert(iGprSrc != iGprDst);
2063 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
2064 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2065 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
2066 }
2067 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2068 return off;
2069}
2070#else
2071# error "port me"
2072#endif
2073
2074/**
2075 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
2076 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
2077 */
2078#ifdef RT_ARCH_AMD64
2079DECL_INLINE_THROW(uint32_t)
2080iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2081 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2082#else
2083DECL_INLINE_THROW(uint32_t)
2084iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2085 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
2086#endif
2087{
2088 if (iAddend != 0)
2089 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
2090 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
2091}
2092
2093
2094/**
2095 * Emits a gprdst = gprsrc32 + addend load.
2096 * @note Bits 63 thru 32 are cleared.
2097 */
2098DECL_INLINE_THROW(uint32_t)
2099iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2100 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2101{
2102 Assert(iAddend != 0);
2103
2104#ifdef RT_ARCH_AMD64
2105 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
2106 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2107 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
2108 if ((iGprDst | iGprSrc) >= 8)
2109 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
2110 pbCodeBuf[off++] = 0x8d;
2111 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
2112
2113#elif defined(RT_ARCH_ARM64)
2114 if ((uint32_t)iAddend < 4096)
2115 {
2116 /* add dst, src, uimm12 */
2117 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2118 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
2119 }
2120 else if ((uint32_t)-iAddend < 4096)
2121 {
2122 /* sub dst, src, uimm12 */
2123 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2124 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
2125 }
2126 else
2127 {
2128 Assert(iGprSrc != iGprDst);
2129 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
2130 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2131 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
2132 }
2133
2134#else
2135# error "port me"
2136#endif
2137 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2138 return off;
2139}
2140
2141
2142/**
2143 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
2144 */
2145DECL_INLINE_THROW(uint32_t)
2146iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2147 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2148{
2149 if (iAddend != 0)
2150 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
2151 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
2152}
2153
2154
2155/**
2156 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
2157 * destination.
2158 */
2159DECL_FORCE_INLINE(uint32_t)
2160iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
2161{
2162#ifdef RT_ARCH_AMD64
2163 /* mov reg16, r/m16 */
2164 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2165 if (idxDst >= 8 || idxSrc >= 8)
2166 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
2167 pCodeBuf[off++] = 0x8b;
2168 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
2169
2170#elif defined(RT_ARCH_ARM64)
2171 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
2172 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
2173
2174#else
2175# error "Port me!"
2176#endif
2177 return off;
2178}
2179
2180
2181/**
2182 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
2183 * destination.
2184 */
2185DECL_INLINE_THROW(uint32_t)
2186iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
2187{
2188#ifdef RT_ARCH_AMD64
2189 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
2190#elif defined(RT_ARCH_ARM64)
2191 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
2192#else
2193# error "Port me!"
2194#endif
2195 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2196 return off;
2197}
2198
2199
2200#ifdef RT_ARCH_AMD64
2201/**
2202 * Common bit of iemNativeEmitLoadGprByBp and friends.
2203 */
2204DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
2205 PIEMRECOMPILERSTATE pReNativeAssert)
2206{
2207 if (offDisp < 128 && offDisp >= -128)
2208 {
2209 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
2210 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
2211 }
2212 else
2213 {
2214 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
2215 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2216 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2217 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2218 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2219 }
2220 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
2221 return off;
2222}
2223#elif defined(RT_ARCH_ARM64)
2224/**
2225 * Common bit of iemNativeEmitLoadGprByBp and friends.
2226 */
2227DECL_FORCE_INLINE_THROW(uint32_t)
2228iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2229 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2230{
2231 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
2232 {
2233 /* str w/ unsigned imm12 (scaled) */
2234 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2235 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
2236 }
2237 else if (offDisp >= -256 && offDisp <= 256)
2238 {
2239 /* stur w/ signed imm9 (unscaled) */
2240 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2241 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
2242 }
2243 else
2244 {
2245 /* Use temporary indexing register. */
2246 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2247 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2248 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2249 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2250 }
2251 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2252 return off;
2253}
2254#endif
2255
2256
2257/**
2258 * Emits a 64-bit GRP load instruction with an BP relative source address.
2259 */
2260DECL_INLINE_THROW(uint32_t)
2261iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2262{
2263#ifdef RT_ARCH_AMD64
2264 /* mov gprdst, qword [rbp + offDisp] */
2265 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2266 if (iGprDst < 8)
2267 pbCodeBuf[off++] = X86_OP_REX_W;
2268 else
2269 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2270 pbCodeBuf[off++] = 0x8b;
2271 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2272
2273#elif defined(RT_ARCH_ARM64)
2274 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2275
2276#else
2277# error "port me"
2278#endif
2279}
2280
2281
2282/**
2283 * Emits a 32-bit GRP load instruction with an BP relative source address.
2284 * @note Bits 63 thru 32 of the GPR will be cleared.
2285 */
2286DECL_INLINE_THROW(uint32_t)
2287iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2288{
2289#ifdef RT_ARCH_AMD64
2290 /* mov gprdst, dword [rbp + offDisp] */
2291 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2292 if (iGprDst >= 8)
2293 pbCodeBuf[off++] = X86_OP_REX_R;
2294 pbCodeBuf[off++] = 0x8b;
2295 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2296
2297#elif defined(RT_ARCH_ARM64)
2298 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2299
2300#else
2301# error "port me"
2302#endif
2303}
2304
2305
2306/**
2307 * Emits a 16-bit GRP load instruction with an BP relative source address.
2308 * @note Bits 63 thru 16 of the GPR will be cleared.
2309 */
2310DECL_INLINE_THROW(uint32_t)
2311iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2312{
2313#ifdef RT_ARCH_AMD64
2314 /* movzx gprdst, word [rbp + offDisp] */
2315 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2316 if (iGprDst >= 8)
2317 pbCodeBuf[off++] = X86_OP_REX_R;
2318 pbCodeBuf[off++] = 0x0f;
2319 pbCodeBuf[off++] = 0xb7;
2320 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2321
2322#elif defined(RT_ARCH_ARM64)
2323 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2324
2325#else
2326# error "port me"
2327#endif
2328}
2329
2330
2331/**
2332 * Emits a 8-bit GRP load instruction with an BP relative source address.
2333 * @note Bits 63 thru 8 of the GPR will be cleared.
2334 */
2335DECL_INLINE_THROW(uint32_t)
2336iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2337{
2338#ifdef RT_ARCH_AMD64
2339 /* movzx gprdst, byte [rbp + offDisp] */
2340 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2341 if (iGprDst >= 8)
2342 pbCodeBuf[off++] = X86_OP_REX_R;
2343 pbCodeBuf[off++] = 0x0f;
2344 pbCodeBuf[off++] = 0xb6;
2345 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2346
2347#elif defined(RT_ARCH_ARM64)
2348 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2349
2350#else
2351# error "port me"
2352#endif
2353}
2354
2355
2356#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2357/**
2358 * Emits a 128-bit vector register load instruction with an BP relative source address.
2359 */
2360DECL_FORCE_INLINE_THROW(uint32_t)
2361iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2362{
2363#ifdef RT_ARCH_AMD64
2364 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2365
2366 /* movdqu reg128, mem128 */
2367 pbCodeBuf[off++] = 0xf3;
2368 if (iVecRegDst >= 8)
2369 pbCodeBuf[off++] = X86_OP_REX_R;
2370 pbCodeBuf[off++] = 0x0f;
2371 pbCodeBuf[off++] = 0x6f;
2372 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2373#elif defined(RT_ARCH_ARM64)
2374 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2375#else
2376# error "port me"
2377#endif
2378}
2379
2380
2381/**
2382 * Emits a 256-bit vector register load instruction with an BP relative source address.
2383 */
2384DECL_FORCE_INLINE_THROW(uint32_t)
2385iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2386{
2387#ifdef RT_ARCH_AMD64
2388 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2389
2390 /* vmovdqu reg256, mem256 */
2391 pbCodeBuf[off++] = X86_OP_VEX2;
2392 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2393 pbCodeBuf[off++] = 0x6f;
2394 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2395#elif defined(RT_ARCH_ARM64)
2396 /* ASSUMES two consecutive vector registers for the 256-bit value. */
2397 Assert(!(iVecRegDst & 0x1));
2398 off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2399 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2400#else
2401# error "port me"
2402#endif
2403}
2404
2405#endif
2406
2407
2408/**
2409 * Emits a load effective address to a GRP with an BP relative source address.
2410 */
2411DECL_INLINE_THROW(uint32_t)
2412iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2413{
2414#ifdef RT_ARCH_AMD64
2415 /* lea gprdst, [rbp + offDisp] */
2416 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2417 if (iGprDst < 8)
2418 pbCodeBuf[off++] = X86_OP_REX_W;
2419 else
2420 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2421 pbCodeBuf[off++] = 0x8d;
2422 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2423
2424#elif defined(RT_ARCH_ARM64)
2425 bool const fSub = offDisp < 0;
2426 uint32_t const offAbsDisp = (uint32_t)RT_ABS(offDisp);
2427 if (offAbsDisp <= 0xffffffU)
2428 {
2429 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2430 if (offAbsDisp <= 0xfffU)
2431 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, ARMV8_A64_REG_BP, offAbsDisp);
2432 else
2433 {
2434 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, ARMV8_A64_REG_BP, offAbsDisp >> 12,
2435 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2436 if (offAbsDisp & 0xfffU)
2437 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, offAbsDisp & 0xfff);
2438 }
2439 }
2440 else
2441 {
2442 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2443 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offAbsDisp);
2444 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2445 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2446 }
2447
2448#else
2449# error "port me"
2450#endif
2451
2452 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2453 return off;
2454}
2455
2456
2457/**
2458 * Emits a 64-bit GPR store with an BP relative destination address.
2459 *
2460 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2461 */
2462DECL_INLINE_THROW(uint32_t)
2463iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2464{
2465#ifdef RT_ARCH_AMD64
2466 /* mov qword [rbp + offDisp], gprdst */
2467 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2468 if (iGprSrc < 8)
2469 pbCodeBuf[off++] = X86_OP_REX_W;
2470 else
2471 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2472 pbCodeBuf[off++] = 0x89;
2473 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2474
2475#elif defined(RT_ARCH_ARM64)
2476 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2477 {
2478 /* str w/ unsigned imm12 (scaled) */
2479 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2480 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2481 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2482 }
2483 else if (offDisp >= -256 && offDisp <= 256)
2484 {
2485 /* stur w/ signed imm9 (unscaled) */
2486 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2487 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2488 }
2489 else if ((uint32_t)-offDisp < (unsigned)_4K)
2490 {
2491 /* Use temporary indexing register w/ sub uimm12. */
2492 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2493 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2494 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2495 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2496 }
2497 else
2498 {
2499 /* Use temporary indexing register. */
2500 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2501 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2502 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2503 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2504 }
2505 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2506 return off;
2507
2508#else
2509# error "Port me!"
2510#endif
2511}
2512
2513
2514/**
2515 * Emits a 64-bit immediate store with an BP relative destination address.
2516 *
2517 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2518 */
2519DECL_INLINE_THROW(uint32_t)
2520iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2521{
2522#ifdef RT_ARCH_AMD64
2523 if ((int64_t)uImm64 == (int32_t)uImm64)
2524 {
2525 /* mov qword [rbp + offDisp], imm32 - sign extended */
2526 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2527 pbCodeBuf[off++] = X86_OP_REX_W;
2528 pbCodeBuf[off++] = 0xc7;
2529 if (offDisp < 128 && offDisp >= -128)
2530 {
2531 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2532 pbCodeBuf[off++] = (uint8_t)offDisp;
2533 }
2534 else
2535 {
2536 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2537 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2538 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2539 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2540 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2541 }
2542 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2543 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2544 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2545 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2546 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2547 return off;
2548 }
2549#endif
2550
2551 /* Load tmp0, imm64; Store tmp to bp+disp. */
2552 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2553 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2554}
2555
2556#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2557
2558/**
2559 * Emits a 128-bit vector register store with an BP relative destination address.
2560 *
2561 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2562 */
2563DECL_INLINE_THROW(uint32_t)
2564iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2565{
2566#ifdef RT_ARCH_AMD64
2567 /* movdqu [rbp + offDisp], vecsrc */
2568 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2569 pbCodeBuf[off++] = 0xf3;
2570 if (iVecRegSrc >= 8)
2571 pbCodeBuf[off++] = X86_OP_REX_R;
2572 pbCodeBuf[off++] = 0x0f;
2573 pbCodeBuf[off++] = 0x7f;
2574 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2575
2576#elif defined(RT_ARCH_ARM64)
2577 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2578 {
2579 /* str w/ unsigned imm12 (scaled) */
2580 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2581 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc,
2582 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2583 }
2584 else if (offDisp >= -256 && offDisp <= 256)
2585 {
2586 /* stur w/ signed imm9 (unscaled) */
2587 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2588 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp);
2589 }
2590 else if ((uint32_t)-offDisp < (unsigned)_4K)
2591 {
2592 /* Use temporary indexing register w/ sub uimm12. */
2593 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2594 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2595 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2596 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2597 }
2598 else
2599 {
2600 /* Use temporary indexing register. */
2601 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2602 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2603 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP,
2604 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2605 }
2606 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2607 return off;
2608
2609#else
2610# error "Port me!"
2611#endif
2612}
2613
2614
2615/**
2616 * Emits a 256-bit vector register store with an BP relative destination address.
2617 *
2618 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2619 */
2620DECL_INLINE_THROW(uint32_t)
2621iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2622{
2623#ifdef RT_ARCH_AMD64
2624 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2625
2626 /* vmovdqu mem256, reg256 */
2627 pbCodeBuf[off++] = X86_OP_VEX2;
2628 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2629 pbCodeBuf[off++] = 0x7f;
2630 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2631#elif defined(RT_ARCH_ARM64)
2632 Assert(!(iVecRegSrc & 0x1));
2633 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp, iVecRegSrc);
2634 return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1);
2635#else
2636# error "Port me!"
2637#endif
2638}
2639
2640#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
2641#if defined(RT_ARCH_ARM64)
2642
2643/**
2644 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2645 *
2646 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2647 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2648 * caller does not heed this.
2649 *
2650 * @note DON'T try this with prefetch.
2651 */
2652DECL_FORCE_INLINE_THROW(uint32_t)
2653iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2654 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2655{
2656 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2657 {
2658 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2659 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2660 }
2661 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2662 && iGprReg != iGprBase)
2663 || iGprTmp != UINT8_MAX)
2664 {
2665 /* The offset is too large, so we must load it into a register and use
2666 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2667 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2668 if (iGprTmp == UINT8_MAX)
2669 iGprTmp = iGprReg;
2670 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2671 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2672 }
2673 else
2674# ifdef IEM_WITH_THROW_CATCH
2675 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2676# else
2677 AssertReleaseFailedStmt(off = UINT32_MAX);
2678# endif
2679 return off;
2680}
2681
2682/**
2683 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2684 */
2685DECL_FORCE_INLINE_THROW(uint32_t)
2686iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2687 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2688{
2689 /*
2690 * There are a couple of ldr variants that takes an immediate offset, so
2691 * try use those if we can, otherwise we have to use the temporary register
2692 * help with the addressing.
2693 */
2694 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2695 {
2696 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2697 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2698 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2699 }
2700 else
2701 {
2702 /* The offset is too large, so we must load it into a register and use
2703 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2704 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2705 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2706
2707 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2708 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2709
2710 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2711 }
2712 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2713 return off;
2714}
2715
2716# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2717/**
2718 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2719 *
2720 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2721 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2722 * caller does not heed this.
2723 *
2724 * @note DON'T try this with prefetch.
2725 */
2726DECL_FORCE_INLINE_THROW(uint32_t)
2727iemNativeEmitVecRegByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iGprBase, int32_t offDisp,
2728 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2729{
2730 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2731 {
2732 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2733 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2734 }
2735 else if ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2736 || iGprTmp != UINT8_MAX)
2737 {
2738 /* The offset is too large, so we must load it into a register and use
2739 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2740 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2741 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2742 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, iGprTmp);
2743 }
2744 else
2745# ifdef IEM_WITH_THROW_CATCH
2746 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2747# else
2748 AssertReleaseFailedStmt(off = UINT32_MAX);
2749# endif
2750 return off;
2751}
2752# endif
2753
2754
2755/**
2756 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2757 */
2758DECL_FORCE_INLINE_THROW(uint32_t)
2759iemNativeEmitVecRegByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg,
2760 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2761{
2762 /*
2763 * There are a couple of ldr variants that takes an immediate offset, so
2764 * try use those if we can, otherwise we have to use the temporary register
2765 * help with the addressing.
2766 */
2767 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2768 {
2769 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2770 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2771 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2772 }
2773 else
2774 {
2775 /* The offset is too large, so we must load it into a register and use
2776 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2777 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2778 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2779
2780 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2781 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, idxTmpReg);
2782
2783 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2784 }
2785 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2786 return off;
2787}
2788#endif /* RT_ARCH_ARM64 */
2789
2790/**
2791 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2792 *
2793 * @note ARM64: Misaligned @a offDisp values and values not in the
2794 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2795 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2796 * does not heed this.
2797 */
2798DECL_FORCE_INLINE_THROW(uint32_t)
2799iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2800 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2801{
2802#ifdef RT_ARCH_AMD64
2803 /* mov reg64, mem64 */
2804 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2805 pCodeBuf[off++] = 0x8b;
2806 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2807 RT_NOREF(iGprTmp);
2808
2809#elif defined(RT_ARCH_ARM64)
2810 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2811 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2812
2813#else
2814# error "port me"
2815#endif
2816 return off;
2817}
2818
2819
2820/**
2821 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2822 */
2823DECL_INLINE_THROW(uint32_t)
2824iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2825{
2826#ifdef RT_ARCH_AMD64
2827 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2828 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2829
2830#elif defined(RT_ARCH_ARM64)
2831 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2832
2833#else
2834# error "port me"
2835#endif
2836 return off;
2837}
2838
2839
2840/**
2841 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2842 *
2843 * @note ARM64: Misaligned @a offDisp values and values not in the
2844 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2845 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2846 * caller does not heed this.
2847 *
2848 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2849 */
2850DECL_FORCE_INLINE_THROW(uint32_t)
2851iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2852 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2853{
2854#ifdef RT_ARCH_AMD64
2855 /* mov reg32, mem32 */
2856 if (iGprDst >= 8 || iGprBase >= 8)
2857 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2858 pCodeBuf[off++] = 0x8b;
2859 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2860 RT_NOREF(iGprTmp);
2861
2862#elif defined(RT_ARCH_ARM64)
2863 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2864 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2865
2866#else
2867# error "port me"
2868#endif
2869 return off;
2870}
2871
2872
2873/**
2874 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2875 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2876 */
2877DECL_INLINE_THROW(uint32_t)
2878iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2879{
2880#ifdef RT_ARCH_AMD64
2881 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2882 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2883
2884#elif defined(RT_ARCH_ARM64)
2885 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2886
2887#else
2888# error "port me"
2889#endif
2890 return off;
2891}
2892
2893
2894/**
2895 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2896 * sign-extending the value to 64 bits.
2897 *
2898 * @note ARM64: Misaligned @a offDisp values and values not in the
2899 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2900 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2901 * caller does not heed this.
2902 */
2903DECL_FORCE_INLINE_THROW(uint32_t)
2904iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2905 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2906{
2907#ifdef RT_ARCH_AMD64
2908 /* movsxd reg64, mem32 */
2909 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2910 pCodeBuf[off++] = 0x63;
2911 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2912 RT_NOREF(iGprTmp);
2913
2914#elif defined(RT_ARCH_ARM64)
2915 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2916 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2917
2918#else
2919# error "port me"
2920#endif
2921 return off;
2922}
2923
2924
2925/**
2926 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2927 *
2928 * @note ARM64: Misaligned @a offDisp values and values not in the
2929 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2930 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2931 * caller does not heed this.
2932 *
2933 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2934 */
2935DECL_FORCE_INLINE_THROW(uint32_t)
2936iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2937 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2938{
2939#ifdef RT_ARCH_AMD64
2940 /* movzx reg32, mem16 */
2941 if (iGprDst >= 8 || iGprBase >= 8)
2942 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2943 pCodeBuf[off++] = 0x0f;
2944 pCodeBuf[off++] = 0xb7;
2945 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2946 RT_NOREF(iGprTmp);
2947
2948#elif defined(RT_ARCH_ARM64)
2949 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2950 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2951
2952#else
2953# error "port me"
2954#endif
2955 return off;
2956}
2957
2958
2959/**
2960 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2961 * sign-extending the value to 64 bits.
2962 *
2963 * @note ARM64: Misaligned @a offDisp values and values not in the
2964 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2965 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2966 * caller does not heed this.
2967 */
2968DECL_FORCE_INLINE_THROW(uint32_t)
2969iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2970 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2971{
2972#ifdef RT_ARCH_AMD64
2973 /* movsx reg64, mem16 */
2974 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2975 pCodeBuf[off++] = 0x0f;
2976 pCodeBuf[off++] = 0xbf;
2977 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2978 RT_NOREF(iGprTmp);
2979
2980#elif defined(RT_ARCH_ARM64)
2981 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2982 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2983
2984#else
2985# error "port me"
2986#endif
2987 return off;
2988}
2989
2990
2991/**
2992 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2993 * sign-extending the value to 32 bits.
2994 *
2995 * @note ARM64: Misaligned @a offDisp values and values not in the
2996 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2997 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2998 * caller does not heed this.
2999 *
3000 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
3001 */
3002DECL_FORCE_INLINE_THROW(uint32_t)
3003iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3004 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3005{
3006#ifdef RT_ARCH_AMD64
3007 /* movsx reg32, mem16 */
3008 if (iGprDst >= 8 || iGprBase >= 8)
3009 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3010 pCodeBuf[off++] = 0x0f;
3011 pCodeBuf[off++] = 0xbf;
3012 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3013 RT_NOREF(iGprTmp);
3014
3015#elif defined(RT_ARCH_ARM64)
3016 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3017 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
3018
3019#else
3020# error "port me"
3021#endif
3022 return off;
3023}
3024
3025
3026/**
3027 * Emits a 8-bit GPR load via a GPR base address with a displacement.
3028 *
3029 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3030 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3031 * same. Will assert / throw if caller does not heed this.
3032 *
3033 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
3034 */
3035DECL_FORCE_INLINE_THROW(uint32_t)
3036iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3037 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3038{
3039#ifdef RT_ARCH_AMD64
3040 /* movzx reg32, mem8 */
3041 if (iGprDst >= 8 || iGprBase >= 8)
3042 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3043 pCodeBuf[off++] = 0x0f;
3044 pCodeBuf[off++] = 0xb6;
3045 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3046 RT_NOREF(iGprTmp);
3047
3048#elif defined(RT_ARCH_ARM64)
3049 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3050 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
3051
3052#else
3053# error "port me"
3054#endif
3055 return off;
3056}
3057
3058
3059/**
3060 * Emits a 8-bit GPR load via a GPR base address with a displacement,
3061 * sign-extending the value to 64 bits.
3062 *
3063 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3064 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3065 * same. Will assert / throw if caller does not heed this.
3066 */
3067DECL_FORCE_INLINE_THROW(uint32_t)
3068iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3069 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3070{
3071#ifdef RT_ARCH_AMD64
3072 /* movsx reg64, mem8 */
3073 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3074 pCodeBuf[off++] = 0x0f;
3075 pCodeBuf[off++] = 0xbe;
3076 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3077 RT_NOREF(iGprTmp);
3078
3079#elif defined(RT_ARCH_ARM64)
3080 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3081 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
3082
3083#else
3084# error "port me"
3085#endif
3086 return off;
3087}
3088
3089
3090/**
3091 * Emits a 8-bit GPR load via a GPR base address with a displacement,
3092 * sign-extending the value to 32 bits.
3093 *
3094 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3095 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3096 * same. Will assert / throw if caller does not heed this.
3097 *
3098 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
3099 */
3100DECL_FORCE_INLINE_THROW(uint32_t)
3101iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3102 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3103{
3104#ifdef RT_ARCH_AMD64
3105 /* movsx reg32, mem8 */
3106 if (iGprDst >= 8 || iGprBase >= 8)
3107 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3108 pCodeBuf[off++] = 0x0f;
3109 pCodeBuf[off++] = 0xbe;
3110 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3111 RT_NOREF(iGprTmp);
3112
3113#elif defined(RT_ARCH_ARM64)
3114 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3115 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
3116
3117#else
3118# error "port me"
3119#endif
3120 return off;
3121}
3122
3123
3124/**
3125 * Emits a 8-bit GPR load via a GPR base address with a displacement,
3126 * sign-extending the value to 16 bits.
3127 *
3128 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3129 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3130 * same. Will assert / throw if caller does not heed this.
3131 *
3132 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
3133 */
3134DECL_FORCE_INLINE_THROW(uint32_t)
3135iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3136 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3137{
3138#ifdef RT_ARCH_AMD64
3139 /* movsx reg32, mem8 */
3140 if (iGprDst >= 8 || iGprBase >= 8)
3141 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3142 pCodeBuf[off++] = 0x0f;
3143 pCodeBuf[off++] = 0xbe;
3144 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3145# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
3146 /* and reg32, 0xffffh */
3147 if (iGprDst >= 8)
3148 pCodeBuf[off++] = X86_OP_REX_B;
3149 pCodeBuf[off++] = 0x81;
3150 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
3151 pCodeBuf[off++] = 0xff;
3152 pCodeBuf[off++] = 0xff;
3153 pCodeBuf[off++] = 0;
3154 pCodeBuf[off++] = 0;
3155# else
3156 /* movzx reg32, reg16 */
3157 if (iGprDst >= 8)
3158 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
3159 pCodeBuf[off++] = 0x0f;
3160 pCodeBuf[off++] = 0xb7;
3161 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
3162# endif
3163 RT_NOREF(iGprTmp);
3164
3165#elif defined(RT_ARCH_ARM64)
3166 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3167 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
3168 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
3169 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
3170
3171#else
3172# error "port me"
3173#endif
3174 return off;
3175}
3176
3177
3178#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3179/**
3180 * Emits a 128-bit vector register load via a GPR base address with a displacement.
3181 *
3182 * @note ARM64: Misaligned @a offDisp values and values not in the
3183 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3184 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3185 * does not heed this.
3186 */
3187DECL_FORCE_INLINE_THROW(uint32_t)
3188iemNativeEmitLoadVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3189 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3190{
3191#ifdef RT_ARCH_AMD64
3192 /* movdqu reg128, mem128 */
3193 pCodeBuf[off++] = 0xf3;
3194 if (iVecRegDst >= 8 || iGprBase >= 8)
3195 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3196 pCodeBuf[off++] = 0x0f;
3197 pCodeBuf[off++] = 0x6f;
3198 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3199 RT_NOREF(iGprTmp);
3200
3201#elif defined(RT_ARCH_ARM64)
3202 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3203 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3204
3205#else
3206# error "port me"
3207#endif
3208 return off;
3209}
3210
3211
3212/**
3213 * Emits a 128-bit GPR load via a GPR base address with a displacement.
3214 */
3215DECL_INLINE_THROW(uint32_t)
3216iemNativeEmitLoadVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3217{
3218#ifdef RT_ARCH_AMD64
3219 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3220 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3221
3222#elif defined(RT_ARCH_ARM64)
3223 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3224
3225#else
3226# error "port me"
3227#endif
3228 return off;
3229}
3230
3231
3232/**
3233 * Emits a 256-bit vector register load via a GPR base address with a displacement.
3234 *
3235 * @note ARM64: Misaligned @a offDisp values and values not in the
3236 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3237 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3238 * does not heed this.
3239 */
3240DECL_FORCE_INLINE_THROW(uint32_t)
3241iemNativeEmitLoadVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3242 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3243{
3244#ifdef RT_ARCH_AMD64
3245 /* vmovdqu reg256, mem256 */
3246 pCodeBuf[off++] = X86_OP_VEX3;
3247 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3248 | X86_OP_VEX3_BYTE1_X
3249 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3250 | UINT8_C(0x01);
3251 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3252 pCodeBuf[off++] = 0x6f;
3253 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3254 RT_NOREF(iGprTmp);
3255
3256#elif defined(RT_ARCH_ARM64)
3257 Assert(!(iVecRegDst & 0x1));
3258 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3259 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3260 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3261 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3262#else
3263# error "port me"
3264#endif
3265 return off;
3266}
3267
3268
3269/**
3270 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3271 */
3272DECL_INLINE_THROW(uint32_t)
3273iemNativeEmitLoadVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3274{
3275#ifdef RT_ARCH_AMD64
3276 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3277 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3278
3279#elif defined(RT_ARCH_ARM64)
3280 Assert(!(iVecRegDst & 0x1));
3281 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3282 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3283 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3284 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3285
3286#else
3287# error "port me"
3288#endif
3289 return off;
3290}
3291#endif
3292
3293
3294/**
3295 * Emits a 64-bit GPR store via a GPR base address with a displacement.
3296 *
3297 * @note ARM64: Misaligned @a offDisp values and values not in the
3298 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3299 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3300 * does not heed this.
3301 */
3302DECL_FORCE_INLINE_THROW(uint32_t)
3303iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3304 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3305{
3306#ifdef RT_ARCH_AMD64
3307 /* mov mem64, reg64 */
3308 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3309 pCodeBuf[off++] = 0x89;
3310 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3311 RT_NOREF(iGprTmp);
3312
3313#elif defined(RT_ARCH_ARM64)
3314 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3315 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
3316
3317#else
3318# error "port me"
3319#endif
3320 return off;
3321}
3322
3323
3324/**
3325 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3326 *
3327 * @note ARM64: Misaligned @a offDisp values and values not in the
3328 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3329 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3330 * does not heed this.
3331 */
3332DECL_FORCE_INLINE_THROW(uint32_t)
3333iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3334 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3335{
3336#ifdef RT_ARCH_AMD64
3337 /* mov mem32, reg32 */
3338 if (iGprSrc >= 8 || iGprBase >= 8)
3339 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3340 pCodeBuf[off++] = 0x89;
3341 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3342 RT_NOREF(iGprTmp);
3343
3344#elif defined(RT_ARCH_ARM64)
3345 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3346 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3347
3348#else
3349# error "port me"
3350#endif
3351 return off;
3352}
3353
3354
3355/**
3356 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3357 *
3358 * @note ARM64: Misaligned @a offDisp values and values not in the
3359 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3360 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3361 * does not heed this.
3362 */
3363DECL_FORCE_INLINE_THROW(uint32_t)
3364iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3365 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3366{
3367#ifdef RT_ARCH_AMD64
3368 /* mov mem16, reg16 */
3369 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3370 if (iGprSrc >= 8 || iGprBase >= 8)
3371 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3372 pCodeBuf[off++] = 0x89;
3373 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3374 RT_NOREF(iGprTmp);
3375
3376#elif defined(RT_ARCH_ARM64)
3377 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3378 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3379
3380#else
3381# error "port me"
3382#endif
3383 return off;
3384}
3385
3386
3387/**
3388 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3389 *
3390 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3391 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3392 * same. Will assert / throw if caller does not heed this.
3393 */
3394DECL_FORCE_INLINE_THROW(uint32_t)
3395iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3396 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3397{
3398#ifdef RT_ARCH_AMD64
3399 /* mov mem8, reg8 */
3400 if (iGprSrc >= 8 || iGprBase >= 8)
3401 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3402 else if (iGprSrc >= 4)
3403 pCodeBuf[off++] = X86_OP_REX;
3404 pCodeBuf[off++] = 0x88;
3405 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3406 RT_NOREF(iGprTmp);
3407
3408#elif defined(RT_ARCH_ARM64)
3409 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3410 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3411
3412#else
3413# error "port me"
3414#endif
3415 return off;
3416}
3417
3418
3419/**
3420 * Emits a 64-bit immediate store via a GPR base address with a displacement.
3421 *
3422 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
3423 * AMD64 it depends on the immediate value.
3424 *
3425 * @note ARM64: Misaligned @a offDisp values and values not in the
3426 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3427 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3428 * does not heed this.
3429 */
3430DECL_FORCE_INLINE_THROW(uint32_t)
3431iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
3432 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3433{
3434#ifdef RT_ARCH_AMD64
3435 if ((int32_t)uImm == (int64_t)uImm)
3436 {
3437 /* mov mem64, imm32 (sign-extended) */
3438 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3439 pCodeBuf[off++] = 0xc7;
3440 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3441 pCodeBuf[off++] = RT_BYTE1(uImm);
3442 pCodeBuf[off++] = RT_BYTE2(uImm);
3443 pCodeBuf[off++] = RT_BYTE3(uImm);
3444 pCodeBuf[off++] = RT_BYTE4(uImm);
3445 }
3446 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
3447 {
3448 /* require temporary register. */
3449 if (iGprImmTmp == UINT8_MAX)
3450 iGprImmTmp = iGprTmp;
3451 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3452 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
3453 }
3454 else
3455# ifdef IEM_WITH_THROW_CATCH
3456 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3457# else
3458 AssertReleaseFailedStmt(off = UINT32_MAX);
3459# endif
3460
3461#elif defined(RT_ARCH_ARM64)
3462 if (uImm == 0)
3463 iGprImmTmp = ARMV8_A64_REG_XZR;
3464 else
3465 {
3466 Assert(iGprImmTmp < 31);
3467 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3468 }
3469 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
3470
3471#else
3472# error "port me"
3473#endif
3474 return off;
3475}
3476
3477
3478/**
3479 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3480 *
3481 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3482 *
3483 * @note ARM64: Misaligned @a offDisp values and values not in the
3484 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3485 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3486 * does not heed this.
3487 */
3488DECL_FORCE_INLINE_THROW(uint32_t)
3489iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
3490 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3491{
3492#ifdef RT_ARCH_AMD64
3493 /* mov mem32, imm32 */
3494 if (iGprBase >= 8)
3495 pCodeBuf[off++] = X86_OP_REX_B;
3496 pCodeBuf[off++] = 0xc7;
3497 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3498 pCodeBuf[off++] = RT_BYTE1(uImm);
3499 pCodeBuf[off++] = RT_BYTE2(uImm);
3500 pCodeBuf[off++] = RT_BYTE3(uImm);
3501 pCodeBuf[off++] = RT_BYTE4(uImm);
3502 RT_NOREF(iGprImmTmp, iGprTmp);
3503
3504#elif defined(RT_ARCH_ARM64)
3505 Assert(iGprImmTmp < 31);
3506 if (uImm == 0)
3507 iGprImmTmp = ARMV8_A64_REG_XZR;
3508 else
3509 {
3510 Assert(iGprImmTmp < 31);
3511 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3512 }
3513 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3514 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3515
3516#else
3517# error "port me"
3518#endif
3519 return off;
3520}
3521
3522
3523/**
3524 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3525 *
3526 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3527 *
3528 * @note ARM64: Misaligned @a offDisp values and values not in the
3529 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3530 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3531 * does not heed this.
3532 */
3533DECL_FORCE_INLINE_THROW(uint32_t)
3534iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
3535 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3536{
3537#ifdef RT_ARCH_AMD64
3538 /* mov mem16, imm16 */
3539 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3540 if (iGprBase >= 8)
3541 pCodeBuf[off++] = X86_OP_REX_B;
3542 pCodeBuf[off++] = 0xc7;
3543 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3544 pCodeBuf[off++] = RT_BYTE1(uImm);
3545 pCodeBuf[off++] = RT_BYTE2(uImm);
3546 RT_NOREF(iGprImmTmp, iGprTmp);
3547
3548#elif defined(RT_ARCH_ARM64)
3549 if (uImm == 0)
3550 iGprImmTmp = ARMV8_A64_REG_XZR;
3551 else
3552 {
3553 Assert(iGprImmTmp < 31);
3554 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3555 }
3556 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3557 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3558
3559#else
3560# error "port me"
3561#endif
3562 return off;
3563}
3564
3565
3566/**
3567 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3568 *
3569 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3570 *
3571 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3572 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3573 * same. Will assert / throw if caller does not heed this.
3574 */
3575DECL_FORCE_INLINE_THROW(uint32_t)
3576iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3577 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3578{
3579#ifdef RT_ARCH_AMD64
3580 /* mov mem8, imm8 */
3581 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3582 if (iGprBase >= 8)
3583 pCodeBuf[off++] = X86_OP_REX_B;
3584 pCodeBuf[off++] = 0xc6;
3585 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3586 pCodeBuf[off++] = uImm;
3587 RT_NOREF(iGprImmTmp, iGprTmp);
3588
3589#elif defined(RT_ARCH_ARM64)
3590 if (uImm == 0)
3591 iGprImmTmp = ARMV8_A64_REG_XZR;
3592 else
3593 {
3594 Assert(iGprImmTmp < 31);
3595 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3596 }
3597 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3598 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3599
3600#else
3601# error "port me"
3602#endif
3603 return off;
3604}
3605
3606
3607#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3608/**
3609 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3610 *
3611 * @note ARM64: Misaligned @a offDisp values and values not in the
3612 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3613 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3614 * does not heed this.
3615 */
3616DECL_FORCE_INLINE_THROW(uint32_t)
3617iemNativeEmitStoreVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3618 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3619{
3620#ifdef RT_ARCH_AMD64
3621 /* movdqu mem128, reg128 */
3622 pCodeBuf[off++] = 0xf3;
3623 if (iVecRegDst >= 8 || iGprBase >= 8)
3624 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3625 pCodeBuf[off++] = 0x0f;
3626 pCodeBuf[off++] = 0x7f;
3627 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3628 RT_NOREF(iGprTmp);
3629
3630#elif defined(RT_ARCH_ARM64)
3631 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3632 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3633
3634#else
3635# error "port me"
3636#endif
3637 return off;
3638}
3639
3640
3641/**
3642 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3643 */
3644DECL_INLINE_THROW(uint32_t)
3645iemNativeEmitStoreVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3646{
3647#ifdef RT_ARCH_AMD64
3648 off = iemNativeEmitStoreVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3649 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3650
3651#elif defined(RT_ARCH_ARM64)
3652 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3653
3654#else
3655# error "port me"
3656#endif
3657 return off;
3658}
3659
3660
3661/**
3662 * Emits a 256-bit vector register store via a GPR base address with a displacement.
3663 *
3664 * @note ARM64: Misaligned @a offDisp values and values not in the
3665 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3666 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3667 * does not heed this.
3668 */
3669DECL_FORCE_INLINE_THROW(uint32_t)
3670iemNativeEmitStoreVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3671 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3672{
3673#ifdef RT_ARCH_AMD64
3674 /* vmovdqu mem256, reg256 */
3675 pCodeBuf[off++] = X86_OP_VEX3;
3676 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3677 | X86_OP_VEX3_BYTE1_X
3678 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3679 | UINT8_C(0x01);
3680 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3681 pCodeBuf[off++] = 0x7f;
3682 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3683 RT_NOREF(iGprTmp);
3684
3685#elif defined(RT_ARCH_ARM64)
3686 Assert(!(iVecRegDst & 0x1));
3687 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3688 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3689 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3690 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3691#else
3692# error "port me"
3693#endif
3694 return off;
3695}
3696
3697
3698/**
3699 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3700 */
3701DECL_INLINE_THROW(uint32_t)
3702iemNativeEmitStoreVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3703{
3704#ifdef RT_ARCH_AMD64
3705 off = iemNativeEmitStoreVecRegByGprU256Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3706 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3707
3708#elif defined(RT_ARCH_ARM64)
3709 Assert(!(iVecRegDst & 0x1));
3710 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3711 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3712 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3713 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3714
3715#else
3716# error "port me"
3717#endif
3718 return off;
3719}
3720#endif
3721
3722
3723
3724/*********************************************************************************************************************************
3725* Subtraction and Additions *
3726*********************************************************************************************************************************/
3727
3728/**
3729 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3730 * @note The AMD64 version sets flags.
3731 */
3732DECL_INLINE_THROW(uint32_t)
3733iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3734{
3735#if defined(RT_ARCH_AMD64)
3736 /* sub Gv,Ev */
3737 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3738 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3739 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3740 pbCodeBuf[off++] = 0x2b;
3741 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3742
3743#elif defined(RT_ARCH_ARM64)
3744 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3745 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3746
3747#else
3748# error "Port me"
3749#endif
3750 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3751 return off;
3752}
3753
3754
3755/**
3756 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3757 * @note The AMD64 version sets flags.
3758 */
3759DECL_FORCE_INLINE(uint32_t)
3760iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3761{
3762#if defined(RT_ARCH_AMD64)
3763 /* sub Gv,Ev */
3764 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3765 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3766 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3767 pCodeBuf[off++] = 0x2b;
3768 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3769
3770#elif defined(RT_ARCH_ARM64)
3771 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3772
3773#else
3774# error "Port me"
3775#endif
3776 return off;
3777}
3778
3779
3780/**
3781 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3782 * @note The AMD64 version sets flags.
3783 */
3784DECL_INLINE_THROW(uint32_t)
3785iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3786{
3787#if defined(RT_ARCH_AMD64)
3788 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3789#elif defined(RT_ARCH_ARM64)
3790 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3791#else
3792# error "Port me"
3793#endif
3794 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3795 return off;
3796}
3797
3798
3799/**
3800 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3801 *
3802 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3803 *
3804 * @note Larger constants will require a temporary register. Failing to specify
3805 * one when needed will trigger fatal assertion / throw.
3806 */
3807DECL_FORCE_INLINE_THROW(uint32_t)
3808iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3809 uint8_t iGprTmp = UINT8_MAX)
3810{
3811#ifdef RT_ARCH_AMD64
3812 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3813 if (iSubtrahend == 1)
3814 {
3815 /* dec r/m64 */
3816 pCodeBuf[off++] = 0xff;
3817 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3818 }
3819 else if (iSubtrahend == -1)
3820 {
3821 /* inc r/m64 */
3822 pCodeBuf[off++] = 0xff;
3823 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3824 }
3825 else if ((int8_t)iSubtrahend == iSubtrahend)
3826 {
3827 /* sub r/m64, imm8 */
3828 pCodeBuf[off++] = 0x83;
3829 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3830 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3831 }
3832 else if ((int32_t)iSubtrahend == iSubtrahend)
3833 {
3834 /* sub r/m64, imm32 */
3835 pCodeBuf[off++] = 0x81;
3836 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3837 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3838 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3839 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3840 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3841 }
3842 else if (iGprTmp != UINT8_MAX)
3843 {
3844 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3845 /* sub r/m64, r64 */
3846 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3847 pCodeBuf[off++] = 0x29;
3848 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3849 }
3850 else
3851# ifdef IEM_WITH_THROW_CATCH
3852 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3853# else
3854 AssertReleaseFailedStmt(off = UINT32_MAX);
3855# endif
3856
3857#elif defined(RT_ARCH_ARM64)
3858 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3859 if (uAbsSubtrahend < 4096)
3860 {
3861 if (iSubtrahend >= 0)
3862 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3863 else
3864 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3865 }
3866 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3867 {
3868 if (iSubtrahend >= 0)
3869 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3870 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3871 else
3872 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3873 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3874 }
3875 else if (iGprTmp != UINT8_MAX)
3876 {
3877 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3878 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3879 }
3880 else
3881# ifdef IEM_WITH_THROW_CATCH
3882 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3883# else
3884 AssertReleaseFailedStmt(off = UINT32_MAX);
3885# endif
3886
3887#else
3888# error "Port me"
3889#endif
3890 return off;
3891}
3892
3893
3894/**
3895 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3896 *
3897 * @note Larger constants will require a temporary register. Failing to specify
3898 * one when needed will trigger fatal assertion / throw.
3899 */
3900DECL_INLINE_THROW(uint32_t)
3901iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3902 uint8_t iGprTmp = UINT8_MAX)
3903
3904{
3905#ifdef RT_ARCH_AMD64
3906 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3907#elif defined(RT_ARCH_ARM64)
3908 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3909#else
3910# error "Port me"
3911#endif
3912 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3913 return off;
3914}
3915
3916
3917/**
3918 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3919 *
3920 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3921 *
3922 * @note ARM64: Larger constants will require a temporary register. Failing to
3923 * specify one when needed will trigger fatal assertion / throw.
3924 */
3925DECL_FORCE_INLINE_THROW(uint32_t)
3926iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3927 uint8_t iGprTmp = UINT8_MAX)
3928{
3929#ifdef RT_ARCH_AMD64
3930 if (iGprDst >= 8)
3931 pCodeBuf[off++] = X86_OP_REX_B;
3932 if (iSubtrahend == 1)
3933 {
3934 /* dec r/m32 */
3935 pCodeBuf[off++] = 0xff;
3936 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3937 }
3938 else if (iSubtrahend == -1)
3939 {
3940 /* inc r/m32 */
3941 pCodeBuf[off++] = 0xff;
3942 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3943 }
3944 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3945 {
3946 /* sub r/m32, imm8 */
3947 pCodeBuf[off++] = 0x83;
3948 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3949 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3950 }
3951 else
3952 {
3953 /* sub r/m32, imm32 */
3954 pCodeBuf[off++] = 0x81;
3955 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3956 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3957 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3958 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3959 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3960 }
3961 RT_NOREF(iGprTmp);
3962
3963#elif defined(RT_ARCH_ARM64)
3964 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3965 if (uAbsSubtrahend < 4096)
3966 {
3967 if (iSubtrahend >= 0)
3968 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3969 else
3970 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3971 }
3972 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3973 {
3974 if (iSubtrahend >= 0)
3975 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3976 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3977 else
3978 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3979 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3980 }
3981 else if (iGprTmp != UINT8_MAX)
3982 {
3983 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3984 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3985 }
3986 else
3987# ifdef IEM_WITH_THROW_CATCH
3988 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3989# else
3990 AssertReleaseFailedStmt(off = UINT32_MAX);
3991# endif
3992
3993#else
3994# error "Port me"
3995#endif
3996 return off;
3997}
3998
3999
4000/**
4001 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
4002 *
4003 * @note ARM64: Larger constants will require a temporary register. Failing to
4004 * specify one when needed will trigger fatal assertion / throw.
4005 */
4006DECL_INLINE_THROW(uint32_t)
4007iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
4008 uint8_t iGprTmp = UINT8_MAX)
4009
4010{
4011#ifdef RT_ARCH_AMD64
4012 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
4013#elif defined(RT_ARCH_ARM64)
4014 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
4015#else
4016# error "Port me"
4017#endif
4018 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4019 return off;
4020}
4021
4022
4023/**
4024 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
4025 *
4026 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
4027 * so not suitable as a base for conditional jumps.
4028 *
4029 * @note AMD64: Will only update the lower 16 bits of the register.
4030 * @note ARM64: Will update the entire register.
4031 * @note ARM64: Larger constants will require a temporary register. Failing to
4032 * specify one when needed will trigger fatal assertion / throw.
4033 */
4034DECL_FORCE_INLINE_THROW(uint32_t)
4035iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
4036 uint8_t iGprTmp = UINT8_MAX)
4037{
4038#ifdef RT_ARCH_AMD64
4039 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4040 if (iGprDst >= 8)
4041 pCodeBuf[off++] = X86_OP_REX_B;
4042 if (iSubtrahend == 1)
4043 {
4044 /* dec r/m16 */
4045 pCodeBuf[off++] = 0xff;
4046 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4047 }
4048 else if (iSubtrahend == -1)
4049 {
4050 /* inc r/m16 */
4051 pCodeBuf[off++] = 0xff;
4052 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4053 }
4054 else if ((int8_t)iSubtrahend == iSubtrahend)
4055 {
4056 /* sub r/m16, imm8 */
4057 pCodeBuf[off++] = 0x83;
4058 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
4059 pCodeBuf[off++] = (uint8_t)iSubtrahend;
4060 }
4061 else
4062 {
4063 /* sub r/m16, imm16 */
4064 pCodeBuf[off++] = 0x81;
4065 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
4066 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
4067 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
4068 }
4069 RT_NOREF(iGprTmp);
4070
4071#elif defined(RT_ARCH_ARM64)
4072 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
4073 if (uAbsSubtrahend < 4096)
4074 {
4075 if (iSubtrahend >= 0)
4076 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
4077 else
4078 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
4079 }
4080 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
4081 {
4082 if (iSubtrahend >= 0)
4083 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
4084 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4085 else
4086 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
4087 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4088 }
4089 else if (iGprTmp != UINT8_MAX)
4090 {
4091 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
4092 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4093 }
4094 else
4095# ifdef IEM_WITH_THROW_CATCH
4096 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4097# else
4098 AssertReleaseFailedStmt(off = UINT32_MAX);
4099# endif
4100 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4101
4102#else
4103# error "Port me"
4104#endif
4105 return off;
4106}
4107
4108
4109/**
4110 * Emits adding a 64-bit GPR to another, storing the result in the first.
4111 * @note The AMD64 version sets flags.
4112 */
4113DECL_FORCE_INLINE(uint32_t)
4114iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4115{
4116#if defined(RT_ARCH_AMD64)
4117 /* add Gv,Ev */
4118 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4119 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
4120 pCodeBuf[off++] = 0x03;
4121 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
4122
4123#elif defined(RT_ARCH_ARM64)
4124 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
4125
4126#else
4127# error "Port me"
4128#endif
4129 return off;
4130}
4131
4132
4133/**
4134 * Emits adding a 64-bit GPR to another, storing the result in the first.
4135 * @note The AMD64 version sets flags.
4136 */
4137DECL_INLINE_THROW(uint32_t)
4138iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4139{
4140#if defined(RT_ARCH_AMD64)
4141 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
4142#elif defined(RT_ARCH_ARM64)
4143 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
4144#else
4145# error "Port me"
4146#endif
4147 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4148 return off;
4149}
4150
4151
4152/**
4153 * Emits adding a 64-bit GPR to another, storing the result in the first.
4154 * @note The AMD64 version sets flags.
4155 */
4156DECL_FORCE_INLINE(uint32_t)
4157iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4158{
4159#if defined(RT_ARCH_AMD64)
4160 /* add Gv,Ev */
4161 if (iGprDst >= 8 || iGprAddend >= 8)
4162 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
4163 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
4164 pCodeBuf[off++] = 0x03;
4165 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
4166
4167#elif defined(RT_ARCH_ARM64)
4168 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
4169
4170#else
4171# error "Port me"
4172#endif
4173 return off;
4174}
4175
4176
4177/**
4178 * Emits adding a 64-bit GPR to another, storing the result in the first.
4179 * @note The AMD64 version sets flags.
4180 */
4181DECL_INLINE_THROW(uint32_t)
4182iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4183{
4184#if defined(RT_ARCH_AMD64)
4185 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
4186#elif defined(RT_ARCH_ARM64)
4187 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
4188#else
4189# error "Port me"
4190#endif
4191 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4192 return off;
4193}
4194
4195
4196/**
4197 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
4198 */
4199DECL_INLINE_THROW(uint32_t)
4200iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4201{
4202#if defined(RT_ARCH_AMD64)
4203 /* add or inc */
4204 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4205 if (iImm8 != 1)
4206 {
4207 pCodeBuf[off++] = 0x83;
4208 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4209 pCodeBuf[off++] = (uint8_t)iImm8;
4210 }
4211 else
4212 {
4213 pCodeBuf[off++] = 0xff;
4214 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4215 }
4216
4217#elif defined(RT_ARCH_ARM64)
4218 if (iImm8 >= 0)
4219 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
4220 else
4221 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
4222
4223#else
4224# error "Port me"
4225#endif
4226 return off;
4227}
4228
4229
4230/**
4231 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
4232 */
4233DECL_INLINE_THROW(uint32_t)
4234iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4235{
4236#if defined(RT_ARCH_AMD64)
4237 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4238#elif defined(RT_ARCH_ARM64)
4239 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4240#else
4241# error "Port me"
4242#endif
4243 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4244 return off;
4245}
4246
4247
4248/**
4249 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4250 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4251 */
4252DECL_FORCE_INLINE(uint32_t)
4253iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4254{
4255#if defined(RT_ARCH_AMD64)
4256 /* add or inc */
4257 if (iGprDst >= 8)
4258 pCodeBuf[off++] = X86_OP_REX_B;
4259 if (iImm8 != 1)
4260 {
4261 pCodeBuf[off++] = 0x83;
4262 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4263 pCodeBuf[off++] = (uint8_t)iImm8;
4264 }
4265 else
4266 {
4267 pCodeBuf[off++] = 0xff;
4268 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4269 }
4270
4271#elif defined(RT_ARCH_ARM64)
4272 if (iImm8 >= 0)
4273 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
4274 else
4275 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
4276
4277#else
4278# error "Port me"
4279#endif
4280 return off;
4281}
4282
4283
4284/**
4285 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4286 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4287 */
4288DECL_INLINE_THROW(uint32_t)
4289iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4290{
4291#if defined(RT_ARCH_AMD64)
4292 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4293#elif defined(RT_ARCH_ARM64)
4294 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4295#else
4296# error "Port me"
4297#endif
4298 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4299 return off;
4300}
4301
4302
4303/**
4304 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4305 *
4306 * @note Will assert / throw if @a iGprTmp is not specified when needed.
4307 */
4308DECL_FORCE_INLINE_THROW(uint32_t)
4309iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4310{
4311#if defined(RT_ARCH_AMD64)
4312 if ((int8_t)iAddend == iAddend)
4313 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4314
4315 if ((int32_t)iAddend == iAddend)
4316 {
4317 /* add grp, imm32 */
4318 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4319 pCodeBuf[off++] = 0x81;
4320 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4321 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4322 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4323 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4324 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4325 }
4326 else if (iGprTmp != UINT8_MAX)
4327 {
4328 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4329
4330 /* add dst, tmpreg */
4331 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4332 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
4333 pCodeBuf[off++] = 0x03;
4334 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
4335 }
4336 else
4337# ifdef IEM_WITH_THROW_CATCH
4338 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4339# else
4340 AssertReleaseFailedStmt(off = UINT32_MAX);
4341# endif
4342
4343#elif defined(RT_ARCH_ARM64)
4344 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4345 if (uAbsAddend <= 0xffffffU)
4346 {
4347 bool const fSub = iAddend < 0;
4348 if (uAbsAddend > 0xfffU)
4349 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, true /*f64Bit*/,
4350 false /*fSetFlags*/, true /*fShift12*/);
4351 if (uAbsAddend & 0xfffU)
4352 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & UINT32_C(0xfff));
4353 }
4354 else if (iGprTmp != UINT8_MAX)
4355 {
4356 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4357 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
4358 }
4359 else
4360# ifdef IEM_WITH_THROW_CATCH
4361 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4362# else
4363 AssertReleaseFailedStmt(off = UINT32_MAX);
4364# endif
4365
4366#else
4367# error "Port me"
4368#endif
4369 return off;
4370}
4371
4372
4373/**
4374 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4375 */
4376DECL_INLINE_THROW(uint32_t)
4377iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
4378{
4379#if defined(RT_ARCH_AMD64)
4380 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4381 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
4382
4383 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
4384 {
4385 /* add grp, imm32 */
4386 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4387 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4388 pbCodeBuf[off++] = 0x81;
4389 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4390 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4391 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4392 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4393 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4394 }
4395 else
4396 {
4397 /* Best to use a temporary register to deal with this in the simplest way: */
4398 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4399
4400 /* add dst, tmpreg */
4401 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4402 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4403 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
4404 pbCodeBuf[off++] = 0x03;
4405 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
4406
4407 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4408 }
4409
4410#elif defined(RT_ARCH_ARM64)
4411 bool const fSub = iAddend < 0;
4412 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4413 if (uAbsAddend <= 0xffffffU)
4414 {
4415 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4416 if (uAbsAddend > 0xfffU)
4417 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, true /*f64Bit*/,
4418 false /*fSetFlags*/, true /*fShift12*/);
4419 if (uAbsAddend & 0xfffU)
4420 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & UINT32_C(0xfff));
4421 }
4422 else
4423 {
4424 /* Use temporary register for the immediate. */
4425 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uAbsAddend);
4426
4427 /* add gprdst, gprdst, tmpreg */
4428 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4429 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, iGprDst, iTmpReg);
4430
4431 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4432 }
4433
4434#else
4435# error "Port me"
4436#endif
4437 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4438 return off;
4439}
4440
4441
4442/**
4443 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4444 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4445 * @note For ARM64 the iAddend value must be in the range 0x000000..0xffffff.
4446 * The negative ranges are also allowed, making it behave like a
4447 * subtraction. If the constant does not conform, bad stuff will happen.
4448 */
4449DECL_FORCE_INLINE_THROW(uint32_t)
4450iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4451{
4452#if defined(RT_ARCH_AMD64)
4453 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4454 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4455
4456 /* add grp, imm32 */
4457 if (iGprDst >= 8)
4458 pCodeBuf[off++] = X86_OP_REX_B;
4459 pCodeBuf[off++] = 0x81;
4460 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4461 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4462 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4463 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4464 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4465 RT_NOREF(iGprTmp);
4466
4467#elif defined(RT_ARCH_ARM64)
4468 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4469 if (uAbsAddend <= 0xffffffU)
4470 {
4471 bool const fSub = iAddend < 0;
4472 if (uAbsAddend > 0xfffU)
4473 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4474 false /*fSetFlags*/, true /*fShift12*/);
4475 if (uAbsAddend & 0xfffU)
4476 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4477 }
4478 else if (iGprTmp != UINT8_MAX)
4479 {
4480 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, iAddend);
4481 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4482 }
4483 else
4484# ifdef IEM_WITH_THROW_CATCH
4485 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4486# else
4487 AssertReleaseFailedStmt(off = UINT32_MAX);
4488# endif
4489
4490#else
4491# error "Port me"
4492#endif
4493 return off;
4494}
4495
4496
4497/**
4498 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4499 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4500 */
4501DECL_INLINE_THROW(uint32_t)
4502iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4503{
4504#if defined(RT_ARCH_AMD64)
4505 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
4506
4507#elif defined(RT_ARCH_ARM64)
4508 bool const fSub = iAddend < 0;
4509 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4510 if (uAbsAddend <= 0xffffffU)
4511 {
4512 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4513 if (uAbsAddend > 0xfffU)
4514 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4515 false /*fSetFlags*/, true /*fShift12*/);
4516 if (uAbsAddend & 0xfffU)
4517 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4518 }
4519 else
4520 {
4521 /* Use temporary register for the immediate. */
4522 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uAbsAddend);
4523
4524 /* add gprdst, gprdst, tmpreg */
4525 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4526 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
4527
4528 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4529 }
4530
4531#else
4532# error "Port me"
4533#endif
4534 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4535 return off;
4536}
4537
4538
4539/**
4540 * Emits a 16-bit GPR add with a signed immediate addend.
4541 *
4542 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
4543 * so not suitable as a base for conditional jumps.
4544 *
4545 * @note AMD64: Will only update the lower 16 bits of the register.
4546 * @note ARM64: Will update the entire register.
4547 * @sa iemNativeEmitSubGpr16ImmEx
4548 */
4549DECL_FORCE_INLINE(uint32_t)
4550iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend)
4551{
4552#ifdef RT_ARCH_AMD64
4553 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4554 if (iGprDst >= 8)
4555 pCodeBuf[off++] = X86_OP_REX_B;
4556 if (iAddend == 1)
4557 {
4558 /* inc r/m16 */
4559 pCodeBuf[off++] = 0xff;
4560 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4561 }
4562 else if (iAddend == -1)
4563 {
4564 /* dec r/m16 */
4565 pCodeBuf[off++] = 0xff;
4566 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4567 }
4568 else if ((int8_t)iAddend == iAddend)
4569 {
4570 /* add r/m16, imm8 */
4571 pCodeBuf[off++] = 0x83;
4572 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4573 pCodeBuf[off++] = (uint8_t)iAddend;
4574 }
4575 else
4576 {
4577 /* add r/m16, imm16 */
4578 pCodeBuf[off++] = 0x81;
4579 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4580 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
4581 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
4582 }
4583
4584#elif defined(RT_ARCH_ARM64)
4585 bool const fSub = iAddend < 0;
4586 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4587 if (uAbsAddend > 0xfffU)
4588 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4589 false /*fSetFlags*/, true /*fShift12*/);
4590 if (uAbsAddend & 0xfffU)
4591 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4592 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4593
4594#else
4595# error "Port me"
4596#endif
4597 return off;
4598}
4599
4600
4601
4602/**
4603 * Adds two 64-bit GPRs together, storing the result in a third register.
4604 */
4605DECL_FORCE_INLINE(uint32_t)
4606iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4607{
4608#ifdef RT_ARCH_AMD64
4609 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4610 {
4611 /** @todo consider LEA */
4612 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
4613 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
4614 }
4615 else
4616 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4617
4618#elif defined(RT_ARCH_ARM64)
4619 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
4620
4621#else
4622# error "Port me!"
4623#endif
4624 return off;
4625}
4626
4627
4628
4629/**
4630 * Adds two 32-bit GPRs together, storing the result in a third register.
4631 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4632 */
4633DECL_FORCE_INLINE(uint32_t)
4634iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4635{
4636#ifdef RT_ARCH_AMD64
4637 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4638 {
4639 /** @todo consider LEA */
4640 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
4641 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
4642 }
4643 else
4644 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4645
4646#elif defined(RT_ARCH_ARM64)
4647 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
4648
4649#else
4650# error "Port me!"
4651#endif
4652 return off;
4653}
4654
4655
4656/**
4657 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
4658 * third register.
4659 *
4660 * @note The ARM64 version does not work for non-trivial constants if the
4661 * two registers are the same. Will assert / throw exception.
4662 */
4663DECL_FORCE_INLINE_THROW(uint32_t)
4664iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4665{
4666#ifdef RT_ARCH_AMD64
4667 /** @todo consider LEA */
4668 if ((int8_t)iImmAddend == iImmAddend)
4669 {
4670 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4671 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4672 }
4673 else
4674 {
4675 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4676 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4677 }
4678
4679#elif defined(RT_ARCH_ARM64)
4680 bool const fSub = iImmAddend < 0;
4681 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4682 if (uAbsImmAddend <= 0xfffU)
4683 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend);
4684 else if (uAbsImmAddend <= 0xffffffU)
4685 {
4686 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend >> 12,
4687 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
4688 if (uAbsImmAddend & 0xfffU)
4689 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsImmAddend & UINT32_C(0xfff));
4690 }
4691 else if (iGprDst != iGprAddend)
4692 {
4693 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4694 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4695 }
4696 else
4697# ifdef IEM_WITH_THROW_CATCH
4698 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4699# else
4700 AssertReleaseFailedStmt(off = UINT32_MAX);
4701# endif
4702
4703#else
4704# error "Port me!"
4705#endif
4706 return off;
4707}
4708
4709
4710/**
4711 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4712 * third register.
4713 *
4714 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4715 *
4716 * @note The ARM64 version does not work for non-trivial constants if the
4717 * two registers are the same. Will assert / throw exception.
4718 */
4719DECL_FORCE_INLINE_THROW(uint32_t)
4720iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4721{
4722#ifdef RT_ARCH_AMD64
4723 /** @todo consider LEA */
4724 if ((int8_t)iImmAddend == iImmAddend)
4725 {
4726 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4727 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4728 }
4729 else
4730 {
4731 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4732 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4733 }
4734
4735#elif defined(RT_ARCH_ARM64)
4736 bool const fSub = iImmAddend < 0;
4737 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4738 if (uAbsImmAddend <= 0xfffU)
4739 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4740 else if (uAbsImmAddend <= 0xffffffU)
4741 {
4742 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend >> 12,
4743 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
4744 if (uAbsImmAddend & 0xfffU)
4745 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsImmAddend & 0xfff, false /*f64Bit*/);
4746 }
4747 else if (iGprDst != iGprAddend)
4748 {
4749 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4750 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4751 }
4752 else
4753# ifdef IEM_WITH_THROW_CATCH
4754 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4755# else
4756 AssertReleaseFailedStmt(off = UINT32_MAX);
4757# endif
4758
4759#else
4760# error "Port me!"
4761#endif
4762 return off;
4763}
4764
4765
4766/*********************************************************************************************************************************
4767* Unary Operations *
4768*********************************************************************************************************************************/
4769
4770/**
4771 * Emits code for two complement negation of a 64-bit GPR.
4772 */
4773DECL_FORCE_INLINE_THROW(uint32_t)
4774iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4775{
4776#if defined(RT_ARCH_AMD64)
4777 /* neg Ev */
4778 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4779 pCodeBuf[off++] = 0xf7;
4780 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4781
4782#elif defined(RT_ARCH_ARM64)
4783 /* sub dst, xzr, dst */
4784 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4785
4786#else
4787# error "Port me"
4788#endif
4789 return off;
4790}
4791
4792
4793/**
4794 * Emits code for two complement negation of a 64-bit GPR.
4795 */
4796DECL_INLINE_THROW(uint32_t)
4797iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4798{
4799#if defined(RT_ARCH_AMD64)
4800 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4801#elif defined(RT_ARCH_ARM64)
4802 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4803#else
4804# error "Port me"
4805#endif
4806 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4807 return off;
4808}
4809
4810
4811/**
4812 * Emits code for two complement negation of a 32-bit GPR.
4813 * @note bit 32 thru 63 are set to zero.
4814 */
4815DECL_FORCE_INLINE_THROW(uint32_t)
4816iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4817{
4818#if defined(RT_ARCH_AMD64)
4819 /* neg Ev */
4820 if (iGprDst >= 8)
4821 pCodeBuf[off++] = X86_OP_REX_B;
4822 pCodeBuf[off++] = 0xf7;
4823 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4824
4825#elif defined(RT_ARCH_ARM64)
4826 /* sub dst, xzr, dst */
4827 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4828
4829#else
4830# error "Port me"
4831#endif
4832 return off;
4833}
4834
4835
4836/**
4837 * Emits code for two complement negation of a 32-bit GPR.
4838 * @note bit 32 thru 63 are set to zero.
4839 */
4840DECL_INLINE_THROW(uint32_t)
4841iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4842{
4843#if defined(RT_ARCH_AMD64)
4844 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4845#elif defined(RT_ARCH_ARM64)
4846 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4847#else
4848# error "Port me"
4849#endif
4850 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4851 return off;
4852}
4853
4854
4855
4856/*********************************************************************************************************************************
4857* Bit Operations *
4858*********************************************************************************************************************************/
4859
4860/**
4861 * Emits code for clearing bits 16 thru 63 in the GPR.
4862 */
4863DECL_INLINE_THROW(uint32_t)
4864iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4865{
4866#if defined(RT_ARCH_AMD64)
4867 /* movzx Gv,Ew */
4868 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4869 if (iGprDst >= 8)
4870 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4871 pbCodeBuf[off++] = 0x0f;
4872 pbCodeBuf[off++] = 0xb7;
4873 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4874
4875#elif defined(RT_ARCH_ARM64)
4876 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4877# if 1
4878 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4879# else
4880 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4881 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4882# endif
4883#else
4884# error "Port me"
4885#endif
4886 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4887 return off;
4888}
4889
4890
4891/**
4892 * Emits code for AND'ing two 64-bit GPRs.
4893 *
4894 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4895 * and ARM64 hosts.
4896 */
4897DECL_FORCE_INLINE(uint32_t)
4898iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4899{
4900#if defined(RT_ARCH_AMD64)
4901 /* and Gv, Ev */
4902 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4903 pCodeBuf[off++] = 0x23;
4904 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4905 RT_NOREF(fSetFlags);
4906
4907#elif defined(RT_ARCH_ARM64)
4908 if (!fSetFlags)
4909 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4910 else
4911 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4912
4913#else
4914# error "Port me"
4915#endif
4916 return off;
4917}
4918
4919
4920/**
4921 * Emits code for AND'ing two 64-bit GPRs.
4922 *
4923 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4924 * and ARM64 hosts.
4925 */
4926DECL_INLINE_THROW(uint32_t)
4927iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4928{
4929#if defined(RT_ARCH_AMD64)
4930 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4931#elif defined(RT_ARCH_ARM64)
4932 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4933#else
4934# error "Port me"
4935#endif
4936 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4937 return off;
4938}
4939
4940
4941/**
4942 * Emits code for AND'ing two 32-bit GPRs.
4943 */
4944DECL_FORCE_INLINE(uint32_t)
4945iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4946{
4947#if defined(RT_ARCH_AMD64)
4948 /* and Gv, Ev */
4949 if (iGprDst >= 8 || iGprSrc >= 8)
4950 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4951 pCodeBuf[off++] = 0x23;
4952 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4953 RT_NOREF(fSetFlags);
4954
4955#elif defined(RT_ARCH_ARM64)
4956 if (!fSetFlags)
4957 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4958 else
4959 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4960
4961#else
4962# error "Port me"
4963#endif
4964 return off;
4965}
4966
4967
4968/**
4969 * Emits code for AND'ing two 32-bit GPRs.
4970 */
4971DECL_INLINE_THROW(uint32_t)
4972iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4973{
4974#if defined(RT_ARCH_AMD64)
4975 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4976#elif defined(RT_ARCH_ARM64)
4977 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4978#else
4979# error "Port me"
4980#endif
4981 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4982 return off;
4983}
4984
4985
4986/**
4987 * Emits code for AND'ing a 64-bit GPRs with a constant.
4988 *
4989 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4990 * and ARM64 hosts.
4991 */
4992DECL_INLINE_THROW(uint32_t)
4993iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4994{
4995#if defined(RT_ARCH_AMD64)
4996 if ((int64_t)uImm == (int8_t)uImm)
4997 {
4998 /* and Ev, imm8 */
4999 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5000 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5001 pbCodeBuf[off++] = 0x83;
5002 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5003 pbCodeBuf[off++] = (uint8_t)uImm;
5004 }
5005 else if ((int64_t)uImm == (int32_t)uImm)
5006 {
5007 /* and Ev, imm32 */
5008 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5009 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5010 pbCodeBuf[off++] = 0x81;
5011 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5012 pbCodeBuf[off++] = RT_BYTE1(uImm);
5013 pbCodeBuf[off++] = RT_BYTE2(uImm);
5014 pbCodeBuf[off++] = RT_BYTE3(uImm);
5015 pbCodeBuf[off++] = RT_BYTE4(uImm);
5016 }
5017 else
5018 {
5019 /* Use temporary register for the 64-bit immediate. */
5020 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5021 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
5022 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5023 }
5024 RT_NOREF(fSetFlags);
5025
5026#elif defined(RT_ARCH_ARM64)
5027 uint32_t uImmR = 0;
5028 uint32_t uImmNandS = 0;
5029 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5030 {
5031 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5032 if (!fSetFlags)
5033 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
5034 else
5035 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
5036 }
5037 else
5038 {
5039 /* Use temporary register for the 64-bit immediate. */
5040 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5041 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
5042 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5043 }
5044
5045#else
5046# error "Port me"
5047#endif
5048 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5049 return off;
5050}
5051
5052
5053/**
5054 * Emits code for AND'ing an 32-bit GPRs with a constant.
5055 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5056 * @note For ARM64 this only supports @a uImm values that can be expressed using
5057 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
5058 * make sure this is possible!
5059 */
5060DECL_FORCE_INLINE_THROW(uint32_t)
5061iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
5062{
5063#if defined(RT_ARCH_AMD64)
5064 /* and Ev, imm */
5065 if (iGprDst >= 8)
5066 pCodeBuf[off++] = X86_OP_REX_B;
5067 if ((int32_t)uImm == (int8_t)uImm)
5068 {
5069 pCodeBuf[off++] = 0x83;
5070 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5071 pCodeBuf[off++] = (uint8_t)uImm;
5072 }
5073 else
5074 {
5075 pCodeBuf[off++] = 0x81;
5076 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5077 pCodeBuf[off++] = RT_BYTE1(uImm);
5078 pCodeBuf[off++] = RT_BYTE2(uImm);
5079 pCodeBuf[off++] = RT_BYTE3(uImm);
5080 pCodeBuf[off++] = RT_BYTE4(uImm);
5081 }
5082 RT_NOREF(fSetFlags);
5083
5084#elif defined(RT_ARCH_ARM64)
5085 uint32_t uImmR = 0;
5086 uint32_t uImmNandS = 0;
5087 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5088 {
5089 if (!fSetFlags)
5090 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5091 else
5092 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5093 }
5094 else
5095# ifdef IEM_WITH_THROW_CATCH
5096 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5097# else
5098 AssertReleaseFailedStmt(off = UINT32_MAX);
5099# endif
5100
5101#else
5102# error "Port me"
5103#endif
5104 return off;
5105}
5106
5107
5108/**
5109 * Emits code for AND'ing an 32-bit GPRs with a constant.
5110 *
5111 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5112 */
5113DECL_INLINE_THROW(uint32_t)
5114iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
5115{
5116#if defined(RT_ARCH_AMD64)
5117 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
5118
5119#elif defined(RT_ARCH_ARM64)
5120 uint32_t uImmR = 0;
5121 uint32_t uImmNandS = 0;
5122 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5123 {
5124 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5125 if (!fSetFlags)
5126 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5127 else
5128 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5129 }
5130 else
5131 {
5132 /* Use temporary register for the 64-bit immediate. */
5133 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5134 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
5135 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5136 }
5137
5138#else
5139# error "Port me"
5140#endif
5141 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5142 return off;
5143}
5144
5145
5146/**
5147 * Emits code for AND'ing an 64-bit GPRs with a constant.
5148 *
5149 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
5150 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
5151 * the same.
5152 */
5153DECL_FORCE_INLINE_THROW(uint32_t)
5154iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
5155 bool fSetFlags = false)
5156{
5157#if defined(RT_ARCH_AMD64)
5158 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
5159 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
5160 RT_NOREF(fSetFlags);
5161
5162#elif defined(RT_ARCH_ARM64)
5163 uint32_t uImmR = 0;
5164 uint32_t uImmNandS = 0;
5165 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5166 {
5167 if (!fSetFlags)
5168 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
5169 else
5170 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
5171 }
5172 else if (iGprDst != iGprSrc)
5173 {
5174 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
5175 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
5176 }
5177 else
5178# ifdef IEM_WITH_THROW_CATCH
5179 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5180# else
5181 AssertReleaseFailedStmt(off = UINT32_MAX);
5182# endif
5183
5184#else
5185# error "Port me"
5186#endif
5187 return off;
5188}
5189
5190/**
5191 * Emits code for AND'ing an 32-bit GPRs with a constant.
5192 *
5193 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
5194 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
5195 * the same.
5196 *
5197 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5198 */
5199DECL_FORCE_INLINE_THROW(uint32_t)
5200iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
5201 bool fSetFlags = false)
5202{
5203#if defined(RT_ARCH_AMD64)
5204 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5205 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5206 RT_NOREF(fSetFlags);
5207
5208#elif defined(RT_ARCH_ARM64)
5209 uint32_t uImmR = 0;
5210 uint32_t uImmNandS = 0;
5211 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5212 {
5213 if (!fSetFlags)
5214 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5215 else
5216 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5217 }
5218 else if (iGprDst != iGprSrc)
5219 {
5220 /* If a value greater or equal than 64K isn't more than 16 bits wide,
5221 we can use shifting to save an instruction. We prefer the builtin ctz
5222 here to our own, since the compiler can process uImm at compile time
5223 if it is a constant value (which is often the case). This is useful
5224 for the TLB looup code. */
5225 if (uImm > 0xffffU)
5226 {
5227# if defined(__GNUC__)
5228 unsigned cTrailingZeros = __builtin_ctz(uImm);
5229# else
5230 unsigned cTrailingZeros = ASMBitFirstSetU32(uImm) - 1;
5231# endif
5232 if ((uImm >> cTrailingZeros) <= 0xffffU)
5233 {
5234 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprDst, uImm >> cTrailingZeros);
5235 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprSrc,
5236 iGprDst, true /*f64Bit*/, cTrailingZeros, kArmv8A64InstrShift_Lsl);
5237 return off;
5238 }
5239 }
5240 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5241 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
5242 }
5243 else
5244# ifdef IEM_WITH_THROW_CATCH
5245 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5246# else
5247 AssertReleaseFailedStmt(off = UINT32_MAX);
5248# endif
5249
5250#else
5251# error "Port me"
5252#endif
5253 return off;
5254}
5255
5256
5257/**
5258 * Emits code for OR'ing two 64-bit GPRs.
5259 */
5260DECL_FORCE_INLINE(uint32_t)
5261iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5262{
5263#if defined(RT_ARCH_AMD64)
5264 /* or Gv, Ev */
5265 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5266 pCodeBuf[off++] = 0x0b;
5267 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5268
5269#elif defined(RT_ARCH_ARM64)
5270 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
5271
5272#else
5273# error "Port me"
5274#endif
5275 return off;
5276}
5277
5278
5279/**
5280 * Emits code for OR'ing two 64-bit GPRs.
5281 */
5282DECL_INLINE_THROW(uint32_t)
5283iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5284{
5285#if defined(RT_ARCH_AMD64)
5286 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5287#elif defined(RT_ARCH_ARM64)
5288 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5289#else
5290# error "Port me"
5291#endif
5292 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5293 return off;
5294}
5295
5296
5297/**
5298 * Emits code for OR'ing two 32-bit GPRs.
5299 * @note Bits 63:32 of the destination GPR will be cleared.
5300 */
5301DECL_FORCE_INLINE(uint32_t)
5302iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5303{
5304#if defined(RT_ARCH_AMD64)
5305 /* or Gv, Ev */
5306 if (iGprDst >= 8 || iGprSrc >= 8)
5307 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5308 pCodeBuf[off++] = 0x0b;
5309 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5310
5311#elif defined(RT_ARCH_ARM64)
5312 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5313
5314#else
5315# error "Port me"
5316#endif
5317 return off;
5318}
5319
5320
5321/**
5322 * Emits code for OR'ing two 32-bit GPRs.
5323 * @note Bits 63:32 of the destination GPR will be cleared.
5324 */
5325DECL_INLINE_THROW(uint32_t)
5326iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5327{
5328#if defined(RT_ARCH_AMD64)
5329 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5330#elif defined(RT_ARCH_ARM64)
5331 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5332#else
5333# error "Port me"
5334#endif
5335 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5336 return off;
5337}
5338
5339
5340/**
5341 * Emits code for OR'ing a 64-bit GPRs with a constant.
5342 */
5343DECL_INLINE_THROW(uint32_t)
5344iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
5345{
5346#if defined(RT_ARCH_AMD64)
5347 if ((int64_t)uImm == (int8_t)uImm)
5348 {
5349 /* or Ev, imm8 */
5350 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5351 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5352 pbCodeBuf[off++] = 0x83;
5353 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5354 pbCodeBuf[off++] = (uint8_t)uImm;
5355 }
5356 else if ((int64_t)uImm == (int32_t)uImm)
5357 {
5358 /* or Ev, imm32 */
5359 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5360 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5361 pbCodeBuf[off++] = 0x81;
5362 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5363 pbCodeBuf[off++] = RT_BYTE1(uImm);
5364 pbCodeBuf[off++] = RT_BYTE2(uImm);
5365 pbCodeBuf[off++] = RT_BYTE3(uImm);
5366 pbCodeBuf[off++] = RT_BYTE4(uImm);
5367 }
5368 else
5369 {
5370 /* Use temporary register for the 64-bit immediate. */
5371 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5372 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
5373 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5374 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5375 }
5376
5377#elif defined(RT_ARCH_ARM64)
5378 uint32_t uImmR = 0;
5379 uint32_t uImmNandS = 0;
5380 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5381 {
5382 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5383 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
5384 }
5385 else
5386 {
5387 /* Use temporary register for the 64-bit immediate. */
5388 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5389 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
5390 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5391 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5392 }
5393
5394#else
5395# error "Port me"
5396#endif
5397 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5398 return off;
5399}
5400
5401
5402/**
5403 * Emits code for OR'ing an 32-bit GPRs with a constant.
5404 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5405 * @note For ARM64 this only supports @a uImm values that can be expressed using
5406 * the two 6-bit immediates of the OR instructions. The caller must make
5407 * sure this is possible!
5408 */
5409DECL_FORCE_INLINE_THROW(uint32_t)
5410iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5411{
5412#if defined(RT_ARCH_AMD64)
5413 /* or Ev, imm */
5414 if (iGprDst >= 8)
5415 pCodeBuf[off++] = X86_OP_REX_B;
5416 if ((int32_t)uImm == (int8_t)uImm)
5417 {
5418 pCodeBuf[off++] = 0x83;
5419 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5420 pCodeBuf[off++] = (uint8_t)uImm;
5421 }
5422 else
5423 {
5424 pCodeBuf[off++] = 0x81;
5425 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5426 pCodeBuf[off++] = RT_BYTE1(uImm);
5427 pCodeBuf[off++] = RT_BYTE2(uImm);
5428 pCodeBuf[off++] = RT_BYTE3(uImm);
5429 pCodeBuf[off++] = RT_BYTE4(uImm);
5430 }
5431
5432#elif defined(RT_ARCH_ARM64)
5433 uint32_t uImmR = 0;
5434 uint32_t uImmNandS = 0;
5435 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5436 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5437 else
5438# ifdef IEM_WITH_THROW_CATCH
5439 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5440# else
5441 AssertReleaseFailedStmt(off = UINT32_MAX);
5442# endif
5443
5444#else
5445# error "Port me"
5446#endif
5447 return off;
5448}
5449
5450
5451/**
5452 * Emits code for OR'ing an 32-bit GPRs with a constant.
5453 *
5454 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5455 */
5456DECL_INLINE_THROW(uint32_t)
5457iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5458{
5459#if defined(RT_ARCH_AMD64)
5460 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5461
5462#elif defined(RT_ARCH_ARM64)
5463 uint32_t uImmR = 0;
5464 uint32_t uImmNandS = 0;
5465 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5466 {
5467 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5468 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5469 }
5470 else
5471 {
5472 /* Use temporary register for the 64-bit immediate. */
5473 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5474 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
5475 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5476 }
5477
5478#else
5479# error "Port me"
5480#endif
5481 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5482 return off;
5483}
5484
5485
5486
5487/**
5488 * ORs two 64-bit GPRs together, storing the result in a third register.
5489 */
5490DECL_FORCE_INLINE(uint32_t)
5491iemNativeEmitGprEqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5492{
5493#ifdef RT_ARCH_AMD64
5494 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5495 {
5496 /** @todo consider LEA */
5497 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc1);
5498 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5499 }
5500 else
5501 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5502
5503#elif defined(RT_ARCH_ARM64)
5504 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2);
5505
5506#else
5507# error "Port me!"
5508#endif
5509 return off;
5510}
5511
5512
5513
5514/**
5515 * Ors two 32-bit GPRs together, storing the result in a third register.
5516 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
5517 */
5518DECL_FORCE_INLINE(uint32_t)
5519iemNativeEmitGpr32EqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5520{
5521#ifdef RT_ARCH_AMD64
5522 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5523 {
5524 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc1);
5525 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5526 }
5527 else
5528 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5529
5530#elif defined(RT_ARCH_ARM64)
5531 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2, false /*f64Bit*/);
5532
5533#else
5534# error "Port me!"
5535#endif
5536 return off;
5537}
5538
5539
5540/**
5541 * Emits code for XOR'ing two 64-bit GPRs.
5542 */
5543DECL_INLINE_THROW(uint32_t)
5544iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5545{
5546#if defined(RT_ARCH_AMD64)
5547 /* and Gv, Ev */
5548 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5549 pCodeBuf[off++] = 0x33;
5550 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5551
5552#elif defined(RT_ARCH_ARM64)
5553 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
5554
5555#else
5556# error "Port me"
5557#endif
5558 return off;
5559}
5560
5561
5562/**
5563 * Emits code for XOR'ing two 64-bit GPRs.
5564 */
5565DECL_INLINE_THROW(uint32_t)
5566iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5567{
5568#if defined(RT_ARCH_AMD64)
5569 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5570#elif defined(RT_ARCH_ARM64)
5571 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5572#else
5573# error "Port me"
5574#endif
5575 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5576 return off;
5577}
5578
5579
5580/**
5581 * Emits code for XOR'ing two 32-bit GPRs.
5582 */
5583DECL_INLINE_THROW(uint32_t)
5584iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5585{
5586#if defined(RT_ARCH_AMD64)
5587 /* and Gv, Ev */
5588 if (iGprDst >= 8 || iGprSrc >= 8)
5589 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5590 pCodeBuf[off++] = 0x33;
5591 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5592
5593#elif defined(RT_ARCH_ARM64)
5594 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5595
5596#else
5597# error "Port me"
5598#endif
5599 return off;
5600}
5601
5602
5603/**
5604 * Emits code for XOR'ing two 32-bit GPRs.
5605 */
5606DECL_INLINE_THROW(uint32_t)
5607iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5608{
5609#if defined(RT_ARCH_AMD64)
5610 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5611#elif defined(RT_ARCH_ARM64)
5612 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5613#else
5614# error "Port me"
5615#endif
5616 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5617 return off;
5618}
5619
5620
5621/**
5622 * Emits code for XOR'ing an 32-bit GPRs with a constant.
5623 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5624 * @note For ARM64 this only supports @a uImm values that can be expressed using
5625 * the two 6-bit immediates of the EOR instructions. The caller must make
5626 * sure this is possible!
5627 */
5628DECL_FORCE_INLINE_THROW(uint32_t)
5629iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5630{
5631#if defined(RT_ARCH_AMD64)
5632 /* xor Ev, imm */
5633 if (iGprDst >= 8)
5634 pCodeBuf[off++] = X86_OP_REX_B;
5635 if ((int32_t)uImm == (int8_t)uImm)
5636 {
5637 pCodeBuf[off++] = 0x83;
5638 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5639 pCodeBuf[off++] = (uint8_t)uImm;
5640 }
5641 else
5642 {
5643 pCodeBuf[off++] = 0x81;
5644 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5645 pCodeBuf[off++] = RT_BYTE1(uImm);
5646 pCodeBuf[off++] = RT_BYTE2(uImm);
5647 pCodeBuf[off++] = RT_BYTE3(uImm);
5648 pCodeBuf[off++] = RT_BYTE4(uImm);
5649 }
5650
5651#elif defined(RT_ARCH_ARM64)
5652 uint32_t uImmR = 0;
5653 uint32_t uImmNandS = 0;
5654 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5655 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5656 else
5657# ifdef IEM_WITH_THROW_CATCH
5658 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5659# else
5660 AssertReleaseFailedStmt(off = UINT32_MAX);
5661# endif
5662
5663#else
5664# error "Port me"
5665#endif
5666 return off;
5667}
5668
5669
5670/**
5671 * Emits code for XOR'ing two 32-bit GPRs.
5672 */
5673DECL_INLINE_THROW(uint32_t)
5674iemNativeEmitXorGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5675{
5676#if defined(RT_ARCH_AMD64)
5677 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5678#elif defined(RT_ARCH_ARM64)
5679 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, uImm);
5680#else
5681# error "Port me"
5682#endif
5683 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5684 return off;
5685}
5686
5687
5688/*********************************************************************************************************************************
5689* Shifting *
5690*********************************************************************************************************************************/
5691
5692/**
5693 * Emits code for shifting a GPR a fixed number of bits to the left.
5694 */
5695DECL_FORCE_INLINE(uint32_t)
5696iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5697{
5698 Assert(cShift > 0 && cShift < 64);
5699
5700#if defined(RT_ARCH_AMD64)
5701 /* shl dst, cShift */
5702 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5703 if (cShift != 1)
5704 {
5705 pCodeBuf[off++] = 0xc1;
5706 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5707 pCodeBuf[off++] = cShift;
5708 }
5709 else
5710 {
5711 pCodeBuf[off++] = 0xd1;
5712 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5713 }
5714
5715#elif defined(RT_ARCH_ARM64)
5716 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
5717
5718#else
5719# error "Port me"
5720#endif
5721 return off;
5722}
5723
5724
5725/**
5726 * Emits code for shifting a GPR a fixed number of bits to the left.
5727 */
5728DECL_INLINE_THROW(uint32_t)
5729iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5730{
5731#if defined(RT_ARCH_AMD64)
5732 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5733#elif defined(RT_ARCH_ARM64)
5734 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5735#else
5736# error "Port me"
5737#endif
5738 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5739 return off;
5740}
5741
5742
5743/**
5744 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5745 */
5746DECL_FORCE_INLINE(uint32_t)
5747iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5748{
5749 Assert(cShift > 0 && cShift < 32);
5750
5751#if defined(RT_ARCH_AMD64)
5752 /* shl dst, cShift */
5753 if (iGprDst >= 8)
5754 pCodeBuf[off++] = X86_OP_REX_B;
5755 if (cShift != 1)
5756 {
5757 pCodeBuf[off++] = 0xc1;
5758 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5759 pCodeBuf[off++] = cShift;
5760 }
5761 else
5762 {
5763 pCodeBuf[off++] = 0xd1;
5764 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5765 }
5766
5767#elif defined(RT_ARCH_ARM64)
5768 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5769
5770#else
5771# error "Port me"
5772#endif
5773 return off;
5774}
5775
5776
5777/**
5778 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5779 */
5780DECL_INLINE_THROW(uint32_t)
5781iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5782{
5783#if defined(RT_ARCH_AMD64)
5784 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5785#elif defined(RT_ARCH_ARM64)
5786 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5787#else
5788# error "Port me"
5789#endif
5790 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5791 return off;
5792}
5793
5794
5795/**
5796 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5797 */
5798DECL_FORCE_INLINE(uint32_t)
5799iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5800{
5801 Assert(cShift > 0 && cShift < 64);
5802
5803#if defined(RT_ARCH_AMD64)
5804 /* shr dst, cShift */
5805 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5806 if (cShift != 1)
5807 {
5808 pCodeBuf[off++] = 0xc1;
5809 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5810 pCodeBuf[off++] = cShift;
5811 }
5812 else
5813 {
5814 pCodeBuf[off++] = 0xd1;
5815 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5816 }
5817
5818#elif defined(RT_ARCH_ARM64)
5819 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5820
5821#else
5822# error "Port me"
5823#endif
5824 return off;
5825}
5826
5827
5828/**
5829 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5830 */
5831DECL_INLINE_THROW(uint32_t)
5832iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5833{
5834#if defined(RT_ARCH_AMD64)
5835 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5836#elif defined(RT_ARCH_ARM64)
5837 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5838#else
5839# error "Port me"
5840#endif
5841 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5842 return off;
5843}
5844
5845
5846/**
5847 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5848 * right.
5849 */
5850DECL_FORCE_INLINE(uint32_t)
5851iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5852{
5853 Assert(cShift > 0 && cShift < 32);
5854
5855#if defined(RT_ARCH_AMD64)
5856 /* shr dst, cShift */
5857 if (iGprDst >= 8)
5858 pCodeBuf[off++] = X86_OP_REX_B;
5859 if (cShift != 1)
5860 {
5861 pCodeBuf[off++] = 0xc1;
5862 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5863 pCodeBuf[off++] = cShift;
5864 }
5865 else
5866 {
5867 pCodeBuf[off++] = 0xd1;
5868 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5869 }
5870
5871#elif defined(RT_ARCH_ARM64)
5872 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5873
5874#else
5875# error "Port me"
5876#endif
5877 return off;
5878}
5879
5880
5881/**
5882 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5883 * right.
5884 */
5885DECL_INLINE_THROW(uint32_t)
5886iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5887{
5888#if defined(RT_ARCH_AMD64)
5889 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5890#elif defined(RT_ARCH_ARM64)
5891 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5892#else
5893# error "Port me"
5894#endif
5895 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5896 return off;
5897}
5898
5899
5900/**
5901 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5902 * right and assigning it to a different GPR.
5903 */
5904DECL_INLINE_THROW(uint32_t)
5905iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5906{
5907 Assert(cShift > 0); Assert(cShift < 32);
5908#if defined(RT_ARCH_AMD64)
5909 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5910 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5911
5912#elif defined(RT_ARCH_ARM64)
5913 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5914
5915#else
5916# error "Port me"
5917#endif
5918 return off;
5919}
5920
5921
5922/**
5923 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5924 */
5925DECL_FORCE_INLINE(uint32_t)
5926iemNativeEmitArithShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5927{
5928 Assert(cShift > 0 && cShift < 64);
5929
5930#if defined(RT_ARCH_AMD64)
5931 /* sar dst, cShift */
5932 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5933 if (cShift != 1)
5934 {
5935 pCodeBuf[off++] = 0xc1;
5936 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5937 pCodeBuf[off++] = cShift;
5938 }
5939 else
5940 {
5941 pCodeBuf[off++] = 0xd1;
5942 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5943 }
5944
5945#elif defined(RT_ARCH_ARM64)
5946 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift);
5947
5948#else
5949# error "Port me"
5950#endif
5951 return off;
5952}
5953
5954
5955/**
5956 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5957 */
5958DECL_INLINE_THROW(uint32_t)
5959iemNativeEmitArithShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5960{
5961#if defined(RT_ARCH_AMD64)
5962 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5963#elif defined(RT_ARCH_ARM64)
5964 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5965#else
5966# error "Port me"
5967#endif
5968 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5969 return off;
5970}
5971
5972
5973/**
5974 * Emits code for (signed) shifting a 32-bit GPR a fixed number of bits to the right.
5975 */
5976DECL_FORCE_INLINE(uint32_t)
5977iemNativeEmitArithShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5978{
5979 Assert(cShift > 0 && cShift < 64);
5980
5981#if defined(RT_ARCH_AMD64)
5982 /* sar dst, cShift */
5983 if (iGprDst >= 8)
5984 pCodeBuf[off++] = X86_OP_REX_B;
5985 if (cShift != 1)
5986 {
5987 pCodeBuf[off++] = 0xc1;
5988 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5989 pCodeBuf[off++] = cShift;
5990 }
5991 else
5992 {
5993 pCodeBuf[off++] = 0xd1;
5994 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5995 }
5996
5997#elif defined(RT_ARCH_ARM64)
5998 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift, false /*f64Bit*/);
5999
6000#else
6001# error "Port me"
6002#endif
6003 return off;
6004}
6005
6006
6007/**
6008 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
6009 */
6010DECL_INLINE_THROW(uint32_t)
6011iemNativeEmitArithShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
6012{
6013#if defined(RT_ARCH_AMD64)
6014 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
6015#elif defined(RT_ARCH_ARM64)
6016 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
6017#else
6018# error "Port me"
6019#endif
6020 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6021 return off;
6022}
6023
6024
6025/**
6026 * Emits code for rotating a GPR a fixed number of bits to the left.
6027 */
6028DECL_FORCE_INLINE(uint32_t)
6029iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
6030{
6031 Assert(cShift > 0 && cShift < 64);
6032
6033#if defined(RT_ARCH_AMD64)
6034 /* rol dst, cShift */
6035 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
6036 if (cShift != 1)
6037 {
6038 pCodeBuf[off++] = 0xc1;
6039 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
6040 pCodeBuf[off++] = cShift;
6041 }
6042 else
6043 {
6044 pCodeBuf[off++] = 0xd1;
6045 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
6046 }
6047
6048#elif defined(RT_ARCH_ARM64)
6049 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
6050
6051#else
6052# error "Port me"
6053#endif
6054 return off;
6055}
6056
6057
6058#if defined(RT_ARCH_AMD64)
6059/**
6060 * Emits code for rotating a 32-bit GPR a fixed number of bits to the left via carry.
6061 */
6062DECL_FORCE_INLINE(uint32_t)
6063iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
6064{
6065 Assert(cShift > 0 && cShift < 32);
6066
6067 /* rcl dst, cShift */
6068 if (iGprDst >= 8)
6069 pCodeBuf[off++] = X86_OP_REX_B;
6070 if (cShift != 1)
6071 {
6072 pCodeBuf[off++] = 0xc1;
6073 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
6074 pCodeBuf[off++] = cShift;
6075 }
6076 else
6077 {
6078 pCodeBuf[off++] = 0xd1;
6079 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
6080 }
6081
6082 return off;
6083}
6084#endif /* RT_ARCH_AMD64 */
6085
6086
6087
6088/**
6089 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
6090 * @note Bits 63:32 of the destination GPR will be cleared.
6091 */
6092DECL_FORCE_INLINE(uint32_t)
6093iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
6094{
6095#if defined(RT_ARCH_AMD64)
6096 /*
6097 * There is no bswap r16 on x86 (the encoding exists but does not work).
6098 * So just use a rol (gcc -O2 is doing that).
6099 *
6100 * rol r16, 0x8
6101 */
6102 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6103 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6104 if (iGpr >= 8)
6105 pbCodeBuf[off++] = X86_OP_REX_B;
6106 pbCodeBuf[off++] = 0xc1;
6107 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
6108 pbCodeBuf[off++] = 0x08;
6109#elif defined(RT_ARCH_ARM64)
6110 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6111
6112 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
6113#else
6114# error "Port me"
6115#endif
6116
6117 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6118 return off;
6119}
6120
6121
6122/**
6123 * Emits code for reversing the byte order in a 32-bit GPR.
6124 * @note Bits 63:32 of the destination GPR will be cleared.
6125 */
6126DECL_FORCE_INLINE(uint32_t)
6127iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
6128{
6129#if defined(RT_ARCH_AMD64)
6130 /* bswap r32 */
6131 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6132
6133 if (iGpr >= 8)
6134 pbCodeBuf[off++] = X86_OP_REX_B;
6135 pbCodeBuf[off++] = 0x0f;
6136 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
6137#elif defined(RT_ARCH_ARM64)
6138 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6139
6140 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
6141#else
6142# error "Port me"
6143#endif
6144
6145 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6146 return off;
6147}
6148
6149
6150/**
6151 * Emits code for reversing the byte order in a 64-bit GPR.
6152 */
6153DECL_FORCE_INLINE(uint32_t)
6154iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
6155{
6156#if defined(RT_ARCH_AMD64)
6157 /* bswap r64 */
6158 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6159
6160 if (iGpr >= 8)
6161 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
6162 else
6163 pbCodeBuf[off++] = X86_OP_REX_W;
6164 pbCodeBuf[off++] = 0x0f;
6165 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
6166#elif defined(RT_ARCH_ARM64)
6167 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6168
6169 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
6170#else
6171# error "Port me"
6172#endif
6173
6174 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6175 return off;
6176}
6177
6178
6179/*********************************************************************************************************************************
6180* Bitfield manipulation *
6181*********************************************************************************************************************************/
6182
6183/**
6184 * Emits code for clearing.
6185 */
6186DECL_FORCE_INLINE(uint32_t)
6187iemNativeEmitBitClearInGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const iGpr, uint8_t iBit)
6188{
6189 Assert(iBit < 32);
6190
6191#if defined(RT_ARCH_AMD64)
6192 /* btr r32, imm8 */
6193 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6194
6195 if (iGpr >= 8)
6196 pbCodeBuf[off++] = X86_OP_REX_B;
6197 pbCodeBuf[off++] = 0x0f;
6198 pbCodeBuf[off++] = 0xba;
6199 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGpr & 7);
6200 pbCodeBuf[off++] = iBit;
6201#elif defined(RT_ARCH_ARM64)
6202 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6203
6204 pu32CodeBuf[off++] = Armv8A64MkInstrBfc(iGpr, iBit /*offFirstBit*/, 1 /*cBits*/, true /*f64Bit*/);
6205#else
6206# error "Port me"
6207#endif
6208
6209 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6210 return off;
6211}
6212
6213
6214/*********************************************************************************************************************************
6215* Compare and Testing *
6216*********************************************************************************************************************************/
6217
6218
6219#ifdef RT_ARCH_ARM64
6220/**
6221 * Emits an ARM64 compare instruction.
6222 */
6223DECL_INLINE_THROW(uint32_t)
6224iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
6225 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
6226{
6227 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6228 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
6229 f64Bit, true /*fSetFlags*/, cShift, enmShift);
6230 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6231 return off;
6232}
6233#endif
6234
6235
6236/**
6237 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
6238 * with conditional instruction.
6239 */
6240DECL_FORCE_INLINE(uint32_t)
6241iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6242{
6243#ifdef RT_ARCH_AMD64
6244 /* cmp Gv, Ev */
6245 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
6246 pCodeBuf[off++] = 0x3b;
6247 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
6248
6249#elif defined(RT_ARCH_ARM64)
6250 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
6251
6252#else
6253# error "Port me!"
6254#endif
6255 return off;
6256}
6257
6258
6259/**
6260 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
6261 * with conditional instruction.
6262 */
6263DECL_INLINE_THROW(uint32_t)
6264iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6265{
6266#ifdef RT_ARCH_AMD64
6267 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6268#elif defined(RT_ARCH_ARM64)
6269 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6270#else
6271# error "Port me!"
6272#endif
6273 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6274 return off;
6275}
6276
6277
6278/**
6279 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6280 * with conditional instruction.
6281 */
6282DECL_FORCE_INLINE(uint32_t)
6283iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6284{
6285#ifdef RT_ARCH_AMD64
6286 /* cmp Gv, Ev */
6287 if (iGprLeft >= 8 || iGprRight >= 8)
6288 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
6289 pCodeBuf[off++] = 0x3b;
6290 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
6291
6292#elif defined(RT_ARCH_ARM64)
6293 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
6294
6295#else
6296# error "Port me!"
6297#endif
6298 return off;
6299}
6300
6301
6302/**
6303 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6304 * with conditional instruction.
6305 */
6306DECL_INLINE_THROW(uint32_t)
6307iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6308{
6309#ifdef RT_ARCH_AMD64
6310 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6311#elif defined(RT_ARCH_ARM64)
6312 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6313#else
6314# error "Port me!"
6315#endif
6316 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6317 return off;
6318}
6319
6320
6321/**
6322 * Emits a compare of a 64-bit GPR with a constant value, settings status
6323 * flags/whatever for use with conditional instruction.
6324 */
6325DECL_INLINE_THROW(uint32_t)
6326iemNativeEmitCmpGprWithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft,
6327 uint64_t uImm, uint8_t idxTmpReg = UINT8_MAX)
6328{
6329#ifdef RT_ARCH_AMD64
6330 if ((int8_t)uImm == (int64_t)uImm)
6331 {
6332 /* cmp Ev, Ib */
6333 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6334 pCodeBuf[off++] = 0x83;
6335 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6336 pCodeBuf[off++] = (uint8_t)uImm;
6337 return off;
6338 }
6339 if ((int32_t)uImm == (int64_t)uImm)
6340 {
6341 /* cmp Ev, imm */
6342 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6343 pCodeBuf[off++] = 0x81;
6344 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6345 pCodeBuf[off++] = RT_BYTE1(uImm);
6346 pCodeBuf[off++] = RT_BYTE2(uImm);
6347 pCodeBuf[off++] = RT_BYTE3(uImm);
6348 pCodeBuf[off++] = RT_BYTE4(uImm);
6349 return off;
6350 }
6351
6352#elif defined(RT_ARCH_ARM64)
6353 if (uImm < _4K)
6354 {
6355 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6356 true /*64Bit*/, true /*fSetFlags*/);
6357 return off;
6358 }
6359 if ((uImm & ~(uint64_t)0xfff000) == 0)
6360 {
6361 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
6362 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6363 return off;
6364 }
6365
6366#else
6367# error "Port me!"
6368#endif
6369
6370 if (idxTmpReg != UINT8_MAX)
6371 {
6372 /* Use temporary register for the immediate. */
6373 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpReg, uImm);
6374 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, iGprLeft, idxTmpReg);
6375 }
6376 else
6377# ifdef IEM_WITH_THROW_CATCH
6378 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6379# else
6380 AssertReleaseFailedStmt(off = UINT32_MAX);
6381# endif
6382
6383 return off;
6384}
6385
6386
6387/**
6388 * Emits a compare of a 64-bit GPR with a constant value, settings status
6389 * flags/whatever for use with conditional instruction.
6390 */
6391DECL_INLINE_THROW(uint32_t)
6392iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
6393{
6394#ifdef RT_ARCH_AMD64
6395 if ((int8_t)uImm == (int64_t)uImm)
6396 {
6397 /* cmp Ev, Ib */
6398 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
6399 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6400 pbCodeBuf[off++] = 0x83;
6401 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6402 pbCodeBuf[off++] = (uint8_t)uImm;
6403 }
6404 else if ((int32_t)uImm == (int64_t)uImm)
6405 {
6406 /* cmp Ev, imm */
6407 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6408 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6409 pbCodeBuf[off++] = 0x81;
6410 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6411 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6412 pbCodeBuf[off++] = RT_BYTE1(uImm);
6413 pbCodeBuf[off++] = RT_BYTE2(uImm);
6414 pbCodeBuf[off++] = RT_BYTE3(uImm);
6415 pbCodeBuf[off++] = RT_BYTE4(uImm);
6416 }
6417 else
6418 {
6419 /* Use temporary register for the immediate. */
6420 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6421 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6422 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6423 }
6424
6425#elif defined(RT_ARCH_ARM64)
6426 /** @todo guess there are clevere things we can do here... */
6427 if (uImm < _4K)
6428 {
6429 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6430 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6431 true /*64Bit*/, true /*fSetFlags*/);
6432 }
6433 else if ((uImm & ~(uint64_t)0xfff000) == 0)
6434 {
6435 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6436 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
6437 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6438 }
6439 else
6440 {
6441 /* Use temporary register for the immediate. */
6442 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6443 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6444 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6445 }
6446
6447#else
6448# error "Port me!"
6449#endif
6450
6451 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6452 return off;
6453}
6454
6455
6456/**
6457 * Emits a compare of a 32-bit GPR with a constant value, settings status
6458 * flags/whatever for use with conditional instruction.
6459 *
6460 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6461 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6462 * bits all zero). Will release assert or throw exception if the caller
6463 * violates this restriction.
6464 */
6465DECL_FORCE_INLINE_THROW(uint32_t)
6466iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6467{
6468#ifdef RT_ARCH_AMD64
6469 if (iGprLeft >= 8)
6470 pCodeBuf[off++] = X86_OP_REX_B;
6471 if (uImm <= UINT32_C(0x7f))
6472 {
6473 /* cmp Ev, Ib */
6474 pCodeBuf[off++] = 0x83;
6475 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6476 pCodeBuf[off++] = (uint8_t)uImm;
6477 }
6478 else
6479 {
6480 /* cmp Ev, imm */
6481 pCodeBuf[off++] = 0x81;
6482 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6483 pCodeBuf[off++] = RT_BYTE1(uImm);
6484 pCodeBuf[off++] = RT_BYTE2(uImm);
6485 pCodeBuf[off++] = RT_BYTE3(uImm);
6486 pCodeBuf[off++] = RT_BYTE4(uImm);
6487 }
6488
6489#elif defined(RT_ARCH_ARM64)
6490 /** @todo guess there are clevere things we can do here... */
6491 if (uImm < _4K)
6492 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6493 false /*64Bit*/, true /*fSetFlags*/);
6494 else if ((uImm & ~(uint32_t)0xfff000) == 0)
6495 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6496 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6497 else
6498# ifdef IEM_WITH_THROW_CATCH
6499 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6500# else
6501 AssertReleaseFailedStmt(off = UINT32_MAX);
6502# endif
6503
6504#else
6505# error "Port me!"
6506#endif
6507 return off;
6508}
6509
6510
6511/**
6512 * Emits a compare of a 32-bit GPR with a constant value, settings status
6513 * flags/whatever for use with conditional instruction.
6514 */
6515DECL_INLINE_THROW(uint32_t)
6516iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6517{
6518#ifdef RT_ARCH_AMD64
6519 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
6520
6521#elif defined(RT_ARCH_ARM64)
6522 /** @todo guess there are clevere things we can do here... */
6523 if (uImm < _4K)
6524 {
6525 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6526 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6527 false /*64Bit*/, true /*fSetFlags*/);
6528 }
6529 else if ((uImm & ~(uint32_t)0xfff000) == 0)
6530 {
6531 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6532 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6533 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6534 }
6535 else
6536 {
6537 /* Use temporary register for the immediate. */
6538 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6539 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
6540 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6541 }
6542
6543#else
6544# error "Port me!"
6545#endif
6546
6547 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6548 return off;
6549}
6550
6551
6552/**
6553 * Emits a compare of a 32-bit GPR with a constant value, settings status
6554 * flags/whatever for use with conditional instruction.
6555 *
6556 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
6557 * 16-bit value from @a iGrpLeft.
6558 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6559 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6560 * bits all zero). Will release assert or throw exception if the caller
6561 * violates this restriction.
6562 */
6563DECL_FORCE_INLINE_THROW(uint32_t)
6564iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6565 uint8_t idxTmpReg = UINT8_MAX)
6566{
6567#ifdef RT_ARCH_AMD64
6568 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6569 if (iGprLeft >= 8)
6570 pCodeBuf[off++] = X86_OP_REX_B;
6571 if (uImm <= UINT32_C(0x7f))
6572 {
6573 /* cmp Ev, Ib */
6574 pCodeBuf[off++] = 0x83;
6575 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6576 pCodeBuf[off++] = (uint8_t)uImm;
6577 }
6578 else
6579 {
6580 /* cmp Ev, imm */
6581 pCodeBuf[off++] = 0x81;
6582 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6583 pCodeBuf[off++] = RT_BYTE1(uImm);
6584 pCodeBuf[off++] = RT_BYTE2(uImm);
6585 }
6586 RT_NOREF(idxTmpReg);
6587
6588#elif defined(RT_ARCH_ARM64)
6589# ifdef IEM_WITH_THROW_CATCH
6590 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6591# else
6592 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
6593# endif
6594 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6595 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
6596 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
6597
6598#else
6599# error "Port me!"
6600#endif
6601 return off;
6602}
6603
6604
6605/**
6606 * Emits a compare of a 16-bit GPR with a constant value, settings status
6607 * flags/whatever for use with conditional instruction.
6608 *
6609 * @note ARM64: Helper register is required (idxTmpReg).
6610 */
6611DECL_INLINE_THROW(uint32_t)
6612iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6613 uint8_t idxTmpReg = UINT8_MAX)
6614{
6615#ifdef RT_ARCH_AMD64
6616 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
6617#elif defined(RT_ARCH_ARM64)
6618 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
6619#else
6620# error "Port me!"
6621#endif
6622 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6623 return off;
6624}
6625
6626
6627
6628/*********************************************************************************************************************************
6629* Branching *
6630*********************************************************************************************************************************/
6631
6632/**
6633 * Emits a JMP rel32 / B imm19 to the given label.
6634 */
6635DECL_FORCE_INLINE_THROW(uint32_t)
6636iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
6637{
6638 Assert(idxLabel < pReNative->cLabels);
6639
6640#ifdef RT_ARCH_AMD64
6641 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6642 {
6643 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
6644 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
6645 {
6646 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
6647 pCodeBuf[off++] = (uint8_t)offRel;
6648 }
6649 else
6650 {
6651 offRel -= 3;
6652 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6653 pCodeBuf[off++] = RT_BYTE1(offRel);
6654 pCodeBuf[off++] = RT_BYTE2(offRel);
6655 pCodeBuf[off++] = RT_BYTE3(offRel);
6656 pCodeBuf[off++] = RT_BYTE4(offRel);
6657 }
6658 }
6659 else
6660 {
6661 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6662 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6663 pCodeBuf[off++] = 0xfe;
6664 pCodeBuf[off++] = 0xff;
6665 pCodeBuf[off++] = 0xff;
6666 pCodeBuf[off++] = 0xff;
6667 }
6668 pCodeBuf[off++] = 0xcc; /* int3 poison */
6669
6670#elif defined(RT_ARCH_ARM64)
6671 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6672 {
6673 pCodeBuf[off] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
6674 off++;
6675 }
6676 else
6677 {
6678 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
6679 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
6680 }
6681
6682#else
6683# error "Port me!"
6684#endif
6685 return off;
6686}
6687
6688
6689/**
6690 * Emits a JMP rel32 / B imm19 to the given label.
6691 */
6692DECL_INLINE_THROW(uint32_t)
6693iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6694{
6695#ifdef RT_ARCH_AMD64
6696 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
6697#elif defined(RT_ARCH_ARM64)
6698 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
6699#else
6700# error "Port me!"
6701#endif
6702 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6703 return off;
6704}
6705
6706
6707/**
6708 * Emits a JMP rel32 / B imm19 to a new undefined label.
6709 */
6710DECL_INLINE_THROW(uint32_t)
6711iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6712{
6713 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6714 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
6715}
6716
6717/** Condition type. */
6718#ifdef RT_ARCH_AMD64
6719typedef enum IEMNATIVEINSTRCOND : uint8_t
6720{
6721 kIemNativeInstrCond_o = 0,
6722 kIemNativeInstrCond_no,
6723 kIemNativeInstrCond_c,
6724 kIemNativeInstrCond_nc,
6725 kIemNativeInstrCond_e,
6726 kIemNativeInstrCond_z = kIemNativeInstrCond_e,
6727 kIemNativeInstrCond_ne,
6728 kIemNativeInstrCond_nz = kIemNativeInstrCond_ne,
6729 kIemNativeInstrCond_be,
6730 kIemNativeInstrCond_nbe,
6731 kIemNativeInstrCond_s,
6732 kIemNativeInstrCond_ns,
6733 kIemNativeInstrCond_p,
6734 kIemNativeInstrCond_np,
6735 kIemNativeInstrCond_l,
6736 kIemNativeInstrCond_nl,
6737 kIemNativeInstrCond_le,
6738 kIemNativeInstrCond_nle
6739} IEMNATIVEINSTRCOND;
6740#elif defined(RT_ARCH_ARM64)
6741typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
6742# define kIemNativeInstrCond_o todo_conditional_codes
6743# define kIemNativeInstrCond_no todo_conditional_codes
6744# define kIemNativeInstrCond_c todo_conditional_codes
6745# define kIemNativeInstrCond_nc todo_conditional_codes
6746# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
6747# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
6748# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
6749# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
6750# define kIemNativeInstrCond_s todo_conditional_codes
6751# define kIemNativeInstrCond_ns todo_conditional_codes
6752# define kIemNativeInstrCond_p todo_conditional_codes
6753# define kIemNativeInstrCond_np todo_conditional_codes
6754# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
6755# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
6756# define kIemNativeInstrCond_le kArmv8InstrCond_Le
6757# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
6758#else
6759# error "Port me!"
6760#endif
6761
6762
6763/**
6764 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6765 */
6766DECL_FORCE_INLINE_THROW(uint32_t)
6767iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6768 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6769{
6770 Assert(idxLabel < pReNative->cLabels);
6771
6772 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
6773#ifdef RT_ARCH_AMD64
6774 if (offLabel >= off)
6775 {
6776 /* jcc rel32 */
6777 pCodeBuf[off++] = 0x0f;
6778 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6779 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6780 pCodeBuf[off++] = 0x00;
6781 pCodeBuf[off++] = 0x00;
6782 pCodeBuf[off++] = 0x00;
6783 pCodeBuf[off++] = 0x00;
6784 }
6785 else
6786 {
6787 int32_t offDisp = offLabel - (off + 2);
6788 if ((int8_t)offDisp == offDisp)
6789 {
6790 /* jcc rel8 */
6791 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6792 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6793 }
6794 else
6795 {
6796 /* jcc rel32 */
6797 offDisp -= 4;
6798 pCodeBuf[off++] = 0x0f;
6799 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6800 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6801 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6802 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6803 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6804 }
6805 }
6806
6807#elif defined(RT_ARCH_ARM64)
6808 if (offLabel >= off)
6809 {
6810 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6811 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
6812 }
6813 else
6814 {
6815 Assert(off - offLabel <= 0x3ffffU);
6816 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
6817 off++;
6818 }
6819
6820#else
6821# error "Port me!"
6822#endif
6823 return off;
6824}
6825
6826
6827/**
6828 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6829 */
6830DECL_INLINE_THROW(uint32_t)
6831iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6832{
6833#ifdef RT_ARCH_AMD64
6834 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
6835#elif defined(RT_ARCH_ARM64)
6836 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
6837#else
6838# error "Port me!"
6839#endif
6840 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6841 return off;
6842}
6843
6844
6845/**
6846 * Emits a Jcc rel32 / B.cc imm19 to a new label.
6847 */
6848DECL_INLINE_THROW(uint32_t)
6849iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6850 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
6851{
6852 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6853 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
6854}
6855
6856
6857/**
6858 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
6859 */
6860DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6861{
6862#ifdef RT_ARCH_AMD64
6863 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
6864#elif defined(RT_ARCH_ARM64)
6865 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
6866#else
6867# error "Port me!"
6868#endif
6869}
6870
6871/**
6872 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
6873 */
6874DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6875 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6876{
6877#ifdef RT_ARCH_AMD64
6878 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
6879#elif defined(RT_ARCH_ARM64)
6880 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
6881#else
6882# error "Port me!"
6883#endif
6884}
6885
6886
6887/**
6888 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
6889 */
6890DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6891{
6892#ifdef RT_ARCH_AMD64
6893 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
6894#elif defined(RT_ARCH_ARM64)
6895 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
6896#else
6897# error "Port me!"
6898#endif
6899}
6900
6901/**
6902 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
6903 */
6904DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6905 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6906{
6907#ifdef RT_ARCH_AMD64
6908 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
6909#elif defined(RT_ARCH_ARM64)
6910 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
6911#else
6912# error "Port me!"
6913#endif
6914}
6915
6916
6917/**
6918 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
6919 */
6920DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6921{
6922#ifdef RT_ARCH_AMD64
6923 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
6924#elif defined(RT_ARCH_ARM64)
6925 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
6926#else
6927# error "Port me!"
6928#endif
6929}
6930
6931/**
6932 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
6933 */
6934DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6935 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6936{
6937#ifdef RT_ARCH_AMD64
6938 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
6939#elif defined(RT_ARCH_ARM64)
6940 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
6941#else
6942# error "Port me!"
6943#endif
6944}
6945
6946
6947/**
6948 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
6949 */
6950DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6951{
6952#ifdef RT_ARCH_AMD64
6953 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
6954#elif defined(RT_ARCH_ARM64)
6955 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
6956#else
6957# error "Port me!"
6958#endif
6959}
6960
6961/**
6962 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
6963 */
6964DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6965 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6966{
6967#ifdef RT_ARCH_AMD64
6968 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
6969#elif defined(RT_ARCH_ARM64)
6970 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
6971#else
6972# error "Port me!"
6973#endif
6974}
6975
6976
6977/**
6978 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
6979 */
6980DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6981{
6982#ifdef RT_ARCH_AMD64
6983 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
6984#elif defined(RT_ARCH_ARM64)
6985 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
6986#else
6987# error "Port me!"
6988#endif
6989}
6990
6991/**
6992 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
6993 */
6994DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6995 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6996{
6997#ifdef RT_ARCH_AMD64
6998 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
6999#elif defined(RT_ARCH_ARM64)
7000 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
7001#else
7002# error "Port me!"
7003#endif
7004}
7005
7006
7007/**
7008 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
7009 *
7010 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
7011 *
7012 * Only use hardcoded jumps forward when emitting for exactly one
7013 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
7014 * the right target address on all platforms!
7015 *
7016 * Please also note that on x86 it is necessary pass off + 256 or higher
7017 * for @a offTarget one believe the intervening code is more than 127
7018 * bytes long.
7019 */
7020DECL_FORCE_INLINE(uint32_t)
7021iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
7022{
7023#ifdef RT_ARCH_AMD64
7024 /* jcc rel8 / rel32 */
7025 int32_t offDisp = (int32_t)(offTarget - (off + 2));
7026 if (offDisp < 128 && offDisp >= -128)
7027 {
7028 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
7029 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
7030 }
7031 else
7032 {
7033 offDisp -= 4;
7034 pCodeBuf[off++] = 0x0f;
7035 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
7036 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
7037 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
7038 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
7039 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
7040 }
7041
7042#elif defined(RT_ARCH_ARM64)
7043 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
7044 off++;
7045#else
7046# error "Port me!"
7047#endif
7048 return off;
7049}
7050
7051
7052/**
7053 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
7054 *
7055 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
7056 *
7057 * Only use hardcoded jumps forward when emitting for exactly one
7058 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
7059 * the right target address on all platforms!
7060 *
7061 * Please also note that on x86 it is necessary pass off + 256 or higher
7062 * for @a offTarget if one believe the intervening code is more than 127
7063 * bytes long.
7064 */
7065DECL_INLINE_THROW(uint32_t)
7066iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
7067{
7068#ifdef RT_ARCH_AMD64
7069 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
7070#elif defined(RT_ARCH_ARM64)
7071 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
7072#else
7073# error "Port me!"
7074#endif
7075 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7076 return off;
7077}
7078
7079
7080/**
7081 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
7082 *
7083 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7084 */
7085DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7086{
7087#ifdef RT_ARCH_AMD64
7088 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
7089#elif defined(RT_ARCH_ARM64)
7090 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
7091#else
7092# error "Port me!"
7093#endif
7094}
7095
7096
7097/**
7098 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
7099 *
7100 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7101 */
7102DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7103{
7104#ifdef RT_ARCH_AMD64
7105 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
7106#elif defined(RT_ARCH_ARM64)
7107 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
7108#else
7109# error "Port me!"
7110#endif
7111}
7112
7113
7114/**
7115 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
7116 *
7117 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7118 */
7119DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7120{
7121#ifdef RT_ARCH_AMD64
7122 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
7123#elif defined(RT_ARCH_ARM64)
7124 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
7125#else
7126# error "Port me!"
7127#endif
7128}
7129
7130
7131/**
7132 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
7133 *
7134 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7135 */
7136DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7137{
7138#ifdef RT_ARCH_AMD64
7139 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
7140#elif defined(RT_ARCH_ARM64)
7141 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
7142#else
7143# error "Port me!"
7144#endif
7145}
7146
7147
7148/**
7149 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
7150 *
7151 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7152 */
7153DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
7154{
7155#ifdef RT_ARCH_AMD64
7156 /* jmp rel8 or rel32 */
7157 int32_t offDisp = offTarget - (off + 2);
7158 if (offDisp < 128 && offDisp >= -128)
7159 {
7160 pCodeBuf[off++] = 0xeb;
7161 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
7162 }
7163 else
7164 {
7165 offDisp -= 3;
7166 pCodeBuf[off++] = 0xe9;
7167 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
7168 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
7169 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
7170 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
7171 }
7172
7173#elif defined(RT_ARCH_ARM64)
7174 pCodeBuf[off] = Armv8A64MkInstrB((int32_t)(offTarget - off));
7175 off++;
7176
7177#else
7178# error "Port me!"
7179#endif
7180 return off;
7181}
7182
7183
7184/**
7185 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
7186 *
7187 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7188 */
7189DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7190{
7191#ifdef RT_ARCH_AMD64
7192 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
7193#elif defined(RT_ARCH_ARM64)
7194 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
7195#else
7196# error "Port me!"
7197#endif
7198 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7199 return off;
7200}
7201
7202
7203/**
7204 * Fixes up a conditional jump to a fixed label.
7205 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
7206 * iemNativeEmitJzToFixed, ...
7207 */
7208DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
7209{
7210#ifdef RT_ARCH_AMD64
7211 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
7212 uint8_t const bOpcode = pbCodeBuf[offFixup];
7213 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
7214 {
7215 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
7216 AssertStmt((int8_t)pbCodeBuf[offFixup + 1] == (int32_t)(offTarget - (offFixup + 2)),
7217 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
7218 }
7219 else
7220 {
7221 if (bOpcode != 0x0f)
7222 Assert(bOpcode == 0xe9);
7223 else
7224 {
7225 offFixup += 1;
7226 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
7227 }
7228 uint32_t const offRel32 = offTarget - (offFixup + 5);
7229 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
7230 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
7231 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
7232 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
7233 }
7234
7235#elif defined(RT_ARCH_ARM64)
7236 int32_t const offDisp = offTarget - offFixup;
7237 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
7238 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
7239 {
7240 /* B.COND + BC.COND */
7241 Assert(offDisp >= -262144 && offDisp < 262144);
7242 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
7243 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
7244 }
7245 else if ((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000))
7246 {
7247 /* B imm26 */
7248 Assert(offDisp >= -33554432 && offDisp < 33554432);
7249 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
7250 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
7251 }
7252 else
7253 {
7254 /* CBZ / CBNZ reg, imm19 */
7255 Assert((pu32CodeBuf[offFixup] & UINT32_C(0x7e000000)) == UINT32_C(0x34000000));
7256 Assert(offDisp >= -1048576 && offDisp < 1048576);
7257 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
7258 | (((uint32_t)offDisp << 5) & UINT32_C(0x00ffffe0));
7259
7260 }
7261
7262#else
7263# error "Port me!"
7264#endif
7265}
7266
7267
7268#ifdef RT_ARCH_AMD64
7269/**
7270 * For doing bt on a register.
7271 */
7272DECL_INLINE_THROW(uint32_t)
7273iemNativeEmitAmd64TestBitInGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
7274{
7275 Assert(iBitNo < 64);
7276 /* bt Ev, imm8 */
7277 if (iBitNo >= 32)
7278 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7279 else if (iGprSrc >= 8)
7280 pCodeBuf[off++] = X86_OP_REX_B;
7281 pCodeBuf[off++] = 0x0f;
7282 pCodeBuf[off++] = 0xba;
7283 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7284 pCodeBuf[off++] = iBitNo;
7285 return off;
7286}
7287#endif /* RT_ARCH_AMD64 */
7288
7289
7290/**
7291 * Internal helper, don't call directly.
7292 */
7293DECL_INLINE_THROW(uint32_t)
7294iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7295 uint32_t offTarget, uint32_t *poffFixup, bool fJmpIfSet)
7296{
7297 Assert(iBitNo < 64);
7298#ifdef RT_ARCH_AMD64
7299 if (iBitNo < 8)
7300 {
7301 /* test Eb, imm8 */
7302 if (iGprSrc >= 4)
7303 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7304 pCodeBuf[off++] = 0xf6;
7305 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7306 pCodeBuf[off++] = (uint8_t)1 << iBitNo;
7307 if (poffFixup)
7308 *poffFixup = off;
7309 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7310 }
7311 else
7312 {
7313 /* bt Ev, imm8 */
7314 if (iBitNo >= 32)
7315 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7316 else if (iGprSrc >= 8)
7317 pCodeBuf[off++] = X86_OP_REX_B;
7318 pCodeBuf[off++] = 0x0f;
7319 pCodeBuf[off++] = 0xba;
7320 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7321 pCodeBuf[off++] = iBitNo;
7322 if (poffFixup)
7323 *poffFixup = off;
7324 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
7325 }
7326
7327#elif defined(RT_ARCH_ARM64)
7328 /* Just use the TBNZ instruction here. */
7329 if (poffFixup)
7330 *poffFixup = off;
7331 pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, off - offTarget, iGprSrc, iBitNo);
7332
7333#else
7334# error "Port me!"
7335#endif
7336 return off;
7337}
7338
7339
7340/**
7341 * Emits a jump to @a idxTarget on the condition that bit @a iBitNo _is_ _set_
7342 * in @a iGprSrc.
7343 */
7344DECL_INLINE_THROW(uint32_t)
7345iemNativeEmitTestBitInGprAndJmpToFixedIfSetEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7346 uint32_t offTarget, uint32_t *poffFixup)
7347{
7348 return iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(pCodeBuf, off, iGprSrc, iBitNo, offTarget, poffFixup, true /*fJmpIfSet*/);
7349}
7350
7351
7352/**
7353 * Emits a jump to @a idxTarget on the condition that bit @a iBitNo _is_ _not_
7354 * _set_ in @a iGprSrc.
7355 */
7356DECL_INLINE_THROW(uint32_t)
7357iemNativeEmitTestBitInGprAndJmpToLabelIfNotSetEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7358 uint32_t offTarget, uint32_t *poffFixup)
7359{
7360 return iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(pCodeBuf, off, iGprSrc, iBitNo, offTarget, poffFixup, false /*fJmpIfSet*/);
7361}
7362
7363
7364
7365/**
7366 * Internal helper, don't call directly.
7367 */
7368DECL_INLINE_THROW(uint32_t)
7369iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7370 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
7371{
7372 Assert(iBitNo < 64);
7373#ifdef RT_ARCH_AMD64
7374 if (iBitNo < 8)
7375 {
7376 /* test Eb, imm8 */
7377 if (iGprSrc >= 4)
7378 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7379 pCodeBuf[off++] = 0xf6;
7380 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7381 pCodeBuf[off++] = (uint8_t)1 << iBitNo;
7382 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7383 fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7384 }
7385 else
7386 {
7387 /* bt Ev, imm8 */
7388 if (iBitNo >= 32)
7389 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7390 else if (iGprSrc >= 8)
7391 pCodeBuf[off++] = X86_OP_REX_B;
7392 pCodeBuf[off++] = 0x0f;
7393 pCodeBuf[off++] = 0xba;
7394 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7395 pCodeBuf[off++] = iBitNo;
7396 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7397 fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
7398 }
7399
7400#elif defined(RT_ARCH_ARM64)
7401 /* Use the TBNZ instruction here. */
7402 if (pReNative->paLabels[idxLabel].enmType > kIemNativeLabelType_LastWholeTbBranch)
7403 {
7404 AssertMsg(pReNative->paLabels[idxLabel].off == UINT32_MAX,
7405 ("TODO: Please enable & test commented out code for jumping back to a predefined label.\n"));
7406 //uint32_t offLabel = pReNative->paLabels[idxLabel].off;
7407 //if (offLabel == UINT32_MAX)
7408 {
7409 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
7410 pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
7411 }
7412 //else
7413 //{
7414 // RT_BREAKPOINT();
7415 // Assert(off - offLabel <= 0x1fffU);
7416 // pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, offLabel - off, iGprSrc, iBitNo);
7417 //
7418 //}
7419 }
7420 else
7421 {
7422 Assert(Armv8A64ConvertImmRImmS2Mask64(0x40, (64U - iBitNo) & 63U) == RT_BIT_64(iBitNo));
7423 pCodeBuf[off++] = Armv8A64MkInstrTstImm(iGprSrc, 0x40, (64U - iBitNo) & 63U);
7424 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7425 pCodeBuf[off++] = Armv8A64MkInstrBCond(fJmpIfSet ? kArmv8InstrCond_Ne : kArmv8InstrCond_Eq, 0);
7426 }
7427
7428#else
7429# error "Port me!"
7430#endif
7431 return off;
7432}
7433
7434
7435/**
7436 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
7437 * @a iGprSrc.
7438 */
7439DECL_INLINE_THROW(uint32_t)
7440iemNativeEmitTestBitInGprAndJmpToLabelIfSetEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7441 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7442{
7443 return iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, pCodeBuf, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
7444}
7445
7446
7447/**
7448 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
7449 * _set_ in @a iGprSrc.
7450 */
7451DECL_INLINE_THROW(uint32_t)
7452iemNativeEmitTestBitInGprAndJmpToLabelIfNotSetEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7453 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7454{
7455 return iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, pCodeBuf, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
7456}
7457
7458
7459/**
7460 * Internal helper, don't call directly.
7461 */
7462DECL_INLINE_THROW(uint32_t)
7463iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7464 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
7465{
7466#ifdef RT_ARCH_AMD64
7467 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 5+6), off,
7468 iGprSrc, iBitNo, idxLabel, fJmpIfSet);
7469#elif defined(RT_ARCH_ARM64)
7470 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 2), off,
7471 iGprSrc, iBitNo, idxLabel, fJmpIfSet);
7472#else
7473# error "Port me!"
7474#endif
7475 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7476 return off;
7477}
7478
7479
7480/**
7481 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
7482 * @a iGprSrc.
7483 */
7484DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7485 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7486{
7487 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
7488}
7489
7490
7491/**
7492 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
7493 * _set_ in @a iGprSrc.
7494 */
7495DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7496 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7497{
7498 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
7499}
7500
7501
7502/**
7503 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
7504 * flags accordingly.
7505 */
7506DECL_INLINE_THROW(uint32_t)
7507iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
7508{
7509 Assert(fBits != 0);
7510#ifdef RT_ARCH_AMD64
7511
7512 if (fBits >= UINT32_MAX)
7513 {
7514 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7515
7516 /* test Ev,Gv */
7517 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7518 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
7519 pbCodeBuf[off++] = 0x85;
7520 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
7521
7522 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7523 }
7524 else if (fBits <= UINT32_MAX)
7525 {
7526 /* test Eb, imm8 or test Ev, imm32 */
7527 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7528 if (fBits <= UINT8_MAX)
7529 {
7530 if (iGprSrc >= 4)
7531 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7532 pbCodeBuf[off++] = 0xf6;
7533 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7534 pbCodeBuf[off++] = (uint8_t)fBits;
7535 }
7536 else
7537 {
7538 if (iGprSrc >= 8)
7539 pbCodeBuf[off++] = X86_OP_REX_B;
7540 pbCodeBuf[off++] = 0xf7;
7541 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7542 pbCodeBuf[off++] = RT_BYTE1(fBits);
7543 pbCodeBuf[off++] = RT_BYTE2(fBits);
7544 pbCodeBuf[off++] = RT_BYTE3(fBits);
7545 pbCodeBuf[off++] = RT_BYTE4(fBits);
7546 }
7547 }
7548 /** @todo implement me. */
7549 else
7550 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
7551
7552#elif defined(RT_ARCH_ARM64)
7553 uint32_t uImmR = 0;
7554 uint32_t uImmNandS = 0;
7555 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
7556 {
7557 /* ands xzr, iGprSrc, #fBits */
7558 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7559 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
7560 }
7561 else
7562 {
7563 /* ands xzr, iGprSrc, iTmpReg */
7564 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7565 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7566 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
7567 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7568 }
7569
7570#else
7571# error "Port me!"
7572#endif
7573 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7574 return off;
7575}
7576
7577
7578/**
7579 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
7580 * @a iGprSrc, setting CPU flags accordingly.
7581 *
7582 * @note For ARM64 this only supports @a fBits values that can be expressed
7583 * using the two 6-bit immediates of the ANDS instruction. The caller
7584 * must make sure this is possible!
7585 */
7586DECL_FORCE_INLINE_THROW(uint32_t)
7587iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits,
7588 uint8_t iTmpReg = UINT8_MAX)
7589{
7590 Assert(fBits != 0);
7591
7592#ifdef RT_ARCH_AMD64
7593 if (fBits <= UINT8_MAX)
7594 {
7595 /* test Eb, imm8 */
7596 if (iGprSrc >= 4)
7597 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7598 pCodeBuf[off++] = 0xf6;
7599 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7600 pCodeBuf[off++] = (uint8_t)fBits;
7601 }
7602 else
7603 {
7604 /* test Ev, imm32 */
7605 if (iGprSrc >= 8)
7606 pCodeBuf[off++] = X86_OP_REX_B;
7607 pCodeBuf[off++] = 0xf7;
7608 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7609 pCodeBuf[off++] = RT_BYTE1(fBits);
7610 pCodeBuf[off++] = RT_BYTE2(fBits);
7611 pCodeBuf[off++] = RT_BYTE3(fBits);
7612 pCodeBuf[off++] = RT_BYTE4(fBits);
7613 }
7614 RT_NOREF(iTmpReg);
7615
7616#elif defined(RT_ARCH_ARM64)
7617 /* ands xzr, src, #fBits */
7618 uint32_t uImmR = 0;
7619 uint32_t uImmNandS = 0;
7620 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7621 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7622 else if (iTmpReg != UINT8_MAX)
7623 {
7624 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iTmpReg, fBits);
7625 pCodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
7626 }
7627 else
7628# ifdef IEM_WITH_THROW_CATCH
7629 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7630# else
7631 AssertReleaseFailedStmt(off = UINT32_MAX);
7632# endif
7633
7634#else
7635# error "Port me!"
7636#endif
7637 return off;
7638}
7639
7640
7641
7642/**
7643 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7644 * @a iGprSrc, setting CPU flags accordingly.
7645 *
7646 * @note For ARM64 this only supports @a fBits values that can be expressed
7647 * using the two 6-bit immediates of the ANDS instruction. The caller
7648 * must make sure this is possible!
7649 */
7650DECL_FORCE_INLINE_THROW(uint32_t)
7651iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7652{
7653 Assert(fBits != 0);
7654
7655#ifdef RT_ARCH_AMD64
7656 /* test Eb, imm8 */
7657 if (iGprSrc >= 4)
7658 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7659 pCodeBuf[off++] = 0xf6;
7660 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7661 pCodeBuf[off++] = fBits;
7662
7663#elif defined(RT_ARCH_ARM64)
7664 /* ands xzr, src, #fBits */
7665 uint32_t uImmR = 0;
7666 uint32_t uImmNandS = 0;
7667 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7668 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7669 else
7670# ifdef IEM_WITH_THROW_CATCH
7671 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7672# else
7673 AssertReleaseFailedStmt(off = UINT32_MAX);
7674# endif
7675
7676#else
7677# error "Port me!"
7678#endif
7679 return off;
7680}
7681
7682
7683/**
7684 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7685 * @a iGprSrc, setting CPU flags accordingly.
7686 */
7687DECL_INLINE_THROW(uint32_t)
7688iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7689{
7690 Assert(fBits != 0);
7691
7692#ifdef RT_ARCH_AMD64
7693 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
7694
7695#elif defined(RT_ARCH_ARM64)
7696 /* ands xzr, src, [tmp|#imm] */
7697 uint32_t uImmR = 0;
7698 uint32_t uImmNandS = 0;
7699 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7700 {
7701 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7702 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7703 }
7704 else
7705 {
7706 /* Use temporary register for the 64-bit immediate. */
7707 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7708 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7709 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
7710 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7711 }
7712
7713#else
7714# error "Port me!"
7715#endif
7716 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7717 return off;
7718}
7719
7720
7721/**
7722 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
7723 * are set in @a iGprSrc.
7724 */
7725DECL_INLINE_THROW(uint32_t)
7726iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7727 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7728{
7729 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7730
7731 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7732 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7733
7734 return off;
7735}
7736
7737
7738/**
7739 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
7740 * are set in @a iGprSrc.
7741 */
7742DECL_INLINE_THROW(uint32_t)
7743iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7744 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7745{
7746 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7747
7748 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7749 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7750
7751 return off;
7752}
7753
7754
7755/**
7756 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7757 *
7758 * The operand size is given by @a f64Bit.
7759 */
7760DECL_FORCE_INLINE_THROW(uint32_t)
7761iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7762 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7763{
7764 Assert(idxLabel < pReNative->cLabels);
7765
7766#ifdef RT_ARCH_AMD64
7767 /* test reg32,reg32 / test reg64,reg64 */
7768 if (f64Bit)
7769 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7770 else if (iGprSrc >= 8)
7771 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7772 pCodeBuf[off++] = 0x85;
7773 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7774
7775 /* jnz idxLabel */
7776 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7777 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7778
7779#elif defined(RT_ARCH_ARM64)
7780 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
7781 {
7782 pCodeBuf[off] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
7783 iGprSrc, f64Bit);
7784 off++;
7785 }
7786 else
7787 {
7788 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7789 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
7790 }
7791
7792#else
7793# error "Port me!"
7794#endif
7795 return off;
7796}
7797
7798
7799/**
7800 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7801 *
7802 * The operand size is given by @a f64Bit.
7803 */
7804DECL_FORCE_INLINE_THROW(uint32_t)
7805iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7806 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7807{
7808#ifdef RT_ARCH_AMD64
7809 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7810 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7811#elif defined(RT_ARCH_ARM64)
7812 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
7813 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7814#else
7815# error "Port me!"
7816#endif
7817 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7818 return off;
7819}
7820
7821
7822/**
7823 * Emits code that jumps to @a offTarget if @a iGprSrc is not zero.
7824 *
7825 * The operand size is given by @a f64Bit.
7826 */
7827DECL_FORCE_INLINE_THROW(uint32_t)
7828iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7829 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t offTarget)
7830{
7831#ifdef RT_ARCH_AMD64
7832 /* test reg32,reg32 / test reg64,reg64 */
7833 if (f64Bit)
7834 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7835 else if (iGprSrc >= 8)
7836 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7837 pCodeBuf[off++] = 0x85;
7838 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7839
7840 /* jnz idxLabel */
7841 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget,
7842 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7843
7844#elif defined(RT_ARCH_ARM64)
7845 pCodeBuf[off] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(offTarget - off), iGprSrc, f64Bit);
7846 off++;
7847
7848#else
7849# error "Port me!"
7850#endif
7851 return off;
7852}
7853
7854
7855/**
7856 * Emits code that jumps to @a offTarget if @a iGprSrc is not zero.
7857 *
7858 * The operand size is given by @a f64Bit.
7859 */
7860DECL_FORCE_INLINE_THROW(uint32_t)
7861iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7862 bool f64Bit, bool fJmpIfNotZero, uint32_t offTarget)
7863{
7864#ifdef RT_ARCH_AMD64
7865 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7866 off, iGprSrc, f64Bit, fJmpIfNotZero, offTarget);
7867#elif defined(RT_ARCH_ARM64)
7868 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1),
7869 off, iGprSrc, f64Bit, fJmpIfNotZero, offTarget);
7870#else
7871# error "Port me!"
7872#endif
7873 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7874 return off;
7875}
7876
7877
7878/* if (Grp1 == 0) Jmp idxLabel; */
7879
7880/**
7881 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7882 *
7883 * The operand size is given by @a f64Bit.
7884 */
7885DECL_FORCE_INLINE_THROW(uint32_t)
7886iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7887 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7888{
7889 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7890 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7891}
7892
7893
7894/**
7895 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7896 *
7897 * The operand size is given by @a f64Bit.
7898 */
7899DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7900 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7901{
7902 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7903}
7904
7905
7906/**
7907 * Emits code that jumps to a new label if @a iGprSrc is zero.
7908 *
7909 * The operand size is given by @a f64Bit.
7910 */
7911DECL_INLINE_THROW(uint32_t)
7912iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7913 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7914{
7915 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7916 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7917}
7918
7919
7920/**
7921 * Emits code that jumps to @a offTarget if @a iGprSrc is zero.
7922 *
7923 * The operand size is given by @a f64Bit.
7924 */
7925DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7926 uint8_t iGprSrc, bool f64Bit, uint32_t offTarget)
7927{
7928 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixed(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, offTarget);
7929}
7930
7931
7932/* if (Grp1 != 0) Jmp idxLabel; */
7933
7934/**
7935 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7936 *
7937 * The operand size is given by @a f64Bit.
7938 */
7939DECL_FORCE_INLINE_THROW(uint32_t)
7940iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7941 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7942{
7943 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7944 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7945}
7946
7947
7948/**
7949 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7950 *
7951 * The operand size is given by @a f64Bit.
7952 */
7953DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7954 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7955{
7956 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7957}
7958
7959
7960/**
7961 * Emits code that jumps to a new label if @a iGprSrc is not zero.
7962 *
7963 * The operand size is given by @a f64Bit.
7964 */
7965DECL_INLINE_THROW(uint32_t)
7966iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7967 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7968{
7969 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7970 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7971}
7972
7973
7974/* if (Grp1 != Gpr2) Jmp idxLabel; */
7975
7976/**
7977 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
7978 * differs.
7979 */
7980DECL_INLINE_THROW(uint32_t)
7981iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7982 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
7983{
7984 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
7985 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7986 return off;
7987}
7988
7989
7990/**
7991 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
7992 */
7993DECL_INLINE_THROW(uint32_t)
7994iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7995 uint8_t iGprLeft, uint8_t iGprRight,
7996 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7997{
7998 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7999 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
8000}
8001
8002
8003/* if (Grp != Imm) Jmp idxLabel; */
8004
8005/**
8006 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
8007 */
8008DECL_INLINE_THROW(uint32_t)
8009iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8010 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
8011{
8012 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
8013 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
8014 return off;
8015}
8016
8017
8018/**
8019 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
8020 */
8021DECL_INLINE_THROW(uint32_t)
8022iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8023 uint8_t iGprSrc, uint64_t uImm,
8024 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8025{
8026 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8027 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8028}
8029
8030
8031/**
8032 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
8033 * @a uImm.
8034 */
8035DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8036 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
8037{
8038 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8039 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
8040 return off;
8041}
8042
8043
8044/**
8045 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
8046 * @a uImm.
8047 */
8048DECL_INLINE_THROW(uint32_t)
8049iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8050 uint8_t iGprSrc, uint32_t uImm,
8051 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8052{
8053 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8054 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8055}
8056
8057
8058/**
8059 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
8060 * @a uImm.
8061 */
8062DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8063 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
8064{
8065 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
8066 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
8067 return off;
8068}
8069
8070
8071/**
8072 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
8073 * @a uImm.
8074 */
8075DECL_INLINE_THROW(uint32_t)
8076iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8077 uint8_t iGprSrc, uint16_t uImm,
8078 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8079{
8080 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8081 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8082}
8083
8084
8085/* if (Grp == Imm) Jmp idxLabel; */
8086
8087/**
8088 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
8089 */
8090DECL_INLINE_THROW(uint32_t)
8091iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8092 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
8093{
8094 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
8095 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
8096 return off;
8097}
8098
8099
8100/**
8101 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
8102 */
8103DECL_INLINE_THROW(uint32_t)
8104iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
8105 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8106{
8107 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8108 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8109}
8110
8111
8112/**
8113 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
8114 */
8115DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8116 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
8117{
8118 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8119 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
8120 return off;
8121}
8122
8123
8124/**
8125 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
8126 */
8127DECL_INLINE_THROW(uint32_t)
8128iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
8129 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8130{
8131 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8132 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8133}
8134
8135
8136/**
8137 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
8138 *
8139 * @note ARM64: Helper register is required (idxTmpReg).
8140 */
8141DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8142 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
8143 uint8_t idxTmpReg = UINT8_MAX)
8144{
8145 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
8146 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
8147 return off;
8148}
8149
8150
8151/**
8152 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
8153 *
8154 * @note ARM64: Helper register is required (idxTmpReg).
8155 */
8156DECL_INLINE_THROW(uint32_t)
8157iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
8158 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
8159 uint8_t idxTmpReg = UINT8_MAX)
8160{
8161 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8162 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
8163}
8164
8165
8166
8167/*********************************************************************************************************************************
8168* Indirect Jumps. *
8169*********************************************************************************************************************************/
8170
8171/**
8172 * Emits an indirect jump a 64-bit address in a GPR.
8173 */
8174DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpViaGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc)
8175{
8176#ifdef RT_ARCH_AMD64
8177 uint8_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
8178 if (iGprSrc >= 8)
8179 pCodeBuf[off++] = X86_OP_REX_B;
8180 pCodeBuf[off++] = 0xff;
8181 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
8182
8183#elif defined(RT_ARCH_ARM64)
8184 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8185 pCodeBuf[off++] = Armv8A64MkInstrBr(iGprSrc);
8186
8187#else
8188# error "port me"
8189#endif
8190 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8191 return off;
8192}
8193
8194
8195/**
8196 * Emits an indirect jump to an immediate 64-bit address (uses the temporary GPR).
8197 */
8198DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
8199{
8200 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
8201 return iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP0);
8202}
8203
8204
8205/*********************************************************************************************************************************
8206* Calls. *
8207*********************************************************************************************************************************/
8208
8209/**
8210 * Emits a call to a 64-bit address.
8211 */
8212DECL_FORCE_INLINE(uint32_t) iemNativeEmitCallImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uintptr_t uPfn,
8213#ifdef RT_ARCH_AMD64
8214 uint8_t idxRegTmp = X86_GREG_xAX
8215#elif defined(RT_ARCH_ARM64)
8216 uint8_t idxRegTmp = IEMNATIVE_REG_FIXED_TMP0
8217#else
8218# error "Port me"
8219#endif
8220 )
8221{
8222 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxRegTmp, uPfn);
8223
8224#ifdef RT_ARCH_AMD64
8225 /* call idxRegTmp */
8226 if (idxRegTmp >= 8)
8227 pCodeBuf[off++] = X86_OP_REX_B;
8228 pCodeBuf[off++] = 0xff;
8229 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, idxRegTmp & 7);
8230
8231#elif defined(RT_ARCH_ARM64)
8232 pCodeBuf[off++] = Armv8A64MkInstrBlr(idxRegTmp);
8233
8234#else
8235# error "port me"
8236#endif
8237 return off;
8238}
8239
8240
8241/**
8242 * Emits a call to a 64-bit address.
8243 */
8244template<bool const a_fSkipEflChecks = false>
8245DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
8246{
8247 if RT_CONSTEXPR_IF(!a_fSkipEflChecks)
8248 {
8249 IEMNATIVE_ASSERT_EFLAGS_POSTPONING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8250 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY( pReNative, X86_EFL_STATUS_BITS);
8251 }
8252
8253#ifdef RT_ARCH_AMD64
8254 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
8255
8256 /* call rax */
8257 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8258 pbCodeBuf[off++] = 0xff;
8259 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
8260
8261#elif defined(RT_ARCH_ARM64)
8262 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
8263
8264 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8265 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
8266
8267#else
8268# error "port me"
8269#endif
8270 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8271 return off;
8272}
8273
8274
8275/**
8276 * Emits code to load a stack variable into an argument GPR.
8277 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
8278 */
8279DECL_FORCE_INLINE_THROW(uint32_t)
8280iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8281 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
8282 bool fSpilledVarsInVolatileRegs = false)
8283{
8284 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8285 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8286 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8287
8288 uint8_t const idxRegVar = pVar->idxReg;
8289 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
8290 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
8291 || !fSpilledVarsInVolatileRegs ))
8292 {
8293 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
8294 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
8295 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
8296 if (!offAddend)
8297 {
8298 if (idxRegArg != idxRegVar)
8299 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
8300 }
8301 else
8302 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
8303 }
8304 else
8305 {
8306 uint8_t const idxStackSlot = pVar->idxStackSlot;
8307 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8308 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
8309 if (offAddend)
8310 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
8311 }
8312 return off;
8313}
8314
8315
8316/**
8317 * Emits code to load a stack or immediate variable value into an argument GPR,
8318 * optional with a addend.
8319 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
8320 */
8321DECL_FORCE_INLINE_THROW(uint32_t)
8322iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8323 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
8324 bool fSpilledVarsInVolatileRegs = false)
8325{
8326 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8327 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8328 if (pVar->enmKind == kIemNativeVarKind_Immediate)
8329 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
8330 else
8331 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
8332 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
8333 return off;
8334}
8335
8336
8337/**
8338 * Emits code to load the variable address into an argument GPR.
8339 *
8340 * This only works for uninitialized and stack variables.
8341 */
8342DECL_FORCE_INLINE_THROW(uint32_t)
8343iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8344 bool fFlushShadows)
8345{
8346 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8347 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8348 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
8349 || pVar->enmKind == kIemNativeVarKind_Stack,
8350 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8351 AssertStmt(!pVar->fSimdReg,
8352 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8353
8354 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8355 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
8356
8357 uint8_t const idxRegVar = pVar->idxReg;
8358 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
8359 {
8360 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
8361 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
8362 Assert(pVar->idxReg == UINT8_MAX);
8363 }
8364 Assert( pVar->idxStackSlot != UINT8_MAX
8365 && pVar->idxReg == UINT8_MAX);
8366
8367 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
8368}
8369
8370
8371
8372/*********************************************************************************************************************************
8373* TB exiting helpers. *
8374*********************************************************************************************************************************/
8375
8376#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8377/* IEMAllN8veEmit-x86.h: */
8378template<uint32_t const a_bmInputRegs>
8379DECL_FORCE_INLINE_THROW(uint32_t)
8380iemNativeDoPostponedEFlagsAtTbExitEx(PIEMRECOMPILERSTATE pReNative, uint32_t off, PIEMNATIVEINSTR pCodeBuf);
8381
8382template<uint32_t const a_bmInputRegs>
8383DECL_FORCE_INLINE_THROW(uint32_t)
8384iemNativeDoPostponedEFlagsAtTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off);
8385#endif
8386
8387
8388/**
8389 * Helper for marking the current conditional branch as exiting the TB.
8390 *
8391 * This simplifies the state consolidation later when we reach the IEM_MC_ENDIF.
8392 */
8393DECL_FORCE_INLINE(void) iemNativeMarkCurCondBranchAsExiting(PIEMRECOMPILERSTATE pReNative)
8394{
8395 uint8_t idxCondDepth = pReNative->cCondDepth;
8396 if (idxCondDepth)
8397 {
8398 idxCondDepth--;
8399 pReNative->aCondStack[idxCondDepth].afExitTb[pReNative->aCondStack[idxCondDepth].fInElse] = true;
8400 }
8401}
8402
8403
8404/**
8405 * Unconditionally exits the translation block via a branch instructions.
8406 *
8407 * @note In case a delayed EFLAGS calculation is pending, this may emit an
8408 * additional IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS instructions.
8409 */
8410template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fActuallyExitingTb = true, bool const a_fPostponedEfl = true>
8411DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off)
8412{
8413 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8414 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8415
8416 if RT_CONSTEXPR_IF(a_fActuallyExitingTb)
8417 iemNativeMarkCurCondBranchAsExiting(pReNative);
8418
8419#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8420 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8421 off = iemNativeDoPostponedEFlagsAtTbExitEx<IEMNATIVELABELTYPE_GET_INPUT_REG_MASK(a_enmExitReason)>(pReNative, off,
8422 pCodeBuf);
8423#endif
8424
8425#ifdef RT_ARCH_AMD64
8426 /* jmp rel32 */
8427 pCodeBuf[off++] = 0xe9;
8428 iemNativeAddTbExitFixup(pReNative, off, a_enmExitReason);
8429 pCodeBuf[off++] = 0xfe;
8430 pCodeBuf[off++] = 0xff;
8431 pCodeBuf[off++] = 0xff;
8432 pCodeBuf[off++] = 0xff;
8433
8434#elif defined(RT_ARCH_ARM64)
8435 iemNativeAddTbExitFixup(pReNative, off, a_enmExitReason);
8436 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
8437
8438#else
8439# error "Port me!"
8440#endif
8441 return off;
8442}
8443
8444
8445/**
8446 * Unconditionally exits the translation block via a branch instructions.
8447 *
8448 * @note In case a delayed EFLAGS calculation is pending, this may emit an
8449 * additional IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS instructions.
8450 */
8451template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fActuallyExitingTb = true, bool const a_fPostponedEfl = true>
8452DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8453{
8454 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8455 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8456
8457 if RT_CONSTEXPR_IF(a_fActuallyExitingTb)
8458 iemNativeMarkCurCondBranchAsExiting(pReNative);
8459
8460#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8461 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8462 off = iemNativeDoPostponedEFlagsAtTbExit<IEMNATIVELABELTYPE_GET_INPUT_REG_MASK(a_enmExitReason)>(pReNative, off);
8463#endif
8464
8465#ifdef RT_ARCH_AMD64
8466 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
8467
8468 /* jmp rel32 */
8469 pCodeBuf[off++] = 0xe9;
8470 iemNativeAddTbExitFixup(pReNative, off, a_enmExitReason);
8471 pCodeBuf[off++] = 0xfe;
8472 pCodeBuf[off++] = 0xff;
8473 pCodeBuf[off++] = 0xff;
8474 pCodeBuf[off++] = 0xff;
8475
8476#elif defined(RT_ARCH_ARM64)
8477 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8478 iemNativeAddTbExitFixup(pReNative, off, a_enmExitReason);
8479 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
8480
8481#else
8482# error "Port me!"
8483#endif
8484 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8485 return off;
8486}
8487
8488
8489/**
8490 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
8491 *
8492 * @note In case a delayed EFLAGS calculation is pending, this may emit an
8493 * additional IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS instructions.
8494 */
8495template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8496DECL_FORCE_INLINE_THROW(uint32_t)
8497iemNativeEmitTbExitJccEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, IEMNATIVEINSTRCOND enmCond)
8498{
8499 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8500 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8501
8502#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8503 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8504 if (pReNative->PostponedEfl.fEFlags)
8505 {
8506 /* Jcc l_NonPrimaryCodeStreamTarget */
8507 uint32_t const offFixup1 = off;
8508 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 1, enmCond);
8509
8510 /* JMP l_PrimaryCodeStreamResume */
8511 uint32_t const offFixup2 = off;
8512 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, off + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8513
8514 /* l_NonPrimaryCodeStreamTarget: */
8515 iemNativeFixupFixedJump(pReNative, offFixup1, off);
8516 off = iemNativeEmitTbExitEx<a_enmExitReason, false /*a_fActuallyExitingTb*/, true>(pReNative, pCodeBuf, off);
8517
8518 /* l_PrimaryCodeStreamResume: */
8519 iemNativeFixupFixedJump(pReNative, offFixup2, off);
8520 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8521 return off;
8522 }
8523#endif
8524
8525#if defined(RT_ARCH_AMD64)
8526 /* jcc rel32 */
8527 pCodeBuf[off++] = 0x0f;
8528 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
8529 iemNativeAddTbExitFixup(pReNative, off, a_enmExitReason);
8530 pCodeBuf[off++] = 0x00;
8531 pCodeBuf[off++] = 0x00;
8532 pCodeBuf[off++] = 0x00;
8533 pCodeBuf[off++] = 0x00;
8534
8535#else
8536 /* ARM64 doesn't have the necessary jump range, so we jump via local label
8537 just like when we keep everything local. */
8538 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, a_enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8539 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel, enmCond);
8540#endif
8541 return off;
8542}
8543
8544
8545/**
8546 * Emits a Jcc rel32 / B.cc imm19 to the epilog.
8547 */
8548template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8549DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitJcc(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEINSTRCOND enmCond)
8550{
8551 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8552 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8553
8554#ifdef RT_ARCH_AMD64
8555 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS + 5);
8556#elif defined(RT_ARCH_ARM64)
8557 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS + 1);
8558#else
8559# error "Port me!"
8560#endif
8561 off = iemNativeEmitTbExitJccEx<a_enmExitReason, a_fPostponedEfl>(pReNative, pCodeBuf, off, enmCond);
8562 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8563 return off;
8564}
8565
8566
8567/**
8568 * Emits a JNZ/JNE rel32 / B.NE imm19 to the TB exit routine with the given reason.
8569 */
8570template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8571DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitJnz(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8572{
8573#ifdef RT_ARCH_AMD64
8574 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_ne);
8575#elif defined(RT_ARCH_ARM64)
8576 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kArmv8InstrCond_Ne);
8577#else
8578# error "Port me!"
8579#endif
8580}
8581
8582
8583/**
8584 * Emits a JZ/JE rel32 / B.EQ imm19 to the TB exit routine with the given reason.
8585 */
8586template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8587DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitJz(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8588{
8589#ifdef RT_ARCH_AMD64
8590 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_e);
8591#elif defined(RT_ARCH_ARM64)
8592 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kArmv8InstrCond_Eq);
8593#else
8594# error "Port me!"
8595#endif
8596}
8597
8598
8599/**
8600 * Emits a JA/JNBE rel32 / B.HI imm19 to the TB exit.
8601 */
8602template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8603DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitJa(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8604{
8605#ifdef RT_ARCH_AMD64
8606 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_nbe);
8607#elif defined(RT_ARCH_ARM64)
8608 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kArmv8InstrCond_Hi);
8609#else
8610# error "Port me!"
8611#endif
8612}
8613
8614
8615/**
8616 * Emits a JL/JNGE rel32 / B.LT imm19 to the TB exit with the given reason.
8617 */
8618template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8619DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitJl(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8620{
8621#ifdef RT_ARCH_AMD64
8622 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_l);
8623#elif defined(RT_ARCH_ARM64)
8624 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kArmv8InstrCond_Lt);
8625#else
8626# error "Port me!"
8627#endif
8628}
8629
8630
8631/**
8632 * Emits a jump to the TB exit with @a a_enmExitReason on the condition _any_ of
8633 * the bits in @a fBits are set in @a iGprSrc.
8634 */
8635template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8636DECL_INLINE_THROW(uint32_t)
8637iemNativeEmitTbExitIfAnyBitsSetInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
8638{
8639 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
8640
8641 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
8642 return iemNativeEmitTbExitJnz<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8643}
8644
8645
8646#if 0 /* unused */
8647/**
8648 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
8649 * are set in @a iGprSrc.
8650 */
8651template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8652DECL_INLINE_THROW(uint32_t)
8653iemNativeEmitTbExitIfNoBitsSetInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
8654{
8655 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
8656
8657 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
8658 return iemNativeEmitJzTbExit<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8659}
8660#endif
8661
8662
8663#if 0 /* unused */
8664/**
8665 * Emits code that exits the TB with the given reason if @a iGprLeft and @a iGprRight
8666 * differs.
8667 */
8668template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8669DECL_INLINE_THROW(uint32_t)
8670iemNativeEmitTbExitIfGprNotEqualGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
8671{
8672 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
8673 off = iemNativeEmitJnzTbExit<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8674 return off;
8675}
8676#endif
8677
8678
8679/**
8680 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
8681 * @a uImm.
8682 */
8683template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8684DECL_INLINE_THROW(uint32_t)
8685iemNativeEmitTbExitIfGpr32NotEqualImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm)
8686{
8687 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8688 off = iemNativeEmitTbExitJnz<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8689 return off;
8690}
8691
8692
8693/**
8694 * Emits code that exits the current TB if @a iGprSrc differs from @a uImm.
8695 */
8696template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8697DECL_INLINE_THROW(uint32_t)
8698iemNativeEmitTbExitIfGprNotEqualImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm)
8699{
8700 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
8701 off = iemNativeEmitTbExitJnz<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8702 return off;
8703}
8704
8705
8706/**
8707 * Emits code that exits the current TB with the given reason if 32-bit @a iGprSrc equals @a uImm.
8708 */
8709template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8710DECL_INLINE_THROW(uint32_t)
8711iemNativeEmitTbExitIfGpr32EqualsImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm)
8712{
8713 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8714 off = iemNativeEmitTbExitJz<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8715 return off;
8716}
8717
8718
8719/**
8720 * Emits code to exit the current TB with the reason @a a_enmExitReason on the
8721 * condition that bit @a iBitNo _is_ _set_ in @a iGprSrc.
8722 *
8723 * @note On ARM64 the range is only +/-8191 instructions.
8724 */
8725template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8726DECL_INLINE_THROW(uint32_t)
8727iemNativeEmitTbExitIfBitSetInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
8728{
8729 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8730
8731#if defined(RT_ARCH_AMD64)
8732 Assert(iBitNo < 64);
8733 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8734 if (iBitNo < 8)
8735 {
8736 /* test Eb, imm8 */
8737 if (iGprSrc >= 4)
8738 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
8739 pbCodeBuf[off++] = 0xf6;
8740 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
8741 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
8742 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8743 off = iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_ne);
8744 }
8745 else
8746 {
8747 /* bt Ev, imm8 */
8748 if (iBitNo >= 32)
8749 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8750 else if (iGprSrc >= 8)
8751 pbCodeBuf[off++] = X86_OP_REX_B;
8752 pbCodeBuf[off++] = 0x0f;
8753 pbCodeBuf[off++] = 0xba;
8754 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
8755 pbCodeBuf[off++] = iBitNo;
8756 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8757 off = iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_c);
8758 }
8759 return off;
8760
8761#elif defined(RT_ARCH_ARM64)
8762 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8763 /** @todo Perhaps we should always apply the PostponedEfl code pattern here,
8764 * it's the same number of instructions as the TST + B.CC stuff? */
8765# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8766 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8767 if (pReNative->PostponedEfl.fEFlags)
8768 {
8769 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
8770 3 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8771 pCodeBuf[off++] = Armv8A64MkInstrTbnz(1 /*l_NonPrimaryCodeStreamTarget*/, iGprSrc, iBitNo);
8772 uint32_t const offFixup = off;
8773 pCodeBuf[off++] = Armv8A64MkInstrB(0 /*l_PrimaryCodeStreamResume*/);
8774 /* l_NonPrimaryCodeStreamTarget: */
8775 off = iemNativeEmitTbExitEx<a_enmExitReason, false /*a_fActuallyExitingTb*/, true>(pReNative, pCodeBuf, off);
8776 /* l_PrimaryCodeStreamResume: */
8777 iemNativeFixupFixedJump(pReNative, offFixup, off);
8778 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8779 return off;
8780 }
8781# endif
8782 /* ARM64 doesn't have the necessary range to reach the per-chunk code, so
8783 we go via a local trampoline. */
8784 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, a_enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8785 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
8786#else
8787# error "port me"
8788#endif
8789}
8790
8791
8792/**
8793 * Emits code that exits the current TB with @a a_enmExitReason if @a iGprSrc is
8794 * not zero.
8795 *
8796 * The operand size is given by @a f64Bit.
8797 */
8798template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8799DECL_FORCE_INLINE_THROW(uint32_t)
8800iemNativeEmitTbExitIfGprIsNotZeroEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
8801 uint8_t iGprSrc, bool f64Bit)
8802{
8803 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8804
8805#if defined(RT_ARCH_AMD64)
8806 /* test reg32,reg32 / test reg64,reg64 */
8807 if (f64Bit)
8808 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
8809 else if (iGprSrc >= 8)
8810 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8811 pCodeBuf[off++] = 0x85;
8812 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
8813
8814 /* jnz idxLabel */
8815 return iemNativeEmitTbExitJccEx<a_enmExitReason, a_fPostponedEfl>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
8816
8817#elif defined(RT_ARCH_ARM64)
8818 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8819# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8820 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8821 if (pReNative->PostponedEfl.fEFlags)
8822 {
8823 pCodeBuf[off++] = Armv8A64MkInstrCbnz(1 /*l_NonPrimaryCodeStreamTarget*/, iGprSrc, f64Bit);
8824 uint32_t const offFixup = off;
8825 pCodeBuf[off++] = Armv8A64MkInstrB(0 /*l_PrimaryCodeStreamResume*/);
8826 /* l_NonPrimaryCodeStreamTarget: */
8827 off = iemNativeEmitTbExitEx<a_enmExitReason, false /*a_fActuallyExitingTb*/, true>(pReNative, pCodeBuf, off);
8828 /* l_PrimaryCodeStreamResume: */
8829 iemNativeFixupFixedJump(pReNative, offFixup, off);
8830 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8831 return off;
8832 }
8833# endif
8834 /* ARM64 doesn't have the necessary range to reach the per-chunk code, so
8835 we go via a local trampoline. */
8836 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, a_enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8837 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
8838 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
8839#else
8840# error "port me"
8841#endif
8842}
8843
8844
8845/**
8846 * Emits code to exit the current TB with the given reason @a a_enmExitReason if
8847 * @a iGprSrc is not zero.
8848 *
8849 * The operand size is given by @a f64Bit.
8850 */
8851template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8852DECL_INLINE_THROW(uint32_t)
8853iemNativeEmitTbExitIfGprIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit)
8854{
8855#if defined(RT_ARCH_AMD64)
8856 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3 + 6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8857
8858#else
8859 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8860#endif
8861 off = iemNativeEmitTbExitIfGprIsNotZeroEx<a_enmExitReason, a_fPostponedEfl>(pReNative, pCodeBuf, off, iGprSrc, f64Bit);
8862 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8863 return off;
8864}
8865
8866
8867/**
8868 * Emits code that exits the current TB with @a a_enmExitReason if @a iGprSrc is
8869 * zero.
8870 *
8871 * The operand size is given by @a f64Bit.
8872 */
8873template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8874DECL_FORCE_INLINE_THROW(uint32_t)
8875iemNativeEmitTbExitIfGprIsZeroEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
8876 uint8_t iGprSrc, bool f64Bit)
8877{
8878 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8879
8880#if defined(RT_ARCH_AMD64)
8881 /* test reg32,reg32 / test reg64,reg64 */
8882 if (f64Bit)
8883 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
8884 else if (iGprSrc >= 8)
8885 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8886 pCodeBuf[off++] = 0x85;
8887 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
8888
8889 /* jnz idxLabel */
8890 return iemNativeEmitTbExitJccEx<a_enmExitReason, a_fPostponedEfl>(pReNative, pCodeBuf, off, kIemNativeInstrCond_e);
8891
8892#elif defined(RT_ARCH_ARM64)
8893 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8894# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8895 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8896 if (pReNative->PostponedEfl.fEFlags)
8897 {
8898 pCodeBuf[off++] = Armv8A64MkInstrCbz(1 /*l_NonPrimaryCodeStreamTarget*/, iGprSrc, f64Bit);
8899 uint32_t const offFixup = off;
8900 pCodeBuf[off++] = Armv8A64MkInstrB(0 /*l_PrimaryCodeStreamResume*/);
8901 /* l_NonPrimaryCodeStreamTarget: */
8902 off = iemNativeEmitTbExitEx<a_enmExitReason, false /*a_fActuallyExitingTb*/, true>(pReNative, pCodeBuf, off);
8903 /* l_PrimaryCodeStreamResume: */
8904 iemNativeFixupFixedJump(pReNative, offFixup, off);
8905 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8906 return off;
8907 }
8908# endif
8909 /* ARM64 doesn't have the necessary range to reach the per-chunk code, so
8910 we go via a local trampoline. */
8911 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, a_enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8912 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
8913 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
8914#else
8915# error "port me"
8916#endif
8917}
8918
8919
8920/**
8921 * Emits code to exit the current TB with the given reason @a a_enmExitReason if @a iGprSrc is zero.
8922 *
8923 * The operand size is given by @a f64Bit.
8924 */
8925template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8926DECL_INLINE_THROW(uint32_t)
8927iemNativeEmitTbExitIfGprIsZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit)
8928{
8929#if defined(RT_ARCH_AMD64)
8930 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3 + 6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8931
8932#else
8933 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8934#endif
8935 off = iemNativeEmitTbExitIfGprIsZeroEx<a_enmExitReason, a_fPostponedEfl>(pReNative, pCodeBuf, off, iGprSrc, f64Bit);
8936 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8937 return off;
8938}
8939
8940
8941#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8942/*********************************************************************************************************************************
8943* SIMD helpers. *
8944*********************************************************************************************************************************/
8945
8946
8947/**
8948 * Emits code to load the variable address into an argument GPR.
8949 *
8950 * This is a special variant intended for SIMD variables only and only called
8951 * by the TLB miss path in the memory fetch/store code because there we pass
8952 * the value by reference and need both the register and stack depending on which
8953 * path is taken (TLB hit vs. miss).
8954 */
8955DECL_FORCE_INLINE_THROW(uint32_t)
8956iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8957 bool fSyncRegWithStack = true)
8958{
8959 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8960 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8961 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
8962 || pVar->enmKind == kIemNativeVarKind_Stack,
8963 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8964 AssertStmt(pVar->fSimdReg,
8965 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8966 Assert( pVar->idxStackSlot != UINT8_MAX
8967 && pVar->idxReg != UINT8_MAX);
8968
8969 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8970 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
8971
8972 uint8_t const idxRegVar = pVar->idxReg;
8973 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8974 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
8975
8976 if (fSyncRegWithStack)
8977 {
8978 if (pVar->cbVar == sizeof(RTUINT128U))
8979 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDisp, idxRegVar);
8980 else
8981 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDisp, idxRegVar);
8982 }
8983
8984 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
8985}
8986
8987
8988/**
8989 * Emits code to sync the host SIMD register assigned to the given SIMD variable.
8990 *
8991 * This is a special helper and only called
8992 * by the TLB miss path in the memory fetch/store code because there we pass
8993 * the value by reference and need to sync the value on the stack with the assigned host register
8994 * after a TLB miss where the value ends up on the stack.
8995 */
8996DECL_FORCE_INLINE_THROW(uint32_t)
8997iemNativeEmitSimdVarSyncStackToRegister(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar)
8998{
8999 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9000 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9001 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
9002 || pVar->enmKind == kIemNativeVarKind_Stack,
9003 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9004 AssertStmt(pVar->fSimdReg,
9005 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9006 Assert( pVar->idxStackSlot != UINT8_MAX
9007 && pVar->idxReg != UINT8_MAX);
9008
9009 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9010 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
9011
9012 uint8_t const idxRegVar = pVar->idxReg;
9013 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
9014 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
9015
9016 if (pVar->cbVar == sizeof(RTUINT128U))
9017 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxRegVar, offBpDisp);
9018 else
9019 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxRegVar, offBpDisp);
9020
9021 return off;
9022}
9023
9024
9025/**
9026 * Emits a gprdst = ~gprsrc store.
9027 */
9028DECL_FORCE_INLINE_THROW(uint32_t)
9029iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
9030{
9031#ifdef RT_ARCH_AMD64
9032 if (iGprDst != iGprSrc)
9033 {
9034 /* mov gprdst, gprsrc. */
9035 if (f64Bit)
9036 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
9037 else
9038 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
9039 }
9040
9041 /* not gprdst */
9042 if (f64Bit || iGprDst >= 8)
9043 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
9044 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
9045 pCodeBuf[off++] = 0xf7;
9046 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
9047#elif defined(RT_ARCH_ARM64)
9048 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
9049#else
9050# error "port me"
9051#endif
9052 return off;
9053}
9054
9055
9056/**
9057 * Emits a gprdst = ~gprsrc store.
9058 */
9059DECL_INLINE_THROW(uint32_t)
9060iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
9061{
9062#ifdef RT_ARCH_AMD64
9063 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
9064#elif defined(RT_ARCH_ARM64)
9065 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
9066#else
9067# error "port me"
9068#endif
9069 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9070 return off;
9071}
9072
9073
9074/**
9075 * Emits a 128-bit vector register store to a VCpu value.
9076 */
9077DECL_FORCE_INLINE_THROW(uint32_t)
9078iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9079{
9080#ifdef RT_ARCH_AMD64
9081 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
9082 pCodeBuf[off++] = 0x66;
9083 if (iVecReg >= 8)
9084 pCodeBuf[off++] = X86_OP_REX_R;
9085 pCodeBuf[off++] = 0x0f;
9086 pCodeBuf[off++] = 0x7f;
9087 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
9088#elif defined(RT_ARCH_ARM64)
9089 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
9090
9091#else
9092# error "port me"
9093#endif
9094 return off;
9095}
9096
9097
9098/**
9099 * Emits a 128-bit vector register load of a VCpu value.
9100 */
9101DECL_INLINE_THROW(uint32_t)
9102iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9103{
9104#ifdef RT_ARCH_AMD64
9105 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
9106#elif defined(RT_ARCH_ARM64)
9107 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
9108#else
9109# error "port me"
9110#endif
9111 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9112 return off;
9113}
9114
9115
9116/**
9117 * Emits a high 128-bit vector register store to a VCpu value.
9118 */
9119DECL_FORCE_INLINE_THROW(uint32_t)
9120iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9121{
9122#ifdef RT_ARCH_AMD64
9123 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
9124 pCodeBuf[off++] = X86_OP_VEX3;
9125 if (iVecReg >= 8)
9126 pCodeBuf[off++] = 0x63;
9127 else
9128 pCodeBuf[off++] = 0xe3;
9129 pCodeBuf[off++] = 0x7d;
9130 pCodeBuf[off++] = 0x39;
9131 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
9132 pCodeBuf[off++] = 0x01; /* Immediate */
9133#elif defined(RT_ARCH_ARM64)
9134 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
9135#else
9136# error "port me"
9137#endif
9138 return off;
9139}
9140
9141
9142/**
9143 * Emits a high 128-bit vector register load of a VCpu value.
9144 */
9145DECL_INLINE_THROW(uint32_t)
9146iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9147{
9148#ifdef RT_ARCH_AMD64
9149 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
9150#elif defined(RT_ARCH_ARM64)
9151 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9152 Assert(!(iVecReg & 0x1));
9153 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
9154#else
9155# error "port me"
9156#endif
9157 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9158 return off;
9159}
9160
9161
9162/**
9163 * Emits a 128-bit vector register load of a VCpu value.
9164 */
9165DECL_FORCE_INLINE_THROW(uint32_t)
9166iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9167{
9168#ifdef RT_ARCH_AMD64
9169 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
9170 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9171 if (iVecReg >= 8)
9172 pCodeBuf[off++] = X86_OP_REX_R;
9173 pCodeBuf[off++] = 0x0f;
9174 pCodeBuf[off++] = 0x6f;
9175 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
9176#elif defined(RT_ARCH_ARM64)
9177 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
9178
9179#else
9180# error "port me"
9181#endif
9182 return off;
9183}
9184
9185
9186/**
9187 * Emits a 128-bit vector register load of a VCpu value.
9188 */
9189DECL_INLINE_THROW(uint32_t)
9190iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9191{
9192#ifdef RT_ARCH_AMD64
9193 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
9194#elif defined(RT_ARCH_ARM64)
9195 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
9196#else
9197# error "port me"
9198#endif
9199 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9200 return off;
9201}
9202
9203
9204/**
9205 * Emits a 128-bit vector register load of a VCpu value.
9206 */
9207DECL_FORCE_INLINE_THROW(uint32_t)
9208iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9209{
9210#ifdef RT_ARCH_AMD64
9211 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
9212 pCodeBuf[off++] = X86_OP_VEX3;
9213 if (iVecReg >= 8)
9214 pCodeBuf[off++] = 0x63;
9215 else
9216 pCodeBuf[off++] = 0xe3;
9217 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
9218 pCodeBuf[off++] = 0x38;
9219 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
9220 pCodeBuf[off++] = 0x01; /* Immediate */
9221#elif defined(RT_ARCH_ARM64)
9222 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
9223#else
9224# error "port me"
9225#endif
9226 return off;
9227}
9228
9229
9230/**
9231 * Emits a 128-bit vector register load of a VCpu value.
9232 */
9233DECL_INLINE_THROW(uint32_t)
9234iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9235{
9236#ifdef RT_ARCH_AMD64
9237 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
9238#elif defined(RT_ARCH_ARM64)
9239 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9240 Assert(!(iVecReg & 0x1));
9241 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
9242#else
9243# error "port me"
9244#endif
9245 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9246 return off;
9247}
9248
9249
9250/**
9251 * Emits a vecdst = vecsrc load.
9252 */
9253DECL_FORCE_INLINE(uint32_t)
9254iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9255{
9256#ifdef RT_ARCH_AMD64
9257 /* movdqu vecdst, vecsrc */
9258 pCodeBuf[off++] = 0xf3;
9259
9260 if ((iVecRegDst | iVecRegSrc) >= 8)
9261 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
9262 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
9263 : X86_OP_REX_R;
9264 pCodeBuf[off++] = 0x0f;
9265 pCodeBuf[off++] = 0x6f;
9266 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9267
9268#elif defined(RT_ARCH_ARM64)
9269 /* mov dst, src; alias for: orr dst, src, src */
9270 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
9271
9272#else
9273# error "port me"
9274#endif
9275 return off;
9276}
9277
9278
9279/**
9280 * Emits a vecdst = vecsrc load, 128-bit.
9281 */
9282DECL_INLINE_THROW(uint32_t)
9283iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9284{
9285#ifdef RT_ARCH_AMD64
9286 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
9287#elif defined(RT_ARCH_ARM64)
9288 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
9289#else
9290# error "port me"
9291#endif
9292 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9293 return off;
9294}
9295
9296
9297/**
9298 * Emits a vecdst[128:255] = vecsrc[128:255] load.
9299 */
9300DECL_FORCE_INLINE_THROW(uint32_t)
9301iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9302{
9303#ifdef RT_ARCH_AMD64
9304 /* vperm2i128 dst, dst, src, 0x30. */ /* ASSUMES AVX2 support */
9305 pCodeBuf[off++] = X86_OP_VEX3;
9306 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
9307 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9308 pCodeBuf[off++] = 0x46;
9309 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9310 pCodeBuf[off++] = 0x30; /* Immediate, this will leave the low 128 bits of dst untouched and move the high 128 bits from src to dst. */
9311
9312#elif defined(RT_ARCH_ARM64)
9313 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
9314
9315 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128(). */
9316# ifdef IEM_WITH_THROW_CATCH
9317 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
9318# else
9319 AssertReleaseFailedStmt(off = UINT32_MAX);
9320# endif
9321#else
9322# error "port me"
9323#endif
9324 return off;
9325}
9326
9327
9328/**
9329 * Emits a vecdst[128:255] = vecsrc[128:255] load, high 128-bit.
9330 */
9331DECL_INLINE_THROW(uint32_t)
9332iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9333{
9334#ifdef RT_ARCH_AMD64
9335 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
9336#elif defined(RT_ARCH_ARM64)
9337 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
9338 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iVecRegSrc + 1);
9339#else
9340# error "port me"
9341#endif
9342 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9343 return off;
9344}
9345
9346
9347/**
9348 * Emits a vecdst[0:127] = vecsrc[128:255] load.
9349 */
9350DECL_FORCE_INLINE_THROW(uint32_t)
9351iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9352{
9353#ifdef RT_ARCH_AMD64
9354 /* vextracti128 dst, src, 1. */ /* ASSUMES AVX2 support */
9355 pCodeBuf[off++] = X86_OP_VEX3;
9356 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegDst >= 8, false, iVecRegSrc >= 8);
9357 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9358 pCodeBuf[off++] = 0x39;
9359 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7);
9360 pCodeBuf[off++] = 0x1;
9361
9362#elif defined(RT_ARCH_ARM64)
9363 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
9364
9365 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(). */
9366# ifdef IEM_WITH_THROW_CATCH
9367 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
9368# else
9369 AssertReleaseFailedStmt(off = UINT32_MAX);
9370# endif
9371#else
9372# error "port me"
9373#endif
9374 return off;
9375}
9376
9377
9378/**
9379 * Emits a vecdst[0:127] = vecsrc[128:255] load, high 128-bit.
9380 */
9381DECL_INLINE_THROW(uint32_t)
9382iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9383{
9384#ifdef RT_ARCH_AMD64
9385 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
9386#elif defined(RT_ARCH_ARM64)
9387 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
9388 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc + 1);
9389#else
9390# error "port me"
9391#endif
9392 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9393 return off;
9394}
9395
9396
9397/**
9398 * Emits a vecdst = vecsrc load, 256-bit.
9399 */
9400DECL_INLINE_THROW(uint32_t)
9401iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9402{
9403#ifdef RT_ARCH_AMD64
9404 /* vmovdqa ymm, ymm */
9405 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
9406 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
9407 {
9408 pbCodeBuf[off++] = X86_OP_VEX3;
9409 pbCodeBuf[off++] = 0x41;
9410 pbCodeBuf[off++] = 0x7d;
9411 pbCodeBuf[off++] = 0x6f;
9412 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9413 }
9414 else
9415 {
9416 pbCodeBuf[off++] = X86_OP_VEX2;
9417 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
9418 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
9419 pbCodeBuf[off++] = iVecRegSrc >= 8
9420 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
9421 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9422 }
9423#elif defined(RT_ARCH_ARM64)
9424 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9425 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
9426 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
9427 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
9428#else
9429# error "port me"
9430#endif
9431 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9432 return off;
9433}
9434
9435
9436/**
9437 * Emits a vecdst = vecsrc load.
9438 */
9439DECL_FORCE_INLINE(uint32_t)
9440iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9441{
9442#ifdef RT_ARCH_AMD64
9443 /* vinserti128 dst, dst, src, 1. */ /* ASSUMES AVX2 support */
9444 pCodeBuf[off++] = X86_OP_VEX3;
9445 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
9446 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9447 pCodeBuf[off++] = 0x38;
9448 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9449 pCodeBuf[off++] = 0x01; /* Immediate */
9450
9451#elif defined(RT_ARCH_ARM64)
9452 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
9453 /* mov dst, src; alias for: orr dst, src, src */
9454 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
9455
9456#else
9457# error "port me"
9458#endif
9459 return off;
9460}
9461
9462
9463/**
9464 * Emits a vecdst[128:255] = vecsrc[0:127] load, 128-bit.
9465 */
9466DECL_INLINE_THROW(uint32_t)
9467iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9468{
9469#ifdef RT_ARCH_AMD64
9470 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
9471#elif defined(RT_ARCH_ARM64)
9472 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
9473#else
9474# error "port me"
9475#endif
9476 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9477 return off;
9478}
9479
9480
9481/**
9482 * Emits a gprdst = vecsrc[x] load, 64-bit.
9483 */
9484DECL_FORCE_INLINE(uint32_t)
9485iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
9486{
9487#ifdef RT_ARCH_AMD64
9488 if (iQWord >= 2)
9489 {
9490 /*
9491 * vpextrq doesn't work on the upper 128-bits.
9492 * So we use the following sequence:
9493 * vextracti128 vectmp0, vecsrc, 1
9494 * pextrq gpr, vectmp0, #(iQWord - 2)
9495 */
9496 /* vextracti128 */
9497 pCodeBuf[off++] = X86_OP_VEX3;
9498 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
9499 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9500 pCodeBuf[off++] = 0x39;
9501 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9502 pCodeBuf[off++] = 0x1;
9503
9504 /* pextrq */
9505 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9506 pCodeBuf[off++] = X86_OP_REX_W
9507 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9508 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9509 pCodeBuf[off++] = 0x0f;
9510 pCodeBuf[off++] = 0x3a;
9511 pCodeBuf[off++] = 0x16;
9512 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
9513 pCodeBuf[off++] = iQWord - 2;
9514 }
9515 else
9516 {
9517 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
9518 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9519 pCodeBuf[off++] = X86_OP_REX_W
9520 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
9521 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9522 pCodeBuf[off++] = 0x0f;
9523 pCodeBuf[off++] = 0x3a;
9524 pCodeBuf[off++] = 0x16;
9525 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
9526 pCodeBuf[off++] = iQWord;
9527 }
9528#elif defined(RT_ARCH_ARM64)
9529 /* umov gprdst, vecsrc[iQWord] */
9530 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
9531#else
9532# error "port me"
9533#endif
9534 return off;
9535}
9536
9537
9538/**
9539 * Emits a gprdst = vecsrc[x] load, 64-bit.
9540 */
9541DECL_INLINE_THROW(uint32_t)
9542iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
9543{
9544 Assert(iQWord <= 3);
9545
9546#ifdef RT_ARCH_AMD64
9547 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iVecRegSrc, iQWord);
9548#elif defined(RT_ARCH_ARM64)
9549 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9550 Assert(!(iVecRegSrc & 0x1));
9551 /* Need to access the "high" 128-bit vector register. */
9552 if (iQWord >= 2)
9553 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
9554 else
9555 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
9556#else
9557# error "port me"
9558#endif
9559 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9560 return off;
9561}
9562
9563
9564/**
9565 * Emits a gprdst = vecsrc[x] load, 32-bit.
9566 */
9567DECL_FORCE_INLINE(uint32_t)
9568iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
9569{
9570#ifdef RT_ARCH_AMD64
9571 if (iDWord >= 4)
9572 {
9573 /*
9574 * vpextrd doesn't work on the upper 128-bits.
9575 * So we use the following sequence:
9576 * vextracti128 vectmp0, vecsrc, 1
9577 * pextrd gpr, vectmp0, #(iDWord - 4)
9578 */
9579 /* vextracti128 */
9580 pCodeBuf[off++] = X86_OP_VEX3;
9581 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
9582 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9583 pCodeBuf[off++] = 0x39;
9584 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9585 pCodeBuf[off++] = 0x1;
9586
9587 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
9588 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9589 if (iGprDst >= 8 || IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8)
9590 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9591 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9592 pCodeBuf[off++] = 0x0f;
9593 pCodeBuf[off++] = 0x3a;
9594 pCodeBuf[off++] = 0x16;
9595 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
9596 pCodeBuf[off++] = iDWord - 4;
9597 }
9598 else
9599 {
9600 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
9601 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9602 if (iGprDst >= 8 || iVecRegSrc >= 8)
9603 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
9604 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9605 pCodeBuf[off++] = 0x0f;
9606 pCodeBuf[off++] = 0x3a;
9607 pCodeBuf[off++] = 0x16;
9608 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
9609 pCodeBuf[off++] = iDWord;
9610 }
9611#elif defined(RT_ARCH_ARM64)
9612 Assert(iDWord < 4);
9613
9614 /* umov gprdst, vecsrc[iDWord] */
9615 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
9616#else
9617# error "port me"
9618#endif
9619 return off;
9620}
9621
9622
9623/**
9624 * Emits a gprdst = vecsrc[x] load, 32-bit.
9625 */
9626DECL_INLINE_THROW(uint32_t)
9627iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
9628{
9629 Assert(iDWord <= 7);
9630
9631#ifdef RT_ARCH_AMD64
9632 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 15), off, iGprDst, iVecRegSrc, iDWord);
9633#elif defined(RT_ARCH_ARM64)
9634 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9635 Assert(!(iVecRegSrc & 0x1));
9636 /* Need to access the "high" 128-bit vector register. */
9637 if (iDWord >= 4)
9638 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
9639 else
9640 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
9641#else
9642# error "port me"
9643#endif
9644 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9645 return off;
9646}
9647
9648
9649/**
9650 * Emits a gprdst = vecsrc[x] load, 16-bit.
9651 */
9652DECL_FORCE_INLINE(uint32_t)
9653iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
9654{
9655#ifdef RT_ARCH_AMD64
9656 if (iWord >= 8)
9657 {
9658 /** @todo Currently not used. */
9659 AssertReleaseFailed();
9660 }
9661 else
9662 {
9663 /* pextrw gpr, vecsrc, #iWord */
9664 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9665 if (iGprDst >= 8 || iVecRegSrc >= 8)
9666 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
9667 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
9668 pCodeBuf[off++] = 0x0f;
9669 pCodeBuf[off++] = 0xc5;
9670 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
9671 pCodeBuf[off++] = iWord;
9672 }
9673#elif defined(RT_ARCH_ARM64)
9674 /* umov gprdst, vecsrc[iWord] */
9675 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
9676#else
9677# error "port me"
9678#endif
9679 return off;
9680}
9681
9682
9683/**
9684 * Emits a gprdst = vecsrc[x] load, 16-bit.
9685 */
9686DECL_INLINE_THROW(uint32_t)
9687iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
9688{
9689 Assert(iWord <= 16);
9690
9691#ifdef RT_ARCH_AMD64
9692 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
9693#elif defined(RT_ARCH_ARM64)
9694 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9695 Assert(!(iVecRegSrc & 0x1));
9696 /* Need to access the "high" 128-bit vector register. */
9697 if (iWord >= 8)
9698 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
9699 else
9700 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
9701#else
9702# error "port me"
9703#endif
9704 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9705 return off;
9706}
9707
9708
9709/**
9710 * Emits a gprdst = vecsrc[x] load, 8-bit.
9711 */
9712DECL_FORCE_INLINE(uint32_t)
9713iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
9714{
9715#ifdef RT_ARCH_AMD64
9716 if (iByte >= 16)
9717 {
9718 /** @todo Currently not used. */
9719 AssertReleaseFailed();
9720 }
9721 else
9722 {
9723 /* pextrb gpr, vecsrc, #iByte */
9724 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9725 if (iGprDst >= 8 || iVecRegSrc >= 8)
9726 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
9727 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9728 pCodeBuf[off++] = 0x0f;
9729 pCodeBuf[off++] = 0x3a;
9730 pCodeBuf[off++] = 0x14;
9731 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
9732 pCodeBuf[off++] = iByte;
9733 }
9734#elif defined(RT_ARCH_ARM64)
9735 /* umov gprdst, vecsrc[iByte] */
9736 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
9737#else
9738# error "port me"
9739#endif
9740 return off;
9741}
9742
9743
9744/**
9745 * Emits a gprdst = vecsrc[x] load, 8-bit.
9746 */
9747DECL_INLINE_THROW(uint32_t)
9748iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
9749{
9750 Assert(iByte <= 32);
9751
9752#ifdef RT_ARCH_AMD64
9753 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iByte);
9754#elif defined(RT_ARCH_ARM64)
9755 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9756 Assert(!(iVecRegSrc & 0x1));
9757 /* Need to access the "high" 128-bit vector register. */
9758 if (iByte >= 16)
9759 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
9760 else
9761 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
9762#else
9763# error "port me"
9764#endif
9765 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9766 return off;
9767}
9768
9769
9770/**
9771 * Emits a vecdst[x] = gprsrc store, 64-bit.
9772 */
9773DECL_FORCE_INLINE(uint32_t)
9774iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
9775{
9776#ifdef RT_ARCH_AMD64
9777 if (iQWord >= 2)
9778 {
9779 /*
9780 * vpinsrq doesn't work on the upper 128-bits.
9781 * So we use the following sequence:
9782 * vextracti128 vectmp0, vecdst, 1
9783 * pinsrq vectmp0, gpr, #(iQWord - 2)
9784 * vinserti128 vecdst, vectmp0, 1
9785 */
9786 /* vextracti128 */
9787 pCodeBuf[off++] = X86_OP_VEX3;
9788 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9789 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9790 pCodeBuf[off++] = 0x39;
9791 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9792 pCodeBuf[off++] = 0x1;
9793
9794 /* pinsrq */
9795 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9796 pCodeBuf[off++] = X86_OP_REX_W
9797 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9798 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9799 pCodeBuf[off++] = 0x0f;
9800 pCodeBuf[off++] = 0x3a;
9801 pCodeBuf[off++] = 0x22;
9802 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
9803 pCodeBuf[off++] = iQWord - 2;
9804
9805 /* vinserti128 */
9806 pCodeBuf[off++] = X86_OP_VEX3;
9807 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9808 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9809 pCodeBuf[off++] = 0x38;
9810 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9811 pCodeBuf[off++] = 0x01; /* Immediate */
9812 }
9813 else
9814 {
9815 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
9816 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9817 pCodeBuf[off++] = X86_OP_REX_W
9818 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9819 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9820 pCodeBuf[off++] = 0x0f;
9821 pCodeBuf[off++] = 0x3a;
9822 pCodeBuf[off++] = 0x22;
9823 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9824 pCodeBuf[off++] = iQWord;
9825 }
9826#elif defined(RT_ARCH_ARM64)
9827 /* ins vecsrc[iQWord], gpr */
9828 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
9829#else
9830# error "port me"
9831#endif
9832 return off;
9833}
9834
9835
9836/**
9837 * Emits a vecdst[x] = gprsrc store, 64-bit.
9838 */
9839DECL_INLINE_THROW(uint32_t)
9840iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
9841{
9842 Assert(iQWord <= 3);
9843
9844#ifdef RT_ARCH_AMD64
9845 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iQWord);
9846#elif defined(RT_ARCH_ARM64)
9847 Assert(!(iVecRegDst & 0x1));
9848 if (iQWord >= 2)
9849 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iQWord - 2);
9850 else
9851 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
9852#else
9853# error "port me"
9854#endif
9855 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9856 return off;
9857}
9858
9859
9860/**
9861 * Emits a vecdst[x] = gprsrc store, 32-bit.
9862 */
9863DECL_FORCE_INLINE(uint32_t)
9864iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
9865{
9866#ifdef RT_ARCH_AMD64
9867 if (iDWord >= 4)
9868 {
9869 /*
9870 * vpinsrq doesn't work on the upper 128-bits.
9871 * So we use the following sequence:
9872 * vextracti128 vectmp0, vecdst, 1
9873 * pinsrd vectmp0, gpr, #(iDword - 4)
9874 * vinserti128 vecdst, vectmp0, 1
9875 */
9876 /* vextracti128 */
9877 pCodeBuf[off++] = X86_OP_VEX3;
9878 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9879 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9880 pCodeBuf[off++] = 0x39;
9881 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9882 pCodeBuf[off++] = 0x1;
9883
9884 /* pinsrd */
9885 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9886 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 || iGprSrc >= 8)
9887 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9888 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9889 pCodeBuf[off++] = 0x0f;
9890 pCodeBuf[off++] = 0x3a;
9891 pCodeBuf[off++] = 0x22;
9892 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
9893 pCodeBuf[off++] = iDWord - 4;
9894
9895 /* vinserti128 */
9896 pCodeBuf[off++] = X86_OP_VEX3;
9897 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9898 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9899 pCodeBuf[off++] = 0x38;
9900 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9901 pCodeBuf[off++] = 0x01; /* Immediate */
9902 }
9903 else
9904 {
9905 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
9906 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9907 if (iVecRegDst >= 8 || iGprSrc >= 8)
9908 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9909 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9910 pCodeBuf[off++] = 0x0f;
9911 pCodeBuf[off++] = 0x3a;
9912 pCodeBuf[off++] = 0x22;
9913 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9914 pCodeBuf[off++] = iDWord;
9915 }
9916#elif defined(RT_ARCH_ARM64)
9917 /* ins vecsrc[iDWord], gpr */
9918 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
9919#else
9920# error "port me"
9921#endif
9922 return off;
9923}
9924
9925
9926/**
9927 * Emits a vecdst[x] = gprsrc store, 64-bit.
9928 */
9929DECL_INLINE_THROW(uint32_t)
9930iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
9931{
9932 Assert(iDWord <= 7);
9933
9934#ifdef RT_ARCH_AMD64
9935 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iDWord);
9936#elif defined(RT_ARCH_ARM64)
9937 Assert(!(iVecRegDst & 0x1));
9938 if (iDWord >= 4)
9939 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iDWord - 4);
9940 else
9941 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
9942#else
9943# error "port me"
9944#endif
9945 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9946 return off;
9947}
9948
9949
9950/**
9951 * Emits a vecdst[x] = gprsrc store, 16-bit.
9952 */
9953DECL_FORCE_INLINE(uint32_t)
9954iemNativeEmitSimdStoreGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
9955{
9956#ifdef RT_ARCH_AMD64
9957 /* pinsrw vecsrc, gpr, #iWord. */
9958 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9959 if (iVecRegDst >= 8 || iGprSrc >= 8)
9960 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9961 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9962 pCodeBuf[off++] = 0x0f;
9963 pCodeBuf[off++] = 0xc4;
9964 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9965 pCodeBuf[off++] = iWord;
9966#elif defined(RT_ARCH_ARM64)
9967 /* ins vecsrc[iWord], gpr */
9968 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iWord, kArmv8InstrUmovInsSz_U16);
9969#else
9970# error "port me"
9971#endif
9972 return off;
9973}
9974
9975
9976/**
9977 * Emits a vecdst[x] = gprsrc store, 16-bit.
9978 */
9979DECL_INLINE_THROW(uint32_t)
9980iemNativeEmitSimdStoreGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
9981{
9982 Assert(iWord <= 15);
9983
9984#ifdef RT_ARCH_AMD64
9985 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iGprSrc, iWord);
9986#elif defined(RT_ARCH_ARM64)
9987 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iWord);
9988#else
9989# error "port me"
9990#endif
9991 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9992 return off;
9993}
9994
9995
9996/**
9997 * Emits a vecdst[x] = gprsrc store, 8-bit.
9998 */
9999DECL_FORCE_INLINE(uint32_t)
10000iemNativeEmitSimdStoreGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
10001{
10002#ifdef RT_ARCH_AMD64
10003 /* pinsrb vecsrc, gpr, #iByte (ASSUMES SSE4.1). */
10004 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10005 if (iVecRegDst >= 8 || iGprSrc >= 8)
10006 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10007 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10008 pCodeBuf[off++] = 0x0f;
10009 pCodeBuf[off++] = 0x3a;
10010 pCodeBuf[off++] = 0x20;
10011 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10012 pCodeBuf[off++] = iByte;
10013#elif defined(RT_ARCH_ARM64)
10014 /* ins vecsrc[iByte], gpr */
10015 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iByte, kArmv8InstrUmovInsSz_U8);
10016#else
10017# error "port me"
10018#endif
10019 return off;
10020}
10021
10022
10023/**
10024 * Emits a vecdst[x] = gprsrc store, 8-bit.
10025 */
10026DECL_INLINE_THROW(uint32_t)
10027iemNativeEmitSimdStoreGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
10028{
10029 Assert(iByte <= 15);
10030
10031#ifdef RT_ARCH_AMD64
10032 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iByte);
10033#elif defined(RT_ARCH_ARM64)
10034 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iByte);
10035#else
10036# error "port me"
10037#endif
10038 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10039 return off;
10040}
10041
10042
10043/**
10044 * Emits a vecdst.au32[iDWord] = 0 store.
10045 */
10046DECL_FORCE_INLINE(uint32_t)
10047iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
10048{
10049 Assert(iDWord <= 7);
10050
10051#ifdef RT_ARCH_AMD64
10052 /*
10053 * xor tmp0, tmp0
10054 * pinsrd xmm, tmp0, iDword
10055 */
10056 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
10057 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
10058 pCodeBuf[off++] = 0x33;
10059 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
10060 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(pCodeBuf, off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
10061#elif defined(RT_ARCH_ARM64)
10062 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10063 Assert(!(iVecReg & 0x1));
10064 /* ins vecsrc[iDWord], wzr */
10065 if (iDWord >= 4)
10066 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
10067 else
10068 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
10069#else
10070# error "port me"
10071#endif
10072 return off;
10073}
10074
10075
10076/**
10077 * Emits a vecdst.au32[iDWord] = 0 store.
10078 */
10079DECL_INLINE_THROW(uint32_t)
10080iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
10081{
10082
10083#ifdef RT_ARCH_AMD64
10084 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
10085#elif defined(RT_ARCH_ARM64)
10086 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
10087#else
10088# error "port me"
10089#endif
10090 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10091 return off;
10092}
10093
10094
10095/**
10096 * Emits a vecdst[0:127] = 0 store.
10097 */
10098DECL_FORCE_INLINE(uint32_t)
10099iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
10100{
10101#ifdef RT_ARCH_AMD64
10102 /* pxor xmm, xmm */
10103 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10104 if (iVecReg >= 8)
10105 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
10106 pCodeBuf[off++] = 0x0f;
10107 pCodeBuf[off++] = 0xef;
10108 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
10109#elif defined(RT_ARCH_ARM64)
10110 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10111 Assert(!(iVecReg & 0x1));
10112 /* eor vecreg, vecreg, vecreg */
10113 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
10114#else
10115# error "port me"
10116#endif
10117 return off;
10118}
10119
10120
10121/**
10122 * Emits a vecdst[0:127] = 0 store.
10123 */
10124DECL_INLINE_THROW(uint32_t)
10125iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
10126{
10127#ifdef RT_ARCH_AMD64
10128 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
10129#elif defined(RT_ARCH_ARM64)
10130 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
10131#else
10132# error "port me"
10133#endif
10134 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10135 return off;
10136}
10137
10138
10139/**
10140 * Emits a vecdst[128:255] = 0 store.
10141 */
10142DECL_FORCE_INLINE(uint32_t)
10143iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
10144{
10145#ifdef RT_ARCH_AMD64
10146 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
10147 if (iVecReg < 8)
10148 {
10149 pCodeBuf[off++] = X86_OP_VEX2;
10150 pCodeBuf[off++] = 0xf9;
10151 }
10152 else
10153 {
10154 pCodeBuf[off++] = X86_OP_VEX3;
10155 pCodeBuf[off++] = 0x41;
10156 pCodeBuf[off++] = 0x79;
10157 }
10158 pCodeBuf[off++] = 0x6f;
10159 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
10160#elif defined(RT_ARCH_ARM64)
10161 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10162 Assert(!(iVecReg & 0x1));
10163 /* eor vecreg, vecreg, vecreg */
10164 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
10165#else
10166# error "port me"
10167#endif
10168 return off;
10169}
10170
10171
10172/**
10173 * Emits a vecdst[128:255] = 0 store.
10174 */
10175DECL_INLINE_THROW(uint32_t)
10176iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
10177{
10178#ifdef RT_ARCH_AMD64
10179 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
10180#elif defined(RT_ARCH_ARM64)
10181 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
10182#else
10183# error "port me"
10184#endif
10185 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10186 return off;
10187}
10188
10189
10190/**
10191 * Emits a vecdst[0:255] = 0 store.
10192 */
10193DECL_FORCE_INLINE(uint32_t)
10194iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
10195{
10196#ifdef RT_ARCH_AMD64
10197 /* vpxor ymm, ymm, ymm */
10198 if (iVecReg < 8)
10199 {
10200 pCodeBuf[off++] = X86_OP_VEX2;
10201 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
10202 }
10203 else
10204 {
10205 pCodeBuf[off++] = X86_OP_VEX3;
10206 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
10207 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
10208 }
10209 pCodeBuf[off++] = 0xef;
10210 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
10211#elif defined(RT_ARCH_ARM64)
10212 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10213 Assert(!(iVecReg & 0x1));
10214 /* eor vecreg, vecreg, vecreg */
10215 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
10216 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
10217#else
10218# error "port me"
10219#endif
10220 return off;
10221}
10222
10223
10224/**
10225 * Emits a vecdst[0:255] = 0 store.
10226 */
10227DECL_INLINE_THROW(uint32_t)
10228iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
10229{
10230#ifdef RT_ARCH_AMD64
10231 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
10232#elif defined(RT_ARCH_ARM64)
10233 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
10234#else
10235# error "port me"
10236#endif
10237 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10238 return off;
10239}
10240
10241
10242/**
10243 * Emits a vecdst = gprsrc broadcast, 8-bit.
10244 */
10245DECL_FORCE_INLINE(uint32_t)
10246iemNativeEmitSimdBroadcastGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10247{
10248#ifdef RT_ARCH_AMD64
10249 /* pinsrb vecdst, gpr, #0 (ASSUMES SSE 4.1) */
10250 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10251 if (iVecRegDst >= 8 || iGprSrc >= 8)
10252 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10253 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10254 pCodeBuf[off++] = 0x0f;
10255 pCodeBuf[off++] = 0x3a;
10256 pCodeBuf[off++] = 0x20;
10257 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10258 pCodeBuf[off++] = 0x00;
10259
10260 /* vpbroadcastb {y,x}mm, xmm (ASSUMES AVX2). */
10261 pCodeBuf[off++] = X86_OP_VEX3;
10262 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
10263 | 0x02 /* opcode map. */
10264 | ( iVecRegDst >= 8
10265 ? 0
10266 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
10267 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
10268 pCodeBuf[off++] = 0x78;
10269 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
10270#elif defined(RT_ARCH_ARM64)
10271 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10272 Assert(!(iVecRegDst & 0x1) || !f256Bit);
10273
10274 /* dup vecsrc, gpr */
10275 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U8);
10276 if (f256Bit)
10277 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U8);
10278#else
10279# error "port me"
10280#endif
10281 return off;
10282}
10283
10284
10285/**
10286 * Emits a vecdst[x] = gprsrc broadcast, 8-bit.
10287 */
10288DECL_INLINE_THROW(uint32_t)
10289iemNativeEmitSimdBroadcastGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10290{
10291#ifdef RT_ARCH_AMD64
10292 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
10293#elif defined(RT_ARCH_ARM64)
10294 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
10295#else
10296# error "port me"
10297#endif
10298 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10299 return off;
10300}
10301
10302
10303/**
10304 * Emits a vecdst = gprsrc broadcast, 16-bit.
10305 */
10306DECL_FORCE_INLINE(uint32_t)
10307iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10308{
10309#ifdef RT_ARCH_AMD64
10310 /* pinsrw vecdst, gpr, #0 */
10311 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10312 if (iVecRegDst >= 8 || iGprSrc >= 8)
10313 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10314 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10315 pCodeBuf[off++] = 0x0f;
10316 pCodeBuf[off++] = 0xc4;
10317 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10318 pCodeBuf[off++] = 0x00;
10319
10320 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
10321 pCodeBuf[off++] = X86_OP_VEX3;
10322 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
10323 | 0x02 /* opcode map. */
10324 | ( iVecRegDst >= 8
10325 ? 0
10326 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
10327 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
10328 pCodeBuf[off++] = 0x79;
10329 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
10330#elif defined(RT_ARCH_ARM64)
10331 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10332 Assert(!(iVecRegDst & 0x1) || !f256Bit);
10333
10334 /* dup vecsrc, gpr */
10335 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
10336 if (f256Bit)
10337 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
10338#else
10339# error "port me"
10340#endif
10341 return off;
10342}
10343
10344
10345/**
10346 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
10347 */
10348DECL_INLINE_THROW(uint32_t)
10349iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10350{
10351#ifdef RT_ARCH_AMD64
10352 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
10353#elif defined(RT_ARCH_ARM64)
10354 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
10355#else
10356# error "port me"
10357#endif
10358 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10359 return off;
10360}
10361
10362
10363/**
10364 * Emits a vecdst = gprsrc broadcast, 32-bit.
10365 */
10366DECL_FORCE_INLINE(uint32_t)
10367iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10368{
10369#ifdef RT_ARCH_AMD64
10370 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
10371 * vbroadcast needs a memory operand or another xmm register to work... */
10372
10373 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
10374 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10375 if (iVecRegDst >= 8 || iGprSrc >= 8)
10376 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10377 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10378 pCodeBuf[off++] = 0x0f;
10379 pCodeBuf[off++] = 0x3a;
10380 pCodeBuf[off++] = 0x22;
10381 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10382 pCodeBuf[off++] = 0x00;
10383
10384 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
10385 pCodeBuf[off++] = X86_OP_VEX3;
10386 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
10387 | 0x02 /* opcode map. */
10388 | ( iVecRegDst >= 8
10389 ? 0
10390 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
10391 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
10392 pCodeBuf[off++] = 0x58;
10393 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
10394#elif defined(RT_ARCH_ARM64)
10395 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10396 Assert(!(iVecRegDst & 0x1) || !f256Bit);
10397
10398 /* dup vecsrc, gpr */
10399 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
10400 if (f256Bit)
10401 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
10402#else
10403# error "port me"
10404#endif
10405 return off;
10406}
10407
10408
10409/**
10410 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
10411 */
10412DECL_INLINE_THROW(uint32_t)
10413iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10414{
10415#ifdef RT_ARCH_AMD64
10416 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
10417#elif defined(RT_ARCH_ARM64)
10418 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
10419#else
10420# error "port me"
10421#endif
10422 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10423 return off;
10424}
10425
10426
10427/**
10428 * Emits a vecdst = gprsrc broadcast, 64-bit.
10429 */
10430DECL_FORCE_INLINE(uint32_t)
10431iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10432{
10433#ifdef RT_ARCH_AMD64
10434 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
10435 * vbroadcast needs a memory operand or another xmm register to work... */
10436
10437 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
10438 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10439 pCodeBuf[off++] = X86_OP_REX_W
10440 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10441 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10442 pCodeBuf[off++] = 0x0f;
10443 pCodeBuf[off++] = 0x3a;
10444 pCodeBuf[off++] = 0x22;
10445 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10446 pCodeBuf[off++] = 0x00;
10447
10448 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
10449 pCodeBuf[off++] = X86_OP_VEX3;
10450 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
10451 | 0x02 /* opcode map. */
10452 | ( iVecRegDst >= 8
10453 ? 0
10454 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
10455 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
10456 pCodeBuf[off++] = 0x59;
10457 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
10458#elif defined(RT_ARCH_ARM64)
10459 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10460 Assert(!(iVecRegDst & 0x1) || !f256Bit);
10461
10462 /* dup vecsrc, gpr */
10463 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
10464 if (f256Bit)
10465 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
10466#else
10467# error "port me"
10468#endif
10469 return off;
10470}
10471
10472
10473/**
10474 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
10475 */
10476DECL_INLINE_THROW(uint32_t)
10477iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10478{
10479#ifdef RT_ARCH_AMD64
10480 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
10481#elif defined(RT_ARCH_ARM64)
10482 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
10483#else
10484# error "port me"
10485#endif
10486 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10487 return off;
10488}
10489
10490
10491/**
10492 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
10493 */
10494DECL_FORCE_INLINE(uint32_t)
10495iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
10496{
10497#ifdef RT_ARCH_AMD64
10498 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(pCodeBuf, off, iVecRegDst, iVecRegSrc);
10499
10500 /* vinserti128 ymm, ymm, xmm, 1. */ /* ASSUMES AVX2 support */
10501 pCodeBuf[off++] = X86_OP_VEX3;
10502 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
10503 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
10504 pCodeBuf[off++] = 0x38;
10505 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
10506 pCodeBuf[off++] = 0x01; /* Immediate */
10507#elif defined(RT_ARCH_ARM64)
10508 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10509 Assert(!(iVecRegDst & 0x1));
10510
10511 /* mov dst, src; alias for: orr dst, src, src */
10512 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
10513 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
10514#else
10515# error "port me"
10516#endif
10517 return off;
10518}
10519
10520
10521/**
10522 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
10523 */
10524DECL_INLINE_THROW(uint32_t)
10525iemNativeEmitSimdBroadcastVecRegU128ToVecReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
10526{
10527#ifdef RT_ARCH_AMD64
10528 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 11), off, iVecRegDst, iVecRegSrc);
10529#elif defined(RT_ARCH_ARM64)
10530 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecRegDst, iVecRegSrc);
10531#else
10532# error "port me"
10533#endif
10534 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10535 return off;
10536}
10537
10538#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
10539
10540/** @} */
10541
10542#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
10543
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette