VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 106123

Last change on this file since 106123 was 106123, checked in by vboxsync, 2 months ago

VMM/IEM: More work on recompilation-time checks of skipped & postponed EFLAGS - fixed incorrect skipping for variants that clears EFLAGS when advancing RIP. The latter had slipped thru and this change reduces the effectiveness of the skipping stuff. :-( bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 364.2 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 106123 2024-09-23 22:04:30Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 if (uInfo == 0)
71 pu32CodeBuf[off++] = ARMV8_A64_INSTR_NOP;
72 else
73 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(ARMV8_A64_REG_XZR, (uint16_t)uInfo);
74
75 RT_NOREF(uInfo);
76#else
77# error "port me"
78#endif
79 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
80 return off;
81}
82
83
84/**
85 * Emit a breakpoint instruction.
86 */
87DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
88{
89#ifdef RT_ARCH_AMD64
90 pCodeBuf[off++] = 0xcc;
91 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
92
93#elif defined(RT_ARCH_ARM64)
94 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
95
96#else
97# error "error"
98#endif
99 return off;
100}
101
102
103/**
104 * Emit a breakpoint instruction.
105 */
106DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
107{
108#ifdef RT_ARCH_AMD64
109 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
110#elif defined(RT_ARCH_ARM64)
111 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
112#else
113# error "error"
114#endif
115 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
116 return off;
117}
118
119
120/*********************************************************************************************************************************
121* Loads, Stores and Related Stuff. *
122*********************************************************************************************************************************/
123
124#ifdef RT_ARCH_AMD64
125/**
126 * Common bit of iemNativeEmitLoadGprByGpr and friends.
127 */
128DECL_FORCE_INLINE(uint32_t)
129iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
130{
131 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
132 {
133 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
134 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
135 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
136 }
137 else if (offDisp == (int8_t)offDisp)
138 {
139 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
140 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
141 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
142 pbCodeBuf[off++] = (uint8_t)offDisp;
143 }
144 else
145 {
146 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
147 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
148 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
149 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
150 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
151 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
152 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
153 }
154 return off;
155}
156#endif /* RT_ARCH_AMD64 */
157
158/**
159 * Emits setting a GPR to zero.
160 */
161DECL_INLINE_THROW(uint32_t)
162iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
163{
164#ifdef RT_ARCH_AMD64
165 /* xor gpr32, gpr32 */
166 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
167 if (iGpr >= 8)
168 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
169 pbCodeBuf[off++] = 0x33;
170 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
171
172#elif defined(RT_ARCH_ARM64)
173 /* mov gpr, #0x0 */
174 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
175 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
176
177#else
178# error "port me"
179#endif
180 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
181 return off;
182}
183
184
185/**
186 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
187 * buffer space.
188 *
189 * Max buffer consumption:
190 * - AMD64: 6 instruction bytes.
191 * - ARM64: 2 instruction words (8 bytes).
192 *
193 * @note The top 32 bits will be cleared.
194 */
195DECL_FORCE_INLINE(uint32_t)
196iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
197{
198#ifdef RT_ARCH_AMD64
199 if (uImm32 == 0)
200 {
201 /* xor gpr, gpr */
202 if (iGpr >= 8)
203 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
204 pCodeBuf[off++] = 0x33;
205 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
206 }
207 else
208 {
209 /* mov gpr, imm32 */
210 if (iGpr >= 8)
211 pCodeBuf[off++] = X86_OP_REX_B;
212 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
213 pCodeBuf[off++] = RT_BYTE1(uImm32);
214 pCodeBuf[off++] = RT_BYTE2(uImm32);
215 pCodeBuf[off++] = RT_BYTE3(uImm32);
216 pCodeBuf[off++] = RT_BYTE4(uImm32);
217 }
218
219#elif defined(RT_ARCH_ARM64)
220 if ((uImm32 >> 16) == 0)
221 /* movz gpr, imm16 */
222 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
223 else if ((uImm32 & UINT32_C(0xffff)) == 0)
224 /* movz gpr, imm16, lsl #16 */
225 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
226 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
227 /* movn gpr, imm16, lsl #16 */
228 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
229 else if ((uImm32 >> 16) == UINT32_C(0xffff))
230 /* movn gpr, imm16 */
231 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
232 else
233 {
234 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
235 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
236 }
237
238#else
239# error "port me"
240#endif
241 return off;
242}
243
244
245/**
246 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
247 * buffer space.
248 *
249 * Max buffer consumption:
250 * - AMD64: 10 instruction bytes.
251 * - ARM64: 4 instruction words (16 bytes).
252 */
253DECL_FORCE_INLINE(uint32_t)
254iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
255{
256#ifdef RT_ARCH_AMD64
257 if (uImm64 == 0)
258 {
259 /* xor gpr, gpr */
260 if (iGpr >= 8)
261 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
262 pCodeBuf[off++] = 0x33;
263 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
264 }
265 else if (uImm64 <= UINT32_MAX)
266 {
267 /* mov gpr, imm32 */
268 if (iGpr >= 8)
269 pCodeBuf[off++] = X86_OP_REX_B;
270 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
271 pCodeBuf[off++] = RT_BYTE1(uImm64);
272 pCodeBuf[off++] = RT_BYTE2(uImm64);
273 pCodeBuf[off++] = RT_BYTE3(uImm64);
274 pCodeBuf[off++] = RT_BYTE4(uImm64);
275 }
276 else if (uImm64 == (uint64_t)(int32_t)uImm64)
277 {
278 /* mov gpr, sx(imm32) */
279 if (iGpr < 8)
280 pCodeBuf[off++] = X86_OP_REX_W;
281 else
282 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
283 pCodeBuf[off++] = 0xc7;
284 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
285 pCodeBuf[off++] = RT_BYTE1(uImm64);
286 pCodeBuf[off++] = RT_BYTE2(uImm64);
287 pCodeBuf[off++] = RT_BYTE3(uImm64);
288 pCodeBuf[off++] = RT_BYTE4(uImm64);
289 }
290 else
291 {
292 /* mov gpr, imm64 */
293 if (iGpr < 8)
294 pCodeBuf[off++] = X86_OP_REX_W;
295 else
296 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
297 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
298 pCodeBuf[off++] = RT_BYTE1(uImm64);
299 pCodeBuf[off++] = RT_BYTE2(uImm64);
300 pCodeBuf[off++] = RT_BYTE3(uImm64);
301 pCodeBuf[off++] = RT_BYTE4(uImm64);
302 pCodeBuf[off++] = RT_BYTE5(uImm64);
303 pCodeBuf[off++] = RT_BYTE6(uImm64);
304 pCodeBuf[off++] = RT_BYTE7(uImm64);
305 pCodeBuf[off++] = RT_BYTE8(uImm64);
306 }
307
308#elif defined(RT_ARCH_ARM64)
309 /*
310 * Quick simplification: Do 32-bit load if top half is zero.
311 */
312 if (uImm64 <= UINT32_MAX)
313 return iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGpr, (uint32_t)uImm64);
314
315 /*
316 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
317 * supply remaining bits using 'movk grp, imm16, lsl #x'.
318 *
319 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
320 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
321 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
322 * after the first non-zero immediate component so we switch to movk for
323 * the remainder.
324 */
325 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
326 + !((uImm64 >> 16) & UINT16_MAX)
327 + !((uImm64 >> 32) & UINT16_MAX)
328 + !((uImm64 >> 48) & UINT16_MAX);
329 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
330 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
331 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
332 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
333 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
334 if (cFfffHalfWords <= cZeroHalfWords)
335 {
336 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
337
338 /* movz gpr, imm16 */
339 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
340 if (uImmPart || cZeroHalfWords == 4)
341 {
342 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
343 fMovBase |= RT_BIT_32(29);
344 }
345 /* mov[z/k] gpr, imm16, lsl #16 */
346 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
347 if (uImmPart)
348 {
349 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
350 fMovBase |= RT_BIT_32(29);
351 }
352 /* mov[z/k] gpr, imm16, lsl #32 */
353 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
354 if (uImmPart)
355 {
356 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
357 fMovBase |= RT_BIT_32(29);
358 }
359 /* mov[z/k] gpr, imm16, lsl #48 */
360 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
361 if (uImmPart)
362 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
363 }
364 else
365 {
366 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
367
368 /* find the first half-word that isn't UINT16_MAX. */
369 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
370 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
371 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
372
373 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
374 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
375 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
376 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
377 /* movk gpr, imm16 */
378 if (iHwNotFfff != 0)
379 {
380 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
381 if (uImmPart != UINT32_C(0xffff))
382 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
383 }
384 /* movk gpr, imm16, lsl #16 */
385 if (iHwNotFfff != 1)
386 {
387 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
388 if (uImmPart != UINT32_C(0xffff))
389 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
390 }
391 /* movk gpr, imm16, lsl #32 */
392 if (iHwNotFfff != 2)
393 {
394 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
395 if (uImmPart != UINT32_C(0xffff))
396 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
397 }
398 /* movk gpr, imm16, lsl #48 */
399 if (iHwNotFfff != 3)
400 {
401 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
402 if (uImmPart != UINT32_C(0xffff))
403 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
404 }
405 }
406
407#else
408# error "port me"
409#endif
410 return off;
411}
412
413
414/**
415 * Emits loading a constant into a 64-bit GPR
416 */
417DECL_INLINE_THROW(uint32_t)
418iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
419{
420#ifdef RT_ARCH_AMD64
421 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
422#elif defined(RT_ARCH_ARM64)
423 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
424#else
425# error "port me"
426#endif
427 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
428 return off;
429}
430
431
432/**
433 * Emits loading a constant into a 32-bit GPR.
434 * @note The top 32 bits will be cleared.
435 */
436DECL_INLINE_THROW(uint32_t)
437iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
438{
439#ifdef RT_ARCH_AMD64
440 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
441#elif defined(RT_ARCH_ARM64)
442 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
443#else
444# error "port me"
445#endif
446 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
447 return off;
448}
449
450
451/**
452 * Emits loading a constant into a 8-bit GPR
453 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
454 * only the ARM64 version does that.
455 */
456DECL_INLINE_THROW(uint32_t)
457iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
458{
459#ifdef RT_ARCH_AMD64
460 /* mov gpr, imm8 */
461 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
462 if (iGpr >= 8)
463 pbCodeBuf[off++] = X86_OP_REX_B;
464 else if (iGpr >= 4)
465 pbCodeBuf[off++] = X86_OP_REX;
466 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
467 pbCodeBuf[off++] = RT_BYTE1(uImm8);
468
469#elif defined(RT_ARCH_ARM64)
470 /* movz gpr, imm16, lsl #0 */
471 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
472 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
473
474#else
475# error "port me"
476#endif
477 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
478 return off;
479}
480
481
482#ifdef RT_ARCH_AMD64
483/**
484 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
485 */
486DECL_FORCE_INLINE(uint32_t)
487iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
488{
489 if (offVCpu < 128)
490 {
491 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
492 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
493 }
494 else
495 {
496 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
497 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
498 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
499 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
500 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
501 }
502 return off;
503}
504
505/**
506 * Special variant of iemNativeEmitGprByVCpuDisp for accessing the VM structure.
507 */
508DECL_FORCE_INLINE(uint32_t)
509iemNativeEmitGprByVCpuSignedDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offVCpu)
510{
511 Assert(offVCpu < 0);
512 if (offVCpu < 128 && offVCpu >= -128)
513 {
514 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
515 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
516 }
517 else
518 {
519 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
520 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
521 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
522 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
523 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
524 }
525 return off;
526}
527
528#elif defined(RT_ARCH_ARM64)
529
530/**
531 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
532 *
533 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
534 * registers (@a iGprTmp).
535 * @note DON'T try this with prefetch.
536 */
537DECL_FORCE_INLINE_THROW(uint32_t)
538iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
539 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
540{
541 /*
542 * There are a couple of ldr variants that takes an immediate offset, so
543 * try use those if we can, otherwise we have to use the temporary register
544 * help with the addressing.
545 */
546 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
547 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
548 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
549 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
550 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
551 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
552 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
553 {
554 /* The offset is too large, so we must load it into a register and use
555 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
556 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
557 if (iGprTmp == UINT8_MAX)
558 iGprTmp = iGprReg;
559 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
560 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
561 }
562 else
563# ifdef IEM_WITH_THROW_CATCH
564 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
565# else
566 AssertReleaseFailedStmt(off = UINT32_MAX);
567# endif
568
569 return off;
570}
571
572/**
573 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
574 */
575DECL_FORCE_INLINE_THROW(uint32_t)
576iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
577 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
578{
579 /*
580 * There are a couple of ldr variants that takes an immediate offset, so
581 * try use those if we can, otherwise we have to use the temporary register
582 * help with the addressing.
583 */
584 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
585 {
586 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
587 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
588 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
589 }
590 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
591 {
592 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
593 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
594 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
595 }
596 else
597 {
598 /* The offset is too large, so we must load it into a register and use
599 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
600 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
601 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
602 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
603 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
604 IEMNATIVE_REG_FIXED_TMP0);
605 }
606 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
607 return off;
608}
609
610
611/**
612 * Special variant of iemNativeEmitGprByVCpuLdStEx for accessing the VM
613 * structure.
614 *
615 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
616 * registers (@a iGprTmp).
617 * @note DON'T try this with prefetch.
618 */
619DECL_FORCE_INLINE_THROW(uint32_t)
620iemNativeEmitGprBySignedVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offVCpu,
621 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
622{
623 Assert(offVCpu < 0);
624 Assert((uint32_t)-offVCpu < RT_BIT_32(28)); /* we should be way out of range for problematic sign extending issues. */
625 Assert(!((uint32_t)-offVCpu & (cbData - 1)));
626
627 /*
628 * For negative offsets we need to use put the displacement in a register
629 * as the two variants with signed immediates will either post or pre
630 * increment the base address register.
631 */
632 if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
633 {
634 uint8_t const idxIndexReg = !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) ? iGprReg : IEMNATIVE_REG_FIXED_TMP0;
635 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxIndexReg, offVCpu / (int32_t)cbData);
636 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, idxIndexReg,
637 kArmv8A64InstrLdStExtend_Sxtw, cbData > 1 /*fShifted*/);
638 }
639 else
640# ifdef IEM_WITH_THROW_CATCH
641 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
642# else
643 AssertReleaseFailedStmt(off = UINT32_MAX);
644# endif
645
646 return off;
647}
648
649/**
650 * Special variant of iemNativeEmitGprByVCpuLdSt for accessing the VM structure.
651 */
652DECL_FORCE_INLINE_THROW(uint32_t)
653iemNativeEmitGprBySignedVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
654 int32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
655{
656 off = iemNativeEmitGprBySignedVCpuLdStEx(iemNativeInstrBufEnsure(pReNative, off, 2 + 1), off, iGprReg,
657 offVCpu, enmOperation, cbData, IEMNATIVE_REG_FIXED_TMP0);
658 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
659 return off;
660}
661
662#endif /* RT_ARCH_ARM64 */
663
664
665/**
666 * Emits a 64-bit GPR load of a VCpu value.
667 */
668DECL_FORCE_INLINE_THROW(uint32_t)
669iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
670{
671#ifdef RT_ARCH_AMD64
672 /* mov reg64, mem64 */
673 if (iGpr < 8)
674 pCodeBuf[off++] = X86_OP_REX_W;
675 else
676 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
677 pCodeBuf[off++] = 0x8b;
678 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
679
680#elif defined(RT_ARCH_ARM64)
681 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
682
683#else
684# error "port me"
685#endif
686 return off;
687}
688
689
690/**
691 * Emits a 64-bit GPR load of a VCpu value.
692 */
693DECL_INLINE_THROW(uint32_t)
694iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
695{
696#ifdef RT_ARCH_AMD64
697 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
698 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
699
700#elif defined(RT_ARCH_ARM64)
701 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
702
703#else
704# error "port me"
705#endif
706 return off;
707}
708
709/**
710 * Emits a 32-bit GPR load of a VCpu value.
711 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
712 */
713DECL_INLINE_THROW(uint32_t)
714iemNativeEmitLoadGprFromVCpuU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
715{
716#ifdef RT_ARCH_AMD64
717 /* mov reg32, mem32 */
718 if (iGpr >= 8)
719 pCodeBuf[off++] = X86_OP_REX_R;
720 pCodeBuf[off++] = 0x8b;
721 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
722
723#elif defined(RT_ARCH_ARM64)
724 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
725
726#else
727# error "port me"
728#endif
729 return off;
730}
731
732
733/**
734 * Emits a 32-bit GPR load of a VCpu value.
735 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
736 */
737DECL_INLINE_THROW(uint32_t)
738iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
739{
740#ifdef RT_ARCH_AMD64
741 off = iemNativeEmitLoadGprFromVCpuU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
742 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
743
744#elif defined(RT_ARCH_ARM64)
745 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
746
747#else
748# error "port me"
749#endif
750 return off;
751}
752
753
754/**
755 * Emits a 16-bit GPR load of a VCpu value.
756 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
757 */
758DECL_INLINE_THROW(uint32_t)
759iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
760{
761#ifdef RT_ARCH_AMD64
762 /* movzx reg32, mem16 */
763 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
764 if (iGpr >= 8)
765 pbCodeBuf[off++] = X86_OP_REX_R;
766 pbCodeBuf[off++] = 0x0f;
767 pbCodeBuf[off++] = 0xb7;
768 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
769 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
770
771#elif defined(RT_ARCH_ARM64)
772 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
773
774#else
775# error "port me"
776#endif
777 return off;
778}
779
780
781/**
782 * Emits a 8-bit GPR load of a VCpu value.
783 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
784 */
785DECL_INLINE_THROW(uint32_t)
786iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
787{
788#ifdef RT_ARCH_AMD64
789 /* movzx reg32, mem8 */
790 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
791 if (iGpr >= 8)
792 pbCodeBuf[off++] = X86_OP_REX_R;
793 pbCodeBuf[off++] = 0x0f;
794 pbCodeBuf[off++] = 0xb6;
795 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
796 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
797
798#elif defined(RT_ARCH_ARM64)
799 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
800
801#else
802# error "port me"
803#endif
804 return off;
805}
806
807
808/**
809 * Emits a store of a GPR value to a 64-bit VCpu field.
810 */
811DECL_FORCE_INLINE_THROW(uint32_t)
812iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
813 uint8_t iGprTmp = UINT8_MAX)
814{
815#ifdef RT_ARCH_AMD64
816 /* mov mem64, reg64 */
817 if (iGpr < 8)
818 pCodeBuf[off++] = X86_OP_REX_W;
819 else
820 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
821 pCodeBuf[off++] = 0x89;
822 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
823 RT_NOREF(iGprTmp);
824
825#elif defined(RT_ARCH_ARM64)
826 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
827
828#else
829# error "port me"
830#endif
831 return off;
832}
833
834
835/**
836 * Emits a store of a GPR value to a 64-bit VCpu field.
837 */
838DECL_INLINE_THROW(uint32_t)
839iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
840{
841#ifdef RT_ARCH_AMD64
842 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
843#elif defined(RT_ARCH_ARM64)
844 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
845 IEMNATIVE_REG_FIXED_TMP0);
846#else
847# error "port me"
848#endif
849 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
850 return off;
851}
852
853
854/**
855 * Emits a store of a GPR value to a 32-bit VCpu field.
856 *
857 * @note Limited range on ARM64.
858 */
859DECL_INLINE_THROW(uint32_t)
860iemNativeEmitStoreGprToVCpuU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
861{
862#ifdef RT_ARCH_AMD64
863 /* mov mem32, reg32 */
864 if (iGpr >= 8)
865 pCodeBuf[off++] = X86_OP_REX_R;
866 pCodeBuf[off++] = 0x89;
867 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
868
869#elif defined(RT_ARCH_ARM64)
870 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
871
872#else
873# error "port me"
874#endif
875 return off;
876}
877
878
879/**
880 * Emits a store of a GPR value to a 32-bit VCpu field.
881 */
882DECL_INLINE_THROW(uint32_t)
883iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
884{
885#ifdef RT_ARCH_AMD64
886 /* mov mem32, reg32 */
887 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
888 if (iGpr >= 8)
889 pbCodeBuf[off++] = X86_OP_REX_R;
890 pbCodeBuf[off++] = 0x89;
891 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
892 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
893
894#elif defined(RT_ARCH_ARM64)
895 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
896
897#else
898# error "port me"
899#endif
900 return off;
901}
902
903
904/**
905 * Emits a store of a GPR value to a 16-bit VCpu field.
906 */
907DECL_INLINE_THROW(uint32_t)
908iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
909{
910#ifdef RT_ARCH_AMD64
911 /* mov mem16, reg16 */
912 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
913 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
914 if (iGpr >= 8)
915 pbCodeBuf[off++] = X86_OP_REX_R;
916 pbCodeBuf[off++] = 0x89;
917 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
918 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
919
920#elif defined(RT_ARCH_ARM64)
921 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
922
923#else
924# error "port me"
925#endif
926 return off;
927}
928
929
930/**
931 * Emits a store of a GPR value to a 8-bit VCpu field.
932 */
933DECL_INLINE_THROW(uint32_t)
934iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
935{
936#ifdef RT_ARCH_AMD64
937 /* mov mem8, reg8 */
938 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
939 if (iGpr >= 8)
940 pbCodeBuf[off++] = X86_OP_REX_R;
941 pbCodeBuf[off++] = 0x88;
942 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
943 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
944
945#elif defined(RT_ARCH_ARM64)
946 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
947
948#else
949# error "port me"
950#endif
951 return off;
952}
953
954
955/**
956 * Emits a store of an immediate value to a 64-bit VCpu field.
957 *
958 * @note Will allocate temporary registers on both ARM64 and AMD64.
959 */
960DECL_FORCE_INLINE_THROW(uint32_t)
961iemNativeEmitStoreImmToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uImm, uint32_t offVCpu)
962{
963#ifdef RT_ARCH_AMD64
964 /* mov mem32, imm32 */
965 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
966 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxRegImm, offVCpu);
967 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
968 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
969
970#elif defined(RT_ARCH_ARM64)
971 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
972 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t));
973 if (idxRegImm != ARMV8_A64_REG_XZR)
974 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
975
976#else
977# error "port me"
978#endif
979 return off;
980}
981
982
983/**
984 * Emits a store of an immediate value to a 32-bit VCpu field.
985 *
986 * @note ARM64: Will allocate temporary registers.
987 */
988DECL_FORCE_INLINE_THROW(uint32_t)
989iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
990{
991#ifdef RT_ARCH_AMD64
992 /* mov mem32, imm32 */
993 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
994 pCodeBuf[off++] = 0xc7;
995 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
996 pCodeBuf[off++] = RT_BYTE1(uImm);
997 pCodeBuf[off++] = RT_BYTE2(uImm);
998 pCodeBuf[off++] = RT_BYTE3(uImm);
999 pCodeBuf[off++] = RT_BYTE4(uImm);
1000 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1001
1002#elif defined(RT_ARCH_ARM64)
1003 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
1004 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
1005 if (idxRegImm != ARMV8_A64_REG_XZR)
1006 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1007
1008#else
1009# error "port me"
1010#endif
1011 return off;
1012}
1013
1014
1015
1016/**
1017 * Emits a store of an immediate value to a 16-bit VCpu field.
1018 *
1019 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
1020 * offset can be encoded as an immediate or not. The @a offVCpu immediate
1021 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
1022 */
1023DECL_FORCE_INLINE_THROW(uint32_t)
1024iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
1025 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
1026{
1027#ifdef RT_ARCH_AMD64
1028 /* mov mem16, imm16 */
1029 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1030 pCodeBuf[off++] = 0xc7;
1031 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1032 pCodeBuf[off++] = RT_BYTE1(uImm);
1033 pCodeBuf[off++] = RT_BYTE2(uImm);
1034 RT_NOREF(idxTmp1, idxTmp2);
1035
1036#elif defined(RT_ARCH_ARM64)
1037 if (idxTmp1 != UINT8_MAX)
1038 {
1039 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
1040 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
1041 sizeof(uint16_t), idxTmp2);
1042 }
1043 else
1044# ifdef IEM_WITH_THROW_CATCH
1045 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
1046# else
1047 AssertReleaseFailedStmt(off = UINT32_MAX);
1048# endif
1049
1050#else
1051# error "port me"
1052#endif
1053 return off;
1054}
1055
1056
1057/**
1058 * Emits a store of an immediate value to a 8-bit VCpu field.
1059 */
1060DECL_INLINE_THROW(uint32_t)
1061iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu,
1062 uint8_t idxRegTmp = UINT8_MAX)
1063{
1064#ifdef RT_ARCH_AMD64
1065 /* mov mem8, imm8 */
1066 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1067 pbCodeBuf[off++] = 0xc6;
1068 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
1069 pbCodeBuf[off++] = bImm;
1070 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1071 RT_NOREF(idxRegTmp);
1072
1073#elif defined(RT_ARCH_ARM64)
1074 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
1075 if (idxRegTmp != UINT8_MAX)
1076 {
1077 Assert(idxRegTmp != IEMNATIVE_REG_FIXED_TMP0);
1078 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegTmp, bImm);
1079 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegTmp, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
1080 }
1081 else
1082 {
1083 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
1084 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
1085 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1086 }
1087
1088#else
1089# error "port me"
1090#endif
1091 return off;
1092}
1093
1094
1095/**
1096 * Emits a load effective address to a GRP of a VCpu field.
1097 */
1098DECL_INLINE_THROW(uint32_t)
1099iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
1100{
1101#ifdef RT_ARCH_AMD64
1102 /* lea gprdst, [rbx + offDisp] */
1103 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1104 if (iGprDst < 8)
1105 pbCodeBuf[off++] = X86_OP_REX_W;
1106 else
1107 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
1108 pbCodeBuf[off++] = 0x8d;
1109 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
1110
1111#elif defined(RT_ARCH_ARM64)
1112 if (offVCpu < (unsigned)_4K)
1113 {
1114 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1115 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
1116 }
1117 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
1118 {
1119 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1120 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
1121 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
1122 }
1123 else if (offVCpu <= 0xffffffU)
1124 {
1125 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1126 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu >> 12,
1127 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1128 if (offVCpu & 0xfffU)
1129 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, offVCpu & 0xfff);
1130 }
1131 else
1132 {
1133 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
1134 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
1135 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1136 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
1137 }
1138
1139#else
1140# error "port me"
1141#endif
1142 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1143 return off;
1144}
1145
1146
1147/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1148DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
1149{
1150 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
1151 Assert(off < sizeof(VMCPU));
1152 return off;
1153}
1154
1155
1156/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1157DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
1158{
1159 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
1160 Assert(off < sizeof(VMCPU));
1161 return off;
1162}
1163
1164
1165/**
1166 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1167 *
1168 * @note The two temp registers are not required for AMD64. ARM64 always
1169 * requires the first, and the 2nd is needed if the offset cannot be
1170 * encoded as an immediate.
1171 */
1172DECL_FORCE_INLINE(uint32_t)
1173iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1174{
1175#ifdef RT_ARCH_AMD64
1176 /* inc qword [pVCpu + off] */
1177 pCodeBuf[off++] = X86_OP_REX_W;
1178 pCodeBuf[off++] = 0xff;
1179 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1180 RT_NOREF(idxTmp1, idxTmp2);
1181
1182#elif defined(RT_ARCH_ARM64)
1183 /* Determine how we're to access pVCpu first. */
1184 uint32_t const cbData = sizeof(STAMCOUNTER);
1185 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1186 {
1187 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1188 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1189 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1190 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1191 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1192 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1193 }
1194 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1195 {
1196 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1197 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1198 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1199 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1200 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1201 }
1202 else
1203 {
1204 /* The offset is too large, so we must load it into a register and use
1205 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1206 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1207 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1208 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1209 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1210 }
1211
1212#else
1213# error "port me"
1214#endif
1215 return off;
1216}
1217
1218
1219/**
1220 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1221 *
1222 * @note The two temp registers are not required for AMD64. ARM64 always
1223 * requires the first, and the 2nd is needed if the offset cannot be
1224 * encoded as an immediate.
1225 */
1226DECL_FORCE_INLINE(uint32_t)
1227iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1228{
1229#ifdef RT_ARCH_AMD64
1230 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1231#elif defined(RT_ARCH_ARM64)
1232 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1233#else
1234# error "port me"
1235#endif
1236 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1237 return off;
1238}
1239
1240
1241/**
1242 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1243 *
1244 * @note The two temp registers are not required for AMD64. ARM64 always
1245 * requires the first, and the 2nd is needed if the offset cannot be
1246 * encoded as an immediate.
1247 */
1248DECL_FORCE_INLINE(uint32_t)
1249iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1250{
1251 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1252#ifdef RT_ARCH_AMD64
1253 /* inc dword [pVCpu + offVCpu] */
1254 pCodeBuf[off++] = 0xff;
1255 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1256 RT_NOREF(idxTmp1, idxTmp2);
1257
1258#elif defined(RT_ARCH_ARM64)
1259 /* Determine how we're to access pVCpu first. */
1260 uint32_t const cbData = sizeof(uint32_t);
1261 if (offVCpu < (unsigned)(_4K * cbData))
1262 {
1263 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1264 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1,
1265 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1266 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1267 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1,
1268 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1269 }
1270 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1271 {
1272 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1273 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1274 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1275 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1276 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1277 }
1278 else
1279 {
1280 /* The offset is too large, so we must load it into a register and use
1281 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1282 of the instruction if that'll reduce the constant to 16-bits. */
1283 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1284 {
1285 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1286 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1287 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1288 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1289 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1290 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1291 }
1292 else
1293 {
1294 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1295 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1296 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1297 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1298 }
1299 }
1300
1301#else
1302# error "port me"
1303#endif
1304 return off;
1305}
1306
1307
1308/**
1309 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1310 *
1311 * @note The two temp registers are not required for AMD64. ARM64 always
1312 * requires the first, and the 2nd is needed if the offset cannot be
1313 * encoded as an immediate.
1314 */
1315DECL_FORCE_INLINE(uint32_t)
1316iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1317{
1318#ifdef RT_ARCH_AMD64
1319 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1320#elif defined(RT_ARCH_ARM64)
1321 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1322#else
1323# error "port me"
1324#endif
1325 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1326 return off;
1327}
1328
1329
1330/**
1331 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1332 *
1333 * @note May allocate temporary registers (not AMD64).
1334 */
1335DECL_FORCE_INLINE(uint32_t)
1336iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1337{
1338 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1339#ifdef RT_ARCH_AMD64
1340 /* or dword [pVCpu + offVCpu], imm8/32 */
1341 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1342 if (fMask < 0x80)
1343 {
1344 pCodeBuf[off++] = 0x83;
1345 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1346 pCodeBuf[off++] = (uint8_t)fMask;
1347 }
1348 else
1349 {
1350 pCodeBuf[off++] = 0x81;
1351 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1352 pCodeBuf[off++] = RT_BYTE1(fMask);
1353 pCodeBuf[off++] = RT_BYTE2(fMask);
1354 pCodeBuf[off++] = RT_BYTE3(fMask);
1355 pCodeBuf[off++] = RT_BYTE4(fMask);
1356 }
1357
1358#elif defined(RT_ARCH_ARM64)
1359 /* If the constant is unwieldy we'll need a register to hold it as well. */
1360 uint32_t uImmSizeLen, uImmRotate;
1361 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1362 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1363
1364 /* We need a temp register for holding the member value we're modifying. */
1365 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1366
1367 /* Determine how we're to access pVCpu first. */
1368 uint32_t const cbData = sizeof(uint32_t);
1369 if (offVCpu < (unsigned)(_4K * cbData))
1370 {
1371 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1372 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1373 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1374 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1375 if (idxTmpMask == UINT8_MAX)
1376 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1377 else
1378 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1379 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1380 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1381 }
1382 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1383 {
1384 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1385 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1386 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1387 if (idxTmpMask == UINT8_MAX)
1388 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1389 else
1390 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1391 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1392 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1393 }
1394 else
1395 {
1396 /* The offset is too large, so we must load it into a register and use
1397 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1398 of the instruction if that'll reduce the constant to 16-bits. */
1399 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1400 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1401 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1402 if (fShifted)
1403 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1404 else
1405 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1406
1407 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1408 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1409
1410 if (idxTmpMask == UINT8_MAX)
1411 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1412 else
1413 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1414
1415 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1416 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1417 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1418 }
1419 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1420 if (idxTmpMask != UINT8_MAX)
1421 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1422
1423#else
1424# error "port me"
1425#endif
1426 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1427 return off;
1428}
1429
1430
1431/**
1432 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1433 *
1434 * @note May allocate temporary registers (not AMD64).
1435 */
1436DECL_FORCE_INLINE(uint32_t)
1437iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1438{
1439 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1440#ifdef RT_ARCH_AMD64
1441 /* and dword [pVCpu + offVCpu], imm8/32 */
1442 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1443 if (fMask < 0x80)
1444 {
1445 pCodeBuf[off++] = 0x83;
1446 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1447 pCodeBuf[off++] = (uint8_t)fMask;
1448 }
1449 else
1450 {
1451 pCodeBuf[off++] = 0x81;
1452 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1453 pCodeBuf[off++] = RT_BYTE1(fMask);
1454 pCodeBuf[off++] = RT_BYTE2(fMask);
1455 pCodeBuf[off++] = RT_BYTE3(fMask);
1456 pCodeBuf[off++] = RT_BYTE4(fMask);
1457 }
1458
1459#elif defined(RT_ARCH_ARM64)
1460 /* If the constant is unwieldy we'll need a register to hold it as well. */
1461 uint32_t uImmSizeLen, uImmRotate;
1462 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1463 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1464
1465 /* We need a temp register for holding the member value we're modifying. */
1466 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1467
1468 /* Determine how we're to access pVCpu first. */
1469 uint32_t const cbData = sizeof(uint32_t);
1470 if (offVCpu < (unsigned)(_4K * cbData))
1471 {
1472 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1473 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1474 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1475 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1476 if (idxTmpMask == UINT8_MAX)
1477 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1478 else
1479 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1480 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1481 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1482 }
1483 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1484 {
1485 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1486 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1487 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1488 if (idxTmpMask == UINT8_MAX)
1489 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1490 else
1491 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1492 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1493 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1494 }
1495 else
1496 {
1497 /* The offset is too large, so we must load it into a register and use
1498 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1499 of the instruction if that'll reduce the constant to 16-bits. */
1500 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1501 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1502 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1503 if (fShifted)
1504 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1505 else
1506 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1507
1508 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1509 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1510
1511 if (idxTmpMask == UINT8_MAX)
1512 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1513 else
1514 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1515
1516 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1517 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1518 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1519 }
1520 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1521 if (idxTmpMask != UINT8_MAX)
1522 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1523
1524#else
1525# error "port me"
1526#endif
1527 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1528 return off;
1529}
1530
1531
1532/**
1533 * Emits a gprdst = gprsrc load.
1534 */
1535DECL_FORCE_INLINE(uint32_t)
1536iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1537{
1538#ifdef RT_ARCH_AMD64
1539 /* mov gprdst, gprsrc */
1540 if ((iGprDst | iGprSrc) >= 8)
1541 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1542 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1543 : X86_OP_REX_W | X86_OP_REX_R;
1544 else
1545 pCodeBuf[off++] = X86_OP_REX_W;
1546 pCodeBuf[off++] = 0x8b;
1547 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1548
1549#elif defined(RT_ARCH_ARM64)
1550 /* mov dst, src; alias for: orr dst, xzr, src */
1551 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1552
1553#else
1554# error "port me"
1555#endif
1556 return off;
1557}
1558
1559
1560/**
1561 * Emits a gprdst = gprsrc load.
1562 */
1563DECL_INLINE_THROW(uint32_t)
1564iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1565{
1566#ifdef RT_ARCH_AMD64
1567 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1568#elif defined(RT_ARCH_ARM64)
1569 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1570#else
1571# error "port me"
1572#endif
1573 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1574 return off;
1575}
1576
1577
1578/**
1579 * Emits a gprdst = gprsrc[31:0] load.
1580 * @note Bits 63 thru 32 are cleared.
1581 */
1582DECL_FORCE_INLINE(uint32_t)
1583iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1584{
1585#ifdef RT_ARCH_AMD64
1586 /* mov gprdst, gprsrc */
1587 if ((iGprDst | iGprSrc) >= 8)
1588 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1589 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1590 : X86_OP_REX_R;
1591 pCodeBuf[off++] = 0x8b;
1592 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1593
1594#elif defined(RT_ARCH_ARM64)
1595 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1596 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1597
1598#else
1599# error "port me"
1600#endif
1601 return off;
1602}
1603
1604
1605/**
1606 * Emits a gprdst = gprsrc[31:0] load.
1607 * @note Bits 63 thru 32 are cleared.
1608 */
1609DECL_INLINE_THROW(uint32_t)
1610iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1611{
1612#ifdef RT_ARCH_AMD64
1613 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1614#elif defined(RT_ARCH_ARM64)
1615 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1616#else
1617# error "port me"
1618#endif
1619 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1620 return off;
1621}
1622
1623
1624/**
1625 * Emits a gprdst = gprsrc[15:0] load.
1626 * @note Bits 63 thru 15 are cleared.
1627 */
1628DECL_INLINE_THROW(uint32_t)
1629iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1630{
1631#ifdef RT_ARCH_AMD64
1632 /* movzx Gv,Ew */
1633 if ((iGprDst | iGprSrc) >= 8)
1634 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1635 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1636 : X86_OP_REX_R;
1637 pCodeBuf[off++] = 0x0f;
1638 pCodeBuf[off++] = 0xb7;
1639 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1640
1641#elif defined(RT_ARCH_ARM64)
1642 /* and gprdst, gprsrc, #0xffff */
1643# if 1
1644 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1645 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1646# else
1647 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1648 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1649# endif
1650
1651#else
1652# error "port me"
1653#endif
1654 return off;
1655}
1656
1657
1658/**
1659 * Emits a gprdst = gprsrc[15:0] load.
1660 * @note Bits 63 thru 15 are cleared.
1661 */
1662DECL_INLINE_THROW(uint32_t)
1663iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1664{
1665#ifdef RT_ARCH_AMD64
1666 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1667#elif defined(RT_ARCH_ARM64)
1668 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1669#else
1670# error "port me"
1671#endif
1672 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1673 return off;
1674}
1675
1676
1677/**
1678 * Emits a gprdst = gprsrc[7:0] load.
1679 * @note Bits 63 thru 8 are cleared.
1680 */
1681DECL_FORCE_INLINE(uint32_t)
1682iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1683{
1684#ifdef RT_ARCH_AMD64
1685 /* movzx Gv,Eb */
1686 if (iGprDst >= 8 || iGprSrc >= 8)
1687 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1688 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1689 : X86_OP_REX_R;
1690 else if (iGprSrc >= 4)
1691 pCodeBuf[off++] = X86_OP_REX;
1692 pCodeBuf[off++] = 0x0f;
1693 pCodeBuf[off++] = 0xb6;
1694 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1695
1696#elif defined(RT_ARCH_ARM64)
1697 /* and gprdst, gprsrc, #0xff */
1698 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1699 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1700
1701#else
1702# error "port me"
1703#endif
1704 return off;
1705}
1706
1707
1708/**
1709 * Emits a gprdst = gprsrc[7:0] load.
1710 * @note Bits 63 thru 8 are cleared.
1711 */
1712DECL_INLINE_THROW(uint32_t)
1713iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1714{
1715#ifdef RT_ARCH_AMD64
1716 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1717#elif defined(RT_ARCH_ARM64)
1718 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1719#else
1720# error "port me"
1721#endif
1722 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1723 return off;
1724}
1725
1726
1727/**
1728 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1729 * @note Bits 63 thru 8 are cleared.
1730 */
1731DECL_INLINE_THROW(uint32_t)
1732iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1733{
1734#ifdef RT_ARCH_AMD64
1735 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1736
1737 /* movzx Gv,Ew */
1738 if ((iGprDst | iGprSrc) >= 8)
1739 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1740 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1741 : X86_OP_REX_R;
1742 pbCodeBuf[off++] = 0x0f;
1743 pbCodeBuf[off++] = 0xb7;
1744 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1745
1746 /* shr Ev,8 */
1747 if (iGprDst >= 8)
1748 pbCodeBuf[off++] = X86_OP_REX_B;
1749 pbCodeBuf[off++] = 0xc1;
1750 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1751 pbCodeBuf[off++] = 8;
1752
1753#elif defined(RT_ARCH_ARM64)
1754 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1755 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1756 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1757
1758#else
1759# error "port me"
1760#endif
1761 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1762 return off;
1763}
1764
1765
1766/**
1767 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1768 */
1769DECL_INLINE_THROW(uint32_t)
1770iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1771{
1772#ifdef RT_ARCH_AMD64
1773 /* movsxd r64, r/m32 */
1774 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1775 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1776 pbCodeBuf[off++] = 0x63;
1777 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1778
1779#elif defined(RT_ARCH_ARM64)
1780 /* sxtw dst, src */
1781 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1782 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1783
1784#else
1785# error "port me"
1786#endif
1787 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1788 return off;
1789}
1790
1791
1792/**
1793 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1794 */
1795DECL_INLINE_THROW(uint32_t)
1796iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1797{
1798#ifdef RT_ARCH_AMD64
1799 /* movsx r64, r/m16 */
1800 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1801 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1802 pbCodeBuf[off++] = 0x0f;
1803 pbCodeBuf[off++] = 0xbf;
1804 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1805
1806#elif defined(RT_ARCH_ARM64)
1807 /* sxth dst, src */
1808 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1809 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1810
1811#else
1812# error "port me"
1813#endif
1814 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1815 return off;
1816}
1817
1818
1819/**
1820 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1821 */
1822DECL_INLINE_THROW(uint32_t)
1823iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1824{
1825#ifdef RT_ARCH_AMD64
1826 /* movsx r64, r/m16 */
1827 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1828 if (iGprDst >= 8 || iGprSrc >= 8)
1829 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1830 pbCodeBuf[off++] = 0x0f;
1831 pbCodeBuf[off++] = 0xbf;
1832 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1833
1834#elif defined(RT_ARCH_ARM64)
1835 /* sxth dst32, src */
1836 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1837 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1838
1839#else
1840# error "port me"
1841#endif
1842 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1843 return off;
1844}
1845
1846
1847/**
1848 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1849 */
1850DECL_INLINE_THROW(uint32_t)
1851iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1852{
1853#ifdef RT_ARCH_AMD64
1854 /* movsx r64, r/m8 */
1855 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1856 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1857 pbCodeBuf[off++] = 0x0f;
1858 pbCodeBuf[off++] = 0xbe;
1859 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1860
1861#elif defined(RT_ARCH_ARM64)
1862 /* sxtb dst, src */
1863 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1864 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1865
1866#else
1867# error "port me"
1868#endif
1869 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1870 return off;
1871}
1872
1873
1874/**
1875 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1876 * @note Bits 63 thru 32 are cleared.
1877 */
1878DECL_INLINE_THROW(uint32_t)
1879iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1880{
1881#ifdef RT_ARCH_AMD64
1882 /* movsx r32, r/m8 */
1883 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1884 if (iGprDst >= 8 || iGprSrc >= 8)
1885 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1886 else if (iGprSrc >= 4)
1887 pbCodeBuf[off++] = X86_OP_REX;
1888 pbCodeBuf[off++] = 0x0f;
1889 pbCodeBuf[off++] = 0xbe;
1890 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1891
1892#elif defined(RT_ARCH_ARM64)
1893 /* sxtb dst32, src32 */
1894 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1895 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1896
1897#else
1898# error "port me"
1899#endif
1900 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1901 return off;
1902}
1903
1904
1905/**
1906 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1907 * @note Bits 63 thru 16 are cleared.
1908 */
1909DECL_INLINE_THROW(uint32_t)
1910iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1911{
1912#ifdef RT_ARCH_AMD64
1913 /* movsx r16, r/m8 */
1914 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1915 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1916 if (iGprDst >= 8 || iGprSrc >= 8)
1917 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1918 else if (iGprSrc >= 4)
1919 pbCodeBuf[off++] = X86_OP_REX;
1920 pbCodeBuf[off++] = 0x0f;
1921 pbCodeBuf[off++] = 0xbe;
1922 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1923
1924 /* movzx r32, r/m16 */
1925 if (iGprDst >= 8)
1926 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
1927 pbCodeBuf[off++] = 0x0f;
1928 pbCodeBuf[off++] = 0xb7;
1929 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
1930
1931#elif defined(RT_ARCH_ARM64)
1932 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
1933 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1934 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1935 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1936 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
1937
1938#else
1939# error "port me"
1940#endif
1941 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1942 return off;
1943}
1944
1945
1946/**
1947 * Emits a gprdst = gprsrc + addend load.
1948 * @note The addend is 32-bit for AMD64 and 64-bit for ARM64.
1949 */
1950#ifdef RT_ARCH_AMD64
1951DECL_INLINE_THROW(uint32_t)
1952iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1953 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1954{
1955 Assert(iAddend != 0);
1956
1957 /* lea gprdst, [gprsrc + iAddend] */
1958 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1959 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1960 pbCodeBuf[off++] = 0x8d;
1961 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1962 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1963 return off;
1964}
1965
1966#elif defined(RT_ARCH_ARM64)
1967DECL_INLINE_THROW(uint32_t)
1968iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1969 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1970{
1971 if ((uint32_t)iAddend < 4096)
1972 {
1973 /* add dst, src, uimm12 */
1974 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1975 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
1976 }
1977 else if ((uint32_t)-iAddend < 4096)
1978 {
1979 /* sub dst, src, uimm12 */
1980 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1981 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
1982 }
1983 else
1984 {
1985 Assert(iGprSrc != iGprDst);
1986 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
1987 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1988 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
1989 }
1990 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1991 return off;
1992}
1993#else
1994# error "port me"
1995#endif
1996
1997/**
1998 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
1999 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
2000 */
2001#ifdef RT_ARCH_AMD64
2002DECL_INLINE_THROW(uint32_t)
2003iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2004 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2005#else
2006DECL_INLINE_THROW(uint32_t)
2007iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2008 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
2009#endif
2010{
2011 if (iAddend != 0)
2012 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
2013 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
2014}
2015
2016
2017/**
2018 * Emits a gprdst = gprsrc32 + addend load.
2019 * @note Bits 63 thru 32 are cleared.
2020 */
2021DECL_INLINE_THROW(uint32_t)
2022iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2023 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2024{
2025 Assert(iAddend != 0);
2026
2027#ifdef RT_ARCH_AMD64
2028 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
2029 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2030 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
2031 if ((iGprDst | iGprSrc) >= 8)
2032 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
2033 pbCodeBuf[off++] = 0x8d;
2034 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
2035
2036#elif defined(RT_ARCH_ARM64)
2037 if ((uint32_t)iAddend < 4096)
2038 {
2039 /* add dst, src, uimm12 */
2040 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2041 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
2042 }
2043 else if ((uint32_t)-iAddend < 4096)
2044 {
2045 /* sub dst, src, uimm12 */
2046 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2047 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
2048 }
2049 else
2050 {
2051 Assert(iGprSrc != iGprDst);
2052 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
2053 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2054 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
2055 }
2056
2057#else
2058# error "port me"
2059#endif
2060 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2061 return off;
2062}
2063
2064
2065/**
2066 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
2067 */
2068DECL_INLINE_THROW(uint32_t)
2069iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2070 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2071{
2072 if (iAddend != 0)
2073 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
2074 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
2075}
2076
2077
2078/**
2079 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
2080 * destination.
2081 */
2082DECL_FORCE_INLINE(uint32_t)
2083iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
2084{
2085#ifdef RT_ARCH_AMD64
2086 /* mov reg16, r/m16 */
2087 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2088 if (idxDst >= 8 || idxSrc >= 8)
2089 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
2090 pCodeBuf[off++] = 0x8b;
2091 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
2092
2093#elif defined(RT_ARCH_ARM64)
2094 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
2095 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
2096
2097#else
2098# error "Port me!"
2099#endif
2100 return off;
2101}
2102
2103
2104/**
2105 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
2106 * destination.
2107 */
2108DECL_INLINE_THROW(uint32_t)
2109iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
2110{
2111#ifdef RT_ARCH_AMD64
2112 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
2113#elif defined(RT_ARCH_ARM64)
2114 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
2115#else
2116# error "Port me!"
2117#endif
2118 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2119 return off;
2120}
2121
2122
2123#ifdef RT_ARCH_AMD64
2124/**
2125 * Common bit of iemNativeEmitLoadGprByBp and friends.
2126 */
2127DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
2128 PIEMRECOMPILERSTATE pReNativeAssert)
2129{
2130 if (offDisp < 128 && offDisp >= -128)
2131 {
2132 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
2133 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
2134 }
2135 else
2136 {
2137 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
2138 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2139 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2140 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2141 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2142 }
2143 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
2144 return off;
2145}
2146#elif defined(RT_ARCH_ARM64)
2147/**
2148 * Common bit of iemNativeEmitLoadGprByBp and friends.
2149 */
2150DECL_FORCE_INLINE_THROW(uint32_t)
2151iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2152 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2153{
2154 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
2155 {
2156 /* str w/ unsigned imm12 (scaled) */
2157 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2158 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
2159 }
2160 else if (offDisp >= -256 && offDisp <= 256)
2161 {
2162 /* stur w/ signed imm9 (unscaled) */
2163 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2164 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
2165 }
2166 else
2167 {
2168 /* Use temporary indexing register. */
2169 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2170 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2171 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2172 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2173 }
2174 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2175 return off;
2176}
2177#endif
2178
2179
2180/**
2181 * Emits a 64-bit GRP load instruction with an BP relative source address.
2182 */
2183DECL_INLINE_THROW(uint32_t)
2184iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2185{
2186#ifdef RT_ARCH_AMD64
2187 /* mov gprdst, qword [rbp + offDisp] */
2188 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2189 if (iGprDst < 8)
2190 pbCodeBuf[off++] = X86_OP_REX_W;
2191 else
2192 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2193 pbCodeBuf[off++] = 0x8b;
2194 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2195
2196#elif defined(RT_ARCH_ARM64)
2197 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2198
2199#else
2200# error "port me"
2201#endif
2202}
2203
2204
2205/**
2206 * Emits a 32-bit GRP load instruction with an BP relative source address.
2207 * @note Bits 63 thru 32 of the GPR will be cleared.
2208 */
2209DECL_INLINE_THROW(uint32_t)
2210iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2211{
2212#ifdef RT_ARCH_AMD64
2213 /* mov gprdst, dword [rbp + offDisp] */
2214 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2215 if (iGprDst >= 8)
2216 pbCodeBuf[off++] = X86_OP_REX_R;
2217 pbCodeBuf[off++] = 0x8b;
2218 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2219
2220#elif defined(RT_ARCH_ARM64)
2221 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2222
2223#else
2224# error "port me"
2225#endif
2226}
2227
2228
2229/**
2230 * Emits a 16-bit GRP load instruction with an BP relative source address.
2231 * @note Bits 63 thru 16 of the GPR will be cleared.
2232 */
2233DECL_INLINE_THROW(uint32_t)
2234iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2235{
2236#ifdef RT_ARCH_AMD64
2237 /* movzx gprdst, word [rbp + offDisp] */
2238 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2239 if (iGprDst >= 8)
2240 pbCodeBuf[off++] = X86_OP_REX_R;
2241 pbCodeBuf[off++] = 0x0f;
2242 pbCodeBuf[off++] = 0xb7;
2243 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2244
2245#elif defined(RT_ARCH_ARM64)
2246 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2247
2248#else
2249# error "port me"
2250#endif
2251}
2252
2253
2254/**
2255 * Emits a 8-bit GRP load instruction with an BP relative source address.
2256 * @note Bits 63 thru 8 of the GPR will be cleared.
2257 */
2258DECL_INLINE_THROW(uint32_t)
2259iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2260{
2261#ifdef RT_ARCH_AMD64
2262 /* movzx gprdst, byte [rbp + offDisp] */
2263 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2264 if (iGprDst >= 8)
2265 pbCodeBuf[off++] = X86_OP_REX_R;
2266 pbCodeBuf[off++] = 0x0f;
2267 pbCodeBuf[off++] = 0xb6;
2268 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2269
2270#elif defined(RT_ARCH_ARM64)
2271 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2272
2273#else
2274# error "port me"
2275#endif
2276}
2277
2278
2279#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2280/**
2281 * Emits a 128-bit vector register load instruction with an BP relative source address.
2282 */
2283DECL_FORCE_INLINE_THROW(uint32_t)
2284iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2285{
2286#ifdef RT_ARCH_AMD64
2287 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2288
2289 /* movdqu reg128, mem128 */
2290 pbCodeBuf[off++] = 0xf3;
2291 if (iVecRegDst >= 8)
2292 pbCodeBuf[off++] = X86_OP_REX_R;
2293 pbCodeBuf[off++] = 0x0f;
2294 pbCodeBuf[off++] = 0x6f;
2295 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2296#elif defined(RT_ARCH_ARM64)
2297 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2298#else
2299# error "port me"
2300#endif
2301}
2302
2303
2304/**
2305 * Emits a 256-bit vector register load instruction with an BP relative source address.
2306 */
2307DECL_FORCE_INLINE_THROW(uint32_t)
2308iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2309{
2310#ifdef RT_ARCH_AMD64
2311 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2312
2313 /* vmovdqu reg256, mem256 */
2314 pbCodeBuf[off++] = X86_OP_VEX2;
2315 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2316 pbCodeBuf[off++] = 0x6f;
2317 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2318#elif defined(RT_ARCH_ARM64)
2319 /* ASSUMES two consecutive vector registers for the 256-bit value. */
2320 Assert(!(iVecRegDst & 0x1));
2321 off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2322 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2323#else
2324# error "port me"
2325#endif
2326}
2327
2328#endif
2329
2330
2331/**
2332 * Emits a load effective address to a GRP with an BP relative source address.
2333 */
2334DECL_INLINE_THROW(uint32_t)
2335iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2336{
2337#ifdef RT_ARCH_AMD64
2338 /* lea gprdst, [rbp + offDisp] */
2339 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2340 if (iGprDst < 8)
2341 pbCodeBuf[off++] = X86_OP_REX_W;
2342 else
2343 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2344 pbCodeBuf[off++] = 0x8d;
2345 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2346
2347#elif defined(RT_ARCH_ARM64)
2348 bool const fSub = offDisp < 0;
2349 uint32_t const offAbsDisp = (uint32_t)RT_ABS(offDisp);
2350 if (offAbsDisp <= 0xffffffU)
2351 {
2352 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2353 if (offAbsDisp <= 0xfffU)
2354 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, ARMV8_A64_REG_BP, offAbsDisp);
2355 else
2356 {
2357 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, ARMV8_A64_REG_BP, offAbsDisp >> 12,
2358 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2359 if (offAbsDisp & 0xfffU)
2360 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, offAbsDisp & 0xfff);
2361 }
2362 }
2363 else
2364 {
2365 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2366 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offAbsDisp);
2367 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2368 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2369 }
2370
2371#else
2372# error "port me"
2373#endif
2374
2375 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2376 return off;
2377}
2378
2379
2380/**
2381 * Emits a 64-bit GPR store with an BP relative destination address.
2382 *
2383 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2384 */
2385DECL_INLINE_THROW(uint32_t)
2386iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2387{
2388#ifdef RT_ARCH_AMD64
2389 /* mov qword [rbp + offDisp], gprdst */
2390 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2391 if (iGprSrc < 8)
2392 pbCodeBuf[off++] = X86_OP_REX_W;
2393 else
2394 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2395 pbCodeBuf[off++] = 0x89;
2396 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2397
2398#elif defined(RT_ARCH_ARM64)
2399 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2400 {
2401 /* str w/ unsigned imm12 (scaled) */
2402 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2403 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2404 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2405 }
2406 else if (offDisp >= -256 && offDisp <= 256)
2407 {
2408 /* stur w/ signed imm9 (unscaled) */
2409 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2410 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2411 }
2412 else if ((uint32_t)-offDisp < (unsigned)_4K)
2413 {
2414 /* Use temporary indexing register w/ sub uimm12. */
2415 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2416 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2417 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2418 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2419 }
2420 else
2421 {
2422 /* Use temporary indexing register. */
2423 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2424 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2425 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2426 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2427 }
2428 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2429 return off;
2430
2431#else
2432# error "Port me!"
2433#endif
2434}
2435
2436
2437/**
2438 * Emits a 64-bit immediate store with an BP relative destination address.
2439 *
2440 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2441 */
2442DECL_INLINE_THROW(uint32_t)
2443iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2444{
2445#ifdef RT_ARCH_AMD64
2446 if ((int64_t)uImm64 == (int32_t)uImm64)
2447 {
2448 /* mov qword [rbp + offDisp], imm32 - sign extended */
2449 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2450 pbCodeBuf[off++] = X86_OP_REX_W;
2451 pbCodeBuf[off++] = 0xc7;
2452 if (offDisp < 128 && offDisp >= -128)
2453 {
2454 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2455 pbCodeBuf[off++] = (uint8_t)offDisp;
2456 }
2457 else
2458 {
2459 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2460 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2461 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2462 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2463 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2464 }
2465 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2466 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2467 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2468 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2469 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2470 return off;
2471 }
2472#endif
2473
2474 /* Load tmp0, imm64; Store tmp to bp+disp. */
2475 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2476 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2477}
2478
2479#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2480
2481/**
2482 * Emits a 128-bit vector register store with an BP relative destination address.
2483 *
2484 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2485 */
2486DECL_INLINE_THROW(uint32_t)
2487iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2488{
2489#ifdef RT_ARCH_AMD64
2490 /* movdqu [rbp + offDisp], vecsrc */
2491 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2492 pbCodeBuf[off++] = 0xf3;
2493 if (iVecRegSrc >= 8)
2494 pbCodeBuf[off++] = X86_OP_REX_R;
2495 pbCodeBuf[off++] = 0x0f;
2496 pbCodeBuf[off++] = 0x7f;
2497 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2498
2499#elif defined(RT_ARCH_ARM64)
2500 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2501 {
2502 /* str w/ unsigned imm12 (scaled) */
2503 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2504 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc,
2505 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2506 }
2507 else if (offDisp >= -256 && offDisp <= 256)
2508 {
2509 /* stur w/ signed imm9 (unscaled) */
2510 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2511 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp);
2512 }
2513 else if ((uint32_t)-offDisp < (unsigned)_4K)
2514 {
2515 /* Use temporary indexing register w/ sub uimm12. */
2516 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2517 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2518 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2519 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2520 }
2521 else
2522 {
2523 /* Use temporary indexing register. */
2524 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2525 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2526 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP,
2527 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2528 }
2529 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2530 return off;
2531
2532#else
2533# error "Port me!"
2534#endif
2535}
2536
2537
2538/**
2539 * Emits a 256-bit vector register store with an BP relative destination address.
2540 *
2541 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2542 */
2543DECL_INLINE_THROW(uint32_t)
2544iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2545{
2546#ifdef RT_ARCH_AMD64
2547 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2548
2549 /* vmovdqu mem256, reg256 */
2550 pbCodeBuf[off++] = X86_OP_VEX2;
2551 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2552 pbCodeBuf[off++] = 0x7f;
2553 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2554#elif defined(RT_ARCH_ARM64)
2555 Assert(!(iVecRegSrc & 0x1));
2556 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp, iVecRegSrc);
2557 return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1);
2558#else
2559# error "Port me!"
2560#endif
2561}
2562
2563#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
2564#if defined(RT_ARCH_ARM64)
2565
2566/**
2567 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2568 *
2569 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2570 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2571 * caller does not heed this.
2572 *
2573 * @note DON'T try this with prefetch.
2574 */
2575DECL_FORCE_INLINE_THROW(uint32_t)
2576iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2577 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2578{
2579 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2580 {
2581 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2582 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2583 }
2584 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2585 && iGprReg != iGprBase)
2586 || iGprTmp != UINT8_MAX)
2587 {
2588 /* The offset is too large, so we must load it into a register and use
2589 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2590 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2591 if (iGprTmp == UINT8_MAX)
2592 iGprTmp = iGprReg;
2593 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2594 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2595 }
2596 else
2597# ifdef IEM_WITH_THROW_CATCH
2598 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2599# else
2600 AssertReleaseFailedStmt(off = UINT32_MAX);
2601# endif
2602 return off;
2603}
2604
2605/**
2606 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2607 */
2608DECL_FORCE_INLINE_THROW(uint32_t)
2609iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2610 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2611{
2612 /*
2613 * There are a couple of ldr variants that takes an immediate offset, so
2614 * try use those if we can, otherwise we have to use the temporary register
2615 * help with the addressing.
2616 */
2617 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2618 {
2619 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2620 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2621 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2622 }
2623 else
2624 {
2625 /* The offset is too large, so we must load it into a register and use
2626 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2627 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2628 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2629
2630 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2631 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2632
2633 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2634 }
2635 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2636 return off;
2637}
2638
2639# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2640/**
2641 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2642 *
2643 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2644 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2645 * caller does not heed this.
2646 *
2647 * @note DON'T try this with prefetch.
2648 */
2649DECL_FORCE_INLINE_THROW(uint32_t)
2650iemNativeEmitVecRegByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iGprBase, int32_t offDisp,
2651 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2652{
2653 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2654 {
2655 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2656 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2657 }
2658 else if ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2659 || iGprTmp != UINT8_MAX)
2660 {
2661 /* The offset is too large, so we must load it into a register and use
2662 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2663 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2664 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2665 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, iGprTmp);
2666 }
2667 else
2668# ifdef IEM_WITH_THROW_CATCH
2669 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2670# else
2671 AssertReleaseFailedStmt(off = UINT32_MAX);
2672# endif
2673 return off;
2674}
2675# endif
2676
2677
2678/**
2679 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2680 */
2681DECL_FORCE_INLINE_THROW(uint32_t)
2682iemNativeEmitVecRegByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg,
2683 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2684{
2685 /*
2686 * There are a couple of ldr variants that takes an immediate offset, so
2687 * try use those if we can, otherwise we have to use the temporary register
2688 * help with the addressing.
2689 */
2690 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2691 {
2692 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2693 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2694 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2695 }
2696 else
2697 {
2698 /* The offset is too large, so we must load it into a register and use
2699 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2700 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2701 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2702
2703 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2704 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, idxTmpReg);
2705
2706 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2707 }
2708 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2709 return off;
2710}
2711#endif /* RT_ARCH_ARM64 */
2712
2713/**
2714 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2715 *
2716 * @note ARM64: Misaligned @a offDisp values and values not in the
2717 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2718 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2719 * does not heed this.
2720 */
2721DECL_FORCE_INLINE_THROW(uint32_t)
2722iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2723 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2724{
2725#ifdef RT_ARCH_AMD64
2726 /* mov reg64, mem64 */
2727 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2728 pCodeBuf[off++] = 0x8b;
2729 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2730 RT_NOREF(iGprTmp);
2731
2732#elif defined(RT_ARCH_ARM64)
2733 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2734 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2735
2736#else
2737# error "port me"
2738#endif
2739 return off;
2740}
2741
2742
2743/**
2744 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2745 */
2746DECL_INLINE_THROW(uint32_t)
2747iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2748{
2749#ifdef RT_ARCH_AMD64
2750 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2751 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2752
2753#elif defined(RT_ARCH_ARM64)
2754 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2755
2756#else
2757# error "port me"
2758#endif
2759 return off;
2760}
2761
2762
2763/**
2764 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2765 *
2766 * @note ARM64: Misaligned @a offDisp values and values not in the
2767 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2768 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2769 * caller does not heed this.
2770 *
2771 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2772 */
2773DECL_FORCE_INLINE_THROW(uint32_t)
2774iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2775 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2776{
2777#ifdef RT_ARCH_AMD64
2778 /* mov reg32, mem32 */
2779 if (iGprDst >= 8 || iGprBase >= 8)
2780 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2781 pCodeBuf[off++] = 0x8b;
2782 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2783 RT_NOREF(iGprTmp);
2784
2785#elif defined(RT_ARCH_ARM64)
2786 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2787 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2788
2789#else
2790# error "port me"
2791#endif
2792 return off;
2793}
2794
2795
2796/**
2797 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2798 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2799 */
2800DECL_INLINE_THROW(uint32_t)
2801iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2802{
2803#ifdef RT_ARCH_AMD64
2804 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2805 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2806
2807#elif defined(RT_ARCH_ARM64)
2808 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2809
2810#else
2811# error "port me"
2812#endif
2813 return off;
2814}
2815
2816
2817/**
2818 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2819 * sign-extending the value to 64 bits.
2820 *
2821 * @note ARM64: Misaligned @a offDisp values and values not in the
2822 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2823 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2824 * caller does not heed this.
2825 */
2826DECL_FORCE_INLINE_THROW(uint32_t)
2827iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2828 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2829{
2830#ifdef RT_ARCH_AMD64
2831 /* movsxd reg64, mem32 */
2832 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2833 pCodeBuf[off++] = 0x63;
2834 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2835 RT_NOREF(iGprTmp);
2836
2837#elif defined(RT_ARCH_ARM64)
2838 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2839 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2840
2841#else
2842# error "port me"
2843#endif
2844 return off;
2845}
2846
2847
2848/**
2849 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2850 *
2851 * @note ARM64: Misaligned @a offDisp values and values not in the
2852 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2853 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2854 * caller does not heed this.
2855 *
2856 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2857 */
2858DECL_FORCE_INLINE_THROW(uint32_t)
2859iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2860 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2861{
2862#ifdef RT_ARCH_AMD64
2863 /* movzx reg32, mem16 */
2864 if (iGprDst >= 8 || iGprBase >= 8)
2865 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2866 pCodeBuf[off++] = 0x0f;
2867 pCodeBuf[off++] = 0xb7;
2868 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2869 RT_NOREF(iGprTmp);
2870
2871#elif defined(RT_ARCH_ARM64)
2872 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2873 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2874
2875#else
2876# error "port me"
2877#endif
2878 return off;
2879}
2880
2881
2882/**
2883 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2884 * sign-extending the value to 64 bits.
2885 *
2886 * @note ARM64: Misaligned @a offDisp values and values not in the
2887 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2888 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2889 * caller does not heed this.
2890 */
2891DECL_FORCE_INLINE_THROW(uint32_t)
2892iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2893 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2894{
2895#ifdef RT_ARCH_AMD64
2896 /* movsx reg64, mem16 */
2897 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2898 pCodeBuf[off++] = 0x0f;
2899 pCodeBuf[off++] = 0xbf;
2900 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2901 RT_NOREF(iGprTmp);
2902
2903#elif defined(RT_ARCH_ARM64)
2904 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2905 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2906
2907#else
2908# error "port me"
2909#endif
2910 return off;
2911}
2912
2913
2914/**
2915 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2916 * sign-extending the value to 32 bits.
2917 *
2918 * @note ARM64: Misaligned @a offDisp values and values not in the
2919 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2920 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2921 * caller does not heed this.
2922 *
2923 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2924 */
2925DECL_FORCE_INLINE_THROW(uint32_t)
2926iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2927 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2928{
2929#ifdef RT_ARCH_AMD64
2930 /* movsx reg32, mem16 */
2931 if (iGprDst >= 8 || iGprBase >= 8)
2932 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2933 pCodeBuf[off++] = 0x0f;
2934 pCodeBuf[off++] = 0xbf;
2935 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2936 RT_NOREF(iGprTmp);
2937
2938#elif defined(RT_ARCH_ARM64)
2939 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2940 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
2941
2942#else
2943# error "port me"
2944#endif
2945 return off;
2946}
2947
2948
2949/**
2950 * Emits a 8-bit GPR load via a GPR base address with a displacement.
2951 *
2952 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2953 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2954 * same. Will assert / throw if caller does not heed this.
2955 *
2956 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
2957 */
2958DECL_FORCE_INLINE_THROW(uint32_t)
2959iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2960 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2961{
2962#ifdef RT_ARCH_AMD64
2963 /* movzx reg32, mem8 */
2964 if (iGprDst >= 8 || iGprBase >= 8)
2965 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2966 pCodeBuf[off++] = 0x0f;
2967 pCodeBuf[off++] = 0xb6;
2968 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2969 RT_NOREF(iGprTmp);
2970
2971#elif defined(RT_ARCH_ARM64)
2972 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2973 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
2974
2975#else
2976# error "port me"
2977#endif
2978 return off;
2979}
2980
2981
2982/**
2983 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2984 * sign-extending the value to 64 bits.
2985 *
2986 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2987 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2988 * same. Will assert / throw if caller does not heed this.
2989 */
2990DECL_FORCE_INLINE_THROW(uint32_t)
2991iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2992 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2993{
2994#ifdef RT_ARCH_AMD64
2995 /* movsx reg64, mem8 */
2996 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2997 pCodeBuf[off++] = 0x0f;
2998 pCodeBuf[off++] = 0xbe;
2999 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3000 RT_NOREF(iGprTmp);
3001
3002#elif defined(RT_ARCH_ARM64)
3003 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3004 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
3005
3006#else
3007# error "port me"
3008#endif
3009 return off;
3010}
3011
3012
3013/**
3014 * Emits a 8-bit GPR load via a GPR base address with a displacement,
3015 * sign-extending the value to 32 bits.
3016 *
3017 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3018 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3019 * same. Will assert / throw if caller does not heed this.
3020 *
3021 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
3022 */
3023DECL_FORCE_INLINE_THROW(uint32_t)
3024iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3025 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3026{
3027#ifdef RT_ARCH_AMD64
3028 /* movsx reg32, mem8 */
3029 if (iGprDst >= 8 || iGprBase >= 8)
3030 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3031 pCodeBuf[off++] = 0x0f;
3032 pCodeBuf[off++] = 0xbe;
3033 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3034 RT_NOREF(iGprTmp);
3035
3036#elif defined(RT_ARCH_ARM64)
3037 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3038 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
3039
3040#else
3041# error "port me"
3042#endif
3043 return off;
3044}
3045
3046
3047/**
3048 * Emits a 8-bit GPR load via a GPR base address with a displacement,
3049 * sign-extending the value to 16 bits.
3050 *
3051 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3052 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3053 * same. Will assert / throw if caller does not heed this.
3054 *
3055 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
3056 */
3057DECL_FORCE_INLINE_THROW(uint32_t)
3058iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3059 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3060{
3061#ifdef RT_ARCH_AMD64
3062 /* movsx reg32, mem8 */
3063 if (iGprDst >= 8 || iGprBase >= 8)
3064 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3065 pCodeBuf[off++] = 0x0f;
3066 pCodeBuf[off++] = 0xbe;
3067 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3068# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
3069 /* and reg32, 0xffffh */
3070 if (iGprDst >= 8)
3071 pCodeBuf[off++] = X86_OP_REX_B;
3072 pCodeBuf[off++] = 0x81;
3073 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
3074 pCodeBuf[off++] = 0xff;
3075 pCodeBuf[off++] = 0xff;
3076 pCodeBuf[off++] = 0;
3077 pCodeBuf[off++] = 0;
3078# else
3079 /* movzx reg32, reg16 */
3080 if (iGprDst >= 8)
3081 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
3082 pCodeBuf[off++] = 0x0f;
3083 pCodeBuf[off++] = 0xb7;
3084 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
3085# endif
3086 RT_NOREF(iGprTmp);
3087
3088#elif defined(RT_ARCH_ARM64)
3089 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3090 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
3091 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
3092 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
3093
3094#else
3095# error "port me"
3096#endif
3097 return off;
3098}
3099
3100
3101#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3102/**
3103 * Emits a 128-bit vector register load via a GPR base address with a displacement.
3104 *
3105 * @note ARM64: Misaligned @a offDisp values and values not in the
3106 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3107 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3108 * does not heed this.
3109 */
3110DECL_FORCE_INLINE_THROW(uint32_t)
3111iemNativeEmitLoadVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3112 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3113{
3114#ifdef RT_ARCH_AMD64
3115 /* movdqu reg128, mem128 */
3116 pCodeBuf[off++] = 0xf3;
3117 if (iVecRegDst >= 8 || iGprBase >= 8)
3118 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3119 pCodeBuf[off++] = 0x0f;
3120 pCodeBuf[off++] = 0x6f;
3121 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3122 RT_NOREF(iGprTmp);
3123
3124#elif defined(RT_ARCH_ARM64)
3125 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3126 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3127
3128#else
3129# error "port me"
3130#endif
3131 return off;
3132}
3133
3134
3135/**
3136 * Emits a 128-bit GPR load via a GPR base address with a displacement.
3137 */
3138DECL_INLINE_THROW(uint32_t)
3139iemNativeEmitLoadVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3140{
3141#ifdef RT_ARCH_AMD64
3142 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3143 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3144
3145#elif defined(RT_ARCH_ARM64)
3146 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3147
3148#else
3149# error "port me"
3150#endif
3151 return off;
3152}
3153
3154
3155/**
3156 * Emits a 256-bit vector register load via a GPR base address with a displacement.
3157 *
3158 * @note ARM64: Misaligned @a offDisp values and values not in the
3159 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3160 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3161 * does not heed this.
3162 */
3163DECL_FORCE_INLINE_THROW(uint32_t)
3164iemNativeEmitLoadVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3165 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3166{
3167#ifdef RT_ARCH_AMD64
3168 /* vmovdqu reg256, mem256 */
3169 pCodeBuf[off++] = X86_OP_VEX3;
3170 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3171 | X86_OP_VEX3_BYTE1_X
3172 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3173 | UINT8_C(0x01);
3174 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3175 pCodeBuf[off++] = 0x6f;
3176 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3177 RT_NOREF(iGprTmp);
3178
3179#elif defined(RT_ARCH_ARM64)
3180 Assert(!(iVecRegDst & 0x1));
3181 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3182 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3183 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3184 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3185#else
3186# error "port me"
3187#endif
3188 return off;
3189}
3190
3191
3192/**
3193 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3194 */
3195DECL_INLINE_THROW(uint32_t)
3196iemNativeEmitLoadVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3197{
3198#ifdef RT_ARCH_AMD64
3199 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3200 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3201
3202#elif defined(RT_ARCH_ARM64)
3203 Assert(!(iVecRegDst & 0x1));
3204 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3205 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3206 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3207 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3208
3209#else
3210# error "port me"
3211#endif
3212 return off;
3213}
3214#endif
3215
3216
3217/**
3218 * Emits a 64-bit GPR store via a GPR base address with a displacement.
3219 *
3220 * @note ARM64: Misaligned @a offDisp values and values not in the
3221 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3222 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3223 * does not heed this.
3224 */
3225DECL_FORCE_INLINE_THROW(uint32_t)
3226iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3227 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3228{
3229#ifdef RT_ARCH_AMD64
3230 /* mov mem64, reg64 */
3231 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3232 pCodeBuf[off++] = 0x89;
3233 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3234 RT_NOREF(iGprTmp);
3235
3236#elif defined(RT_ARCH_ARM64)
3237 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3238 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
3239
3240#else
3241# error "port me"
3242#endif
3243 return off;
3244}
3245
3246
3247/**
3248 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3249 *
3250 * @note ARM64: Misaligned @a offDisp values and values not in the
3251 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3252 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3253 * does not heed this.
3254 */
3255DECL_FORCE_INLINE_THROW(uint32_t)
3256iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3257 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3258{
3259#ifdef RT_ARCH_AMD64
3260 /* mov mem32, reg32 */
3261 if (iGprSrc >= 8 || iGprBase >= 8)
3262 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3263 pCodeBuf[off++] = 0x89;
3264 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3265 RT_NOREF(iGprTmp);
3266
3267#elif defined(RT_ARCH_ARM64)
3268 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3269 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3270
3271#else
3272# error "port me"
3273#endif
3274 return off;
3275}
3276
3277
3278/**
3279 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3280 *
3281 * @note ARM64: Misaligned @a offDisp values and values not in the
3282 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3283 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3284 * does not heed this.
3285 */
3286DECL_FORCE_INLINE_THROW(uint32_t)
3287iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3288 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3289{
3290#ifdef RT_ARCH_AMD64
3291 /* mov mem16, reg16 */
3292 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3293 if (iGprSrc >= 8 || iGprBase >= 8)
3294 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3295 pCodeBuf[off++] = 0x89;
3296 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3297 RT_NOREF(iGprTmp);
3298
3299#elif defined(RT_ARCH_ARM64)
3300 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3301 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3302
3303#else
3304# error "port me"
3305#endif
3306 return off;
3307}
3308
3309
3310/**
3311 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3312 *
3313 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3314 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3315 * same. Will assert / throw if caller does not heed this.
3316 */
3317DECL_FORCE_INLINE_THROW(uint32_t)
3318iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3319 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3320{
3321#ifdef RT_ARCH_AMD64
3322 /* mov mem8, reg8 */
3323 if (iGprSrc >= 8 || iGprBase >= 8)
3324 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3325 else if (iGprSrc >= 4)
3326 pCodeBuf[off++] = X86_OP_REX;
3327 pCodeBuf[off++] = 0x88;
3328 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3329 RT_NOREF(iGprTmp);
3330
3331#elif defined(RT_ARCH_ARM64)
3332 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3333 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3334
3335#else
3336# error "port me"
3337#endif
3338 return off;
3339}
3340
3341
3342/**
3343 * Emits a 64-bit immediate store via a GPR base address with a displacement.
3344 *
3345 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
3346 * AMD64 it depends on the immediate value.
3347 *
3348 * @note ARM64: Misaligned @a offDisp values and values not in the
3349 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3350 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3351 * does not heed this.
3352 */
3353DECL_FORCE_INLINE_THROW(uint32_t)
3354iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
3355 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3356{
3357#ifdef RT_ARCH_AMD64
3358 if ((int32_t)uImm == (int64_t)uImm)
3359 {
3360 /* mov mem64, imm32 (sign-extended) */
3361 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3362 pCodeBuf[off++] = 0xc7;
3363 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3364 pCodeBuf[off++] = RT_BYTE1(uImm);
3365 pCodeBuf[off++] = RT_BYTE2(uImm);
3366 pCodeBuf[off++] = RT_BYTE3(uImm);
3367 pCodeBuf[off++] = RT_BYTE4(uImm);
3368 }
3369 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
3370 {
3371 /* require temporary register. */
3372 if (iGprImmTmp == UINT8_MAX)
3373 iGprImmTmp = iGprTmp;
3374 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3375 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
3376 }
3377 else
3378# ifdef IEM_WITH_THROW_CATCH
3379 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3380# else
3381 AssertReleaseFailedStmt(off = UINT32_MAX);
3382# endif
3383
3384#elif defined(RT_ARCH_ARM64)
3385 if (uImm == 0)
3386 iGprImmTmp = ARMV8_A64_REG_XZR;
3387 else
3388 {
3389 Assert(iGprImmTmp < 31);
3390 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3391 }
3392 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
3393
3394#else
3395# error "port me"
3396#endif
3397 return off;
3398}
3399
3400
3401/**
3402 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3403 *
3404 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3405 *
3406 * @note ARM64: Misaligned @a offDisp values and values not in the
3407 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3408 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3409 * does not heed this.
3410 */
3411DECL_FORCE_INLINE_THROW(uint32_t)
3412iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
3413 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3414{
3415#ifdef RT_ARCH_AMD64
3416 /* mov mem32, imm32 */
3417 if (iGprBase >= 8)
3418 pCodeBuf[off++] = X86_OP_REX_B;
3419 pCodeBuf[off++] = 0xc7;
3420 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3421 pCodeBuf[off++] = RT_BYTE1(uImm);
3422 pCodeBuf[off++] = RT_BYTE2(uImm);
3423 pCodeBuf[off++] = RT_BYTE3(uImm);
3424 pCodeBuf[off++] = RT_BYTE4(uImm);
3425 RT_NOREF(iGprImmTmp, iGprTmp);
3426
3427#elif defined(RT_ARCH_ARM64)
3428 Assert(iGprImmTmp < 31);
3429 if (uImm == 0)
3430 iGprImmTmp = ARMV8_A64_REG_XZR;
3431 else
3432 {
3433 Assert(iGprImmTmp < 31);
3434 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3435 }
3436 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3437 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3438
3439#else
3440# error "port me"
3441#endif
3442 return off;
3443}
3444
3445
3446/**
3447 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3448 *
3449 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3450 *
3451 * @note ARM64: Misaligned @a offDisp values and values not in the
3452 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3453 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3454 * does not heed this.
3455 */
3456DECL_FORCE_INLINE_THROW(uint32_t)
3457iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
3458 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3459{
3460#ifdef RT_ARCH_AMD64
3461 /* mov mem16, imm16 */
3462 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3463 if (iGprBase >= 8)
3464 pCodeBuf[off++] = X86_OP_REX_B;
3465 pCodeBuf[off++] = 0xc7;
3466 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3467 pCodeBuf[off++] = RT_BYTE1(uImm);
3468 pCodeBuf[off++] = RT_BYTE2(uImm);
3469 RT_NOREF(iGprImmTmp, iGprTmp);
3470
3471#elif defined(RT_ARCH_ARM64)
3472 if (uImm == 0)
3473 iGprImmTmp = ARMV8_A64_REG_XZR;
3474 else
3475 {
3476 Assert(iGprImmTmp < 31);
3477 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3478 }
3479 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3480 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3481
3482#else
3483# error "port me"
3484#endif
3485 return off;
3486}
3487
3488
3489/**
3490 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3491 *
3492 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3493 *
3494 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3495 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3496 * same. Will assert / throw if caller does not heed this.
3497 */
3498DECL_FORCE_INLINE_THROW(uint32_t)
3499iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3500 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3501{
3502#ifdef RT_ARCH_AMD64
3503 /* mov mem8, imm8 */
3504 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3505 if (iGprBase >= 8)
3506 pCodeBuf[off++] = X86_OP_REX_B;
3507 pCodeBuf[off++] = 0xc6;
3508 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3509 pCodeBuf[off++] = uImm;
3510 RT_NOREF(iGprImmTmp, iGprTmp);
3511
3512#elif defined(RT_ARCH_ARM64)
3513 if (uImm == 0)
3514 iGprImmTmp = ARMV8_A64_REG_XZR;
3515 else
3516 {
3517 Assert(iGprImmTmp < 31);
3518 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3519 }
3520 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3521 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3522
3523#else
3524# error "port me"
3525#endif
3526 return off;
3527}
3528
3529
3530#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3531/**
3532 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3533 *
3534 * @note ARM64: Misaligned @a offDisp values and values not in the
3535 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3536 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3537 * does not heed this.
3538 */
3539DECL_FORCE_INLINE_THROW(uint32_t)
3540iemNativeEmitStoreVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3541 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3542{
3543#ifdef RT_ARCH_AMD64
3544 /* movdqu mem128, reg128 */
3545 pCodeBuf[off++] = 0xf3;
3546 if (iVecRegDst >= 8 || iGprBase >= 8)
3547 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3548 pCodeBuf[off++] = 0x0f;
3549 pCodeBuf[off++] = 0x7f;
3550 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3551 RT_NOREF(iGprTmp);
3552
3553#elif defined(RT_ARCH_ARM64)
3554 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3555 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3556
3557#else
3558# error "port me"
3559#endif
3560 return off;
3561}
3562
3563
3564/**
3565 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3566 */
3567DECL_INLINE_THROW(uint32_t)
3568iemNativeEmitStoreVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3569{
3570#ifdef RT_ARCH_AMD64
3571 off = iemNativeEmitStoreVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3572 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3573
3574#elif defined(RT_ARCH_ARM64)
3575 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3576
3577#else
3578# error "port me"
3579#endif
3580 return off;
3581}
3582
3583
3584/**
3585 * Emits a 256-bit vector register store via a GPR base address with a displacement.
3586 *
3587 * @note ARM64: Misaligned @a offDisp values and values not in the
3588 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3589 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3590 * does not heed this.
3591 */
3592DECL_FORCE_INLINE_THROW(uint32_t)
3593iemNativeEmitStoreVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3594 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3595{
3596#ifdef RT_ARCH_AMD64
3597 /* vmovdqu mem256, reg256 */
3598 pCodeBuf[off++] = X86_OP_VEX3;
3599 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3600 | X86_OP_VEX3_BYTE1_X
3601 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3602 | UINT8_C(0x01);
3603 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3604 pCodeBuf[off++] = 0x7f;
3605 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3606 RT_NOREF(iGprTmp);
3607
3608#elif defined(RT_ARCH_ARM64)
3609 Assert(!(iVecRegDst & 0x1));
3610 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3611 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3612 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3613 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3614#else
3615# error "port me"
3616#endif
3617 return off;
3618}
3619
3620
3621/**
3622 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3623 */
3624DECL_INLINE_THROW(uint32_t)
3625iemNativeEmitStoreVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3626{
3627#ifdef RT_ARCH_AMD64
3628 off = iemNativeEmitStoreVecRegByGprU256Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3629 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3630
3631#elif defined(RT_ARCH_ARM64)
3632 Assert(!(iVecRegDst & 0x1));
3633 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3634 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3635 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3636 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3637
3638#else
3639# error "port me"
3640#endif
3641 return off;
3642}
3643#endif
3644
3645
3646
3647/*********************************************************************************************************************************
3648* Subtraction and Additions *
3649*********************************************************************************************************************************/
3650
3651/**
3652 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3653 * @note The AMD64 version sets flags.
3654 */
3655DECL_INLINE_THROW(uint32_t)
3656iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3657{
3658#if defined(RT_ARCH_AMD64)
3659 /* sub Gv,Ev */
3660 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3661 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3662 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3663 pbCodeBuf[off++] = 0x2b;
3664 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3665
3666#elif defined(RT_ARCH_ARM64)
3667 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3668 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3669
3670#else
3671# error "Port me"
3672#endif
3673 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3674 return off;
3675}
3676
3677
3678/**
3679 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3680 * @note The AMD64 version sets flags.
3681 */
3682DECL_FORCE_INLINE(uint32_t)
3683iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3684{
3685#if defined(RT_ARCH_AMD64)
3686 /* sub Gv,Ev */
3687 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3688 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3689 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3690 pCodeBuf[off++] = 0x2b;
3691 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3692
3693#elif defined(RT_ARCH_ARM64)
3694 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3695
3696#else
3697# error "Port me"
3698#endif
3699 return off;
3700}
3701
3702
3703/**
3704 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3705 * @note The AMD64 version sets flags.
3706 */
3707DECL_INLINE_THROW(uint32_t)
3708iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3709{
3710#if defined(RT_ARCH_AMD64)
3711 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3712#elif defined(RT_ARCH_ARM64)
3713 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3714#else
3715# error "Port me"
3716#endif
3717 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3718 return off;
3719}
3720
3721
3722/**
3723 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3724 *
3725 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3726 *
3727 * @note Larger constants will require a temporary register. Failing to specify
3728 * one when needed will trigger fatal assertion / throw.
3729 */
3730DECL_FORCE_INLINE_THROW(uint32_t)
3731iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3732 uint8_t iGprTmp = UINT8_MAX)
3733{
3734#ifdef RT_ARCH_AMD64
3735 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3736 if (iSubtrahend == 1)
3737 {
3738 /* dec r/m64 */
3739 pCodeBuf[off++] = 0xff;
3740 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3741 }
3742 else if (iSubtrahend == -1)
3743 {
3744 /* inc r/m64 */
3745 pCodeBuf[off++] = 0xff;
3746 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3747 }
3748 else if ((int8_t)iSubtrahend == iSubtrahend)
3749 {
3750 /* sub r/m64, imm8 */
3751 pCodeBuf[off++] = 0x83;
3752 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3753 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3754 }
3755 else if ((int32_t)iSubtrahend == iSubtrahend)
3756 {
3757 /* sub r/m64, imm32 */
3758 pCodeBuf[off++] = 0x81;
3759 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3760 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3761 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3762 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3763 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3764 }
3765 else if (iGprTmp != UINT8_MAX)
3766 {
3767 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3768 /* sub r/m64, r64 */
3769 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3770 pCodeBuf[off++] = 0x29;
3771 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3772 }
3773 else
3774# ifdef IEM_WITH_THROW_CATCH
3775 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3776# else
3777 AssertReleaseFailedStmt(off = UINT32_MAX);
3778# endif
3779
3780#elif defined(RT_ARCH_ARM64)
3781 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3782 if (uAbsSubtrahend < 4096)
3783 {
3784 if (iSubtrahend >= 0)
3785 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3786 else
3787 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3788 }
3789 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3790 {
3791 if (iSubtrahend >= 0)
3792 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3793 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3794 else
3795 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3796 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3797 }
3798 else if (iGprTmp != UINT8_MAX)
3799 {
3800 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3801 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3802 }
3803 else
3804# ifdef IEM_WITH_THROW_CATCH
3805 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3806# else
3807 AssertReleaseFailedStmt(off = UINT32_MAX);
3808# endif
3809
3810#else
3811# error "Port me"
3812#endif
3813 return off;
3814}
3815
3816
3817/**
3818 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3819 *
3820 * @note Larger constants will require a temporary register. Failing to specify
3821 * one when needed will trigger fatal assertion / throw.
3822 */
3823DECL_INLINE_THROW(uint32_t)
3824iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3825 uint8_t iGprTmp = UINT8_MAX)
3826
3827{
3828#ifdef RT_ARCH_AMD64
3829 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3830#elif defined(RT_ARCH_ARM64)
3831 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3832#else
3833# error "Port me"
3834#endif
3835 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3836 return off;
3837}
3838
3839
3840/**
3841 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3842 *
3843 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3844 *
3845 * @note ARM64: Larger constants will require a temporary register. Failing to
3846 * specify one when needed will trigger fatal assertion / throw.
3847 */
3848DECL_FORCE_INLINE_THROW(uint32_t)
3849iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3850 uint8_t iGprTmp = UINT8_MAX)
3851{
3852#ifdef RT_ARCH_AMD64
3853 if (iGprDst >= 8)
3854 pCodeBuf[off++] = X86_OP_REX_B;
3855 if (iSubtrahend == 1)
3856 {
3857 /* dec r/m32 */
3858 pCodeBuf[off++] = 0xff;
3859 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3860 }
3861 else if (iSubtrahend == -1)
3862 {
3863 /* inc r/m32 */
3864 pCodeBuf[off++] = 0xff;
3865 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3866 }
3867 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3868 {
3869 /* sub r/m32, imm8 */
3870 pCodeBuf[off++] = 0x83;
3871 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3872 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3873 }
3874 else
3875 {
3876 /* sub r/m32, imm32 */
3877 pCodeBuf[off++] = 0x81;
3878 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3879 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3880 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3881 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3882 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3883 }
3884 RT_NOREF(iGprTmp);
3885
3886#elif defined(RT_ARCH_ARM64)
3887 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3888 if (uAbsSubtrahend < 4096)
3889 {
3890 if (iSubtrahend >= 0)
3891 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3892 else
3893 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3894 }
3895 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3896 {
3897 if (iSubtrahend >= 0)
3898 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3899 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3900 else
3901 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3902 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3903 }
3904 else if (iGprTmp != UINT8_MAX)
3905 {
3906 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3907 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3908 }
3909 else
3910# ifdef IEM_WITH_THROW_CATCH
3911 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3912# else
3913 AssertReleaseFailedStmt(off = UINT32_MAX);
3914# endif
3915
3916#else
3917# error "Port me"
3918#endif
3919 return off;
3920}
3921
3922
3923/**
3924 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3925 *
3926 * @note ARM64: Larger constants will require a temporary register. Failing to
3927 * specify one when needed will trigger fatal assertion / throw.
3928 */
3929DECL_INLINE_THROW(uint32_t)
3930iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3931 uint8_t iGprTmp = UINT8_MAX)
3932
3933{
3934#ifdef RT_ARCH_AMD64
3935 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
3936#elif defined(RT_ARCH_ARM64)
3937 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
3938#else
3939# error "Port me"
3940#endif
3941 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3942 return off;
3943}
3944
3945
3946/**
3947 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
3948 *
3949 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
3950 * so not suitable as a base for conditional jumps.
3951 *
3952 * @note AMD64: Will only update the lower 16 bits of the register.
3953 * @note ARM64: Will update the entire register.
3954 * @note ARM64: Larger constants will require a temporary register. Failing to
3955 * specify one when needed will trigger fatal assertion / throw.
3956 */
3957DECL_FORCE_INLINE_THROW(uint32_t)
3958iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
3959 uint8_t iGprTmp = UINT8_MAX)
3960{
3961#ifdef RT_ARCH_AMD64
3962 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3963 if (iGprDst >= 8)
3964 pCodeBuf[off++] = X86_OP_REX_B;
3965 if (iSubtrahend == 1)
3966 {
3967 /* dec r/m16 */
3968 pCodeBuf[off++] = 0xff;
3969 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3970 }
3971 else if (iSubtrahend == -1)
3972 {
3973 /* inc r/m16 */
3974 pCodeBuf[off++] = 0xff;
3975 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3976 }
3977 else if ((int8_t)iSubtrahend == iSubtrahend)
3978 {
3979 /* sub r/m16, imm8 */
3980 pCodeBuf[off++] = 0x83;
3981 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3982 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3983 }
3984 else
3985 {
3986 /* sub r/m16, imm16 */
3987 pCodeBuf[off++] = 0x81;
3988 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3989 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
3990 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
3991 }
3992 RT_NOREF(iGprTmp);
3993
3994#elif defined(RT_ARCH_ARM64)
3995 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3996 if (uAbsSubtrahend < 4096)
3997 {
3998 if (iSubtrahend >= 0)
3999 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
4000 else
4001 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
4002 }
4003 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
4004 {
4005 if (iSubtrahend >= 0)
4006 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
4007 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4008 else
4009 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
4010 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4011 }
4012 else if (iGprTmp != UINT8_MAX)
4013 {
4014 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
4015 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4016 }
4017 else
4018# ifdef IEM_WITH_THROW_CATCH
4019 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4020# else
4021 AssertReleaseFailedStmt(off = UINT32_MAX);
4022# endif
4023 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4024
4025#else
4026# error "Port me"
4027#endif
4028 return off;
4029}
4030
4031
4032/**
4033 * Emits adding a 64-bit GPR to another, storing the result in the first.
4034 * @note The AMD64 version sets flags.
4035 */
4036DECL_FORCE_INLINE(uint32_t)
4037iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4038{
4039#if defined(RT_ARCH_AMD64)
4040 /* add Gv,Ev */
4041 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4042 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
4043 pCodeBuf[off++] = 0x03;
4044 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
4045
4046#elif defined(RT_ARCH_ARM64)
4047 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
4048
4049#else
4050# error "Port me"
4051#endif
4052 return off;
4053}
4054
4055
4056/**
4057 * Emits adding a 64-bit GPR to another, storing the result in the first.
4058 * @note The AMD64 version sets flags.
4059 */
4060DECL_INLINE_THROW(uint32_t)
4061iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4062{
4063#if defined(RT_ARCH_AMD64)
4064 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
4065#elif defined(RT_ARCH_ARM64)
4066 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
4067#else
4068# error "Port me"
4069#endif
4070 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4071 return off;
4072}
4073
4074
4075/**
4076 * Emits adding a 64-bit GPR to another, storing the result in the first.
4077 * @note The AMD64 version sets flags.
4078 */
4079DECL_FORCE_INLINE(uint32_t)
4080iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4081{
4082#if defined(RT_ARCH_AMD64)
4083 /* add Gv,Ev */
4084 if (iGprDst >= 8 || iGprAddend >= 8)
4085 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
4086 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
4087 pCodeBuf[off++] = 0x03;
4088 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
4089
4090#elif defined(RT_ARCH_ARM64)
4091 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
4092
4093#else
4094# error "Port me"
4095#endif
4096 return off;
4097}
4098
4099
4100/**
4101 * Emits adding a 64-bit GPR to another, storing the result in the first.
4102 * @note The AMD64 version sets flags.
4103 */
4104DECL_INLINE_THROW(uint32_t)
4105iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4106{
4107#if defined(RT_ARCH_AMD64)
4108 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
4109#elif defined(RT_ARCH_ARM64)
4110 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
4111#else
4112# error "Port me"
4113#endif
4114 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4115 return off;
4116}
4117
4118
4119/**
4120 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
4121 */
4122DECL_INLINE_THROW(uint32_t)
4123iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4124{
4125#if defined(RT_ARCH_AMD64)
4126 /* add or inc */
4127 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4128 if (iImm8 != 1)
4129 {
4130 pCodeBuf[off++] = 0x83;
4131 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4132 pCodeBuf[off++] = (uint8_t)iImm8;
4133 }
4134 else
4135 {
4136 pCodeBuf[off++] = 0xff;
4137 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4138 }
4139
4140#elif defined(RT_ARCH_ARM64)
4141 if (iImm8 >= 0)
4142 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
4143 else
4144 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
4145
4146#else
4147# error "Port me"
4148#endif
4149 return off;
4150}
4151
4152
4153/**
4154 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
4155 */
4156DECL_INLINE_THROW(uint32_t)
4157iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4158{
4159#if defined(RT_ARCH_AMD64)
4160 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4161#elif defined(RT_ARCH_ARM64)
4162 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4163#else
4164# error "Port me"
4165#endif
4166 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4167 return off;
4168}
4169
4170
4171/**
4172 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4173 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4174 */
4175DECL_FORCE_INLINE(uint32_t)
4176iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4177{
4178#if defined(RT_ARCH_AMD64)
4179 /* add or inc */
4180 if (iGprDst >= 8)
4181 pCodeBuf[off++] = X86_OP_REX_B;
4182 if (iImm8 != 1)
4183 {
4184 pCodeBuf[off++] = 0x83;
4185 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4186 pCodeBuf[off++] = (uint8_t)iImm8;
4187 }
4188 else
4189 {
4190 pCodeBuf[off++] = 0xff;
4191 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4192 }
4193
4194#elif defined(RT_ARCH_ARM64)
4195 if (iImm8 >= 0)
4196 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
4197 else
4198 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
4199
4200#else
4201# error "Port me"
4202#endif
4203 return off;
4204}
4205
4206
4207/**
4208 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4209 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4210 */
4211DECL_INLINE_THROW(uint32_t)
4212iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4213{
4214#if defined(RT_ARCH_AMD64)
4215 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4216#elif defined(RT_ARCH_ARM64)
4217 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4218#else
4219# error "Port me"
4220#endif
4221 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4222 return off;
4223}
4224
4225
4226/**
4227 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4228 *
4229 * @note Will assert / throw if @a iGprTmp is not specified when needed.
4230 */
4231DECL_FORCE_INLINE_THROW(uint32_t)
4232iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4233{
4234#if defined(RT_ARCH_AMD64)
4235 if ((int8_t)iAddend == iAddend)
4236 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4237
4238 if ((int32_t)iAddend == iAddend)
4239 {
4240 /* add grp, imm32 */
4241 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4242 pCodeBuf[off++] = 0x81;
4243 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4244 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4245 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4246 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4247 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4248 }
4249 else if (iGprTmp != UINT8_MAX)
4250 {
4251 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4252
4253 /* add dst, tmpreg */
4254 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4255 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
4256 pCodeBuf[off++] = 0x03;
4257 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
4258 }
4259 else
4260# ifdef IEM_WITH_THROW_CATCH
4261 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4262# else
4263 AssertReleaseFailedStmt(off = UINT32_MAX);
4264# endif
4265
4266#elif defined(RT_ARCH_ARM64)
4267 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4268 if (uAbsAddend <= 0xffffffU)
4269 {
4270 bool const fSub = iAddend < 0;
4271 if (uAbsAddend > 0xfffU)
4272 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, true /*f64Bit*/,
4273 false /*fSetFlags*/, true /*fShift12*/);
4274 if (uAbsAddend & 0xfffU)
4275 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & UINT32_C(0xfff));
4276 }
4277 else if (iGprTmp != UINT8_MAX)
4278 {
4279 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4280 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
4281 }
4282 else
4283# ifdef IEM_WITH_THROW_CATCH
4284 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4285# else
4286 AssertReleaseFailedStmt(off = UINT32_MAX);
4287# endif
4288
4289#else
4290# error "Port me"
4291#endif
4292 return off;
4293}
4294
4295
4296/**
4297 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4298 */
4299DECL_INLINE_THROW(uint32_t)
4300iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
4301{
4302#if defined(RT_ARCH_AMD64)
4303 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4304 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
4305
4306 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
4307 {
4308 /* add grp, imm32 */
4309 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4310 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4311 pbCodeBuf[off++] = 0x81;
4312 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4313 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4314 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4315 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4316 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4317 }
4318 else
4319 {
4320 /* Best to use a temporary register to deal with this in the simplest way: */
4321 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4322
4323 /* add dst, tmpreg */
4324 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4325 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4326 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
4327 pbCodeBuf[off++] = 0x03;
4328 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
4329
4330 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4331 }
4332
4333#elif defined(RT_ARCH_ARM64)
4334 bool const fSub = iAddend < 0;
4335 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4336 if (uAbsAddend <= 0xffffffU)
4337 {
4338 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4339 if (uAbsAddend > 0xfffU)
4340 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, true /*f64Bit*/,
4341 false /*fSetFlags*/, true /*fShift12*/);
4342 if (uAbsAddend & 0xfffU)
4343 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & UINT32_C(0xfff));
4344 }
4345 else
4346 {
4347 /* Use temporary register for the immediate. */
4348 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uAbsAddend);
4349
4350 /* add gprdst, gprdst, tmpreg */
4351 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4352 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, iGprDst, iTmpReg);
4353
4354 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4355 }
4356
4357#else
4358# error "Port me"
4359#endif
4360 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4361 return off;
4362}
4363
4364
4365/**
4366 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4367 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4368 * @note For ARM64 the iAddend value must be in the range 0x000000..0xffffff.
4369 * The negative ranges are also allowed, making it behave like a
4370 * subtraction. If the constant does not conform, bad stuff will happen.
4371 */
4372DECL_FORCE_INLINE_THROW(uint32_t)
4373iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4374{
4375#if defined(RT_ARCH_AMD64)
4376 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4377 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4378
4379 /* add grp, imm32 */
4380 if (iGprDst >= 8)
4381 pCodeBuf[off++] = X86_OP_REX_B;
4382 pCodeBuf[off++] = 0x81;
4383 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4384 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4385 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4386 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4387 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4388 RT_NOREF(iGprTmp);
4389
4390#elif defined(RT_ARCH_ARM64)
4391 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4392 if (uAbsAddend <= 0xffffffU)
4393 {
4394 bool const fSub = iAddend < 0;
4395 if (uAbsAddend > 0xfffU)
4396 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4397 false /*fSetFlags*/, true /*fShift12*/);
4398 if (uAbsAddend & 0xfffU)
4399 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4400 }
4401 else if (iGprTmp != UINT8_MAX)
4402 {
4403 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, iAddend);
4404 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4405 }
4406 else
4407# ifdef IEM_WITH_THROW_CATCH
4408 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4409# else
4410 AssertReleaseFailedStmt(off = UINT32_MAX);
4411# endif
4412
4413#else
4414# error "Port me"
4415#endif
4416 return off;
4417}
4418
4419
4420/**
4421 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4422 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4423 */
4424DECL_INLINE_THROW(uint32_t)
4425iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4426{
4427#if defined(RT_ARCH_AMD64)
4428 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
4429
4430#elif defined(RT_ARCH_ARM64)
4431 bool const fSub = iAddend < 0;
4432 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4433 if (uAbsAddend <= 0xffffffU)
4434 {
4435 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4436 if (uAbsAddend > 0xfffU)
4437 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4438 false /*fSetFlags*/, true /*fShift12*/);
4439 if (uAbsAddend & 0xfffU)
4440 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4441 }
4442 else
4443 {
4444 /* Use temporary register for the immediate. */
4445 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uAbsAddend);
4446
4447 /* add gprdst, gprdst, tmpreg */
4448 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4449 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
4450
4451 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4452 }
4453
4454#else
4455# error "Port me"
4456#endif
4457 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4458 return off;
4459}
4460
4461
4462/**
4463 * Emits a 16-bit GPR add with a signed immediate addend.
4464 *
4465 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
4466 * so not suitable as a base for conditional jumps.
4467 *
4468 * @note AMD64: Will only update the lower 16 bits of the register.
4469 * @note ARM64: Will update the entire register.
4470 * @sa iemNativeEmitSubGpr16ImmEx
4471 */
4472DECL_FORCE_INLINE(uint32_t)
4473iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend)
4474{
4475#ifdef RT_ARCH_AMD64
4476 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4477 if (iGprDst >= 8)
4478 pCodeBuf[off++] = X86_OP_REX_B;
4479 if (iAddend == 1)
4480 {
4481 /* inc r/m16 */
4482 pCodeBuf[off++] = 0xff;
4483 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4484 }
4485 else if (iAddend == -1)
4486 {
4487 /* dec r/m16 */
4488 pCodeBuf[off++] = 0xff;
4489 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4490 }
4491 else if ((int8_t)iAddend == iAddend)
4492 {
4493 /* add r/m16, imm8 */
4494 pCodeBuf[off++] = 0x83;
4495 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4496 pCodeBuf[off++] = (uint8_t)iAddend;
4497 }
4498 else
4499 {
4500 /* add r/m16, imm16 */
4501 pCodeBuf[off++] = 0x81;
4502 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4503 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
4504 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
4505 }
4506
4507#elif defined(RT_ARCH_ARM64)
4508 bool const fSub = iAddend < 0;
4509 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4510 if (uAbsAddend > 0xfffU)
4511 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4512 false /*fSetFlags*/, true /*fShift12*/);
4513 if (uAbsAddend & 0xfffU)
4514 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4515 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4516
4517#else
4518# error "Port me"
4519#endif
4520 return off;
4521}
4522
4523
4524
4525/**
4526 * Adds two 64-bit GPRs together, storing the result in a third register.
4527 */
4528DECL_FORCE_INLINE(uint32_t)
4529iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4530{
4531#ifdef RT_ARCH_AMD64
4532 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4533 {
4534 /** @todo consider LEA */
4535 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
4536 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
4537 }
4538 else
4539 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4540
4541#elif defined(RT_ARCH_ARM64)
4542 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
4543
4544#else
4545# error "Port me!"
4546#endif
4547 return off;
4548}
4549
4550
4551
4552/**
4553 * Adds two 32-bit GPRs together, storing the result in a third register.
4554 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4555 */
4556DECL_FORCE_INLINE(uint32_t)
4557iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4558{
4559#ifdef RT_ARCH_AMD64
4560 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4561 {
4562 /** @todo consider LEA */
4563 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
4564 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
4565 }
4566 else
4567 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4568
4569#elif defined(RT_ARCH_ARM64)
4570 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
4571
4572#else
4573# error "Port me!"
4574#endif
4575 return off;
4576}
4577
4578
4579/**
4580 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
4581 * third register.
4582 *
4583 * @note The ARM64 version does not work for non-trivial constants if the
4584 * two registers are the same. Will assert / throw exception.
4585 */
4586DECL_FORCE_INLINE_THROW(uint32_t)
4587iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4588{
4589#ifdef RT_ARCH_AMD64
4590 /** @todo consider LEA */
4591 if ((int8_t)iImmAddend == iImmAddend)
4592 {
4593 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4594 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4595 }
4596 else
4597 {
4598 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4599 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4600 }
4601
4602#elif defined(RT_ARCH_ARM64)
4603 bool const fSub = iImmAddend < 0;
4604 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4605 if (uAbsImmAddend <= 0xfffU)
4606 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend);
4607 else if (uAbsImmAddend <= 0xffffffU)
4608 {
4609 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend >> 12,
4610 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
4611 if (uAbsImmAddend & 0xfffU)
4612 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsImmAddend & UINT32_C(0xfff));
4613 }
4614 else if (iGprDst != iGprAddend)
4615 {
4616 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4617 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4618 }
4619 else
4620# ifdef IEM_WITH_THROW_CATCH
4621 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4622# else
4623 AssertReleaseFailedStmt(off = UINT32_MAX);
4624# endif
4625
4626#else
4627# error "Port me!"
4628#endif
4629 return off;
4630}
4631
4632
4633/**
4634 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4635 * third register.
4636 *
4637 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4638 *
4639 * @note The ARM64 version does not work for non-trivial constants if the
4640 * two registers are the same. Will assert / throw exception.
4641 */
4642DECL_FORCE_INLINE_THROW(uint32_t)
4643iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4644{
4645#ifdef RT_ARCH_AMD64
4646 /** @todo consider LEA */
4647 if ((int8_t)iImmAddend == iImmAddend)
4648 {
4649 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4650 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4651 }
4652 else
4653 {
4654 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4655 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4656 }
4657
4658#elif defined(RT_ARCH_ARM64)
4659 bool const fSub = iImmAddend < 0;
4660 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4661 if (uAbsImmAddend <= 0xfffU)
4662 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4663 else if (uAbsImmAddend <= 0xffffffU)
4664 {
4665 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend >> 12,
4666 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
4667 if (uAbsImmAddend & 0xfffU)
4668 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsImmAddend & 0xfff, false /*f64Bit*/);
4669 }
4670 else if (iGprDst != iGprAddend)
4671 {
4672 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4673 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4674 }
4675 else
4676# ifdef IEM_WITH_THROW_CATCH
4677 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4678# else
4679 AssertReleaseFailedStmt(off = UINT32_MAX);
4680# endif
4681
4682#else
4683# error "Port me!"
4684#endif
4685 return off;
4686}
4687
4688
4689/*********************************************************************************************************************************
4690* Unary Operations *
4691*********************************************************************************************************************************/
4692
4693/**
4694 * Emits code for two complement negation of a 64-bit GPR.
4695 */
4696DECL_FORCE_INLINE_THROW(uint32_t)
4697iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4698{
4699#if defined(RT_ARCH_AMD64)
4700 /* neg Ev */
4701 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4702 pCodeBuf[off++] = 0xf7;
4703 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4704
4705#elif defined(RT_ARCH_ARM64)
4706 /* sub dst, xzr, dst */
4707 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4708
4709#else
4710# error "Port me"
4711#endif
4712 return off;
4713}
4714
4715
4716/**
4717 * Emits code for two complement negation of a 64-bit GPR.
4718 */
4719DECL_INLINE_THROW(uint32_t)
4720iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4721{
4722#if defined(RT_ARCH_AMD64)
4723 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4724#elif defined(RT_ARCH_ARM64)
4725 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4726#else
4727# error "Port me"
4728#endif
4729 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4730 return off;
4731}
4732
4733
4734/**
4735 * Emits code for two complement negation of a 32-bit GPR.
4736 * @note bit 32 thru 63 are set to zero.
4737 */
4738DECL_FORCE_INLINE_THROW(uint32_t)
4739iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4740{
4741#if defined(RT_ARCH_AMD64)
4742 /* neg Ev */
4743 if (iGprDst >= 8)
4744 pCodeBuf[off++] = X86_OP_REX_B;
4745 pCodeBuf[off++] = 0xf7;
4746 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4747
4748#elif defined(RT_ARCH_ARM64)
4749 /* sub dst, xzr, dst */
4750 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4751
4752#else
4753# error "Port me"
4754#endif
4755 return off;
4756}
4757
4758
4759/**
4760 * Emits code for two complement negation of a 32-bit GPR.
4761 * @note bit 32 thru 63 are set to zero.
4762 */
4763DECL_INLINE_THROW(uint32_t)
4764iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4765{
4766#if defined(RT_ARCH_AMD64)
4767 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4768#elif defined(RT_ARCH_ARM64)
4769 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4770#else
4771# error "Port me"
4772#endif
4773 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4774 return off;
4775}
4776
4777
4778
4779/*********************************************************************************************************************************
4780* Bit Operations *
4781*********************************************************************************************************************************/
4782
4783/**
4784 * Emits code for clearing bits 16 thru 63 in the GPR.
4785 */
4786DECL_INLINE_THROW(uint32_t)
4787iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4788{
4789#if defined(RT_ARCH_AMD64)
4790 /* movzx Gv,Ew */
4791 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4792 if (iGprDst >= 8)
4793 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4794 pbCodeBuf[off++] = 0x0f;
4795 pbCodeBuf[off++] = 0xb7;
4796 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4797
4798#elif defined(RT_ARCH_ARM64)
4799 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4800# if 1
4801 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4802# else
4803 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4804 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4805# endif
4806#else
4807# error "Port me"
4808#endif
4809 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4810 return off;
4811}
4812
4813
4814/**
4815 * Emits code for AND'ing two 64-bit GPRs.
4816 *
4817 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4818 * and ARM64 hosts.
4819 */
4820DECL_FORCE_INLINE(uint32_t)
4821iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4822{
4823#if defined(RT_ARCH_AMD64)
4824 /* and Gv, Ev */
4825 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4826 pCodeBuf[off++] = 0x23;
4827 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4828 RT_NOREF(fSetFlags);
4829
4830#elif defined(RT_ARCH_ARM64)
4831 if (!fSetFlags)
4832 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4833 else
4834 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4835
4836#else
4837# error "Port me"
4838#endif
4839 return off;
4840}
4841
4842
4843/**
4844 * Emits code for AND'ing two 64-bit GPRs.
4845 *
4846 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4847 * and ARM64 hosts.
4848 */
4849DECL_INLINE_THROW(uint32_t)
4850iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4851{
4852#if defined(RT_ARCH_AMD64)
4853 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4854#elif defined(RT_ARCH_ARM64)
4855 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4856#else
4857# error "Port me"
4858#endif
4859 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4860 return off;
4861}
4862
4863
4864/**
4865 * Emits code for AND'ing two 32-bit GPRs.
4866 */
4867DECL_FORCE_INLINE(uint32_t)
4868iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4869{
4870#if defined(RT_ARCH_AMD64)
4871 /* and Gv, Ev */
4872 if (iGprDst >= 8 || iGprSrc >= 8)
4873 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4874 pCodeBuf[off++] = 0x23;
4875 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4876 RT_NOREF(fSetFlags);
4877
4878#elif defined(RT_ARCH_ARM64)
4879 if (!fSetFlags)
4880 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4881 else
4882 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4883
4884#else
4885# error "Port me"
4886#endif
4887 return off;
4888}
4889
4890
4891/**
4892 * Emits code for AND'ing two 32-bit GPRs.
4893 */
4894DECL_INLINE_THROW(uint32_t)
4895iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4896{
4897#if defined(RT_ARCH_AMD64)
4898 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4899#elif defined(RT_ARCH_ARM64)
4900 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4901#else
4902# error "Port me"
4903#endif
4904 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4905 return off;
4906}
4907
4908
4909/**
4910 * Emits code for AND'ing a 64-bit GPRs with a constant.
4911 *
4912 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4913 * and ARM64 hosts.
4914 */
4915DECL_INLINE_THROW(uint32_t)
4916iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4917{
4918#if defined(RT_ARCH_AMD64)
4919 if ((int64_t)uImm == (int8_t)uImm)
4920 {
4921 /* and Ev, imm8 */
4922 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4923 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4924 pbCodeBuf[off++] = 0x83;
4925 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4926 pbCodeBuf[off++] = (uint8_t)uImm;
4927 }
4928 else if ((int64_t)uImm == (int32_t)uImm)
4929 {
4930 /* and Ev, imm32 */
4931 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4932 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4933 pbCodeBuf[off++] = 0x81;
4934 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4935 pbCodeBuf[off++] = RT_BYTE1(uImm);
4936 pbCodeBuf[off++] = RT_BYTE2(uImm);
4937 pbCodeBuf[off++] = RT_BYTE3(uImm);
4938 pbCodeBuf[off++] = RT_BYTE4(uImm);
4939 }
4940 else
4941 {
4942 /* Use temporary register for the 64-bit immediate. */
4943 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4944 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
4945 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4946 }
4947 RT_NOREF(fSetFlags);
4948
4949#elif defined(RT_ARCH_ARM64)
4950 uint32_t uImmR = 0;
4951 uint32_t uImmNandS = 0;
4952 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4953 {
4954 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4955 if (!fSetFlags)
4956 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
4957 else
4958 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
4959 }
4960 else
4961 {
4962 /* Use temporary register for the 64-bit immediate. */
4963 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4964 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4965 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4966 }
4967
4968#else
4969# error "Port me"
4970#endif
4971 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4972 return off;
4973}
4974
4975
4976/**
4977 * Emits code for AND'ing an 32-bit GPRs with a constant.
4978 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4979 * @note For ARM64 this only supports @a uImm values that can be expressed using
4980 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
4981 * make sure this is possible!
4982 */
4983DECL_FORCE_INLINE_THROW(uint32_t)
4984iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4985{
4986#if defined(RT_ARCH_AMD64)
4987 /* and Ev, imm */
4988 if (iGprDst >= 8)
4989 pCodeBuf[off++] = X86_OP_REX_B;
4990 if ((int32_t)uImm == (int8_t)uImm)
4991 {
4992 pCodeBuf[off++] = 0x83;
4993 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4994 pCodeBuf[off++] = (uint8_t)uImm;
4995 }
4996 else
4997 {
4998 pCodeBuf[off++] = 0x81;
4999 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5000 pCodeBuf[off++] = RT_BYTE1(uImm);
5001 pCodeBuf[off++] = RT_BYTE2(uImm);
5002 pCodeBuf[off++] = RT_BYTE3(uImm);
5003 pCodeBuf[off++] = RT_BYTE4(uImm);
5004 }
5005 RT_NOREF(fSetFlags);
5006
5007#elif defined(RT_ARCH_ARM64)
5008 uint32_t uImmR = 0;
5009 uint32_t uImmNandS = 0;
5010 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5011 {
5012 if (!fSetFlags)
5013 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5014 else
5015 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5016 }
5017 else
5018# ifdef IEM_WITH_THROW_CATCH
5019 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5020# else
5021 AssertReleaseFailedStmt(off = UINT32_MAX);
5022# endif
5023
5024#else
5025# error "Port me"
5026#endif
5027 return off;
5028}
5029
5030
5031/**
5032 * Emits code for AND'ing an 32-bit GPRs with a constant.
5033 *
5034 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5035 */
5036DECL_INLINE_THROW(uint32_t)
5037iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
5038{
5039#if defined(RT_ARCH_AMD64)
5040 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
5041
5042#elif defined(RT_ARCH_ARM64)
5043 uint32_t uImmR = 0;
5044 uint32_t uImmNandS = 0;
5045 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5046 {
5047 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5048 if (!fSetFlags)
5049 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5050 else
5051 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5052 }
5053 else
5054 {
5055 /* Use temporary register for the 64-bit immediate. */
5056 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5057 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
5058 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5059 }
5060
5061#else
5062# error "Port me"
5063#endif
5064 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5065 return off;
5066}
5067
5068
5069/**
5070 * Emits code for AND'ing an 64-bit GPRs with a constant.
5071 *
5072 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
5073 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
5074 * the same.
5075 */
5076DECL_FORCE_INLINE_THROW(uint32_t)
5077iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
5078 bool fSetFlags = false)
5079{
5080#if defined(RT_ARCH_AMD64)
5081 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
5082 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
5083 RT_NOREF(fSetFlags);
5084
5085#elif defined(RT_ARCH_ARM64)
5086 uint32_t uImmR = 0;
5087 uint32_t uImmNandS = 0;
5088 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5089 {
5090 if (!fSetFlags)
5091 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
5092 else
5093 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
5094 }
5095 else if (iGprDst != iGprSrc)
5096 {
5097 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
5098 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
5099 }
5100 else
5101# ifdef IEM_WITH_THROW_CATCH
5102 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5103# else
5104 AssertReleaseFailedStmt(off = UINT32_MAX);
5105# endif
5106
5107#else
5108# error "Port me"
5109#endif
5110 return off;
5111}
5112
5113/**
5114 * Emits code for AND'ing an 32-bit GPRs with a constant.
5115 *
5116 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
5117 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
5118 * the same.
5119 *
5120 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5121 */
5122DECL_FORCE_INLINE_THROW(uint32_t)
5123iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
5124 bool fSetFlags = false)
5125{
5126#if defined(RT_ARCH_AMD64)
5127 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5128 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5129 RT_NOREF(fSetFlags);
5130
5131#elif defined(RT_ARCH_ARM64)
5132 uint32_t uImmR = 0;
5133 uint32_t uImmNandS = 0;
5134 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5135 {
5136 if (!fSetFlags)
5137 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5138 else
5139 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5140 }
5141 else if (iGprDst != iGprSrc)
5142 {
5143 /* If a value greater or equal than 64K isn't more than 16 bits wide,
5144 we can use shifting to save an instruction. We prefer the builtin ctz
5145 here to our own, since the compiler can process uImm at compile time
5146 if it is a constant value (which is often the case). This is useful
5147 for the TLB looup code. */
5148 if (uImm > 0xffffU)
5149 {
5150# if defined(__GNUC__)
5151 unsigned cTrailingZeros = __builtin_ctz(uImm);
5152# else
5153 unsigned cTrailingZeros = ASMBitFirstSetU32(uImm) - 1;
5154# endif
5155 if ((uImm >> cTrailingZeros) <= 0xffffU)
5156 {
5157 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprDst, uImm >> cTrailingZeros);
5158 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprSrc,
5159 iGprDst, true /*f64Bit*/, cTrailingZeros, kArmv8A64InstrShift_Lsl);
5160 return off;
5161 }
5162 }
5163 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5164 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
5165 }
5166 else
5167# ifdef IEM_WITH_THROW_CATCH
5168 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5169# else
5170 AssertReleaseFailedStmt(off = UINT32_MAX);
5171# endif
5172
5173#else
5174# error "Port me"
5175#endif
5176 return off;
5177}
5178
5179
5180/**
5181 * Emits code for OR'ing two 64-bit GPRs.
5182 */
5183DECL_FORCE_INLINE(uint32_t)
5184iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5185{
5186#if defined(RT_ARCH_AMD64)
5187 /* or Gv, Ev */
5188 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5189 pCodeBuf[off++] = 0x0b;
5190 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5191
5192#elif defined(RT_ARCH_ARM64)
5193 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
5194
5195#else
5196# error "Port me"
5197#endif
5198 return off;
5199}
5200
5201
5202/**
5203 * Emits code for OR'ing two 64-bit GPRs.
5204 */
5205DECL_INLINE_THROW(uint32_t)
5206iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5207{
5208#if defined(RT_ARCH_AMD64)
5209 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5210#elif defined(RT_ARCH_ARM64)
5211 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5212#else
5213# error "Port me"
5214#endif
5215 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5216 return off;
5217}
5218
5219
5220/**
5221 * Emits code for OR'ing two 32-bit GPRs.
5222 * @note Bits 63:32 of the destination GPR will be cleared.
5223 */
5224DECL_FORCE_INLINE(uint32_t)
5225iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5226{
5227#if defined(RT_ARCH_AMD64)
5228 /* or Gv, Ev */
5229 if (iGprDst >= 8 || iGprSrc >= 8)
5230 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5231 pCodeBuf[off++] = 0x0b;
5232 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5233
5234#elif defined(RT_ARCH_ARM64)
5235 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5236
5237#else
5238# error "Port me"
5239#endif
5240 return off;
5241}
5242
5243
5244/**
5245 * Emits code for OR'ing two 32-bit GPRs.
5246 * @note Bits 63:32 of the destination GPR will be cleared.
5247 */
5248DECL_INLINE_THROW(uint32_t)
5249iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5250{
5251#if defined(RT_ARCH_AMD64)
5252 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5253#elif defined(RT_ARCH_ARM64)
5254 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5255#else
5256# error "Port me"
5257#endif
5258 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5259 return off;
5260}
5261
5262
5263/**
5264 * Emits code for OR'ing a 64-bit GPRs with a constant.
5265 */
5266DECL_INLINE_THROW(uint32_t)
5267iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
5268{
5269#if defined(RT_ARCH_AMD64)
5270 if ((int64_t)uImm == (int8_t)uImm)
5271 {
5272 /* or Ev, imm8 */
5273 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5274 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5275 pbCodeBuf[off++] = 0x83;
5276 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5277 pbCodeBuf[off++] = (uint8_t)uImm;
5278 }
5279 else if ((int64_t)uImm == (int32_t)uImm)
5280 {
5281 /* or Ev, imm32 */
5282 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5283 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5284 pbCodeBuf[off++] = 0x81;
5285 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5286 pbCodeBuf[off++] = RT_BYTE1(uImm);
5287 pbCodeBuf[off++] = RT_BYTE2(uImm);
5288 pbCodeBuf[off++] = RT_BYTE3(uImm);
5289 pbCodeBuf[off++] = RT_BYTE4(uImm);
5290 }
5291 else
5292 {
5293 /* Use temporary register for the 64-bit immediate. */
5294 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5295 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
5296 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5297 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5298 }
5299
5300#elif defined(RT_ARCH_ARM64)
5301 uint32_t uImmR = 0;
5302 uint32_t uImmNandS = 0;
5303 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5304 {
5305 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5306 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
5307 }
5308 else
5309 {
5310 /* Use temporary register for the 64-bit immediate. */
5311 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5312 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
5313 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5314 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5315 }
5316
5317#else
5318# error "Port me"
5319#endif
5320 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5321 return off;
5322}
5323
5324
5325/**
5326 * Emits code for OR'ing an 32-bit GPRs with a constant.
5327 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5328 * @note For ARM64 this only supports @a uImm values that can be expressed using
5329 * the two 6-bit immediates of the OR instructions. The caller must make
5330 * sure this is possible!
5331 */
5332DECL_FORCE_INLINE_THROW(uint32_t)
5333iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5334{
5335#if defined(RT_ARCH_AMD64)
5336 /* or Ev, imm */
5337 if (iGprDst >= 8)
5338 pCodeBuf[off++] = X86_OP_REX_B;
5339 if ((int32_t)uImm == (int8_t)uImm)
5340 {
5341 pCodeBuf[off++] = 0x83;
5342 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5343 pCodeBuf[off++] = (uint8_t)uImm;
5344 }
5345 else
5346 {
5347 pCodeBuf[off++] = 0x81;
5348 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5349 pCodeBuf[off++] = RT_BYTE1(uImm);
5350 pCodeBuf[off++] = RT_BYTE2(uImm);
5351 pCodeBuf[off++] = RT_BYTE3(uImm);
5352 pCodeBuf[off++] = RT_BYTE4(uImm);
5353 }
5354
5355#elif defined(RT_ARCH_ARM64)
5356 uint32_t uImmR = 0;
5357 uint32_t uImmNandS = 0;
5358 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5359 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5360 else
5361# ifdef IEM_WITH_THROW_CATCH
5362 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5363# else
5364 AssertReleaseFailedStmt(off = UINT32_MAX);
5365# endif
5366
5367#else
5368# error "Port me"
5369#endif
5370 return off;
5371}
5372
5373
5374/**
5375 * Emits code for OR'ing an 32-bit GPRs with a constant.
5376 *
5377 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5378 */
5379DECL_INLINE_THROW(uint32_t)
5380iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5381{
5382#if defined(RT_ARCH_AMD64)
5383 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5384
5385#elif defined(RT_ARCH_ARM64)
5386 uint32_t uImmR = 0;
5387 uint32_t uImmNandS = 0;
5388 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5389 {
5390 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5391 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5392 }
5393 else
5394 {
5395 /* Use temporary register for the 64-bit immediate. */
5396 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5397 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
5398 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5399 }
5400
5401#else
5402# error "Port me"
5403#endif
5404 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5405 return off;
5406}
5407
5408
5409
5410/**
5411 * ORs two 64-bit GPRs together, storing the result in a third register.
5412 */
5413DECL_FORCE_INLINE(uint32_t)
5414iemNativeEmitGprEqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5415{
5416#ifdef RT_ARCH_AMD64
5417 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5418 {
5419 /** @todo consider LEA */
5420 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc1);
5421 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5422 }
5423 else
5424 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5425
5426#elif defined(RT_ARCH_ARM64)
5427 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2);
5428
5429#else
5430# error "Port me!"
5431#endif
5432 return off;
5433}
5434
5435
5436
5437/**
5438 * Ors two 32-bit GPRs together, storing the result in a third register.
5439 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
5440 */
5441DECL_FORCE_INLINE(uint32_t)
5442iemNativeEmitGpr32EqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5443{
5444#ifdef RT_ARCH_AMD64
5445 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5446 {
5447 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc1);
5448 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5449 }
5450 else
5451 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5452
5453#elif defined(RT_ARCH_ARM64)
5454 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2, false /*f64Bit*/);
5455
5456#else
5457# error "Port me!"
5458#endif
5459 return off;
5460}
5461
5462
5463/**
5464 * Emits code for XOR'ing two 64-bit GPRs.
5465 */
5466DECL_INLINE_THROW(uint32_t)
5467iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5468{
5469#if defined(RT_ARCH_AMD64)
5470 /* and Gv, Ev */
5471 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5472 pCodeBuf[off++] = 0x33;
5473 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5474
5475#elif defined(RT_ARCH_ARM64)
5476 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
5477
5478#else
5479# error "Port me"
5480#endif
5481 return off;
5482}
5483
5484
5485/**
5486 * Emits code for XOR'ing two 64-bit GPRs.
5487 */
5488DECL_INLINE_THROW(uint32_t)
5489iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5490{
5491#if defined(RT_ARCH_AMD64)
5492 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5493#elif defined(RT_ARCH_ARM64)
5494 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5495#else
5496# error "Port me"
5497#endif
5498 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5499 return off;
5500}
5501
5502
5503/**
5504 * Emits code for XOR'ing two 32-bit GPRs.
5505 */
5506DECL_INLINE_THROW(uint32_t)
5507iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5508{
5509#if defined(RT_ARCH_AMD64)
5510 /* and Gv, Ev */
5511 if (iGprDst >= 8 || iGprSrc >= 8)
5512 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5513 pCodeBuf[off++] = 0x33;
5514 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5515
5516#elif defined(RT_ARCH_ARM64)
5517 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5518
5519#else
5520# error "Port me"
5521#endif
5522 return off;
5523}
5524
5525
5526/**
5527 * Emits code for XOR'ing two 32-bit GPRs.
5528 */
5529DECL_INLINE_THROW(uint32_t)
5530iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5531{
5532#if defined(RT_ARCH_AMD64)
5533 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5534#elif defined(RT_ARCH_ARM64)
5535 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5536#else
5537# error "Port me"
5538#endif
5539 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5540 return off;
5541}
5542
5543
5544/**
5545 * Emits code for XOR'ing an 32-bit GPRs with a constant.
5546 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5547 * @note For ARM64 this only supports @a uImm values that can be expressed using
5548 * the two 6-bit immediates of the EOR instructions. The caller must make
5549 * sure this is possible!
5550 */
5551DECL_FORCE_INLINE_THROW(uint32_t)
5552iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5553{
5554#if defined(RT_ARCH_AMD64)
5555 /* xor Ev, imm */
5556 if (iGprDst >= 8)
5557 pCodeBuf[off++] = X86_OP_REX_B;
5558 if ((int32_t)uImm == (int8_t)uImm)
5559 {
5560 pCodeBuf[off++] = 0x83;
5561 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5562 pCodeBuf[off++] = (uint8_t)uImm;
5563 }
5564 else
5565 {
5566 pCodeBuf[off++] = 0x81;
5567 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5568 pCodeBuf[off++] = RT_BYTE1(uImm);
5569 pCodeBuf[off++] = RT_BYTE2(uImm);
5570 pCodeBuf[off++] = RT_BYTE3(uImm);
5571 pCodeBuf[off++] = RT_BYTE4(uImm);
5572 }
5573
5574#elif defined(RT_ARCH_ARM64)
5575 uint32_t uImmR = 0;
5576 uint32_t uImmNandS = 0;
5577 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5578 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5579 else
5580# ifdef IEM_WITH_THROW_CATCH
5581 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5582# else
5583 AssertReleaseFailedStmt(off = UINT32_MAX);
5584# endif
5585
5586#else
5587# error "Port me"
5588#endif
5589 return off;
5590}
5591
5592
5593/**
5594 * Emits code for XOR'ing two 32-bit GPRs.
5595 */
5596DECL_INLINE_THROW(uint32_t)
5597iemNativeEmitXorGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5598{
5599#if defined(RT_ARCH_AMD64)
5600 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5601#elif defined(RT_ARCH_ARM64)
5602 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, uImm);
5603#else
5604# error "Port me"
5605#endif
5606 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5607 return off;
5608}
5609
5610
5611/*********************************************************************************************************************************
5612* Shifting *
5613*********************************************************************************************************************************/
5614
5615/**
5616 * Emits code for shifting a GPR a fixed number of bits to the left.
5617 */
5618DECL_FORCE_INLINE(uint32_t)
5619iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5620{
5621 Assert(cShift > 0 && cShift < 64);
5622
5623#if defined(RT_ARCH_AMD64)
5624 /* shl dst, cShift */
5625 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5626 if (cShift != 1)
5627 {
5628 pCodeBuf[off++] = 0xc1;
5629 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5630 pCodeBuf[off++] = cShift;
5631 }
5632 else
5633 {
5634 pCodeBuf[off++] = 0xd1;
5635 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5636 }
5637
5638#elif defined(RT_ARCH_ARM64)
5639 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
5640
5641#else
5642# error "Port me"
5643#endif
5644 return off;
5645}
5646
5647
5648/**
5649 * Emits code for shifting a GPR a fixed number of bits to the left.
5650 */
5651DECL_INLINE_THROW(uint32_t)
5652iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5653{
5654#if defined(RT_ARCH_AMD64)
5655 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5656#elif defined(RT_ARCH_ARM64)
5657 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5658#else
5659# error "Port me"
5660#endif
5661 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5662 return off;
5663}
5664
5665
5666/**
5667 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5668 */
5669DECL_FORCE_INLINE(uint32_t)
5670iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5671{
5672 Assert(cShift > 0 && cShift < 32);
5673
5674#if defined(RT_ARCH_AMD64)
5675 /* shl dst, cShift */
5676 if (iGprDst >= 8)
5677 pCodeBuf[off++] = X86_OP_REX_B;
5678 if (cShift != 1)
5679 {
5680 pCodeBuf[off++] = 0xc1;
5681 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5682 pCodeBuf[off++] = cShift;
5683 }
5684 else
5685 {
5686 pCodeBuf[off++] = 0xd1;
5687 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5688 }
5689
5690#elif defined(RT_ARCH_ARM64)
5691 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5692
5693#else
5694# error "Port me"
5695#endif
5696 return off;
5697}
5698
5699
5700/**
5701 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5702 */
5703DECL_INLINE_THROW(uint32_t)
5704iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5705{
5706#if defined(RT_ARCH_AMD64)
5707 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5708#elif defined(RT_ARCH_ARM64)
5709 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5710#else
5711# error "Port me"
5712#endif
5713 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5714 return off;
5715}
5716
5717
5718/**
5719 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5720 */
5721DECL_FORCE_INLINE(uint32_t)
5722iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5723{
5724 Assert(cShift > 0 && cShift < 64);
5725
5726#if defined(RT_ARCH_AMD64)
5727 /* shr dst, cShift */
5728 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5729 if (cShift != 1)
5730 {
5731 pCodeBuf[off++] = 0xc1;
5732 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5733 pCodeBuf[off++] = cShift;
5734 }
5735 else
5736 {
5737 pCodeBuf[off++] = 0xd1;
5738 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5739 }
5740
5741#elif defined(RT_ARCH_ARM64)
5742 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5743
5744#else
5745# error "Port me"
5746#endif
5747 return off;
5748}
5749
5750
5751/**
5752 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5753 */
5754DECL_INLINE_THROW(uint32_t)
5755iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5756{
5757#if defined(RT_ARCH_AMD64)
5758 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5759#elif defined(RT_ARCH_ARM64)
5760 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5761#else
5762# error "Port me"
5763#endif
5764 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5765 return off;
5766}
5767
5768
5769/**
5770 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5771 * right.
5772 */
5773DECL_FORCE_INLINE(uint32_t)
5774iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5775{
5776 Assert(cShift > 0 && cShift < 32);
5777
5778#if defined(RT_ARCH_AMD64)
5779 /* shr dst, cShift */
5780 if (iGprDst >= 8)
5781 pCodeBuf[off++] = X86_OP_REX_B;
5782 if (cShift != 1)
5783 {
5784 pCodeBuf[off++] = 0xc1;
5785 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5786 pCodeBuf[off++] = cShift;
5787 }
5788 else
5789 {
5790 pCodeBuf[off++] = 0xd1;
5791 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5792 }
5793
5794#elif defined(RT_ARCH_ARM64)
5795 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5796
5797#else
5798# error "Port me"
5799#endif
5800 return off;
5801}
5802
5803
5804/**
5805 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5806 * right.
5807 */
5808DECL_INLINE_THROW(uint32_t)
5809iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5810{
5811#if defined(RT_ARCH_AMD64)
5812 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5813#elif defined(RT_ARCH_ARM64)
5814 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5815#else
5816# error "Port me"
5817#endif
5818 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5819 return off;
5820}
5821
5822
5823/**
5824 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5825 * right and assigning it to a different GPR.
5826 */
5827DECL_INLINE_THROW(uint32_t)
5828iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5829{
5830 Assert(cShift > 0); Assert(cShift < 32);
5831#if defined(RT_ARCH_AMD64)
5832 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5833 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5834
5835#elif defined(RT_ARCH_ARM64)
5836 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5837
5838#else
5839# error "Port me"
5840#endif
5841 return off;
5842}
5843
5844
5845/**
5846 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5847 */
5848DECL_FORCE_INLINE(uint32_t)
5849iemNativeEmitArithShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5850{
5851 Assert(cShift > 0 && cShift < 64);
5852
5853#if defined(RT_ARCH_AMD64)
5854 /* sar dst, cShift */
5855 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5856 if (cShift != 1)
5857 {
5858 pCodeBuf[off++] = 0xc1;
5859 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5860 pCodeBuf[off++] = cShift;
5861 }
5862 else
5863 {
5864 pCodeBuf[off++] = 0xd1;
5865 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5866 }
5867
5868#elif defined(RT_ARCH_ARM64)
5869 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift);
5870
5871#else
5872# error "Port me"
5873#endif
5874 return off;
5875}
5876
5877
5878/**
5879 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5880 */
5881DECL_INLINE_THROW(uint32_t)
5882iemNativeEmitArithShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5883{
5884#if defined(RT_ARCH_AMD64)
5885 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5886#elif defined(RT_ARCH_ARM64)
5887 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5888#else
5889# error "Port me"
5890#endif
5891 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5892 return off;
5893}
5894
5895
5896/**
5897 * Emits code for (signed) shifting a 32-bit GPR a fixed number of bits to the right.
5898 */
5899DECL_FORCE_INLINE(uint32_t)
5900iemNativeEmitArithShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5901{
5902 Assert(cShift > 0 && cShift < 64);
5903
5904#if defined(RT_ARCH_AMD64)
5905 /* sar dst, cShift */
5906 if (iGprDst >= 8)
5907 pCodeBuf[off++] = X86_OP_REX_B;
5908 if (cShift != 1)
5909 {
5910 pCodeBuf[off++] = 0xc1;
5911 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5912 pCodeBuf[off++] = cShift;
5913 }
5914 else
5915 {
5916 pCodeBuf[off++] = 0xd1;
5917 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5918 }
5919
5920#elif defined(RT_ARCH_ARM64)
5921 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift, false /*f64Bit*/);
5922
5923#else
5924# error "Port me"
5925#endif
5926 return off;
5927}
5928
5929
5930/**
5931 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5932 */
5933DECL_INLINE_THROW(uint32_t)
5934iemNativeEmitArithShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5935{
5936#if defined(RT_ARCH_AMD64)
5937 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5938#elif defined(RT_ARCH_ARM64)
5939 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5940#else
5941# error "Port me"
5942#endif
5943 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5944 return off;
5945}
5946
5947
5948/**
5949 * Emits code for rotating a GPR a fixed number of bits to the left.
5950 */
5951DECL_FORCE_INLINE(uint32_t)
5952iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5953{
5954 Assert(cShift > 0 && cShift < 64);
5955
5956#if defined(RT_ARCH_AMD64)
5957 /* rol dst, cShift */
5958 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5959 if (cShift != 1)
5960 {
5961 pCodeBuf[off++] = 0xc1;
5962 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5963 pCodeBuf[off++] = cShift;
5964 }
5965 else
5966 {
5967 pCodeBuf[off++] = 0xd1;
5968 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5969 }
5970
5971#elif defined(RT_ARCH_ARM64)
5972 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
5973
5974#else
5975# error "Port me"
5976#endif
5977 return off;
5978}
5979
5980
5981#if defined(RT_ARCH_AMD64)
5982/**
5983 * Emits code for rotating a 32-bit GPR a fixed number of bits to the left via carry.
5984 */
5985DECL_FORCE_INLINE(uint32_t)
5986iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5987{
5988 Assert(cShift > 0 && cShift < 32);
5989
5990 /* rcl dst, cShift */
5991 if (iGprDst >= 8)
5992 pCodeBuf[off++] = X86_OP_REX_B;
5993 if (cShift != 1)
5994 {
5995 pCodeBuf[off++] = 0xc1;
5996 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5997 pCodeBuf[off++] = cShift;
5998 }
5999 else
6000 {
6001 pCodeBuf[off++] = 0xd1;
6002 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
6003 }
6004
6005 return off;
6006}
6007#endif /* RT_ARCH_AMD64 */
6008
6009
6010
6011/**
6012 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
6013 * @note Bits 63:32 of the destination GPR will be cleared.
6014 */
6015DECL_FORCE_INLINE(uint32_t)
6016iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
6017{
6018#if defined(RT_ARCH_AMD64)
6019 /*
6020 * There is no bswap r16 on x86 (the encoding exists but does not work).
6021 * So just use a rol (gcc -O2 is doing that).
6022 *
6023 * rol r16, 0x8
6024 */
6025 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6026 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6027 if (iGpr >= 8)
6028 pbCodeBuf[off++] = X86_OP_REX_B;
6029 pbCodeBuf[off++] = 0xc1;
6030 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
6031 pbCodeBuf[off++] = 0x08;
6032#elif defined(RT_ARCH_ARM64)
6033 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6034
6035 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
6036#else
6037# error "Port me"
6038#endif
6039
6040 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6041 return off;
6042}
6043
6044
6045/**
6046 * Emits code for reversing the byte order in a 32-bit GPR.
6047 * @note Bits 63:32 of the destination GPR will be cleared.
6048 */
6049DECL_FORCE_INLINE(uint32_t)
6050iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
6051{
6052#if defined(RT_ARCH_AMD64)
6053 /* bswap r32 */
6054 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6055
6056 if (iGpr >= 8)
6057 pbCodeBuf[off++] = X86_OP_REX_B;
6058 pbCodeBuf[off++] = 0x0f;
6059 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
6060#elif defined(RT_ARCH_ARM64)
6061 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6062
6063 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
6064#else
6065# error "Port me"
6066#endif
6067
6068 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6069 return off;
6070}
6071
6072
6073/**
6074 * Emits code for reversing the byte order in a 64-bit GPR.
6075 */
6076DECL_FORCE_INLINE(uint32_t)
6077iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
6078{
6079#if defined(RT_ARCH_AMD64)
6080 /* bswap r64 */
6081 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6082
6083 if (iGpr >= 8)
6084 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
6085 else
6086 pbCodeBuf[off++] = X86_OP_REX_W;
6087 pbCodeBuf[off++] = 0x0f;
6088 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
6089#elif defined(RT_ARCH_ARM64)
6090 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6091
6092 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
6093#else
6094# error "Port me"
6095#endif
6096
6097 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6098 return off;
6099}
6100
6101
6102/*********************************************************************************************************************************
6103* Bitfield manipulation *
6104*********************************************************************************************************************************/
6105
6106/**
6107 * Emits code for clearing.
6108 */
6109DECL_FORCE_INLINE(uint32_t)
6110iemNativeEmitBitClearInGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const iGpr, uint8_t iBit)
6111{
6112 Assert(iBit < 32);
6113
6114#if defined(RT_ARCH_AMD64)
6115 /* btr r32, imm8 */
6116 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6117
6118 if (iGpr >= 8)
6119 pbCodeBuf[off++] = X86_OP_REX_B;
6120 pbCodeBuf[off++] = 0x0f;
6121 pbCodeBuf[off++] = 0xba;
6122 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGpr & 7);
6123 pbCodeBuf[off++] = iBit;
6124#elif defined(RT_ARCH_ARM64)
6125 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6126
6127 pu32CodeBuf[off++] = Armv8A64MkInstrBfc(iGpr, iBit /*offFirstBit*/, 1 /*cBits*/, true /*f64Bit*/);
6128#else
6129# error "Port me"
6130#endif
6131
6132 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6133 return off;
6134}
6135
6136
6137/*********************************************************************************************************************************
6138* Compare and Testing *
6139*********************************************************************************************************************************/
6140
6141
6142#ifdef RT_ARCH_ARM64
6143/**
6144 * Emits an ARM64 compare instruction.
6145 */
6146DECL_INLINE_THROW(uint32_t)
6147iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
6148 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
6149{
6150 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6151 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
6152 f64Bit, true /*fSetFlags*/, cShift, enmShift);
6153 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6154 return off;
6155}
6156#endif
6157
6158
6159/**
6160 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
6161 * with conditional instruction.
6162 */
6163DECL_FORCE_INLINE(uint32_t)
6164iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6165{
6166#ifdef RT_ARCH_AMD64
6167 /* cmp Gv, Ev */
6168 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
6169 pCodeBuf[off++] = 0x3b;
6170 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
6171
6172#elif defined(RT_ARCH_ARM64)
6173 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
6174
6175#else
6176# error "Port me!"
6177#endif
6178 return off;
6179}
6180
6181
6182/**
6183 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
6184 * with conditional instruction.
6185 */
6186DECL_INLINE_THROW(uint32_t)
6187iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6188{
6189#ifdef RT_ARCH_AMD64
6190 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6191#elif defined(RT_ARCH_ARM64)
6192 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6193#else
6194# error "Port me!"
6195#endif
6196 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6197 return off;
6198}
6199
6200
6201/**
6202 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6203 * with conditional instruction.
6204 */
6205DECL_FORCE_INLINE(uint32_t)
6206iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6207{
6208#ifdef RT_ARCH_AMD64
6209 /* cmp Gv, Ev */
6210 if (iGprLeft >= 8 || iGprRight >= 8)
6211 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
6212 pCodeBuf[off++] = 0x3b;
6213 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
6214
6215#elif defined(RT_ARCH_ARM64)
6216 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
6217
6218#else
6219# error "Port me!"
6220#endif
6221 return off;
6222}
6223
6224
6225/**
6226 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6227 * with conditional instruction.
6228 */
6229DECL_INLINE_THROW(uint32_t)
6230iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6231{
6232#ifdef RT_ARCH_AMD64
6233 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6234#elif defined(RT_ARCH_ARM64)
6235 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6236#else
6237# error "Port me!"
6238#endif
6239 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6240 return off;
6241}
6242
6243
6244/**
6245 * Emits a compare of a 64-bit GPR with a constant value, settings status
6246 * flags/whatever for use with conditional instruction.
6247 */
6248DECL_INLINE_THROW(uint32_t)
6249iemNativeEmitCmpGprWithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft,
6250 uint64_t uImm, uint8_t idxTmpReg = UINT8_MAX)
6251{
6252#ifdef RT_ARCH_AMD64
6253 if ((int8_t)uImm == (int64_t)uImm)
6254 {
6255 /* cmp Ev, Ib */
6256 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6257 pCodeBuf[off++] = 0x83;
6258 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6259 pCodeBuf[off++] = (uint8_t)uImm;
6260 return off;
6261 }
6262 if ((int32_t)uImm == (int64_t)uImm)
6263 {
6264 /* cmp Ev, imm */
6265 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6266 pCodeBuf[off++] = 0x81;
6267 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6268 pCodeBuf[off++] = RT_BYTE1(uImm);
6269 pCodeBuf[off++] = RT_BYTE2(uImm);
6270 pCodeBuf[off++] = RT_BYTE3(uImm);
6271 pCodeBuf[off++] = RT_BYTE4(uImm);
6272 return off;
6273 }
6274
6275#elif defined(RT_ARCH_ARM64)
6276 if (uImm < _4K)
6277 {
6278 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6279 true /*64Bit*/, true /*fSetFlags*/);
6280 return off;
6281 }
6282 if ((uImm & ~(uint64_t)0xfff000) == 0)
6283 {
6284 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
6285 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6286 return off;
6287 }
6288
6289#else
6290# error "Port me!"
6291#endif
6292
6293 if (idxTmpReg != UINT8_MAX)
6294 {
6295 /* Use temporary register for the immediate. */
6296 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpReg, uImm);
6297 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, iGprLeft, idxTmpReg);
6298 }
6299 else
6300# ifdef IEM_WITH_THROW_CATCH
6301 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6302# else
6303 AssertReleaseFailedStmt(off = UINT32_MAX);
6304# endif
6305
6306 return off;
6307}
6308
6309
6310/**
6311 * Emits a compare of a 64-bit GPR with a constant value, settings status
6312 * flags/whatever for use with conditional instruction.
6313 */
6314DECL_INLINE_THROW(uint32_t)
6315iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
6316{
6317#ifdef RT_ARCH_AMD64
6318 if ((int8_t)uImm == (int64_t)uImm)
6319 {
6320 /* cmp Ev, Ib */
6321 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
6322 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6323 pbCodeBuf[off++] = 0x83;
6324 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6325 pbCodeBuf[off++] = (uint8_t)uImm;
6326 }
6327 else if ((int32_t)uImm == (int64_t)uImm)
6328 {
6329 /* cmp Ev, imm */
6330 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6331 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6332 pbCodeBuf[off++] = 0x81;
6333 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6334 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6335 pbCodeBuf[off++] = RT_BYTE1(uImm);
6336 pbCodeBuf[off++] = RT_BYTE2(uImm);
6337 pbCodeBuf[off++] = RT_BYTE3(uImm);
6338 pbCodeBuf[off++] = RT_BYTE4(uImm);
6339 }
6340 else
6341 {
6342 /* Use temporary register for the immediate. */
6343 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6344 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6345 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6346 }
6347
6348#elif defined(RT_ARCH_ARM64)
6349 /** @todo guess there are clevere things we can do here... */
6350 if (uImm < _4K)
6351 {
6352 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6353 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6354 true /*64Bit*/, true /*fSetFlags*/);
6355 }
6356 else if ((uImm & ~(uint64_t)0xfff000) == 0)
6357 {
6358 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6359 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
6360 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6361 }
6362 else
6363 {
6364 /* Use temporary register for the immediate. */
6365 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6366 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6367 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6368 }
6369
6370#else
6371# error "Port me!"
6372#endif
6373
6374 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6375 return off;
6376}
6377
6378
6379/**
6380 * Emits a compare of a 32-bit GPR with a constant value, settings status
6381 * flags/whatever for use with conditional instruction.
6382 *
6383 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6384 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6385 * bits all zero). Will release assert or throw exception if the caller
6386 * violates this restriction.
6387 */
6388DECL_FORCE_INLINE_THROW(uint32_t)
6389iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6390{
6391#ifdef RT_ARCH_AMD64
6392 if (iGprLeft >= 8)
6393 pCodeBuf[off++] = X86_OP_REX_B;
6394 if (uImm <= UINT32_C(0x7f))
6395 {
6396 /* cmp Ev, Ib */
6397 pCodeBuf[off++] = 0x83;
6398 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6399 pCodeBuf[off++] = (uint8_t)uImm;
6400 }
6401 else
6402 {
6403 /* cmp Ev, imm */
6404 pCodeBuf[off++] = 0x81;
6405 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6406 pCodeBuf[off++] = RT_BYTE1(uImm);
6407 pCodeBuf[off++] = RT_BYTE2(uImm);
6408 pCodeBuf[off++] = RT_BYTE3(uImm);
6409 pCodeBuf[off++] = RT_BYTE4(uImm);
6410 }
6411
6412#elif defined(RT_ARCH_ARM64)
6413 /** @todo guess there are clevere things we can do here... */
6414 if (uImm < _4K)
6415 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6416 false /*64Bit*/, true /*fSetFlags*/);
6417 else if ((uImm & ~(uint32_t)0xfff000) == 0)
6418 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6419 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6420 else
6421# ifdef IEM_WITH_THROW_CATCH
6422 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6423# else
6424 AssertReleaseFailedStmt(off = UINT32_MAX);
6425# endif
6426
6427#else
6428# error "Port me!"
6429#endif
6430 return off;
6431}
6432
6433
6434/**
6435 * Emits a compare of a 32-bit GPR with a constant value, settings status
6436 * flags/whatever for use with conditional instruction.
6437 */
6438DECL_INLINE_THROW(uint32_t)
6439iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6440{
6441#ifdef RT_ARCH_AMD64
6442 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
6443
6444#elif defined(RT_ARCH_ARM64)
6445 /** @todo guess there are clevere things we can do here... */
6446 if (uImm < _4K)
6447 {
6448 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6449 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6450 false /*64Bit*/, true /*fSetFlags*/);
6451 }
6452 else if ((uImm & ~(uint32_t)0xfff000) == 0)
6453 {
6454 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6455 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6456 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6457 }
6458 else
6459 {
6460 /* Use temporary register for the immediate. */
6461 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6462 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
6463 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6464 }
6465
6466#else
6467# error "Port me!"
6468#endif
6469
6470 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6471 return off;
6472}
6473
6474
6475/**
6476 * Emits a compare of a 32-bit GPR with a constant value, settings status
6477 * flags/whatever for use with conditional instruction.
6478 *
6479 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
6480 * 16-bit value from @a iGrpLeft.
6481 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6482 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6483 * bits all zero). Will release assert or throw exception if the caller
6484 * violates this restriction.
6485 */
6486DECL_FORCE_INLINE_THROW(uint32_t)
6487iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6488 uint8_t idxTmpReg = UINT8_MAX)
6489{
6490#ifdef RT_ARCH_AMD64
6491 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6492 if (iGprLeft >= 8)
6493 pCodeBuf[off++] = X86_OP_REX_B;
6494 if (uImm <= UINT32_C(0x7f))
6495 {
6496 /* cmp Ev, Ib */
6497 pCodeBuf[off++] = 0x83;
6498 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6499 pCodeBuf[off++] = (uint8_t)uImm;
6500 }
6501 else
6502 {
6503 /* cmp Ev, imm */
6504 pCodeBuf[off++] = 0x81;
6505 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6506 pCodeBuf[off++] = RT_BYTE1(uImm);
6507 pCodeBuf[off++] = RT_BYTE2(uImm);
6508 }
6509 RT_NOREF(idxTmpReg);
6510
6511#elif defined(RT_ARCH_ARM64)
6512# ifdef IEM_WITH_THROW_CATCH
6513 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6514# else
6515 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
6516# endif
6517 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6518 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
6519 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
6520
6521#else
6522# error "Port me!"
6523#endif
6524 return off;
6525}
6526
6527
6528/**
6529 * Emits a compare of a 16-bit GPR with a constant value, settings status
6530 * flags/whatever for use with conditional instruction.
6531 *
6532 * @note ARM64: Helper register is required (idxTmpReg).
6533 */
6534DECL_INLINE_THROW(uint32_t)
6535iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6536 uint8_t idxTmpReg = UINT8_MAX)
6537{
6538#ifdef RT_ARCH_AMD64
6539 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
6540#elif defined(RT_ARCH_ARM64)
6541 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
6542#else
6543# error "Port me!"
6544#endif
6545 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6546 return off;
6547}
6548
6549
6550
6551/*********************************************************************************************************************************
6552* Branching *
6553*********************************************************************************************************************************/
6554
6555/**
6556 * Emits a JMP rel32 / B imm19 to the given label.
6557 */
6558DECL_FORCE_INLINE_THROW(uint32_t)
6559iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
6560{
6561 Assert(idxLabel < pReNative->cLabels);
6562
6563#ifdef RT_ARCH_AMD64
6564 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6565 {
6566 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
6567 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
6568 {
6569 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
6570 pCodeBuf[off++] = (uint8_t)offRel;
6571 }
6572 else
6573 {
6574 offRel -= 3;
6575 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6576 pCodeBuf[off++] = RT_BYTE1(offRel);
6577 pCodeBuf[off++] = RT_BYTE2(offRel);
6578 pCodeBuf[off++] = RT_BYTE3(offRel);
6579 pCodeBuf[off++] = RT_BYTE4(offRel);
6580 }
6581 }
6582 else
6583 {
6584 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6585 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6586 pCodeBuf[off++] = 0xfe;
6587 pCodeBuf[off++] = 0xff;
6588 pCodeBuf[off++] = 0xff;
6589 pCodeBuf[off++] = 0xff;
6590 }
6591 pCodeBuf[off++] = 0xcc; /* int3 poison */
6592
6593#elif defined(RT_ARCH_ARM64)
6594 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6595 {
6596 pCodeBuf[off] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
6597 off++;
6598 }
6599 else
6600 {
6601 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
6602 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
6603 }
6604
6605#else
6606# error "Port me!"
6607#endif
6608 return off;
6609}
6610
6611
6612/**
6613 * Emits a JMP rel32 / B imm19 to the given label.
6614 */
6615DECL_INLINE_THROW(uint32_t)
6616iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6617{
6618#ifdef RT_ARCH_AMD64
6619 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
6620#elif defined(RT_ARCH_ARM64)
6621 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
6622#else
6623# error "Port me!"
6624#endif
6625 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6626 return off;
6627}
6628
6629
6630/**
6631 * Emits a JMP rel32 / B imm19 to a new undefined label.
6632 */
6633DECL_INLINE_THROW(uint32_t)
6634iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6635{
6636 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6637 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
6638}
6639
6640/** Condition type. */
6641#ifdef RT_ARCH_AMD64
6642typedef enum IEMNATIVEINSTRCOND : uint8_t
6643{
6644 kIemNativeInstrCond_o = 0,
6645 kIemNativeInstrCond_no,
6646 kIemNativeInstrCond_c,
6647 kIemNativeInstrCond_nc,
6648 kIemNativeInstrCond_e,
6649 kIemNativeInstrCond_z = kIemNativeInstrCond_e,
6650 kIemNativeInstrCond_ne,
6651 kIemNativeInstrCond_nz = kIemNativeInstrCond_ne,
6652 kIemNativeInstrCond_be,
6653 kIemNativeInstrCond_nbe,
6654 kIemNativeInstrCond_s,
6655 kIemNativeInstrCond_ns,
6656 kIemNativeInstrCond_p,
6657 kIemNativeInstrCond_np,
6658 kIemNativeInstrCond_l,
6659 kIemNativeInstrCond_nl,
6660 kIemNativeInstrCond_le,
6661 kIemNativeInstrCond_nle
6662} IEMNATIVEINSTRCOND;
6663#elif defined(RT_ARCH_ARM64)
6664typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
6665# define kIemNativeInstrCond_o todo_conditional_codes
6666# define kIemNativeInstrCond_no todo_conditional_codes
6667# define kIemNativeInstrCond_c todo_conditional_codes
6668# define kIemNativeInstrCond_nc todo_conditional_codes
6669# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
6670# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
6671# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
6672# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
6673# define kIemNativeInstrCond_s todo_conditional_codes
6674# define kIemNativeInstrCond_ns todo_conditional_codes
6675# define kIemNativeInstrCond_p todo_conditional_codes
6676# define kIemNativeInstrCond_np todo_conditional_codes
6677# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
6678# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
6679# define kIemNativeInstrCond_le kArmv8InstrCond_Le
6680# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
6681#else
6682# error "Port me!"
6683#endif
6684
6685
6686/**
6687 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6688 */
6689DECL_FORCE_INLINE_THROW(uint32_t)
6690iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6691 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6692{
6693 Assert(idxLabel < pReNative->cLabels);
6694
6695 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
6696#ifdef RT_ARCH_AMD64
6697 if (offLabel >= off)
6698 {
6699 /* jcc rel32 */
6700 pCodeBuf[off++] = 0x0f;
6701 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6702 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6703 pCodeBuf[off++] = 0x00;
6704 pCodeBuf[off++] = 0x00;
6705 pCodeBuf[off++] = 0x00;
6706 pCodeBuf[off++] = 0x00;
6707 }
6708 else
6709 {
6710 int32_t offDisp = offLabel - (off + 2);
6711 if ((int8_t)offDisp == offDisp)
6712 {
6713 /* jcc rel8 */
6714 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6715 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6716 }
6717 else
6718 {
6719 /* jcc rel32 */
6720 offDisp -= 4;
6721 pCodeBuf[off++] = 0x0f;
6722 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6723 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6724 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6725 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6726 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6727 }
6728 }
6729
6730#elif defined(RT_ARCH_ARM64)
6731 if (offLabel >= off)
6732 {
6733 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6734 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
6735 }
6736 else
6737 {
6738 Assert(off - offLabel <= 0x3ffffU);
6739 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
6740 off++;
6741 }
6742
6743#else
6744# error "Port me!"
6745#endif
6746 return off;
6747}
6748
6749
6750/**
6751 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6752 */
6753DECL_INLINE_THROW(uint32_t)
6754iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6755{
6756#ifdef RT_ARCH_AMD64
6757 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
6758#elif defined(RT_ARCH_ARM64)
6759 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
6760#else
6761# error "Port me!"
6762#endif
6763 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6764 return off;
6765}
6766
6767
6768/**
6769 * Emits a Jcc rel32 / B.cc imm19 to a new label.
6770 */
6771DECL_INLINE_THROW(uint32_t)
6772iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6773 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
6774{
6775 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6776 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
6777}
6778
6779
6780/**
6781 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
6782 */
6783DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6784{
6785#ifdef RT_ARCH_AMD64
6786 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
6787#elif defined(RT_ARCH_ARM64)
6788 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
6789#else
6790# error "Port me!"
6791#endif
6792}
6793
6794/**
6795 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
6796 */
6797DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6798 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6799{
6800#ifdef RT_ARCH_AMD64
6801 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
6802#elif defined(RT_ARCH_ARM64)
6803 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
6804#else
6805# error "Port me!"
6806#endif
6807}
6808
6809
6810/**
6811 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
6812 */
6813DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6814{
6815#ifdef RT_ARCH_AMD64
6816 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
6817#elif defined(RT_ARCH_ARM64)
6818 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
6819#else
6820# error "Port me!"
6821#endif
6822}
6823
6824/**
6825 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
6826 */
6827DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6828 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6829{
6830#ifdef RT_ARCH_AMD64
6831 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
6832#elif defined(RT_ARCH_ARM64)
6833 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
6834#else
6835# error "Port me!"
6836#endif
6837}
6838
6839
6840/**
6841 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
6842 */
6843DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6844{
6845#ifdef RT_ARCH_AMD64
6846 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
6847#elif defined(RT_ARCH_ARM64)
6848 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
6849#else
6850# error "Port me!"
6851#endif
6852}
6853
6854/**
6855 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
6856 */
6857DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6858 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6859{
6860#ifdef RT_ARCH_AMD64
6861 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
6862#elif defined(RT_ARCH_ARM64)
6863 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
6864#else
6865# error "Port me!"
6866#endif
6867}
6868
6869
6870/**
6871 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
6872 */
6873DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6874{
6875#ifdef RT_ARCH_AMD64
6876 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
6877#elif defined(RT_ARCH_ARM64)
6878 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
6879#else
6880# error "Port me!"
6881#endif
6882}
6883
6884/**
6885 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
6886 */
6887DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6888 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6889{
6890#ifdef RT_ARCH_AMD64
6891 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
6892#elif defined(RT_ARCH_ARM64)
6893 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
6894#else
6895# error "Port me!"
6896#endif
6897}
6898
6899
6900/**
6901 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
6902 */
6903DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6904{
6905#ifdef RT_ARCH_AMD64
6906 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
6907#elif defined(RT_ARCH_ARM64)
6908 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
6909#else
6910# error "Port me!"
6911#endif
6912}
6913
6914/**
6915 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
6916 */
6917DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6918 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6919{
6920#ifdef RT_ARCH_AMD64
6921 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
6922#elif defined(RT_ARCH_ARM64)
6923 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
6924#else
6925# error "Port me!"
6926#endif
6927}
6928
6929
6930/**
6931 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6932 *
6933 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6934 *
6935 * Only use hardcoded jumps forward when emitting for exactly one
6936 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6937 * the right target address on all platforms!
6938 *
6939 * Please also note that on x86 it is necessary pass off + 256 or higher
6940 * for @a offTarget one believe the intervening code is more than 127
6941 * bytes long.
6942 */
6943DECL_FORCE_INLINE(uint32_t)
6944iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6945{
6946#ifdef RT_ARCH_AMD64
6947 /* jcc rel8 / rel32 */
6948 int32_t offDisp = (int32_t)(offTarget - (off + 2));
6949 if (offDisp < 128 && offDisp >= -128)
6950 {
6951 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6952 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6953 }
6954 else
6955 {
6956 offDisp -= 4;
6957 pCodeBuf[off++] = 0x0f;
6958 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6959 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6960 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6961 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6962 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6963 }
6964
6965#elif defined(RT_ARCH_ARM64)
6966 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
6967 off++;
6968#else
6969# error "Port me!"
6970#endif
6971 return off;
6972}
6973
6974
6975/**
6976 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6977 *
6978 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6979 *
6980 * Only use hardcoded jumps forward when emitting for exactly one
6981 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6982 * the right target address on all platforms!
6983 *
6984 * Please also note that on x86 it is necessary pass off + 256 or higher
6985 * for @a offTarget if one believe the intervening code is more than 127
6986 * bytes long.
6987 */
6988DECL_INLINE_THROW(uint32_t)
6989iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6990{
6991#ifdef RT_ARCH_AMD64
6992 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
6993#elif defined(RT_ARCH_ARM64)
6994 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
6995#else
6996# error "Port me!"
6997#endif
6998 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6999 return off;
7000}
7001
7002
7003/**
7004 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
7005 *
7006 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7007 */
7008DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7009{
7010#ifdef RT_ARCH_AMD64
7011 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
7012#elif defined(RT_ARCH_ARM64)
7013 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
7014#else
7015# error "Port me!"
7016#endif
7017}
7018
7019
7020/**
7021 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
7022 *
7023 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7024 */
7025DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7026{
7027#ifdef RT_ARCH_AMD64
7028 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
7029#elif defined(RT_ARCH_ARM64)
7030 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
7031#else
7032# error "Port me!"
7033#endif
7034}
7035
7036
7037/**
7038 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
7039 *
7040 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7041 */
7042DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7043{
7044#ifdef RT_ARCH_AMD64
7045 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
7046#elif defined(RT_ARCH_ARM64)
7047 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
7048#else
7049# error "Port me!"
7050#endif
7051}
7052
7053
7054/**
7055 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
7056 *
7057 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7058 */
7059DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7060{
7061#ifdef RT_ARCH_AMD64
7062 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
7063#elif defined(RT_ARCH_ARM64)
7064 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
7065#else
7066# error "Port me!"
7067#endif
7068}
7069
7070
7071/**
7072 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
7073 *
7074 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7075 */
7076DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
7077{
7078#ifdef RT_ARCH_AMD64
7079 /* jmp rel8 or rel32 */
7080 int32_t offDisp = offTarget - (off + 2);
7081 if (offDisp < 128 && offDisp >= -128)
7082 {
7083 pCodeBuf[off++] = 0xeb;
7084 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
7085 }
7086 else
7087 {
7088 offDisp -= 3;
7089 pCodeBuf[off++] = 0xe9;
7090 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
7091 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
7092 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
7093 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
7094 }
7095
7096#elif defined(RT_ARCH_ARM64)
7097 pCodeBuf[off] = Armv8A64MkInstrB((int32_t)(offTarget - off));
7098 off++;
7099
7100#else
7101# error "Port me!"
7102#endif
7103 return off;
7104}
7105
7106
7107/**
7108 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
7109 *
7110 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7111 */
7112DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7113{
7114#ifdef RT_ARCH_AMD64
7115 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
7116#elif defined(RT_ARCH_ARM64)
7117 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
7118#else
7119# error "Port me!"
7120#endif
7121 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7122 return off;
7123}
7124
7125
7126/**
7127 * Fixes up a conditional jump to a fixed label.
7128 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
7129 * iemNativeEmitJzToFixed, ...
7130 */
7131DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
7132{
7133#ifdef RT_ARCH_AMD64
7134 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
7135 uint8_t const bOpcode = pbCodeBuf[offFixup];
7136 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
7137 {
7138 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
7139 AssertStmt((int8_t)pbCodeBuf[offFixup + 1] == (int32_t)(offTarget - (offFixup + 2)),
7140 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
7141 }
7142 else
7143 {
7144 if (bOpcode != 0x0f)
7145 Assert(bOpcode == 0xe9);
7146 else
7147 {
7148 offFixup += 1;
7149 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
7150 }
7151 uint32_t const offRel32 = offTarget - (offFixup + 5);
7152 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
7153 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
7154 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
7155 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
7156 }
7157
7158#elif defined(RT_ARCH_ARM64)
7159 int32_t const offDisp = offTarget - offFixup;
7160 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
7161 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
7162 {
7163 /* B.COND + BC.COND */
7164 Assert(offDisp >= -262144 && offDisp < 262144);
7165 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
7166 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
7167 }
7168 else if ((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000))
7169 {
7170 /* B imm26 */
7171 Assert(offDisp >= -33554432 && offDisp < 33554432);
7172 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
7173 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
7174 }
7175 else
7176 {
7177 /* CBZ / CBNZ reg, imm19 */
7178 Assert((pu32CodeBuf[offFixup] & UINT32_C(0x7e000000)) == UINT32_C(0x34000000));
7179 Assert(offDisp >= -1048576 && offDisp < 1048576);
7180 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
7181 | (((uint32_t)offDisp << 5) & UINT32_C(0x00ffffe0));
7182
7183 }
7184
7185#else
7186# error "Port me!"
7187#endif
7188}
7189
7190
7191#ifdef RT_ARCH_AMD64
7192/**
7193 * For doing bt on a register.
7194 */
7195DECL_INLINE_THROW(uint32_t)
7196iemNativeEmitAmd64TestBitInGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
7197{
7198 Assert(iBitNo < 64);
7199 /* bt Ev, imm8 */
7200 if (iBitNo >= 32)
7201 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7202 else if (iGprSrc >= 8)
7203 pCodeBuf[off++] = X86_OP_REX_B;
7204 pCodeBuf[off++] = 0x0f;
7205 pCodeBuf[off++] = 0xba;
7206 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7207 pCodeBuf[off++] = iBitNo;
7208 return off;
7209}
7210#endif /* RT_ARCH_AMD64 */
7211
7212
7213/**
7214 * Internal helper, don't call directly.
7215 */
7216DECL_INLINE_THROW(uint32_t)
7217iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7218 uint32_t offTarget, uint32_t *poffFixup, bool fJmpIfSet)
7219{
7220 Assert(iBitNo < 64);
7221#ifdef RT_ARCH_AMD64
7222 if (iBitNo < 8)
7223 {
7224 /* test Eb, imm8 */
7225 if (iGprSrc >= 4)
7226 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7227 pCodeBuf[off++] = 0xf6;
7228 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7229 pCodeBuf[off++] = (uint8_t)1 << iBitNo;
7230 if (poffFixup)
7231 *poffFixup = off;
7232 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7233 }
7234 else
7235 {
7236 /* bt Ev, imm8 */
7237 if (iBitNo >= 32)
7238 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7239 else if (iGprSrc >= 8)
7240 pCodeBuf[off++] = X86_OP_REX_B;
7241 pCodeBuf[off++] = 0x0f;
7242 pCodeBuf[off++] = 0xba;
7243 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7244 pCodeBuf[off++] = iBitNo;
7245 if (poffFixup)
7246 *poffFixup = off;
7247 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
7248 }
7249
7250#elif defined(RT_ARCH_ARM64)
7251 /* Just use the TBNZ instruction here. */
7252 if (poffFixup)
7253 *poffFixup = off;
7254 pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, off - offTarget, iGprSrc, iBitNo);
7255
7256#else
7257# error "Port me!"
7258#endif
7259 return off;
7260}
7261
7262
7263/**
7264 * Emits a jump to @a idxTarget on the condition that bit @a iBitNo _is_ _set_
7265 * in @a iGprSrc.
7266 */
7267DECL_INLINE_THROW(uint32_t)
7268iemNativeEmitTestBitInGprAndJmpToFixedIfSetEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7269 uint32_t offTarget, uint32_t *poffFixup)
7270{
7271 return iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(pCodeBuf, off, iGprSrc, iBitNo, offTarget, poffFixup, true /*fJmpIfSet*/);
7272}
7273
7274
7275/**
7276 * Emits a jump to @a idxTarget on the condition that bit @a iBitNo _is_ _not_
7277 * _set_ in @a iGprSrc.
7278 */
7279DECL_INLINE_THROW(uint32_t)
7280iemNativeEmitTestBitInGprAndJmpToLabelIfNotSetEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7281 uint32_t offTarget, uint32_t *poffFixup)
7282{
7283 return iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(pCodeBuf, off, iGprSrc, iBitNo, offTarget, poffFixup, false /*fJmpIfSet*/);
7284}
7285
7286
7287
7288/**
7289 * Internal helper, don't call directly.
7290 */
7291DECL_INLINE_THROW(uint32_t)
7292iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7293 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
7294{
7295 Assert(iBitNo < 64);
7296#ifdef RT_ARCH_AMD64
7297 if (iBitNo < 8)
7298 {
7299 /* test Eb, imm8 */
7300 if (iGprSrc >= 4)
7301 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7302 pCodeBuf[off++] = 0xf6;
7303 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7304 pCodeBuf[off++] = (uint8_t)1 << iBitNo;
7305 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7306 fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7307 }
7308 else
7309 {
7310 /* bt Ev, imm8 */
7311 if (iBitNo >= 32)
7312 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7313 else if (iGprSrc >= 8)
7314 pCodeBuf[off++] = X86_OP_REX_B;
7315 pCodeBuf[off++] = 0x0f;
7316 pCodeBuf[off++] = 0xba;
7317 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7318 pCodeBuf[off++] = iBitNo;
7319 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7320 fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
7321 }
7322
7323#elif defined(RT_ARCH_ARM64)
7324 /* Use the TBNZ instruction here. */
7325 if (pReNative->paLabels[idxLabel].enmType > kIemNativeLabelType_LastWholeTbBranch)
7326 {
7327 AssertMsg(pReNative->paLabels[idxLabel].off == UINT32_MAX,
7328 ("TODO: Please enable & test commented out code for jumping back to a predefined label.\n"));
7329 //uint32_t offLabel = pReNative->paLabels[idxLabel].off;
7330 //if (offLabel == UINT32_MAX)
7331 {
7332 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
7333 pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
7334 }
7335 //else
7336 //{
7337 // RT_BREAKPOINT();
7338 // Assert(off - offLabel <= 0x1fffU);
7339 // pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, offLabel - off, iGprSrc, iBitNo);
7340 //
7341 //}
7342 }
7343 else
7344 {
7345 Assert(Armv8A64ConvertImmRImmS2Mask64(0x40, (64U - iBitNo) & 63U) == RT_BIT_64(iBitNo));
7346 pCodeBuf[off++] = Armv8A64MkInstrTstImm(iGprSrc, 0x40, (64U - iBitNo) & 63U);
7347 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7348 pCodeBuf[off++] = Armv8A64MkInstrBCond(fJmpIfSet ? kArmv8InstrCond_Ne : kArmv8InstrCond_Eq, 0);
7349 }
7350
7351#else
7352# error "Port me!"
7353#endif
7354 return off;
7355}
7356
7357
7358/**
7359 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
7360 * @a iGprSrc.
7361 */
7362DECL_INLINE_THROW(uint32_t)
7363iemNativeEmitTestBitInGprAndJmpToLabelIfSetEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7364 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7365{
7366 return iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, pCodeBuf, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
7367}
7368
7369
7370/**
7371 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
7372 * _set_ in @a iGprSrc.
7373 */
7374DECL_INLINE_THROW(uint32_t)
7375iemNativeEmitTestBitInGprAndJmpToLabelIfNotSetEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7376 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7377{
7378 return iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, pCodeBuf, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
7379}
7380
7381
7382/**
7383 * Internal helper, don't call directly.
7384 */
7385DECL_INLINE_THROW(uint32_t)
7386iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7387 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
7388{
7389#ifdef RT_ARCH_AMD64
7390 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 5+6), off,
7391 iGprSrc, iBitNo, idxLabel, fJmpIfSet);
7392#elif defined(RT_ARCH_ARM64)
7393 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 2), off,
7394 iGprSrc, iBitNo, idxLabel, fJmpIfSet);
7395#else
7396# error "Port me!"
7397#endif
7398 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7399 return off;
7400}
7401
7402
7403/**
7404 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
7405 * @a iGprSrc.
7406 */
7407DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7408 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7409{
7410 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
7411}
7412
7413
7414/**
7415 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
7416 * _set_ in @a iGprSrc.
7417 */
7418DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7419 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7420{
7421 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
7422}
7423
7424
7425/**
7426 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
7427 * flags accordingly.
7428 */
7429DECL_INLINE_THROW(uint32_t)
7430iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
7431{
7432 Assert(fBits != 0);
7433#ifdef RT_ARCH_AMD64
7434
7435 if (fBits >= UINT32_MAX)
7436 {
7437 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7438
7439 /* test Ev,Gv */
7440 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7441 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
7442 pbCodeBuf[off++] = 0x85;
7443 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
7444
7445 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7446 }
7447 else if (fBits <= UINT32_MAX)
7448 {
7449 /* test Eb, imm8 or test Ev, imm32 */
7450 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7451 if (fBits <= UINT8_MAX)
7452 {
7453 if (iGprSrc >= 4)
7454 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7455 pbCodeBuf[off++] = 0xf6;
7456 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7457 pbCodeBuf[off++] = (uint8_t)fBits;
7458 }
7459 else
7460 {
7461 if (iGprSrc >= 8)
7462 pbCodeBuf[off++] = X86_OP_REX_B;
7463 pbCodeBuf[off++] = 0xf7;
7464 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7465 pbCodeBuf[off++] = RT_BYTE1(fBits);
7466 pbCodeBuf[off++] = RT_BYTE2(fBits);
7467 pbCodeBuf[off++] = RT_BYTE3(fBits);
7468 pbCodeBuf[off++] = RT_BYTE4(fBits);
7469 }
7470 }
7471 /** @todo implement me. */
7472 else
7473 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
7474
7475#elif defined(RT_ARCH_ARM64)
7476 uint32_t uImmR = 0;
7477 uint32_t uImmNandS = 0;
7478 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
7479 {
7480 /* ands xzr, iGprSrc, #fBits */
7481 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7482 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
7483 }
7484 else
7485 {
7486 /* ands xzr, iGprSrc, iTmpReg */
7487 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7488 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7489 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
7490 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7491 }
7492
7493#else
7494# error "Port me!"
7495#endif
7496 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7497 return off;
7498}
7499
7500
7501/**
7502 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
7503 * @a iGprSrc, setting CPU flags accordingly.
7504 *
7505 * @note For ARM64 this only supports @a fBits values that can be expressed
7506 * using the two 6-bit immediates of the ANDS instruction. The caller
7507 * must make sure this is possible!
7508 */
7509DECL_FORCE_INLINE_THROW(uint32_t)
7510iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits)
7511{
7512 Assert(fBits != 0);
7513
7514#ifdef RT_ARCH_AMD64
7515 if (fBits <= UINT8_MAX)
7516 {
7517 /* test Eb, imm8 */
7518 if (iGprSrc >= 4)
7519 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7520 pCodeBuf[off++] = 0xf6;
7521 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7522 pCodeBuf[off++] = (uint8_t)fBits;
7523 }
7524 else
7525 {
7526 /* test Ev, imm32 */
7527 if (iGprSrc >= 8)
7528 pCodeBuf[off++] = X86_OP_REX_B;
7529 pCodeBuf[off++] = 0xf7;
7530 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7531 pCodeBuf[off++] = RT_BYTE1(fBits);
7532 pCodeBuf[off++] = RT_BYTE2(fBits);
7533 pCodeBuf[off++] = RT_BYTE3(fBits);
7534 pCodeBuf[off++] = RT_BYTE4(fBits);
7535 }
7536
7537#elif defined(RT_ARCH_ARM64)
7538 /* ands xzr, src, #fBits */
7539 uint32_t uImmR = 0;
7540 uint32_t uImmNandS = 0;
7541 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7542 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7543 else
7544# ifdef IEM_WITH_THROW_CATCH
7545 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7546# else
7547 AssertReleaseFailedStmt(off = UINT32_MAX);
7548# endif
7549
7550#else
7551# error "Port me!"
7552#endif
7553 return off;
7554}
7555
7556
7557
7558/**
7559 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7560 * @a iGprSrc, setting CPU flags accordingly.
7561 *
7562 * @note For ARM64 this only supports @a fBits values that can be expressed
7563 * using the two 6-bit immediates of the ANDS instruction. The caller
7564 * must make sure this is possible!
7565 */
7566DECL_FORCE_INLINE_THROW(uint32_t)
7567iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7568{
7569 Assert(fBits != 0);
7570
7571#ifdef RT_ARCH_AMD64
7572 /* test Eb, imm8 */
7573 if (iGprSrc >= 4)
7574 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7575 pCodeBuf[off++] = 0xf6;
7576 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7577 pCodeBuf[off++] = fBits;
7578
7579#elif defined(RT_ARCH_ARM64)
7580 /* ands xzr, src, #fBits */
7581 uint32_t uImmR = 0;
7582 uint32_t uImmNandS = 0;
7583 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7584 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7585 else
7586# ifdef IEM_WITH_THROW_CATCH
7587 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7588# else
7589 AssertReleaseFailedStmt(off = UINT32_MAX);
7590# endif
7591
7592#else
7593# error "Port me!"
7594#endif
7595 return off;
7596}
7597
7598
7599/**
7600 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7601 * @a iGprSrc, setting CPU flags accordingly.
7602 */
7603DECL_INLINE_THROW(uint32_t)
7604iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7605{
7606 Assert(fBits != 0);
7607
7608#ifdef RT_ARCH_AMD64
7609 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
7610
7611#elif defined(RT_ARCH_ARM64)
7612 /* ands xzr, src, [tmp|#imm] */
7613 uint32_t uImmR = 0;
7614 uint32_t uImmNandS = 0;
7615 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7616 {
7617 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7618 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7619 }
7620 else
7621 {
7622 /* Use temporary register for the 64-bit immediate. */
7623 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7624 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7625 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
7626 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7627 }
7628
7629#else
7630# error "Port me!"
7631#endif
7632 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7633 return off;
7634}
7635
7636
7637/**
7638 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
7639 * are set in @a iGprSrc.
7640 */
7641DECL_INLINE_THROW(uint32_t)
7642iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7643 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7644{
7645 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7646
7647 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7648 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7649
7650 return off;
7651}
7652
7653
7654/**
7655 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
7656 * are set in @a iGprSrc.
7657 */
7658DECL_INLINE_THROW(uint32_t)
7659iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7660 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7661{
7662 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7663
7664 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7665 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7666
7667 return off;
7668}
7669
7670
7671/**
7672 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7673 *
7674 * The operand size is given by @a f64Bit.
7675 */
7676DECL_FORCE_INLINE_THROW(uint32_t)
7677iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7678 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7679{
7680 Assert(idxLabel < pReNative->cLabels);
7681
7682#ifdef RT_ARCH_AMD64
7683 /* test reg32,reg32 / test reg64,reg64 */
7684 if (f64Bit)
7685 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7686 else if (iGprSrc >= 8)
7687 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7688 pCodeBuf[off++] = 0x85;
7689 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7690
7691 /* jnz idxLabel */
7692 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7693 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7694
7695#elif defined(RT_ARCH_ARM64)
7696 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
7697 {
7698 pCodeBuf[off] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
7699 iGprSrc, f64Bit);
7700 off++;
7701 }
7702 else
7703 {
7704 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7705 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
7706 }
7707
7708#else
7709# error "Port me!"
7710#endif
7711 return off;
7712}
7713
7714
7715/**
7716 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7717 *
7718 * The operand size is given by @a f64Bit.
7719 */
7720DECL_FORCE_INLINE_THROW(uint32_t)
7721iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7722 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7723{
7724#ifdef RT_ARCH_AMD64
7725 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7726 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7727#elif defined(RT_ARCH_ARM64)
7728 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
7729 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7730#else
7731# error "Port me!"
7732#endif
7733 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7734 return off;
7735}
7736
7737
7738/**
7739 * Emits code that jumps to @a offTarget if @a iGprSrc is not zero.
7740 *
7741 * The operand size is given by @a f64Bit.
7742 */
7743DECL_FORCE_INLINE_THROW(uint32_t)
7744iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7745 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t offTarget)
7746{
7747#ifdef RT_ARCH_AMD64
7748 /* test reg32,reg32 / test reg64,reg64 */
7749 if (f64Bit)
7750 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7751 else if (iGprSrc >= 8)
7752 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7753 pCodeBuf[off++] = 0x85;
7754 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7755
7756 /* jnz idxLabel */
7757 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget,
7758 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7759
7760#elif defined(RT_ARCH_ARM64)
7761 pCodeBuf[off] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(offTarget - off), iGprSrc, f64Bit);
7762 off++;
7763
7764#else
7765# error "Port me!"
7766#endif
7767 return off;
7768}
7769
7770
7771/**
7772 * Emits code that jumps to @a offTarget if @a iGprSrc is not zero.
7773 *
7774 * The operand size is given by @a f64Bit.
7775 */
7776DECL_FORCE_INLINE_THROW(uint32_t)
7777iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7778 bool f64Bit, bool fJmpIfNotZero, uint32_t offTarget)
7779{
7780#ifdef RT_ARCH_AMD64
7781 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7782 off, iGprSrc, f64Bit, fJmpIfNotZero, offTarget);
7783#elif defined(RT_ARCH_ARM64)
7784 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1),
7785 off, iGprSrc, f64Bit, fJmpIfNotZero, offTarget);
7786#else
7787# error "Port me!"
7788#endif
7789 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7790 return off;
7791}
7792
7793
7794/* if (Grp1 == 0) Jmp idxLabel; */
7795
7796/**
7797 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7798 *
7799 * The operand size is given by @a f64Bit.
7800 */
7801DECL_FORCE_INLINE_THROW(uint32_t)
7802iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7803 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7804{
7805 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7806 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7807}
7808
7809
7810/**
7811 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7812 *
7813 * The operand size is given by @a f64Bit.
7814 */
7815DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7816 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7817{
7818 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7819}
7820
7821
7822/**
7823 * Emits code that jumps to a new label if @a iGprSrc is zero.
7824 *
7825 * The operand size is given by @a f64Bit.
7826 */
7827DECL_INLINE_THROW(uint32_t)
7828iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7829 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7830{
7831 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7832 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7833}
7834
7835
7836/**
7837 * Emits code that jumps to @a offTarget if @a iGprSrc is zero.
7838 *
7839 * The operand size is given by @a f64Bit.
7840 */
7841DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7842 uint8_t iGprSrc, bool f64Bit, uint32_t offTarget)
7843{
7844 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixed(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, offTarget);
7845}
7846
7847
7848/* if (Grp1 != 0) Jmp idxLabel; */
7849
7850/**
7851 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7852 *
7853 * The operand size is given by @a f64Bit.
7854 */
7855DECL_FORCE_INLINE_THROW(uint32_t)
7856iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7857 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7858{
7859 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7860 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7861}
7862
7863
7864/**
7865 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7866 *
7867 * The operand size is given by @a f64Bit.
7868 */
7869DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7870 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7871{
7872 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7873}
7874
7875
7876/**
7877 * Emits code that jumps to a new label if @a iGprSrc is not zero.
7878 *
7879 * The operand size is given by @a f64Bit.
7880 */
7881DECL_INLINE_THROW(uint32_t)
7882iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7883 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7884{
7885 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7886 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7887}
7888
7889
7890/* if (Grp1 != Gpr2) Jmp idxLabel; */
7891
7892/**
7893 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
7894 * differs.
7895 */
7896DECL_INLINE_THROW(uint32_t)
7897iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7898 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
7899{
7900 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
7901 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7902 return off;
7903}
7904
7905
7906/**
7907 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
7908 */
7909DECL_INLINE_THROW(uint32_t)
7910iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7911 uint8_t iGprLeft, uint8_t iGprRight,
7912 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7913{
7914 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7915 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
7916}
7917
7918
7919/* if (Grp != Imm) Jmp idxLabel; */
7920
7921/**
7922 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
7923 */
7924DECL_INLINE_THROW(uint32_t)
7925iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7926 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7927{
7928 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7929 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7930 return off;
7931}
7932
7933
7934/**
7935 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
7936 */
7937DECL_INLINE_THROW(uint32_t)
7938iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7939 uint8_t iGprSrc, uint64_t uImm,
7940 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7941{
7942 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7943 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7944}
7945
7946
7947/**
7948 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
7949 * @a uImm.
7950 */
7951DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7952 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7953{
7954 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7955 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7956 return off;
7957}
7958
7959
7960/**
7961 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
7962 * @a uImm.
7963 */
7964DECL_INLINE_THROW(uint32_t)
7965iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7966 uint8_t iGprSrc, uint32_t uImm,
7967 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7968{
7969 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7970 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7971}
7972
7973
7974/**
7975 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
7976 * @a uImm.
7977 */
7978DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7979 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
7980{
7981 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
7982 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7983 return off;
7984}
7985
7986
7987/**
7988 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
7989 * @a uImm.
7990 */
7991DECL_INLINE_THROW(uint32_t)
7992iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7993 uint8_t iGprSrc, uint16_t uImm,
7994 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7995{
7996 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7997 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7998}
7999
8000
8001/* if (Grp == Imm) Jmp idxLabel; */
8002
8003/**
8004 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
8005 */
8006DECL_INLINE_THROW(uint32_t)
8007iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8008 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
8009{
8010 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
8011 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
8012 return off;
8013}
8014
8015
8016/**
8017 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
8018 */
8019DECL_INLINE_THROW(uint32_t)
8020iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
8021 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8022{
8023 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8024 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8025}
8026
8027
8028/**
8029 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
8030 */
8031DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8032 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
8033{
8034 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8035 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
8036 return off;
8037}
8038
8039
8040/**
8041 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
8042 */
8043DECL_INLINE_THROW(uint32_t)
8044iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
8045 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8046{
8047 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8048 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8049}
8050
8051
8052/**
8053 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
8054 *
8055 * @note ARM64: Helper register is required (idxTmpReg).
8056 */
8057DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8058 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
8059 uint8_t idxTmpReg = UINT8_MAX)
8060{
8061 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
8062 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
8063 return off;
8064}
8065
8066
8067/**
8068 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
8069 *
8070 * @note ARM64: Helper register is required (idxTmpReg).
8071 */
8072DECL_INLINE_THROW(uint32_t)
8073iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
8074 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
8075 uint8_t idxTmpReg = UINT8_MAX)
8076{
8077 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8078 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
8079}
8080
8081
8082
8083/*********************************************************************************************************************************
8084* Indirect Jumps. *
8085*********************************************************************************************************************************/
8086
8087/**
8088 * Emits an indirect jump a 64-bit address in a GPR.
8089 */
8090DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpViaGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc)
8091{
8092#ifdef RT_ARCH_AMD64
8093 uint8_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
8094 if (iGprSrc >= 8)
8095 pCodeBuf[off++] = X86_OP_REX_B;
8096 pCodeBuf[off++] = 0xff;
8097 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
8098
8099#elif defined(RT_ARCH_ARM64)
8100 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8101 pCodeBuf[off++] = Armv8A64MkInstrBr(iGprSrc);
8102
8103#else
8104# error "port me"
8105#endif
8106 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8107 return off;
8108}
8109
8110
8111/**
8112 * Emits an indirect jump to an immediate 64-bit address (uses the temporary GPR).
8113 */
8114DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
8115{
8116 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
8117 return iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP0);
8118}
8119
8120
8121/*********************************************************************************************************************************
8122* Calls. *
8123*********************************************************************************************************************************/
8124
8125/**
8126 * Emits a call to a 64-bit address.
8127 */
8128DECL_FORCE_INLINE(uint32_t) iemNativeEmitCallImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uintptr_t uPfn,
8129#ifdef RT_ARCH_AMD64
8130 uint8_t idxRegTmp = X86_GREG_xAX
8131#elif defined(RT_ARCH_ARM64)
8132 uint8_t idxRegTmp = IEMNATIVE_REG_FIXED_TMP0
8133#else
8134# error "Port me"
8135#endif
8136 )
8137{
8138 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxRegTmp, uPfn);
8139
8140#ifdef RT_ARCH_AMD64
8141 /* call idxRegTmp */
8142 if (idxRegTmp >= 8)
8143 pCodeBuf[off++] = X86_OP_REX_B;
8144 pCodeBuf[off++] = 0xff;
8145 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, idxRegTmp & 7);
8146
8147#elif defined(RT_ARCH_ARM64)
8148 pCodeBuf[off++] = Armv8A64MkInstrBlr(idxRegTmp);
8149
8150#else
8151# error "port me"
8152#endif
8153 return off;
8154}
8155
8156
8157/**
8158 * Emits a call to a 64-bit address.
8159 */
8160DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
8161{
8162#ifdef RT_ARCH_AMD64
8163 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
8164
8165 /* call rax */
8166 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8167 pbCodeBuf[off++] = 0xff;
8168 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
8169
8170#elif defined(RT_ARCH_ARM64)
8171 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
8172
8173 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8174 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
8175
8176#else
8177# error "port me"
8178#endif
8179 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8180 return off;
8181}
8182
8183
8184/**
8185 * Emits code to load a stack variable into an argument GPR.
8186 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
8187 */
8188DECL_FORCE_INLINE_THROW(uint32_t)
8189iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8190 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
8191 bool fSpilledVarsInVolatileRegs = false)
8192{
8193 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8194 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8195 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8196
8197 uint8_t const idxRegVar = pVar->idxReg;
8198 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
8199 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
8200 || !fSpilledVarsInVolatileRegs ))
8201 {
8202 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
8203 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
8204 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
8205 if (!offAddend)
8206 {
8207 if (idxRegArg != idxRegVar)
8208 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
8209 }
8210 else
8211 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
8212 }
8213 else
8214 {
8215 uint8_t const idxStackSlot = pVar->idxStackSlot;
8216 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8217 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
8218 if (offAddend)
8219 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
8220 }
8221 return off;
8222}
8223
8224
8225/**
8226 * Emits code to load a stack or immediate variable value into an argument GPR,
8227 * optional with a addend.
8228 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
8229 */
8230DECL_FORCE_INLINE_THROW(uint32_t)
8231iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8232 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
8233 bool fSpilledVarsInVolatileRegs = false)
8234{
8235 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8236 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8237 if (pVar->enmKind == kIemNativeVarKind_Immediate)
8238 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
8239 else
8240 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
8241 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
8242 return off;
8243}
8244
8245
8246/**
8247 * Emits code to load the variable address into an argument GPR.
8248 *
8249 * This only works for uninitialized and stack variables.
8250 */
8251DECL_FORCE_INLINE_THROW(uint32_t)
8252iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8253 bool fFlushShadows)
8254{
8255 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8256 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8257 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
8258 || pVar->enmKind == kIemNativeVarKind_Stack,
8259 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8260 AssertStmt(!pVar->fSimdReg,
8261 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8262
8263 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8264 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
8265
8266 uint8_t const idxRegVar = pVar->idxReg;
8267 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
8268 {
8269 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
8270 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
8271 Assert(pVar->idxReg == UINT8_MAX);
8272 }
8273 Assert( pVar->idxStackSlot != UINT8_MAX
8274 && pVar->idxReg == UINT8_MAX);
8275
8276 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
8277}
8278
8279
8280
8281/*********************************************************************************************************************************
8282* TB exiting helpers. *
8283*********************************************************************************************************************************/
8284
8285/**
8286 * Helper for marking the current conditional branch as exiting the TB.
8287 *
8288 * This simplifies the state consolidation later when we reach the IEM_MC_ENDIF.
8289 */
8290DECL_FORCE_INLINE(void) iemNativeMarkCurCondBranchAsExiting(PIEMRECOMPILERSTATE pReNative)
8291{
8292 uint8_t idxCondDepth = pReNative->cCondDepth;
8293 if (idxCondDepth)
8294 {
8295 idxCondDepth--;
8296 pReNative->aCondStack[idxCondDepth].afExitTb[pReNative->aCondStack[idxCondDepth].fInElse] = true;
8297 }
8298}
8299
8300
8301/**
8302 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
8303 */
8304DECL_FORCE_INLINE_THROW(uint32_t)
8305iemNativeEmitJccTbExitEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
8306 IEMNATIVELABELTYPE enmExitReason, IEMNATIVEINSTRCOND enmCond)
8307{
8308 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8309 IEMNATIVE_ASSERT_EFLAGS_POSTPONING_ONLY(pReNative, X86_EFL_STATUS_BITS); /** @todo emit postponed stuff here and invert the condition. */
8310 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
8311
8312#if defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) && defined(RT_ARCH_AMD64)
8313 /* jcc rel32 */
8314 pCodeBuf[off++] = 0x0f;
8315 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
8316 iemNativeAddTbExitFixup(pReNative, off, enmExitReason);
8317 pCodeBuf[off++] = 0x00;
8318 pCodeBuf[off++] = 0x00;
8319 pCodeBuf[off++] = 0x00;
8320 pCodeBuf[off++] = 0x00;
8321
8322#else
8323 /* ARM64 doesn't have the necessary jump range, so we jump via local label
8324 just like when we keep everything local. */
8325 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8326 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel, enmCond);
8327#endif
8328 return off;
8329}
8330
8331
8332/**
8333 * Emits a Jcc rel32 / B.cc imm19 to the epilog.
8334 */
8335DECL_INLINE_THROW(uint32_t)
8336iemNativeEmitJccTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmExitReason, IEMNATIVEINSTRCOND enmCond)
8337{
8338 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8339 IEMNATIVE_ASSERT_EFLAGS_POSTPONING_ONLY(pReNative, X86_EFL_STATUS_BITS); /** @todo emit postponed stuff here and invert the condition. */
8340 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
8341
8342#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
8343# ifdef RT_ARCH_AMD64
8344 off = iemNativeEmitJccTbExitEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, enmExitReason, enmCond);
8345# elif defined(RT_ARCH_ARM64)
8346 off = iemNativeEmitJccTbExitEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 2), off, enmExitReason, enmCond);
8347# else
8348# error "Port me!"
8349# endif
8350 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8351 return off;
8352#else
8353 return iemNativeEmitJccToNewLabel(pReNative, off, enmExitReason, 0 /*uData*/, enmCond);
8354#endif
8355}
8356
8357
8358/**
8359 * Emits a JNZ/JNE rel32 / B.NE imm19 to the TB exit routine with the given reason.
8360 */
8361DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmExitReason)
8362{
8363#ifdef RT_ARCH_AMD64
8364 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_ne);
8365#elif defined(RT_ARCH_ARM64)
8366 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kArmv8InstrCond_Ne);
8367#else
8368# error "Port me!"
8369#endif
8370}
8371
8372
8373/**
8374 * Emits a JZ/JE rel32 / B.EQ imm19 to the TB exit routine with the given reason.
8375 */
8376DECL_INLINE_THROW(uint32_t) iemNativeEmitJzTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmExitReason)
8377{
8378#ifdef RT_ARCH_AMD64
8379 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_e);
8380#elif defined(RT_ARCH_ARM64)
8381 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kArmv8InstrCond_Eq);
8382#else
8383# error "Port me!"
8384#endif
8385}
8386
8387
8388/**
8389 * Emits a JA/JNBE rel32 / B.HI imm19 to the TB exit.
8390 */
8391DECL_INLINE_THROW(uint32_t) iemNativeEmitJaTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmExitReason)
8392{
8393#ifdef RT_ARCH_AMD64
8394 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_nbe);
8395#elif defined(RT_ARCH_ARM64)
8396 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kArmv8InstrCond_Hi);
8397#else
8398# error "Port me!"
8399#endif
8400}
8401
8402
8403/**
8404 * Emits a JL/JNGE rel32 / B.LT imm19 to the TB exit with the given reason.
8405 */
8406DECL_INLINE_THROW(uint32_t) iemNativeEmitJlTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmExitReason)
8407{
8408#ifdef RT_ARCH_AMD64
8409 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_l);
8410#elif defined(RT_ARCH_ARM64)
8411 return iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kArmv8InstrCond_Lt);
8412#else
8413# error "Port me!"
8414#endif
8415}
8416
8417
8418DECL_INLINE_THROW(uint32_t)
8419iemNativeEmitTbExitEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, IEMNATIVELABELTYPE enmExitReason)
8420{
8421 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8422 IEMNATIVE_ASSERT_EFLAGS_POSTPONING_ONLY(pReNative, X86_EFL_STATUS_BITS); /** @todo emit postponed stuff here. */
8423 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
8424
8425 iemNativeMarkCurCondBranchAsExiting(pReNative);
8426
8427#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
8428# ifdef RT_ARCH_AMD64
8429 /* jmp rel32 */
8430 pCodeBuf[off++] = 0xe9;
8431 iemNativeAddTbExitFixup(pReNative, off, enmExitReason);
8432 pCodeBuf[off++] = 0xfe;
8433 pCodeBuf[off++] = 0xff;
8434 pCodeBuf[off++] = 0xff;
8435 pCodeBuf[off++] = 0xff;
8436
8437# elif defined(RT_ARCH_ARM64)
8438 iemNativeAddTbExitFixup(pReNative, off, enmExitReason);
8439 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
8440
8441# else
8442# error "Port me!"
8443# endif
8444 return off;
8445
8446#else
8447 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8448 return iemNativeEmitJmpToLabelEx(pReNative, pCodeBuf, off, idxLabel);
8449#endif
8450}
8451
8452
8453DECL_INLINE_THROW(uint32_t)
8454iemNativeEmitTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmExitReason,
8455 bool fActuallyExitingTb = true)
8456{
8457 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8458 IEMNATIVE_ASSERT_EFLAGS_POSTPONING_ONLY(pReNative, X86_EFL_STATUS_BITS); /** @todo emit postponed stuff here. */
8459 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
8460
8461 if (fActuallyExitingTb)
8462 iemNativeMarkCurCondBranchAsExiting(pReNative);
8463
8464#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
8465# ifdef RT_ARCH_AMD64
8466 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
8467
8468 /* jmp rel32 */
8469 pCodeBuf[off++] = 0xe9;
8470 iemNativeAddTbExitFixup(pReNative, off, enmExitReason);
8471 pCodeBuf[off++] = 0xfe;
8472 pCodeBuf[off++] = 0xff;
8473 pCodeBuf[off++] = 0xff;
8474 pCodeBuf[off++] = 0xff;
8475
8476# elif defined(RT_ARCH_ARM64)
8477 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8478 iemNativeAddTbExitFixup(pReNative, off, enmExitReason);
8479 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
8480
8481# else
8482# error "Port me!"
8483# endif
8484 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8485 return off;
8486
8487#else
8488 return iemNativeEmitJmpToNewLabel(pReNative, off, enmExitReason);
8489#endif
8490}
8491
8492
8493/**
8494 * Emits a jump to the TB exit with @a enmExitReason on the condition _any_ of the bits in @a fBits
8495 * are set in @a iGprSrc.
8496 */
8497DECL_INLINE_THROW(uint32_t)
8498iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8499 uint8_t iGprSrc, uint64_t fBits, IEMNATIVELABELTYPE enmExitReason)
8500{
8501 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
8502
8503 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
8504 return iemNativeEmitJnzTbExit(pReNative, off, enmExitReason);
8505}
8506
8507
8508/**
8509 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
8510 * are set in @a iGprSrc.
8511 */
8512DECL_INLINE_THROW(uint32_t)
8513iemNativeEmitTestAnyBitsInGprAndTbExitIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8514 uint8_t iGprSrc, uint64_t fBits, IEMNATIVELABELTYPE enmExitReason)
8515{
8516 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
8517
8518 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
8519 return iemNativeEmitJzTbExit(pReNative, off, enmExitReason);
8520}
8521
8522
8523/**
8524 * Emits code that exits the TB with the given reason if @a iGprLeft and @a iGprRight
8525 * differs.
8526 */
8527DECL_INLINE_THROW(uint32_t)
8528iemNativeEmitTestIfGprNotEqualGprAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8529 uint8_t iGprLeft, uint8_t iGprRight, IEMNATIVELABELTYPE enmExitReason)
8530{
8531 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
8532 off = iemNativeEmitJnzTbExit(pReNative, off, enmExitReason);
8533 return off;
8534}
8535
8536
8537/**
8538 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
8539 * @a uImm.
8540 */
8541DECL_INLINE_THROW(uint32_t)
8542iemNativeEmitTestIfGpr32NotEqualImmAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8543 uint8_t iGprSrc, uint32_t uImm, IEMNATIVELABELTYPE enmExitReason)
8544{
8545 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8546 off = iemNativeEmitJnzTbExit(pReNative, off, enmExitReason);
8547 return off;
8548}
8549
8550
8551/**
8552 * Emits code that exits the current TB if @a iGprSrc differs from @a uImm.
8553 */
8554DECL_INLINE_THROW(uint32_t)
8555iemNativeEmitTestIfGprNotEqualImmAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8556 uint8_t iGprSrc, uint64_t uImm, IEMNATIVELABELTYPE enmExitReason)
8557{
8558 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
8559 off = iemNativeEmitJnzTbExit(pReNative, off, enmExitReason);
8560 return off;
8561}
8562
8563
8564/**
8565 * Emits code that exits the current TB with the given reason if 32-bit @a iGprSrc equals @a uImm.
8566 */
8567DECL_INLINE_THROW(uint32_t)
8568iemNativeEmitTestIfGpr32EqualsImmAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8569 uint8_t iGprSrc, uint32_t uImm, IEMNATIVELABELTYPE enmExitReason)
8570{
8571 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8572 off = iemNativeEmitJzTbExit(pReNative, off, enmExitReason);
8573 return off;
8574}
8575
8576
8577/**
8578 * Emits code to exit the current TB with the reason @a enmExitReason on the condition that bit @a iBitNo _is_ _set_ in
8579 * @a iGprSrc.
8580 *
8581 * @note On ARM64 the range is only +/-8191 instructions.
8582 */
8583DECL_INLINE_THROW(uint32_t)
8584iemNativeEmitTestBitInGprAndTbExitIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8585 uint8_t iGprSrc, uint8_t iBitNo, IEMNATIVELABELTYPE enmExitReason)
8586{
8587 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
8588
8589#if defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) && defined(RT_ARCH_AMD64)
8590 Assert(iBitNo < 64);
8591 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8592 if (iBitNo < 8)
8593 {
8594 /* test Eb, imm8 */
8595 if (iGprSrc >= 4)
8596 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
8597 pbCodeBuf[off++] = 0xf6;
8598 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
8599 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
8600 off = iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_ne);
8601 }
8602 else
8603 {
8604 /* bt Ev, imm8 */
8605 if (iBitNo >= 32)
8606 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8607 else if (iGprSrc >= 8)
8608 pbCodeBuf[off++] = X86_OP_REX_B;
8609 pbCodeBuf[off++] = 0x0f;
8610 pbCodeBuf[off++] = 0xba;
8611 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
8612 pbCodeBuf[off++] = iBitNo;
8613 off = iemNativeEmitJccTbExit(pReNative, off, enmExitReason, kIemNativeInstrCond_c);
8614 }
8615 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8616 return off;
8617
8618#else
8619 /* ARM64 doesn't have the necessary jump range, so we jump via local label
8620 just like when we keep everything local. */
8621 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8622 IEMNATIVE_ASSERT_EFLAGS_POSTPONING_ONLY(pReNative, X86_EFL_STATUS_BITS); /** @todo emit postponed stuff here and invert the condition. */
8623 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8624 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
8625#endif
8626}
8627
8628
8629/**
8630 * Emits code that exits the current TB with @a enmExitReason if @a iGprSrc is not zero.
8631 *
8632 * The operand size is given by @a f64Bit.
8633 */
8634DECL_FORCE_INLINE_THROW(uint32_t)
8635iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
8636 uint8_t iGprSrc, bool f64Bit, IEMNATIVELABELTYPE enmExitReason)
8637{
8638 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
8639
8640#if defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) && defined(RT_ARCH_AMD64)
8641 /* test reg32,reg32 / test reg64,reg64 */
8642 if (f64Bit)
8643 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
8644 else if (iGprSrc >= 8)
8645 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8646 pCodeBuf[off++] = 0x85;
8647 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
8648
8649 /* jnz idxLabel */
8650 return iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, enmExitReason, kIemNativeInstrCond_ne);
8651
8652#else
8653 /* ARM64 doesn't have the necessary jump range, so we jump via local label
8654 just like when we keep everything local. */
8655 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8656 IEMNATIVE_ASSERT_EFLAGS_POSTPONING_ONLY(pReNative, X86_EFL_STATUS_BITS); /** @todo emit postponed stuff here and invert the condition. */
8657 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8658 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
8659 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
8660#endif
8661}
8662
8663
8664/**
8665 * Emits code to exit the current TB with the given reason @a enmExitReason if @a iGprSrc is not zero.
8666 *
8667 * The operand size is given by @a f64Bit.
8668 */
8669DECL_INLINE_THROW(uint32_t)
8670iemNativeEmitTestIfGprIsNotZeroAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8671 uint8_t iGprSrc, bool f64Bit, IEMNATIVELABELTYPE enmExitReason)
8672{
8673#if defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) && defined(RT_ARCH_AMD64)
8674 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
8675 off, iGprSrc, f64Bit, enmExitReason);
8676 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8677 return off;
8678#else
8679 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8680 IEMNATIVE_ASSERT_EFLAGS_POSTPONING_ONLY(pReNative, X86_EFL_STATUS_BITS); /** @todo emit postponed stuff here and invert the condition. */
8681 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8682 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
8683#endif
8684}
8685
8686
8687/**
8688 * Emits code that exits the current TB with @a enmExitReason if @a iGprSrc is zero.
8689 *
8690 * The operand size is given by @a f64Bit.
8691 */
8692DECL_FORCE_INLINE_THROW(uint32_t)
8693iemNativeEmitTestIfGprIsZeroAndTbExitEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
8694 uint8_t iGprSrc, bool f64Bit, IEMNATIVELABELTYPE enmExitReason)
8695{
8696 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
8697#if defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) && defined(RT_ARCH_AMD64)
8698 /* test reg32,reg32 / test reg64,reg64 */
8699 if (f64Bit)
8700 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
8701 else if (iGprSrc >= 8)
8702 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8703 pCodeBuf[off++] = 0x85;
8704 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
8705
8706 /* jnz idxLabel */
8707 return iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, enmExitReason, kIemNativeInstrCond_e);
8708
8709#else
8710 /* ARM64 doesn't have the necessary jump range, so we jump via local label
8711 just like when we keep everything local. */
8712 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8713 IEMNATIVE_ASSERT_EFLAGS_POSTPONING_ONLY(pReNative, X86_EFL_STATUS_BITS); /** @todo emit postponed stuff here and invert the condition. */
8714 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8715 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
8716 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
8717#endif
8718}
8719
8720
8721/**
8722 * Emits code to exit the current TB with the given reason @a enmExitReason if @a iGprSrc is zero.
8723 *
8724 * The operand size is given by @a f64Bit.
8725 */
8726DECL_INLINE_THROW(uint32_t)
8727iemNativeEmitTestIfGprIsZeroAndTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8728 uint8_t iGprSrc, bool f64Bit, IEMNATIVELABELTYPE enmExitReason)
8729{
8730#if defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) && defined(RT_ARCH_AMD64)
8731 off = iemNativeEmitTestIfGprIsZeroAndTbExitEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
8732 off, iGprSrc, f64Bit, enmExitReason);
8733 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8734 return off;
8735#else
8736 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8737 IEMNATIVE_ASSERT_EFLAGS_POSTPONING_ONLY(pReNative, X86_EFL_STATUS_BITS); /** @todo emit postponed stuff here and invert the condition. */
8738 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8739 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
8740#endif
8741}
8742
8743
8744#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8745/*********************************************************************************************************************************
8746* SIMD helpers. *
8747*********************************************************************************************************************************/
8748
8749
8750/**
8751 * Emits code to load the variable address into an argument GPR.
8752 *
8753 * This is a special variant intended for SIMD variables only and only called
8754 * by the TLB miss path in the memory fetch/store code because there we pass
8755 * the value by reference and need both the register and stack depending on which
8756 * path is taken (TLB hit vs. miss).
8757 */
8758DECL_FORCE_INLINE_THROW(uint32_t)
8759iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8760 bool fSyncRegWithStack = true)
8761{
8762 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8763 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8764 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
8765 || pVar->enmKind == kIemNativeVarKind_Stack,
8766 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8767 AssertStmt(pVar->fSimdReg,
8768 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8769 Assert( pVar->idxStackSlot != UINT8_MAX
8770 && pVar->idxReg != UINT8_MAX);
8771
8772 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8773 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
8774
8775 uint8_t const idxRegVar = pVar->idxReg;
8776 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8777 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
8778
8779 if (fSyncRegWithStack)
8780 {
8781 if (pVar->cbVar == sizeof(RTUINT128U))
8782 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDisp, idxRegVar);
8783 else
8784 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDisp, idxRegVar);
8785 }
8786
8787 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
8788}
8789
8790
8791/**
8792 * Emits code to sync the host SIMD register assigned to the given SIMD variable.
8793 *
8794 * This is a special helper and only called
8795 * by the TLB miss path in the memory fetch/store code because there we pass
8796 * the value by reference and need to sync the value on the stack with the assigned host register
8797 * after a TLB miss where the value ends up on the stack.
8798 */
8799DECL_FORCE_INLINE_THROW(uint32_t)
8800iemNativeEmitSimdVarSyncStackToRegister(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar)
8801{
8802 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8803 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8804 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
8805 || pVar->enmKind == kIemNativeVarKind_Stack,
8806 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8807 AssertStmt(pVar->fSimdReg,
8808 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8809 Assert( pVar->idxStackSlot != UINT8_MAX
8810 && pVar->idxReg != UINT8_MAX);
8811
8812 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8813 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
8814
8815 uint8_t const idxRegVar = pVar->idxReg;
8816 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8817 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
8818
8819 if (pVar->cbVar == sizeof(RTUINT128U))
8820 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxRegVar, offBpDisp);
8821 else
8822 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxRegVar, offBpDisp);
8823
8824 return off;
8825}
8826
8827
8828/**
8829 * Emits a gprdst = ~gprsrc store.
8830 */
8831DECL_FORCE_INLINE_THROW(uint32_t)
8832iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
8833{
8834#ifdef RT_ARCH_AMD64
8835 if (iGprDst != iGprSrc)
8836 {
8837 /* mov gprdst, gprsrc. */
8838 if (f64Bit)
8839 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
8840 else
8841 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
8842 }
8843
8844 /* not gprdst */
8845 if (f64Bit || iGprDst >= 8)
8846 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
8847 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
8848 pCodeBuf[off++] = 0xf7;
8849 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
8850#elif defined(RT_ARCH_ARM64)
8851 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
8852#else
8853# error "port me"
8854#endif
8855 return off;
8856}
8857
8858
8859/**
8860 * Emits a gprdst = ~gprsrc store.
8861 */
8862DECL_INLINE_THROW(uint32_t)
8863iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
8864{
8865#ifdef RT_ARCH_AMD64
8866 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
8867#elif defined(RT_ARCH_ARM64)
8868 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
8869#else
8870# error "port me"
8871#endif
8872 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8873 return off;
8874}
8875
8876
8877/**
8878 * Emits a 128-bit vector register store to a VCpu value.
8879 */
8880DECL_FORCE_INLINE_THROW(uint32_t)
8881iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8882{
8883#ifdef RT_ARCH_AMD64
8884 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
8885 pCodeBuf[off++] = 0x66;
8886 if (iVecReg >= 8)
8887 pCodeBuf[off++] = X86_OP_REX_R;
8888 pCodeBuf[off++] = 0x0f;
8889 pCodeBuf[off++] = 0x7f;
8890 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8891#elif defined(RT_ARCH_ARM64)
8892 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
8893
8894#else
8895# error "port me"
8896#endif
8897 return off;
8898}
8899
8900
8901/**
8902 * Emits a 128-bit vector register load of a VCpu value.
8903 */
8904DECL_INLINE_THROW(uint32_t)
8905iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8906{
8907#ifdef RT_ARCH_AMD64
8908 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
8909#elif defined(RT_ARCH_ARM64)
8910 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
8911#else
8912# error "port me"
8913#endif
8914 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8915 return off;
8916}
8917
8918
8919/**
8920 * Emits a high 128-bit vector register store to a VCpu value.
8921 */
8922DECL_FORCE_INLINE_THROW(uint32_t)
8923iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8924{
8925#ifdef RT_ARCH_AMD64
8926 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
8927 pCodeBuf[off++] = X86_OP_VEX3;
8928 if (iVecReg >= 8)
8929 pCodeBuf[off++] = 0x63;
8930 else
8931 pCodeBuf[off++] = 0xe3;
8932 pCodeBuf[off++] = 0x7d;
8933 pCodeBuf[off++] = 0x39;
8934 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8935 pCodeBuf[off++] = 0x01; /* Immediate */
8936#elif defined(RT_ARCH_ARM64)
8937 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
8938#else
8939# error "port me"
8940#endif
8941 return off;
8942}
8943
8944
8945/**
8946 * Emits a high 128-bit vector register load of a VCpu value.
8947 */
8948DECL_INLINE_THROW(uint32_t)
8949iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8950{
8951#ifdef RT_ARCH_AMD64
8952 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
8953#elif defined(RT_ARCH_ARM64)
8954 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8955 Assert(!(iVecReg & 0x1));
8956 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
8957#else
8958# error "port me"
8959#endif
8960 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8961 return off;
8962}
8963
8964
8965/**
8966 * Emits a 128-bit vector register load of a VCpu value.
8967 */
8968DECL_FORCE_INLINE_THROW(uint32_t)
8969iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8970{
8971#ifdef RT_ARCH_AMD64
8972 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
8973 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8974 if (iVecReg >= 8)
8975 pCodeBuf[off++] = X86_OP_REX_R;
8976 pCodeBuf[off++] = 0x0f;
8977 pCodeBuf[off++] = 0x6f;
8978 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8979#elif defined(RT_ARCH_ARM64)
8980 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
8981
8982#else
8983# error "port me"
8984#endif
8985 return off;
8986}
8987
8988
8989/**
8990 * Emits a 128-bit vector register load of a VCpu value.
8991 */
8992DECL_INLINE_THROW(uint32_t)
8993iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8994{
8995#ifdef RT_ARCH_AMD64
8996 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
8997#elif defined(RT_ARCH_ARM64)
8998 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
8999#else
9000# error "port me"
9001#endif
9002 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9003 return off;
9004}
9005
9006
9007/**
9008 * Emits a 128-bit vector register load of a VCpu value.
9009 */
9010DECL_FORCE_INLINE_THROW(uint32_t)
9011iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9012{
9013#ifdef RT_ARCH_AMD64
9014 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
9015 pCodeBuf[off++] = X86_OP_VEX3;
9016 if (iVecReg >= 8)
9017 pCodeBuf[off++] = 0x63;
9018 else
9019 pCodeBuf[off++] = 0xe3;
9020 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
9021 pCodeBuf[off++] = 0x38;
9022 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
9023 pCodeBuf[off++] = 0x01; /* Immediate */
9024#elif defined(RT_ARCH_ARM64)
9025 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
9026#else
9027# error "port me"
9028#endif
9029 return off;
9030}
9031
9032
9033/**
9034 * Emits a 128-bit vector register load of a VCpu value.
9035 */
9036DECL_INLINE_THROW(uint32_t)
9037iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9038{
9039#ifdef RT_ARCH_AMD64
9040 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
9041#elif defined(RT_ARCH_ARM64)
9042 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9043 Assert(!(iVecReg & 0x1));
9044 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
9045#else
9046# error "port me"
9047#endif
9048 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9049 return off;
9050}
9051
9052
9053/**
9054 * Emits a vecdst = vecsrc load.
9055 */
9056DECL_FORCE_INLINE(uint32_t)
9057iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9058{
9059#ifdef RT_ARCH_AMD64
9060 /* movdqu vecdst, vecsrc */
9061 pCodeBuf[off++] = 0xf3;
9062
9063 if ((iVecRegDst | iVecRegSrc) >= 8)
9064 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
9065 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
9066 : X86_OP_REX_R;
9067 pCodeBuf[off++] = 0x0f;
9068 pCodeBuf[off++] = 0x6f;
9069 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9070
9071#elif defined(RT_ARCH_ARM64)
9072 /* mov dst, src; alias for: orr dst, src, src */
9073 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
9074
9075#else
9076# error "port me"
9077#endif
9078 return off;
9079}
9080
9081
9082/**
9083 * Emits a vecdst = vecsrc load, 128-bit.
9084 */
9085DECL_INLINE_THROW(uint32_t)
9086iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9087{
9088#ifdef RT_ARCH_AMD64
9089 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
9090#elif defined(RT_ARCH_ARM64)
9091 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
9092#else
9093# error "port me"
9094#endif
9095 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9096 return off;
9097}
9098
9099
9100/**
9101 * Emits a vecdst[128:255] = vecsrc[128:255] load.
9102 */
9103DECL_FORCE_INLINE_THROW(uint32_t)
9104iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9105{
9106#ifdef RT_ARCH_AMD64
9107 /* vperm2i128 dst, dst, src, 0x30. */ /* ASSUMES AVX2 support */
9108 pCodeBuf[off++] = X86_OP_VEX3;
9109 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
9110 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9111 pCodeBuf[off++] = 0x46;
9112 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9113 pCodeBuf[off++] = 0x30; /* Immediate, this will leave the low 128 bits of dst untouched and move the high 128 bits from src to dst. */
9114
9115#elif defined(RT_ARCH_ARM64)
9116 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
9117
9118 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128(). */
9119# ifdef IEM_WITH_THROW_CATCH
9120 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
9121# else
9122 AssertReleaseFailedStmt(off = UINT32_MAX);
9123# endif
9124#else
9125# error "port me"
9126#endif
9127 return off;
9128}
9129
9130
9131/**
9132 * Emits a vecdst[128:255] = vecsrc[128:255] load, high 128-bit.
9133 */
9134DECL_INLINE_THROW(uint32_t)
9135iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9136{
9137#ifdef RT_ARCH_AMD64
9138 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
9139#elif defined(RT_ARCH_ARM64)
9140 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
9141 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iVecRegSrc + 1);
9142#else
9143# error "port me"
9144#endif
9145 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9146 return off;
9147}
9148
9149
9150/**
9151 * Emits a vecdst[0:127] = vecsrc[128:255] load.
9152 */
9153DECL_FORCE_INLINE_THROW(uint32_t)
9154iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9155{
9156#ifdef RT_ARCH_AMD64
9157 /* vextracti128 dst, src, 1. */ /* ASSUMES AVX2 support */
9158 pCodeBuf[off++] = X86_OP_VEX3;
9159 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegDst >= 8, false, iVecRegSrc >= 8);
9160 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9161 pCodeBuf[off++] = 0x39;
9162 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7);
9163 pCodeBuf[off++] = 0x1;
9164
9165#elif defined(RT_ARCH_ARM64)
9166 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
9167
9168 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(). */
9169# ifdef IEM_WITH_THROW_CATCH
9170 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
9171# else
9172 AssertReleaseFailedStmt(off = UINT32_MAX);
9173# endif
9174#else
9175# error "port me"
9176#endif
9177 return off;
9178}
9179
9180
9181/**
9182 * Emits a vecdst[0:127] = vecsrc[128:255] load, high 128-bit.
9183 */
9184DECL_INLINE_THROW(uint32_t)
9185iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9186{
9187#ifdef RT_ARCH_AMD64
9188 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
9189#elif defined(RT_ARCH_ARM64)
9190 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
9191 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc + 1);
9192#else
9193# error "port me"
9194#endif
9195 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9196 return off;
9197}
9198
9199
9200/**
9201 * Emits a vecdst = vecsrc load, 256-bit.
9202 */
9203DECL_INLINE_THROW(uint32_t)
9204iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9205{
9206#ifdef RT_ARCH_AMD64
9207 /* vmovdqa ymm, ymm */
9208 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
9209 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
9210 {
9211 pbCodeBuf[off++] = X86_OP_VEX3;
9212 pbCodeBuf[off++] = 0x41;
9213 pbCodeBuf[off++] = 0x7d;
9214 pbCodeBuf[off++] = 0x6f;
9215 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9216 }
9217 else
9218 {
9219 pbCodeBuf[off++] = X86_OP_VEX2;
9220 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
9221 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
9222 pbCodeBuf[off++] = iVecRegSrc >= 8
9223 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
9224 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9225 }
9226#elif defined(RT_ARCH_ARM64)
9227 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9228 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
9229 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
9230 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
9231#else
9232# error "port me"
9233#endif
9234 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9235 return off;
9236}
9237
9238
9239/**
9240 * Emits a vecdst = vecsrc load.
9241 */
9242DECL_FORCE_INLINE(uint32_t)
9243iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9244{
9245#ifdef RT_ARCH_AMD64
9246 /* vinserti128 dst, dst, src, 1. */ /* ASSUMES AVX2 support */
9247 pCodeBuf[off++] = X86_OP_VEX3;
9248 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
9249 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9250 pCodeBuf[off++] = 0x38;
9251 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9252 pCodeBuf[off++] = 0x01; /* Immediate */
9253
9254#elif defined(RT_ARCH_ARM64)
9255 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
9256 /* mov dst, src; alias for: orr dst, src, src */
9257 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
9258
9259#else
9260# error "port me"
9261#endif
9262 return off;
9263}
9264
9265
9266/**
9267 * Emits a vecdst[128:255] = vecsrc[0:127] load, 128-bit.
9268 */
9269DECL_INLINE_THROW(uint32_t)
9270iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9271{
9272#ifdef RT_ARCH_AMD64
9273 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
9274#elif defined(RT_ARCH_ARM64)
9275 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
9276#else
9277# error "port me"
9278#endif
9279 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9280 return off;
9281}
9282
9283
9284/**
9285 * Emits a gprdst = vecsrc[x] load, 64-bit.
9286 */
9287DECL_FORCE_INLINE(uint32_t)
9288iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
9289{
9290#ifdef RT_ARCH_AMD64
9291 if (iQWord >= 2)
9292 {
9293 /*
9294 * vpextrq doesn't work on the upper 128-bits.
9295 * So we use the following sequence:
9296 * vextracti128 vectmp0, vecsrc, 1
9297 * pextrq gpr, vectmp0, #(iQWord - 2)
9298 */
9299 /* vextracti128 */
9300 pCodeBuf[off++] = X86_OP_VEX3;
9301 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
9302 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9303 pCodeBuf[off++] = 0x39;
9304 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9305 pCodeBuf[off++] = 0x1;
9306
9307 /* pextrq */
9308 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9309 pCodeBuf[off++] = X86_OP_REX_W
9310 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9311 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9312 pCodeBuf[off++] = 0x0f;
9313 pCodeBuf[off++] = 0x3a;
9314 pCodeBuf[off++] = 0x16;
9315 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
9316 pCodeBuf[off++] = iQWord - 2;
9317 }
9318 else
9319 {
9320 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
9321 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9322 pCodeBuf[off++] = X86_OP_REX_W
9323 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
9324 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9325 pCodeBuf[off++] = 0x0f;
9326 pCodeBuf[off++] = 0x3a;
9327 pCodeBuf[off++] = 0x16;
9328 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
9329 pCodeBuf[off++] = iQWord;
9330 }
9331#elif defined(RT_ARCH_ARM64)
9332 /* umov gprdst, vecsrc[iQWord] */
9333 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
9334#else
9335# error "port me"
9336#endif
9337 return off;
9338}
9339
9340
9341/**
9342 * Emits a gprdst = vecsrc[x] load, 64-bit.
9343 */
9344DECL_INLINE_THROW(uint32_t)
9345iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
9346{
9347 Assert(iQWord <= 3);
9348
9349#ifdef RT_ARCH_AMD64
9350 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iVecRegSrc, iQWord);
9351#elif defined(RT_ARCH_ARM64)
9352 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9353 Assert(!(iVecRegSrc & 0x1));
9354 /* Need to access the "high" 128-bit vector register. */
9355 if (iQWord >= 2)
9356 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
9357 else
9358 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
9359#else
9360# error "port me"
9361#endif
9362 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9363 return off;
9364}
9365
9366
9367/**
9368 * Emits a gprdst = vecsrc[x] load, 32-bit.
9369 */
9370DECL_FORCE_INLINE(uint32_t)
9371iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
9372{
9373#ifdef RT_ARCH_AMD64
9374 if (iDWord >= 4)
9375 {
9376 /*
9377 * vpextrd doesn't work on the upper 128-bits.
9378 * So we use the following sequence:
9379 * vextracti128 vectmp0, vecsrc, 1
9380 * pextrd gpr, vectmp0, #(iDWord - 4)
9381 */
9382 /* vextracti128 */
9383 pCodeBuf[off++] = X86_OP_VEX3;
9384 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
9385 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9386 pCodeBuf[off++] = 0x39;
9387 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9388 pCodeBuf[off++] = 0x1;
9389
9390 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
9391 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9392 if (iGprDst >= 8 || IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8)
9393 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9394 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9395 pCodeBuf[off++] = 0x0f;
9396 pCodeBuf[off++] = 0x3a;
9397 pCodeBuf[off++] = 0x16;
9398 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
9399 pCodeBuf[off++] = iDWord - 4;
9400 }
9401 else
9402 {
9403 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
9404 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9405 if (iGprDst >= 8 || iVecRegSrc >= 8)
9406 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
9407 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9408 pCodeBuf[off++] = 0x0f;
9409 pCodeBuf[off++] = 0x3a;
9410 pCodeBuf[off++] = 0x16;
9411 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
9412 pCodeBuf[off++] = iDWord;
9413 }
9414#elif defined(RT_ARCH_ARM64)
9415 Assert(iDWord < 4);
9416
9417 /* umov gprdst, vecsrc[iDWord] */
9418 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
9419#else
9420# error "port me"
9421#endif
9422 return off;
9423}
9424
9425
9426/**
9427 * Emits a gprdst = vecsrc[x] load, 32-bit.
9428 */
9429DECL_INLINE_THROW(uint32_t)
9430iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
9431{
9432 Assert(iDWord <= 7);
9433
9434#ifdef RT_ARCH_AMD64
9435 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 15), off, iGprDst, iVecRegSrc, iDWord);
9436#elif defined(RT_ARCH_ARM64)
9437 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9438 Assert(!(iVecRegSrc & 0x1));
9439 /* Need to access the "high" 128-bit vector register. */
9440 if (iDWord >= 4)
9441 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
9442 else
9443 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
9444#else
9445# error "port me"
9446#endif
9447 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9448 return off;
9449}
9450
9451
9452/**
9453 * Emits a gprdst = vecsrc[x] load, 16-bit.
9454 */
9455DECL_FORCE_INLINE(uint32_t)
9456iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
9457{
9458#ifdef RT_ARCH_AMD64
9459 if (iWord >= 8)
9460 {
9461 /** @todo Currently not used. */
9462 AssertReleaseFailed();
9463 }
9464 else
9465 {
9466 /* pextrw gpr, vecsrc, #iWord */
9467 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9468 if (iGprDst >= 8 || iVecRegSrc >= 8)
9469 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
9470 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
9471 pCodeBuf[off++] = 0x0f;
9472 pCodeBuf[off++] = 0xc5;
9473 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
9474 pCodeBuf[off++] = iWord;
9475 }
9476#elif defined(RT_ARCH_ARM64)
9477 /* umov gprdst, vecsrc[iWord] */
9478 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
9479#else
9480# error "port me"
9481#endif
9482 return off;
9483}
9484
9485
9486/**
9487 * Emits a gprdst = vecsrc[x] load, 16-bit.
9488 */
9489DECL_INLINE_THROW(uint32_t)
9490iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
9491{
9492 Assert(iWord <= 16);
9493
9494#ifdef RT_ARCH_AMD64
9495 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
9496#elif defined(RT_ARCH_ARM64)
9497 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9498 Assert(!(iVecRegSrc & 0x1));
9499 /* Need to access the "high" 128-bit vector register. */
9500 if (iWord >= 8)
9501 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
9502 else
9503 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
9504#else
9505# error "port me"
9506#endif
9507 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9508 return off;
9509}
9510
9511
9512/**
9513 * Emits a gprdst = vecsrc[x] load, 8-bit.
9514 */
9515DECL_FORCE_INLINE(uint32_t)
9516iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
9517{
9518#ifdef RT_ARCH_AMD64
9519 if (iByte >= 16)
9520 {
9521 /** @todo Currently not used. */
9522 AssertReleaseFailed();
9523 }
9524 else
9525 {
9526 /* pextrb gpr, vecsrc, #iByte */
9527 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9528 if (iGprDst >= 8 || iVecRegSrc >= 8)
9529 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
9530 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9531 pCodeBuf[off++] = 0x0f;
9532 pCodeBuf[off++] = 0x3a;
9533 pCodeBuf[off++] = 0x14;
9534 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
9535 pCodeBuf[off++] = iByte;
9536 }
9537#elif defined(RT_ARCH_ARM64)
9538 /* umov gprdst, vecsrc[iByte] */
9539 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
9540#else
9541# error "port me"
9542#endif
9543 return off;
9544}
9545
9546
9547/**
9548 * Emits a gprdst = vecsrc[x] load, 8-bit.
9549 */
9550DECL_INLINE_THROW(uint32_t)
9551iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
9552{
9553 Assert(iByte <= 32);
9554
9555#ifdef RT_ARCH_AMD64
9556 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iByte);
9557#elif defined(RT_ARCH_ARM64)
9558 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9559 Assert(!(iVecRegSrc & 0x1));
9560 /* Need to access the "high" 128-bit vector register. */
9561 if (iByte >= 16)
9562 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
9563 else
9564 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
9565#else
9566# error "port me"
9567#endif
9568 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9569 return off;
9570}
9571
9572
9573/**
9574 * Emits a vecdst[x] = gprsrc store, 64-bit.
9575 */
9576DECL_FORCE_INLINE(uint32_t)
9577iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
9578{
9579#ifdef RT_ARCH_AMD64
9580 if (iQWord >= 2)
9581 {
9582 /*
9583 * vpinsrq doesn't work on the upper 128-bits.
9584 * So we use the following sequence:
9585 * vextracti128 vectmp0, vecdst, 1
9586 * pinsrq vectmp0, gpr, #(iQWord - 2)
9587 * vinserti128 vecdst, vectmp0, 1
9588 */
9589 /* vextracti128 */
9590 pCodeBuf[off++] = X86_OP_VEX3;
9591 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9592 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9593 pCodeBuf[off++] = 0x39;
9594 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9595 pCodeBuf[off++] = 0x1;
9596
9597 /* pinsrq */
9598 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9599 pCodeBuf[off++] = X86_OP_REX_W
9600 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9601 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9602 pCodeBuf[off++] = 0x0f;
9603 pCodeBuf[off++] = 0x3a;
9604 pCodeBuf[off++] = 0x22;
9605 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
9606 pCodeBuf[off++] = iQWord - 2;
9607
9608 /* vinserti128 */
9609 pCodeBuf[off++] = X86_OP_VEX3;
9610 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9611 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9612 pCodeBuf[off++] = 0x38;
9613 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9614 pCodeBuf[off++] = 0x01; /* Immediate */
9615 }
9616 else
9617 {
9618 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
9619 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9620 pCodeBuf[off++] = X86_OP_REX_W
9621 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9622 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9623 pCodeBuf[off++] = 0x0f;
9624 pCodeBuf[off++] = 0x3a;
9625 pCodeBuf[off++] = 0x22;
9626 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9627 pCodeBuf[off++] = iQWord;
9628 }
9629#elif defined(RT_ARCH_ARM64)
9630 /* ins vecsrc[iQWord], gpr */
9631 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
9632#else
9633# error "port me"
9634#endif
9635 return off;
9636}
9637
9638
9639/**
9640 * Emits a vecdst[x] = gprsrc store, 64-bit.
9641 */
9642DECL_INLINE_THROW(uint32_t)
9643iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
9644{
9645 Assert(iQWord <= 3);
9646
9647#ifdef RT_ARCH_AMD64
9648 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iQWord);
9649#elif defined(RT_ARCH_ARM64)
9650 Assert(!(iVecRegDst & 0x1));
9651 if (iQWord >= 2)
9652 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iQWord - 2);
9653 else
9654 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
9655#else
9656# error "port me"
9657#endif
9658 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9659 return off;
9660}
9661
9662
9663/**
9664 * Emits a vecdst[x] = gprsrc store, 32-bit.
9665 */
9666DECL_FORCE_INLINE(uint32_t)
9667iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
9668{
9669#ifdef RT_ARCH_AMD64
9670 if (iDWord >= 4)
9671 {
9672 /*
9673 * vpinsrq doesn't work on the upper 128-bits.
9674 * So we use the following sequence:
9675 * vextracti128 vectmp0, vecdst, 1
9676 * pinsrd vectmp0, gpr, #(iDword - 4)
9677 * vinserti128 vecdst, vectmp0, 1
9678 */
9679 /* vextracti128 */
9680 pCodeBuf[off++] = X86_OP_VEX3;
9681 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9682 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9683 pCodeBuf[off++] = 0x39;
9684 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9685 pCodeBuf[off++] = 0x1;
9686
9687 /* pinsrd */
9688 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9689 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 || iGprSrc >= 8)
9690 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9691 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9692 pCodeBuf[off++] = 0x0f;
9693 pCodeBuf[off++] = 0x3a;
9694 pCodeBuf[off++] = 0x22;
9695 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
9696 pCodeBuf[off++] = iDWord - 4;
9697
9698 /* vinserti128 */
9699 pCodeBuf[off++] = X86_OP_VEX3;
9700 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9701 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9702 pCodeBuf[off++] = 0x38;
9703 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9704 pCodeBuf[off++] = 0x01; /* Immediate */
9705 }
9706 else
9707 {
9708 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
9709 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9710 if (iVecRegDst >= 8 || iGprSrc >= 8)
9711 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9712 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9713 pCodeBuf[off++] = 0x0f;
9714 pCodeBuf[off++] = 0x3a;
9715 pCodeBuf[off++] = 0x22;
9716 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9717 pCodeBuf[off++] = iDWord;
9718 }
9719#elif defined(RT_ARCH_ARM64)
9720 /* ins vecsrc[iDWord], gpr */
9721 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
9722#else
9723# error "port me"
9724#endif
9725 return off;
9726}
9727
9728
9729/**
9730 * Emits a vecdst[x] = gprsrc store, 64-bit.
9731 */
9732DECL_INLINE_THROW(uint32_t)
9733iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
9734{
9735 Assert(iDWord <= 7);
9736
9737#ifdef RT_ARCH_AMD64
9738 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iDWord);
9739#elif defined(RT_ARCH_ARM64)
9740 Assert(!(iVecRegDst & 0x1));
9741 if (iDWord >= 4)
9742 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iDWord - 4);
9743 else
9744 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
9745#else
9746# error "port me"
9747#endif
9748 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9749 return off;
9750}
9751
9752
9753/**
9754 * Emits a vecdst[x] = gprsrc store, 16-bit.
9755 */
9756DECL_FORCE_INLINE(uint32_t)
9757iemNativeEmitSimdStoreGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
9758{
9759#ifdef RT_ARCH_AMD64
9760 /* pinsrw vecsrc, gpr, #iWord. */
9761 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9762 if (iVecRegDst >= 8 || iGprSrc >= 8)
9763 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9764 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9765 pCodeBuf[off++] = 0x0f;
9766 pCodeBuf[off++] = 0xc4;
9767 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9768 pCodeBuf[off++] = iWord;
9769#elif defined(RT_ARCH_ARM64)
9770 /* ins vecsrc[iWord], gpr */
9771 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iWord, kArmv8InstrUmovInsSz_U16);
9772#else
9773# error "port me"
9774#endif
9775 return off;
9776}
9777
9778
9779/**
9780 * Emits a vecdst[x] = gprsrc store, 16-bit.
9781 */
9782DECL_INLINE_THROW(uint32_t)
9783iemNativeEmitSimdStoreGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
9784{
9785 Assert(iWord <= 15);
9786
9787#ifdef RT_ARCH_AMD64
9788 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iGprSrc, iWord);
9789#elif defined(RT_ARCH_ARM64)
9790 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iWord);
9791#else
9792# error "port me"
9793#endif
9794 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9795 return off;
9796}
9797
9798
9799/**
9800 * Emits a vecdst[x] = gprsrc store, 8-bit.
9801 */
9802DECL_FORCE_INLINE(uint32_t)
9803iemNativeEmitSimdStoreGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
9804{
9805#ifdef RT_ARCH_AMD64
9806 /* pinsrb vecsrc, gpr, #iByte (ASSUMES SSE4.1). */
9807 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9808 if (iVecRegDst >= 8 || iGprSrc >= 8)
9809 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9810 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9811 pCodeBuf[off++] = 0x0f;
9812 pCodeBuf[off++] = 0x3a;
9813 pCodeBuf[off++] = 0x20;
9814 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9815 pCodeBuf[off++] = iByte;
9816#elif defined(RT_ARCH_ARM64)
9817 /* ins vecsrc[iByte], gpr */
9818 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iByte, kArmv8InstrUmovInsSz_U8);
9819#else
9820# error "port me"
9821#endif
9822 return off;
9823}
9824
9825
9826/**
9827 * Emits a vecdst[x] = gprsrc store, 8-bit.
9828 */
9829DECL_INLINE_THROW(uint32_t)
9830iemNativeEmitSimdStoreGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
9831{
9832 Assert(iByte <= 15);
9833
9834#ifdef RT_ARCH_AMD64
9835 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iByte);
9836#elif defined(RT_ARCH_ARM64)
9837 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iByte);
9838#else
9839# error "port me"
9840#endif
9841 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9842 return off;
9843}
9844
9845
9846/**
9847 * Emits a vecdst.au32[iDWord] = 0 store.
9848 */
9849DECL_FORCE_INLINE(uint32_t)
9850iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
9851{
9852 Assert(iDWord <= 7);
9853
9854#ifdef RT_ARCH_AMD64
9855 /*
9856 * xor tmp0, tmp0
9857 * pinsrd xmm, tmp0, iDword
9858 */
9859 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
9860 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
9861 pCodeBuf[off++] = 0x33;
9862 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
9863 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(pCodeBuf, off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
9864#elif defined(RT_ARCH_ARM64)
9865 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9866 Assert(!(iVecReg & 0x1));
9867 /* ins vecsrc[iDWord], wzr */
9868 if (iDWord >= 4)
9869 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
9870 else
9871 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
9872#else
9873# error "port me"
9874#endif
9875 return off;
9876}
9877
9878
9879/**
9880 * Emits a vecdst.au32[iDWord] = 0 store.
9881 */
9882DECL_INLINE_THROW(uint32_t)
9883iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
9884{
9885
9886#ifdef RT_ARCH_AMD64
9887 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
9888#elif defined(RT_ARCH_ARM64)
9889 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
9890#else
9891# error "port me"
9892#endif
9893 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9894 return off;
9895}
9896
9897
9898/**
9899 * Emits a vecdst[0:127] = 0 store.
9900 */
9901DECL_FORCE_INLINE(uint32_t)
9902iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
9903{
9904#ifdef RT_ARCH_AMD64
9905 /* pxor xmm, xmm */
9906 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9907 if (iVecReg >= 8)
9908 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
9909 pCodeBuf[off++] = 0x0f;
9910 pCodeBuf[off++] = 0xef;
9911 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
9912#elif defined(RT_ARCH_ARM64)
9913 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9914 Assert(!(iVecReg & 0x1));
9915 /* eor vecreg, vecreg, vecreg */
9916 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
9917#else
9918# error "port me"
9919#endif
9920 return off;
9921}
9922
9923
9924/**
9925 * Emits a vecdst[0:127] = 0 store.
9926 */
9927DECL_INLINE_THROW(uint32_t)
9928iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
9929{
9930#ifdef RT_ARCH_AMD64
9931 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
9932#elif defined(RT_ARCH_ARM64)
9933 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
9934#else
9935# error "port me"
9936#endif
9937 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9938 return off;
9939}
9940
9941
9942/**
9943 * Emits a vecdst[128:255] = 0 store.
9944 */
9945DECL_FORCE_INLINE(uint32_t)
9946iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
9947{
9948#ifdef RT_ARCH_AMD64
9949 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
9950 if (iVecReg < 8)
9951 {
9952 pCodeBuf[off++] = X86_OP_VEX2;
9953 pCodeBuf[off++] = 0xf9;
9954 }
9955 else
9956 {
9957 pCodeBuf[off++] = X86_OP_VEX3;
9958 pCodeBuf[off++] = 0x41;
9959 pCodeBuf[off++] = 0x79;
9960 }
9961 pCodeBuf[off++] = 0x6f;
9962 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
9963#elif defined(RT_ARCH_ARM64)
9964 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9965 Assert(!(iVecReg & 0x1));
9966 /* eor vecreg, vecreg, vecreg */
9967 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
9968#else
9969# error "port me"
9970#endif
9971 return off;
9972}
9973
9974
9975/**
9976 * Emits a vecdst[128:255] = 0 store.
9977 */
9978DECL_INLINE_THROW(uint32_t)
9979iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
9980{
9981#ifdef RT_ARCH_AMD64
9982 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
9983#elif defined(RT_ARCH_ARM64)
9984 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
9985#else
9986# error "port me"
9987#endif
9988 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9989 return off;
9990}
9991
9992
9993/**
9994 * Emits a vecdst[0:255] = 0 store.
9995 */
9996DECL_FORCE_INLINE(uint32_t)
9997iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
9998{
9999#ifdef RT_ARCH_AMD64
10000 /* vpxor ymm, ymm, ymm */
10001 if (iVecReg < 8)
10002 {
10003 pCodeBuf[off++] = X86_OP_VEX2;
10004 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
10005 }
10006 else
10007 {
10008 pCodeBuf[off++] = X86_OP_VEX3;
10009 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
10010 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
10011 }
10012 pCodeBuf[off++] = 0xef;
10013 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
10014#elif defined(RT_ARCH_ARM64)
10015 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10016 Assert(!(iVecReg & 0x1));
10017 /* eor vecreg, vecreg, vecreg */
10018 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
10019 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
10020#else
10021# error "port me"
10022#endif
10023 return off;
10024}
10025
10026
10027/**
10028 * Emits a vecdst[0:255] = 0 store.
10029 */
10030DECL_INLINE_THROW(uint32_t)
10031iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
10032{
10033#ifdef RT_ARCH_AMD64
10034 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
10035#elif defined(RT_ARCH_ARM64)
10036 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
10037#else
10038# error "port me"
10039#endif
10040 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10041 return off;
10042}
10043
10044
10045/**
10046 * Emits a vecdst = gprsrc broadcast, 8-bit.
10047 */
10048DECL_FORCE_INLINE(uint32_t)
10049iemNativeEmitSimdBroadcastGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10050{
10051#ifdef RT_ARCH_AMD64
10052 /* pinsrb vecdst, gpr, #0 (ASSUMES SSE 4.1) */
10053 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10054 if (iVecRegDst >= 8 || iGprSrc >= 8)
10055 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10056 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10057 pCodeBuf[off++] = 0x0f;
10058 pCodeBuf[off++] = 0x3a;
10059 pCodeBuf[off++] = 0x20;
10060 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10061 pCodeBuf[off++] = 0x00;
10062
10063 /* vpbroadcastb {y,x}mm, xmm (ASSUMES AVX2). */
10064 pCodeBuf[off++] = X86_OP_VEX3;
10065 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
10066 | 0x02 /* opcode map. */
10067 | ( iVecRegDst >= 8
10068 ? 0
10069 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
10070 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
10071 pCodeBuf[off++] = 0x78;
10072 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
10073#elif defined(RT_ARCH_ARM64)
10074 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10075 Assert(!(iVecRegDst & 0x1) || !f256Bit);
10076
10077 /* dup vecsrc, gpr */
10078 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U8);
10079 if (f256Bit)
10080 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U8);
10081#else
10082# error "port me"
10083#endif
10084 return off;
10085}
10086
10087
10088/**
10089 * Emits a vecdst[x] = gprsrc broadcast, 8-bit.
10090 */
10091DECL_INLINE_THROW(uint32_t)
10092iemNativeEmitSimdBroadcastGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10093{
10094#ifdef RT_ARCH_AMD64
10095 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
10096#elif defined(RT_ARCH_ARM64)
10097 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
10098#else
10099# error "port me"
10100#endif
10101 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10102 return off;
10103}
10104
10105
10106/**
10107 * Emits a vecdst = gprsrc broadcast, 16-bit.
10108 */
10109DECL_FORCE_INLINE(uint32_t)
10110iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10111{
10112#ifdef RT_ARCH_AMD64
10113 /* pinsrw vecdst, gpr, #0 */
10114 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10115 if (iVecRegDst >= 8 || iGprSrc >= 8)
10116 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10117 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10118 pCodeBuf[off++] = 0x0f;
10119 pCodeBuf[off++] = 0xc4;
10120 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10121 pCodeBuf[off++] = 0x00;
10122
10123 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
10124 pCodeBuf[off++] = X86_OP_VEX3;
10125 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
10126 | 0x02 /* opcode map. */
10127 | ( iVecRegDst >= 8
10128 ? 0
10129 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
10130 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
10131 pCodeBuf[off++] = 0x79;
10132 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
10133#elif defined(RT_ARCH_ARM64)
10134 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10135 Assert(!(iVecRegDst & 0x1) || !f256Bit);
10136
10137 /* dup vecsrc, gpr */
10138 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
10139 if (f256Bit)
10140 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
10141#else
10142# error "port me"
10143#endif
10144 return off;
10145}
10146
10147
10148/**
10149 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
10150 */
10151DECL_INLINE_THROW(uint32_t)
10152iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10153{
10154#ifdef RT_ARCH_AMD64
10155 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
10156#elif defined(RT_ARCH_ARM64)
10157 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
10158#else
10159# error "port me"
10160#endif
10161 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10162 return off;
10163}
10164
10165
10166/**
10167 * Emits a vecdst = gprsrc broadcast, 32-bit.
10168 */
10169DECL_FORCE_INLINE(uint32_t)
10170iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10171{
10172#ifdef RT_ARCH_AMD64
10173 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
10174 * vbroadcast needs a memory operand or another xmm register to work... */
10175
10176 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
10177 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10178 if (iVecRegDst >= 8 || iGprSrc >= 8)
10179 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10180 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10181 pCodeBuf[off++] = 0x0f;
10182 pCodeBuf[off++] = 0x3a;
10183 pCodeBuf[off++] = 0x22;
10184 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10185 pCodeBuf[off++] = 0x00;
10186
10187 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
10188 pCodeBuf[off++] = X86_OP_VEX3;
10189 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
10190 | 0x02 /* opcode map. */
10191 | ( iVecRegDst >= 8
10192 ? 0
10193 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
10194 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
10195 pCodeBuf[off++] = 0x58;
10196 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
10197#elif defined(RT_ARCH_ARM64)
10198 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10199 Assert(!(iVecRegDst & 0x1) || !f256Bit);
10200
10201 /* dup vecsrc, gpr */
10202 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
10203 if (f256Bit)
10204 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
10205#else
10206# error "port me"
10207#endif
10208 return off;
10209}
10210
10211
10212/**
10213 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
10214 */
10215DECL_INLINE_THROW(uint32_t)
10216iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10217{
10218#ifdef RT_ARCH_AMD64
10219 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
10220#elif defined(RT_ARCH_ARM64)
10221 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
10222#else
10223# error "port me"
10224#endif
10225 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10226 return off;
10227}
10228
10229
10230/**
10231 * Emits a vecdst = gprsrc broadcast, 64-bit.
10232 */
10233DECL_FORCE_INLINE(uint32_t)
10234iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10235{
10236#ifdef RT_ARCH_AMD64
10237 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
10238 * vbroadcast needs a memory operand or another xmm register to work... */
10239
10240 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
10241 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10242 pCodeBuf[off++] = X86_OP_REX_W
10243 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10244 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10245 pCodeBuf[off++] = 0x0f;
10246 pCodeBuf[off++] = 0x3a;
10247 pCodeBuf[off++] = 0x22;
10248 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10249 pCodeBuf[off++] = 0x00;
10250
10251 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
10252 pCodeBuf[off++] = X86_OP_VEX3;
10253 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
10254 | 0x02 /* opcode map. */
10255 | ( iVecRegDst >= 8
10256 ? 0
10257 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
10258 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
10259 pCodeBuf[off++] = 0x59;
10260 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
10261#elif defined(RT_ARCH_ARM64)
10262 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10263 Assert(!(iVecRegDst & 0x1) || !f256Bit);
10264
10265 /* dup vecsrc, gpr */
10266 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
10267 if (f256Bit)
10268 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
10269#else
10270# error "port me"
10271#endif
10272 return off;
10273}
10274
10275
10276/**
10277 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
10278 */
10279DECL_INLINE_THROW(uint32_t)
10280iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10281{
10282#ifdef RT_ARCH_AMD64
10283 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
10284#elif defined(RT_ARCH_ARM64)
10285 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
10286#else
10287# error "port me"
10288#endif
10289 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10290 return off;
10291}
10292
10293
10294/**
10295 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
10296 */
10297DECL_FORCE_INLINE(uint32_t)
10298iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
10299{
10300#ifdef RT_ARCH_AMD64
10301 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(pCodeBuf, off, iVecRegDst, iVecRegSrc);
10302
10303 /* vinserti128 ymm, ymm, xmm, 1. */ /* ASSUMES AVX2 support */
10304 pCodeBuf[off++] = X86_OP_VEX3;
10305 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
10306 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
10307 pCodeBuf[off++] = 0x38;
10308 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
10309 pCodeBuf[off++] = 0x01; /* Immediate */
10310#elif defined(RT_ARCH_ARM64)
10311 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10312 Assert(!(iVecRegDst & 0x1));
10313
10314 /* mov dst, src; alias for: orr dst, src, src */
10315 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
10316 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
10317#else
10318# error "port me"
10319#endif
10320 return off;
10321}
10322
10323
10324/**
10325 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
10326 */
10327DECL_INLINE_THROW(uint32_t)
10328iemNativeEmitSimdBroadcastVecRegU128ToVecReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
10329{
10330#ifdef RT_ARCH_AMD64
10331 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 11), off, iVecRegDst, iVecRegSrc);
10332#elif defined(RT_ARCH_ARM64)
10333 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecRegDst, iVecRegSrc);
10334#else
10335# error "port me"
10336#endif
10337 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10338 return off;
10339}
10340
10341#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
10342
10343/** @} */
10344
10345#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
10346
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette