VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 104030

Last change on this file since 104030 was 104030, checked in by vboxsync, 10 months ago

VMM/IEM: Build fix for linux.arm64 (gcc complains about unsequenced operation because off is read and/or written on both sides of the same statement), bugref:10391

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 310.9 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 104030 2024-03-24 18:47:23Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 pu32CodeBuf[off++] = 0xd503201f;
71
72 RT_NOREF(uInfo);
73#else
74# error "port me"
75#endif
76 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
77 return off;
78}
79
80
81/**
82 * Emit a breakpoint instruction.
83 */
84DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
85{
86#ifdef RT_ARCH_AMD64
87 pCodeBuf[off++] = 0xcc;
88 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
89
90#elif defined(RT_ARCH_ARM64)
91 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
92
93#else
94# error "error"
95#endif
96 return off;
97}
98
99
100/**
101 * Emit a breakpoint instruction.
102 */
103DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
104{
105#ifdef RT_ARCH_AMD64
106 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
107#elif defined(RT_ARCH_ARM64)
108 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
109#else
110# error "error"
111#endif
112 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
113 return off;
114}
115
116
117/*********************************************************************************************************************************
118* Loads, Stores and Related Stuff. *
119*********************************************************************************************************************************/
120
121#ifdef RT_ARCH_AMD64
122/**
123 * Common bit of iemNativeEmitLoadGprByGpr and friends.
124 */
125DECL_FORCE_INLINE(uint32_t)
126iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
127{
128 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
129 {
130 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
131 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
132 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
133 }
134 else if (offDisp == (int8_t)offDisp)
135 {
136 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
137 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
138 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
139 pbCodeBuf[off++] = (uint8_t)offDisp;
140 }
141 else
142 {
143 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
144 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
145 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
146 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
147 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
148 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
149 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
150 }
151 return off;
152}
153#endif /* RT_ARCH_AMD64 */
154
155/**
156 * Emits setting a GPR to zero.
157 */
158DECL_INLINE_THROW(uint32_t)
159iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
160{
161#ifdef RT_ARCH_AMD64
162 /* xor gpr32, gpr32 */
163 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
164 if (iGpr >= 8)
165 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
166 pbCodeBuf[off++] = 0x33;
167 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
168
169#elif defined(RT_ARCH_ARM64)
170 /* mov gpr, #0x0 */
171 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
172 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
173
174#else
175# error "port me"
176#endif
177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
178 return off;
179}
180
181
182/**
183 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
184 * buffer space.
185 *
186 * Max buffer consumption:
187 * - AMD64: 10 instruction bytes.
188 * - ARM64: 4 instruction words (16 bytes).
189 */
190DECL_FORCE_INLINE(uint32_t)
191iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
192{
193#ifdef RT_ARCH_AMD64
194 if (uImm64 == 0)
195 {
196 /* xor gpr, gpr */
197 if (iGpr >= 8)
198 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
199 pCodeBuf[off++] = 0x33;
200 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
201 }
202 else if (uImm64 <= UINT32_MAX)
203 {
204 /* mov gpr, imm32 */
205 if (iGpr >= 8)
206 pCodeBuf[off++] = X86_OP_REX_B;
207 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
208 pCodeBuf[off++] = RT_BYTE1(uImm64);
209 pCodeBuf[off++] = RT_BYTE2(uImm64);
210 pCodeBuf[off++] = RT_BYTE3(uImm64);
211 pCodeBuf[off++] = RT_BYTE4(uImm64);
212 }
213 else if (uImm64 == (uint64_t)(int32_t)uImm64)
214 {
215 /* mov gpr, sx(imm32) */
216 if (iGpr < 8)
217 pCodeBuf[off++] = X86_OP_REX_W;
218 else
219 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
220 pCodeBuf[off++] = 0xc7;
221 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
222 pCodeBuf[off++] = RT_BYTE1(uImm64);
223 pCodeBuf[off++] = RT_BYTE2(uImm64);
224 pCodeBuf[off++] = RT_BYTE3(uImm64);
225 pCodeBuf[off++] = RT_BYTE4(uImm64);
226 }
227 else
228 {
229 /* mov gpr, imm64 */
230 if (iGpr < 8)
231 pCodeBuf[off++] = X86_OP_REX_W;
232 else
233 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
234 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
235 pCodeBuf[off++] = RT_BYTE1(uImm64);
236 pCodeBuf[off++] = RT_BYTE2(uImm64);
237 pCodeBuf[off++] = RT_BYTE3(uImm64);
238 pCodeBuf[off++] = RT_BYTE4(uImm64);
239 pCodeBuf[off++] = RT_BYTE5(uImm64);
240 pCodeBuf[off++] = RT_BYTE6(uImm64);
241 pCodeBuf[off++] = RT_BYTE7(uImm64);
242 pCodeBuf[off++] = RT_BYTE8(uImm64);
243 }
244
245#elif defined(RT_ARCH_ARM64)
246 /*
247 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
248 * supply remaining bits using 'movk grp, imm16, lsl #x'.
249 *
250 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
251 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
252 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
253 * after the first non-zero immediate component so we switch to movk for
254 * the remainder.
255 */
256 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
257 + !((uImm64 >> 16) & UINT16_MAX)
258 + !((uImm64 >> 32) & UINT16_MAX)
259 + !((uImm64 >> 48) & UINT16_MAX);
260 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
261 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
262 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
263 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
264 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
265 if (cFfffHalfWords <= cZeroHalfWords)
266 {
267 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
268
269 /* movz gpr, imm16 */
270 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
271 if (uImmPart || cZeroHalfWords == 4)
272 {
273 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
274 fMovBase |= RT_BIT_32(29);
275 }
276 /* mov[z/k] gpr, imm16, lsl #16 */
277 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
278 if (uImmPart)
279 {
280 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
281 fMovBase |= RT_BIT_32(29);
282 }
283 /* mov[z/k] gpr, imm16, lsl #32 */
284 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
285 if (uImmPart)
286 {
287 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
288 fMovBase |= RT_BIT_32(29);
289 }
290 /* mov[z/k] gpr, imm16, lsl #48 */
291 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
292 if (uImmPart)
293 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
294 }
295 else
296 {
297 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
298
299 /* find the first half-word that isn't UINT16_MAX. */
300 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
301 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
302 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
303
304 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
305 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
306 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
307 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
308 /* movk gpr, imm16 */
309 if (iHwNotFfff != 0)
310 {
311 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
312 if (uImmPart != UINT32_C(0xffff))
313 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
314 }
315 /* movk gpr, imm16, lsl #16 */
316 if (iHwNotFfff != 1)
317 {
318 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
319 if (uImmPart != UINT32_C(0xffff))
320 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
321 }
322 /* movk gpr, imm16, lsl #32 */
323 if (iHwNotFfff != 2)
324 {
325 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
326 if (uImmPart != UINT32_C(0xffff))
327 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
328 }
329 /* movk gpr, imm16, lsl #48 */
330 if (iHwNotFfff != 3)
331 {
332 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
333 if (uImmPart != UINT32_C(0xffff))
334 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
335 }
336 }
337
338 /** @todo load into 'w' register instead of 'x' when imm64 <= UINT32_MAX?
339 * clang 12.x does that, only to use the 'x' version for the
340 * addressing in the following ldr). */
341
342#else
343# error "port me"
344#endif
345 return off;
346}
347
348
349/**
350 * Emits loading a constant into a 64-bit GPR
351 */
352DECL_INLINE_THROW(uint32_t)
353iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
354{
355#ifdef RT_ARCH_AMD64
356 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
357#elif defined(RT_ARCH_ARM64)
358 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
359#else
360# error "port me"
361#endif
362 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
363 return off;
364}
365
366
367/**
368 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
369 * buffer space.
370 *
371 * Max buffer consumption:
372 * - AMD64: 6 instruction bytes.
373 * - ARM64: 2 instruction words (8 bytes).
374 *
375 * @note The top 32 bits will be cleared.
376 */
377DECLINLINE(uint32_t) iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
378{
379#ifdef RT_ARCH_AMD64
380 if (uImm32 == 0)
381 {
382 /* xor gpr, gpr */
383 if (iGpr >= 8)
384 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
385 pCodeBuf[off++] = 0x33;
386 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
387 }
388 else
389 {
390 /* mov gpr, imm32 */
391 if (iGpr >= 8)
392 pCodeBuf[off++] = X86_OP_REX_B;
393 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
394 pCodeBuf[off++] = RT_BYTE1(uImm32);
395 pCodeBuf[off++] = RT_BYTE2(uImm32);
396 pCodeBuf[off++] = RT_BYTE3(uImm32);
397 pCodeBuf[off++] = RT_BYTE4(uImm32);
398 }
399
400#elif defined(RT_ARCH_ARM64)
401 if ((uImm32 >> 16) == 0)
402 /* movz gpr, imm16 */
403 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
404 else if ((uImm32 & UINT32_C(0xffff)) == 0)
405 /* movz gpr, imm16, lsl #16 */
406 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
407 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
408 /* movn gpr, imm16, lsl #16 */
409 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
410 else if ((uImm32 >> 16) == UINT32_C(0xffff))
411 /* movn gpr, imm16 */
412 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
413 else
414 {
415 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
416 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
417 }
418
419#else
420# error "port me"
421#endif
422 return off;
423}
424
425
426/**
427 * Emits loading a constant into a 32-bit GPR.
428 * @note The top 32 bits will be cleared.
429 */
430DECL_INLINE_THROW(uint32_t)
431iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
432{
433#ifdef RT_ARCH_AMD64
434 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
435#elif defined(RT_ARCH_ARM64)
436 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
437#else
438# error "port me"
439#endif
440 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
441 return off;
442}
443
444
445/**
446 * Emits loading a constant into a 8-bit GPR
447 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
448 * only the ARM64 version does that.
449 */
450DECL_INLINE_THROW(uint32_t)
451iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
452{
453#ifdef RT_ARCH_AMD64
454 /* mov gpr, imm8 */
455 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
456 if (iGpr >= 8)
457 pbCodeBuf[off++] = X86_OP_REX_B;
458 else if (iGpr >= 4)
459 pbCodeBuf[off++] = X86_OP_REX;
460 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
461 pbCodeBuf[off++] = RT_BYTE1(uImm8);
462
463#elif defined(RT_ARCH_ARM64)
464 /* movz gpr, imm16, lsl #0 */
465 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
466 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
467
468#else
469# error "port me"
470#endif
471 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
472 return off;
473}
474
475
476#ifdef RT_ARCH_AMD64
477/**
478 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
479 */
480DECL_FORCE_INLINE(uint32_t)
481iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
482{
483 if (offVCpu < 128)
484 {
485 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
486 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
487 }
488 else
489 {
490 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
491 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
492 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
493 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
494 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
495 }
496 return off;
497}
498
499#elif defined(RT_ARCH_ARM64)
500
501/**
502 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
503 *
504 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
505 * registers (@a iGprTmp).
506 * @note DON'T try this with prefetch.
507 */
508DECL_FORCE_INLINE_THROW(uint32_t)
509iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
510 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
511{
512 /*
513 * There are a couple of ldr variants that takes an immediate offset, so
514 * try use those if we can, otherwise we have to use the temporary register
515 * help with the addressing.
516 */
517 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
518 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
519 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
520 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
521 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
522 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
523 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
524 {
525 /* The offset is too large, so we must load it into a register and use
526 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
527 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
528 if (iGprTmp == UINT8_MAX)
529 iGprTmp = iGprReg;
530 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
531 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
532 }
533 else
534# ifdef IEM_WITH_THROW_CATCH
535 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
536# else
537 AssertReleaseFailedStmt(off = UINT32_MAX);
538# endif
539
540 return off;
541}
542
543/**
544 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
545 */
546DECL_FORCE_INLINE_THROW(uint32_t)
547iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
548 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
549{
550 /*
551 * There are a couple of ldr variants that takes an immediate offset, so
552 * try use those if we can, otherwise we have to use the temporary register
553 * help with the addressing.
554 */
555 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
556 {
557 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
558 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
559 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
560 }
561 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
562 {
563 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
564 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
565 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
566 }
567 else
568 {
569 /* The offset is too large, so we must load it into a register and use
570 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
571 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
572 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
573 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
574 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
575 IEMNATIVE_REG_FIXED_TMP0);
576 }
577 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
578 return off;
579}
580
581#endif /* RT_ARCH_ARM64 */
582
583
584/**
585 * Emits a 64-bit GPR load of a VCpu value.
586 */
587DECL_FORCE_INLINE_THROW(uint32_t)
588iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
589{
590#ifdef RT_ARCH_AMD64
591 /* mov reg64, mem64 */
592 if (iGpr < 8)
593 pCodeBuf[off++] = X86_OP_REX_W;
594 else
595 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
596 pCodeBuf[off++] = 0x8b;
597 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off,iGpr, offVCpu);
598
599#elif defined(RT_ARCH_ARM64)
600 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
601
602#else
603# error "port me"
604#endif
605 return off;
606}
607
608
609/**
610 * Emits a 64-bit GPR load of a VCpu value.
611 */
612DECL_INLINE_THROW(uint32_t)
613iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
614{
615#ifdef RT_ARCH_AMD64
616 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
617 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
618
619#elif defined(RT_ARCH_ARM64)
620 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
621
622#else
623# error "port me"
624#endif
625 return off;
626}
627
628
629/**
630 * Emits a 32-bit GPR load of a VCpu value.
631 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
632 */
633DECL_INLINE_THROW(uint32_t)
634iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
635{
636#ifdef RT_ARCH_AMD64
637 /* mov reg32, mem32 */
638 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
639 if (iGpr >= 8)
640 pbCodeBuf[off++] = X86_OP_REX_R;
641 pbCodeBuf[off++] = 0x8b;
642 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
644
645#elif defined(RT_ARCH_ARM64)
646 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
647
648#else
649# error "port me"
650#endif
651 return off;
652}
653
654
655/**
656 * Emits a 16-bit GPR load of a VCpu value.
657 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
658 */
659DECL_INLINE_THROW(uint32_t)
660iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
661{
662#ifdef RT_ARCH_AMD64
663 /* movzx reg32, mem16 */
664 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
665 if (iGpr >= 8)
666 pbCodeBuf[off++] = X86_OP_REX_R;
667 pbCodeBuf[off++] = 0x0f;
668 pbCodeBuf[off++] = 0xb7;
669 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
670 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
671
672#elif defined(RT_ARCH_ARM64)
673 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
674
675#else
676# error "port me"
677#endif
678 return off;
679}
680
681
682/**
683 * Emits a 8-bit GPR load of a VCpu value.
684 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
685 */
686DECL_INLINE_THROW(uint32_t)
687iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
688{
689#ifdef RT_ARCH_AMD64
690 /* movzx reg32, mem8 */
691 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
692 if (iGpr >= 8)
693 pbCodeBuf[off++] = X86_OP_REX_R;
694 pbCodeBuf[off++] = 0x0f;
695 pbCodeBuf[off++] = 0xb6;
696 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
697 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
698
699#elif defined(RT_ARCH_ARM64)
700 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
701
702#else
703# error "port me"
704#endif
705 return off;
706}
707
708
709/**
710 * Emits a store of a GPR value to a 64-bit VCpu field.
711 */
712DECL_FORCE_INLINE_THROW(uint32_t)
713iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
714 uint8_t iGprTmp = UINT8_MAX)
715{
716#ifdef RT_ARCH_AMD64
717 /* mov mem64, reg64 */
718 if (iGpr < 8)
719 pCodeBuf[off++] = X86_OP_REX_W;
720 else
721 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
722 pCodeBuf[off++] = 0x89;
723 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
724 RT_NOREF(iGprTmp);
725
726#elif defined(RT_ARCH_ARM64)
727 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
728
729#else
730# error "port me"
731#endif
732 return off;
733}
734
735
736/**
737 * Emits a store of a GPR value to a 64-bit VCpu field.
738 */
739DECL_INLINE_THROW(uint32_t)
740iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
741{
742#ifdef RT_ARCH_AMD64
743 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
744#elif defined(RT_ARCH_ARM64)
745 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
746 IEMNATIVE_REG_FIXED_TMP0);
747#else
748# error "port me"
749#endif
750 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
751 return off;
752}
753
754
755/**
756 * Emits a store of a GPR value to a 32-bit VCpu field.
757 */
758DECL_INLINE_THROW(uint32_t)
759iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
760{
761#ifdef RT_ARCH_AMD64
762 /* mov mem32, reg32 */
763 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
764 if (iGpr >= 8)
765 pbCodeBuf[off++] = X86_OP_REX_R;
766 pbCodeBuf[off++] = 0x89;
767 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
769
770#elif defined(RT_ARCH_ARM64)
771 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
772
773#else
774# error "port me"
775#endif
776 return off;
777}
778
779
780/**
781 * Emits a store of a GPR value to a 16-bit VCpu field.
782 */
783DECL_INLINE_THROW(uint32_t)
784iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
785{
786#ifdef RT_ARCH_AMD64
787 /* mov mem16, reg16 */
788 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
789 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
790 if (iGpr >= 8)
791 pbCodeBuf[off++] = X86_OP_REX_R;
792 pbCodeBuf[off++] = 0x89;
793 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
794 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
795
796#elif defined(RT_ARCH_ARM64)
797 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
798
799#else
800# error "port me"
801#endif
802 return off;
803}
804
805
806/**
807 * Emits a store of a GPR value to a 8-bit VCpu field.
808 */
809DECL_INLINE_THROW(uint32_t)
810iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
811{
812#ifdef RT_ARCH_AMD64
813 /* mov mem8, reg8 */
814 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
815 if (iGpr >= 8)
816 pbCodeBuf[off++] = X86_OP_REX_R;
817 pbCodeBuf[off++] = 0x88;
818 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
820
821#elif defined(RT_ARCH_ARM64)
822 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
823
824#else
825# error "port me"
826#endif
827 return off;
828}
829
830
831/**
832 * Emits a store of an immediate value to a 32-bit VCpu field.
833 *
834 * @note ARM64: Will allocate temporary registers.
835 */
836DECL_FORCE_INLINE_THROW(uint32_t)
837iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
838{
839#ifdef RT_ARCH_AMD64
840 /* mov mem32, imm32 */
841 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
842 pCodeBuf[off++] = 0xc7;
843 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
844 pCodeBuf[off++] = RT_BYTE1(uImm);
845 pCodeBuf[off++] = RT_BYTE2(uImm);
846 pCodeBuf[off++] = RT_BYTE3(uImm);
847 pCodeBuf[off++] = RT_BYTE4(uImm);
848 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
849
850#elif defined(RT_ARCH_ARM64)
851 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
852 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
853 if (idxRegImm != ARMV8_A64_REG_XZR)
854 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
855
856#else
857# error "port me"
858#endif
859 return off;
860}
861
862
863
864/**
865 * Emits a store of an immediate value to a 16-bit VCpu field.
866 *
867 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
868 * offset can be encoded as an immediate or not. The @a offVCpu immediate
869 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
870 */
871DECL_FORCE_INLINE_THROW(uint32_t)
872iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
873 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
874{
875#ifdef RT_ARCH_AMD64
876 /* mov mem16, imm16 */
877 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
878 pCodeBuf[off++] = 0xc7;
879 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
880 pCodeBuf[off++] = RT_BYTE1(uImm);
881 pCodeBuf[off++] = RT_BYTE2(uImm);
882 RT_NOREF(idxTmp1, idxTmp2);
883
884#elif defined(RT_ARCH_ARM64)
885 if (idxTmp1 != UINT8_MAX)
886 {
887 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
888 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
889 sizeof(uint16_t), idxTmp2);
890 }
891 else
892# ifdef IEM_WITH_THROW_CATCH
893 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
894# else
895 AssertReleaseFailedStmt(off = UINT32_MAX);
896# endif
897
898#else
899# error "port me"
900#endif
901 return off;
902}
903
904
905/**
906 * Emits a store of an immediate value to a 8-bit VCpu field.
907 */
908DECL_INLINE_THROW(uint32_t)
909iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu)
910{
911#ifdef RT_ARCH_AMD64
912 /* mov mem8, imm8 */
913 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
914 pbCodeBuf[off++] = 0xc6;
915 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
916 pbCodeBuf[off++] = bImm;
917 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
918
919#elif defined(RT_ARCH_ARM64)
920 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
921 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
922 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
923 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
924
925#else
926# error "port me"
927#endif
928 return off;
929}
930
931
932/**
933 * Emits a load effective address to a GRP of a VCpu field.
934 */
935DECL_INLINE_THROW(uint32_t)
936iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
937{
938#ifdef RT_ARCH_AMD64
939 /* lea gprdst, [rbx + offDisp] */
940 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
941 if (iGprDst < 8)
942 pbCodeBuf[off++] = X86_OP_REX_W;
943 else
944 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
945 pbCodeBuf[off++] = 0x8d;
946 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
947
948#elif defined(RT_ARCH_ARM64)
949 if (offVCpu < (unsigned)_4K)
950 {
951 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
952 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
953 }
954 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
955 {
956 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
957 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
958 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
959 }
960 else
961 {
962 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
963 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
964 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
965 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
966 }
967
968#else
969# error "port me"
970#endif
971 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
972 return off;
973}
974
975
976/** This is just as a typesafe alternative to RT_UOFFSETOF. */
977DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
978{
979 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
980 Assert(off < sizeof(VMCPU));
981 return off;
982}
983
984
985/** This is just as a typesafe alternative to RT_UOFFSETOF. */
986DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
987{
988 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
989 Assert(off < sizeof(VMCPU));
990 return off;
991}
992
993
994/**
995 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
996 *
997 * @note The two temp registers are not required for AMD64. ARM64 always
998 * requires the first, and the 2nd is needed if the offset cannot be
999 * encoded as an immediate.
1000 */
1001DECL_FORCE_INLINE(uint32_t)
1002iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1003{
1004#ifdef RT_ARCH_AMD64
1005 /* inc qword [pVCpu + off] */
1006 pCodeBuf[off++] = X86_OP_REX_W;
1007 pCodeBuf[off++] = 0xff;
1008 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1009 RT_NOREF(idxTmp1, idxTmp2);
1010
1011#elif defined(RT_ARCH_ARM64)
1012 /* Determine how we're to access pVCpu first. */
1013 uint32_t const cbData = sizeof(STAMCOUNTER);
1014 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1015 {
1016 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1017 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1018 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1019 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1020 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1021 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1022 }
1023 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1024 {
1025 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1026 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1027 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1028 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1029 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1030 }
1031 else
1032 {
1033 /* The offset is too large, so we must load it into a register and use
1034 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1035 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1036 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1037 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1038 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1039 }
1040
1041#else
1042# error "port me"
1043#endif
1044 return off;
1045}
1046
1047
1048/**
1049 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1050 *
1051 * @note The two temp registers are not required for AMD64. ARM64 always
1052 * requires the first, and the 2nd is needed if the offset cannot be
1053 * encoded as an immediate.
1054 */
1055DECL_FORCE_INLINE(uint32_t)
1056iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1057{
1058#ifdef RT_ARCH_AMD64
1059 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1060#elif defined(RT_ARCH_ARM64)
1061 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1062#else
1063# error "port me"
1064#endif
1065 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1066 return off;
1067}
1068
1069
1070/**
1071 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1072 *
1073 * @note The two temp registers are not required for AMD64. ARM64 always
1074 * requires the first, and the 2nd is needed if the offset cannot be
1075 * encoded as an immediate.
1076 */
1077DECL_FORCE_INLINE(uint32_t)
1078iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1079{
1080 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1081#ifdef RT_ARCH_AMD64
1082 /* inc dword [pVCpu + offVCpu] */
1083 pCodeBuf[off++] = 0xff;
1084 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1085 RT_NOREF(idxTmp1, idxTmp2);
1086
1087#elif defined(RT_ARCH_ARM64)
1088 /* Determine how we're to access pVCpu first. */
1089 uint32_t const cbData = sizeof(uint32_t);
1090 if (offVCpu < (unsigned)(_4K * cbData))
1091 {
1092 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1093 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1094 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1095 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1096 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1097 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1098 }
1099 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1100 {
1101 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1102 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1103 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1104 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1105 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1106 }
1107 else
1108 {
1109 /* The offset is too large, so we must load it into a register and use
1110 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1111 of the instruction if that'll reduce the constant to 16-bits. */
1112 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1113 {
1114 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1115 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1116 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1117 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1118 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1119 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1120 }
1121 else
1122 {
1123 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1124 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1125 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1126 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1127 }
1128 }
1129
1130#else
1131# error "port me"
1132#endif
1133 return off;
1134}
1135
1136
1137/**
1138 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1139 *
1140 * @note The two temp registers are not required for AMD64. ARM64 always
1141 * requires the first, and the 2nd is needed if the offset cannot be
1142 * encoded as an immediate.
1143 */
1144DECL_FORCE_INLINE(uint32_t)
1145iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1146{
1147#ifdef RT_ARCH_AMD64
1148 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1149#elif defined(RT_ARCH_ARM64)
1150 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1151#else
1152# error "port me"
1153#endif
1154 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1155 return off;
1156}
1157
1158
1159/**
1160 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1161 *
1162 * @note May allocate temporary registers (not AMD64).
1163 */
1164DECL_FORCE_INLINE(uint32_t)
1165iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1166{
1167 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1168#ifdef RT_ARCH_AMD64
1169 /* or dword [pVCpu + offVCpu], imm8/32 */
1170 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1171 if (fMask < 0x80)
1172 {
1173 pCodeBuf[off++] = 0x83;
1174 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1175 pCodeBuf[off++] = (uint8_t)fMask;
1176 }
1177 else
1178 {
1179 pCodeBuf[off++] = 0x81;
1180 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1181 pCodeBuf[off++] = RT_BYTE1(fMask);
1182 pCodeBuf[off++] = RT_BYTE2(fMask);
1183 pCodeBuf[off++] = RT_BYTE3(fMask);
1184 pCodeBuf[off++] = RT_BYTE4(fMask);
1185 }
1186
1187#elif defined(RT_ARCH_ARM64)
1188 /* If the constant is unwieldy we'll need a register to hold it as well. */
1189 uint32_t uImmSizeLen, uImmRotate;
1190 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1191 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1192
1193 /* We need a temp register for holding the member value we're modifying. */
1194 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1195
1196 /* Determine how we're to access pVCpu first. */
1197 uint32_t const cbData = sizeof(uint32_t);
1198 if (offVCpu < (unsigned)(_4K * cbData))
1199 {
1200 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1201 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1202 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1203 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1204 if (idxTmpMask == UINT8_MAX)
1205 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1206 else
1207 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1208 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1209 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1210 }
1211 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1212 {
1213 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1214 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1215 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1216 if (idxTmpMask == UINT8_MAX)
1217 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1218 else
1219 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1220 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1221 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1222 }
1223 else
1224 {
1225 /* The offset is too large, so we must load it into a register and use
1226 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1227 of the instruction if that'll reduce the constant to 16-bits. */
1228 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1229 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1230 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1231 if (fShifted)
1232 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1233 else
1234 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1235
1236 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1237 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1238
1239 if (idxTmpMask == UINT8_MAX)
1240 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1241 else
1242 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1243
1244 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1245 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1246 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1247 }
1248 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1249 if (idxTmpMask != UINT8_MAX)
1250 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1251
1252#else
1253# error "port me"
1254#endif
1255 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1256 return off;
1257}
1258
1259
1260/**
1261 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1262 *
1263 * @note May allocate temporary registers (not AMD64).
1264 */
1265DECL_FORCE_INLINE(uint32_t)
1266iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1267{
1268 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1269#ifdef RT_ARCH_AMD64
1270 /* and dword [pVCpu + offVCpu], imm8/32 */
1271 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1272 if (fMask < 0x80)
1273 {
1274 pCodeBuf[off++] = 0x83;
1275 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1276 pCodeBuf[off++] = (uint8_t)fMask;
1277 }
1278 else
1279 {
1280 pCodeBuf[off++] = 0x81;
1281 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1282 pCodeBuf[off++] = RT_BYTE1(fMask);
1283 pCodeBuf[off++] = RT_BYTE2(fMask);
1284 pCodeBuf[off++] = RT_BYTE3(fMask);
1285 pCodeBuf[off++] = RT_BYTE4(fMask);
1286 }
1287
1288#elif defined(RT_ARCH_ARM64)
1289 /* If the constant is unwieldy we'll need a register to hold it as well. */
1290 uint32_t uImmSizeLen, uImmRotate;
1291 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1292 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1293
1294 /* We need a temp register for holding the member value we're modifying. */
1295 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1296
1297 /* Determine how we're to access pVCpu first. */
1298 uint32_t const cbData = sizeof(uint32_t);
1299 if (offVCpu < (unsigned)(_4K * cbData))
1300 {
1301 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1302 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1303 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1304 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1305 if (idxTmpMask == UINT8_MAX)
1306 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1307 else
1308 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1309 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1310 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1311 }
1312 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1313 {
1314 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1315 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1316 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1317 if (idxTmpMask == UINT8_MAX)
1318 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1319 else
1320 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1321 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1322 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1323 }
1324 else
1325 {
1326 /* The offset is too large, so we must load it into a register and use
1327 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1328 of the instruction if that'll reduce the constant to 16-bits. */
1329 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1330 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1331 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1332 if (fShifted)
1333 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1334 else
1335 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1336
1337 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1338 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1339
1340 if (idxTmpMask == UINT8_MAX)
1341 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1342 else
1343 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1344
1345 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1346 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1347 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1348 }
1349 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1350 if (idxTmpMask != UINT8_MAX)
1351 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1352
1353#else
1354# error "port me"
1355#endif
1356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1357 return off;
1358}
1359
1360
1361/**
1362 * Emits a gprdst = gprsrc load.
1363 */
1364DECL_FORCE_INLINE(uint32_t)
1365iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1366{
1367#ifdef RT_ARCH_AMD64
1368 /* mov gprdst, gprsrc */
1369 if ((iGprDst | iGprSrc) >= 8)
1370 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1371 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1372 : X86_OP_REX_W | X86_OP_REX_R;
1373 else
1374 pCodeBuf[off++] = X86_OP_REX_W;
1375 pCodeBuf[off++] = 0x8b;
1376 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1377
1378#elif defined(RT_ARCH_ARM64)
1379 /* mov dst, src; alias for: orr dst, xzr, src */
1380 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1381
1382#else
1383# error "port me"
1384#endif
1385 return off;
1386}
1387
1388
1389/**
1390 * Emits a gprdst = gprsrc load.
1391 */
1392DECL_INLINE_THROW(uint32_t)
1393iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1394{
1395#ifdef RT_ARCH_AMD64
1396 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1397#elif defined(RT_ARCH_ARM64)
1398 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1399#else
1400# error "port me"
1401#endif
1402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1403 return off;
1404}
1405
1406
1407/**
1408 * Emits a gprdst = gprsrc[31:0] load.
1409 * @note Bits 63 thru 32 are cleared.
1410 */
1411DECL_FORCE_INLINE(uint32_t)
1412iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1413{
1414#ifdef RT_ARCH_AMD64
1415 /* mov gprdst, gprsrc */
1416 if ((iGprDst | iGprSrc) >= 8)
1417 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1418 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1419 : X86_OP_REX_R;
1420 pCodeBuf[off++] = 0x8b;
1421 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1422
1423#elif defined(RT_ARCH_ARM64)
1424 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1425 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1426
1427#else
1428# error "port me"
1429#endif
1430 return off;
1431}
1432
1433
1434/**
1435 * Emits a gprdst = gprsrc[31:0] load.
1436 * @note Bits 63 thru 32 are cleared.
1437 */
1438DECL_INLINE_THROW(uint32_t)
1439iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1440{
1441#ifdef RT_ARCH_AMD64
1442 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1443#elif defined(RT_ARCH_ARM64)
1444 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1445#else
1446# error "port me"
1447#endif
1448 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1449 return off;
1450}
1451
1452
1453/**
1454 * Emits a gprdst = gprsrc[15:0] load.
1455 * @note Bits 63 thru 15 are cleared.
1456 */
1457DECL_INLINE_THROW(uint32_t)
1458iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1459{
1460#ifdef RT_ARCH_AMD64
1461 /* movzx Gv,Ew */
1462 if ((iGprDst | iGprSrc) >= 8)
1463 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1464 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1465 : X86_OP_REX_R;
1466 pCodeBuf[off++] = 0x0f;
1467 pCodeBuf[off++] = 0xb7;
1468 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1469
1470#elif defined(RT_ARCH_ARM64)
1471 /* and gprdst, gprsrc, #0xffff */
1472# if 1
1473 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1474 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1475# else
1476 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1477 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1478# endif
1479
1480#else
1481# error "port me"
1482#endif
1483 return off;
1484}
1485
1486
1487/**
1488 * Emits a gprdst = gprsrc[15:0] load.
1489 * @note Bits 63 thru 15 are cleared.
1490 */
1491DECL_INLINE_THROW(uint32_t)
1492iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1493{
1494#ifdef RT_ARCH_AMD64
1495 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1496#elif defined(RT_ARCH_ARM64)
1497 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1498#else
1499# error "port me"
1500#endif
1501 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1502 return off;
1503}
1504
1505
1506/**
1507 * Emits a gprdst = gprsrc[7:0] load.
1508 * @note Bits 63 thru 8 are cleared.
1509 */
1510DECL_FORCE_INLINE(uint32_t)
1511iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1512{
1513#ifdef RT_ARCH_AMD64
1514 /* movzx Gv,Eb */
1515 if (iGprDst >= 8 || iGprSrc >= 8)
1516 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1517 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1518 : X86_OP_REX_R;
1519 else if (iGprSrc >= 4)
1520 pCodeBuf[off++] = X86_OP_REX;
1521 pCodeBuf[off++] = 0x0f;
1522 pCodeBuf[off++] = 0xb6;
1523 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1524
1525#elif defined(RT_ARCH_ARM64)
1526 /* and gprdst, gprsrc, #0xff */
1527 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1528 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1529
1530#else
1531# error "port me"
1532#endif
1533 return off;
1534}
1535
1536
1537/**
1538 * Emits a gprdst = gprsrc[7:0] load.
1539 * @note Bits 63 thru 8 are cleared.
1540 */
1541DECL_INLINE_THROW(uint32_t)
1542iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1543{
1544#ifdef RT_ARCH_AMD64
1545 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1546#elif defined(RT_ARCH_ARM64)
1547 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1548#else
1549# error "port me"
1550#endif
1551 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1552 return off;
1553}
1554
1555
1556/**
1557 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1558 * @note Bits 63 thru 8 are cleared.
1559 */
1560DECL_INLINE_THROW(uint32_t)
1561iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1562{
1563#ifdef RT_ARCH_AMD64
1564 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1565
1566 /* movzx Gv,Ew */
1567 if ((iGprDst | iGprSrc) >= 8)
1568 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1569 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1570 : X86_OP_REX_R;
1571 pbCodeBuf[off++] = 0x0f;
1572 pbCodeBuf[off++] = 0xb7;
1573 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1574
1575 /* shr Ev,8 */
1576 if (iGprDst >= 8)
1577 pbCodeBuf[off++] = X86_OP_REX_B;
1578 pbCodeBuf[off++] = 0xc1;
1579 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1580 pbCodeBuf[off++] = 8;
1581
1582#elif defined(RT_ARCH_ARM64)
1583 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1584 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1585 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1586
1587#else
1588# error "port me"
1589#endif
1590 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1591 return off;
1592}
1593
1594
1595/**
1596 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1597 */
1598DECL_INLINE_THROW(uint32_t)
1599iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1600{
1601#ifdef RT_ARCH_AMD64
1602 /* movsxd r64, r/m32 */
1603 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1604 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1605 pbCodeBuf[off++] = 0x63;
1606 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1607
1608#elif defined(RT_ARCH_ARM64)
1609 /* sxtw dst, src */
1610 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1611 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1612
1613#else
1614# error "port me"
1615#endif
1616 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1617 return off;
1618}
1619
1620
1621/**
1622 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1623 */
1624DECL_INLINE_THROW(uint32_t)
1625iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1626{
1627#ifdef RT_ARCH_AMD64
1628 /* movsx r64, r/m16 */
1629 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1630 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1631 pbCodeBuf[off++] = 0x0f;
1632 pbCodeBuf[off++] = 0xbf;
1633 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1634
1635#elif defined(RT_ARCH_ARM64)
1636 /* sxth dst, src */
1637 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1638 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1639
1640#else
1641# error "port me"
1642#endif
1643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1644 return off;
1645}
1646
1647
1648/**
1649 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1650 */
1651DECL_INLINE_THROW(uint32_t)
1652iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1653{
1654#ifdef RT_ARCH_AMD64
1655 /* movsx r64, r/m16 */
1656 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1657 if (iGprDst >= 8 || iGprSrc >= 8)
1658 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1659 pbCodeBuf[off++] = 0x0f;
1660 pbCodeBuf[off++] = 0xbf;
1661 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1662
1663#elif defined(RT_ARCH_ARM64)
1664 /* sxth dst32, src */
1665 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1666 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1667
1668#else
1669# error "port me"
1670#endif
1671 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1672 return off;
1673}
1674
1675
1676/**
1677 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1678 */
1679DECL_INLINE_THROW(uint32_t)
1680iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1681{
1682#ifdef RT_ARCH_AMD64
1683 /* movsx r64, r/m8 */
1684 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1685 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1686 pbCodeBuf[off++] = 0x0f;
1687 pbCodeBuf[off++] = 0xbe;
1688 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1689
1690#elif defined(RT_ARCH_ARM64)
1691 /* sxtb dst, src */
1692 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1693 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1694
1695#else
1696# error "port me"
1697#endif
1698 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1699 return off;
1700}
1701
1702
1703/**
1704 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1705 * @note Bits 63 thru 32 are cleared.
1706 */
1707DECL_INLINE_THROW(uint32_t)
1708iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1709{
1710#ifdef RT_ARCH_AMD64
1711 /* movsx r32, r/m8 */
1712 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1713 if (iGprDst >= 8 || iGprSrc >= 8)
1714 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1715 else if (iGprSrc >= 4)
1716 pbCodeBuf[off++] = X86_OP_REX;
1717 pbCodeBuf[off++] = 0x0f;
1718 pbCodeBuf[off++] = 0xbe;
1719 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1720
1721#elif defined(RT_ARCH_ARM64)
1722 /* sxtb dst32, src32 */
1723 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1724 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1725
1726#else
1727# error "port me"
1728#endif
1729 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1730 return off;
1731}
1732
1733
1734/**
1735 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1736 * @note Bits 63 thru 16 are cleared.
1737 */
1738DECL_INLINE_THROW(uint32_t)
1739iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1740{
1741#ifdef RT_ARCH_AMD64
1742 /* movsx r16, r/m8 */
1743 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1744 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1745 if (iGprDst >= 8 || iGprSrc >= 8)
1746 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1747 else if (iGprSrc >= 4)
1748 pbCodeBuf[off++] = X86_OP_REX;
1749 pbCodeBuf[off++] = 0x0f;
1750 pbCodeBuf[off++] = 0xbe;
1751 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1752
1753 /* movzx r32, r/m16 */
1754 if (iGprDst >= 8)
1755 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
1756 pbCodeBuf[off++] = 0x0f;
1757 pbCodeBuf[off++] = 0xb7;
1758 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
1759
1760#elif defined(RT_ARCH_ARM64)
1761 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
1762 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1763 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1764 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1765 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
1766
1767#else
1768# error "port me"
1769#endif
1770 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1771 return off;
1772}
1773
1774
1775/**
1776 * Emits a gprdst = gprsrc + addend load.
1777 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1778 */
1779#ifdef RT_ARCH_AMD64
1780DECL_INLINE_THROW(uint32_t)
1781iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1782 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1783{
1784 Assert(iAddend != 0);
1785
1786 /* lea gprdst, [gprsrc + iAddend] */
1787 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1788 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1789 pbCodeBuf[off++] = 0x8d;
1790 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1791 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1792 return off;
1793}
1794
1795#elif defined(RT_ARCH_ARM64)
1796DECL_INLINE_THROW(uint32_t)
1797iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1798 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1799{
1800 if ((uint32_t)iAddend < 4096)
1801 {
1802 /* add dst, src, uimm12 */
1803 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1804 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
1805 }
1806 else if ((uint32_t)-iAddend < 4096)
1807 {
1808 /* sub dst, src, uimm12 */
1809 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1810 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
1811 }
1812 else
1813 {
1814 Assert(iGprSrc != iGprDst);
1815 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
1816 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1817 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
1818 }
1819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1820 return off;
1821}
1822#else
1823# error "port me"
1824#endif
1825
1826/**
1827 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
1828 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1829 */
1830#ifdef RT_ARCH_AMD64
1831DECL_INLINE_THROW(uint32_t)
1832iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1833 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1834#else
1835DECL_INLINE_THROW(uint32_t)
1836iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1837 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1838#endif
1839{
1840 if (iAddend != 0)
1841 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1842 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
1843}
1844
1845
1846/**
1847 * Emits a gprdst = gprsrc32 + addend load.
1848 * @note Bits 63 thru 32 are cleared.
1849 */
1850DECL_INLINE_THROW(uint32_t)
1851iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1852 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1853{
1854 Assert(iAddend != 0);
1855
1856#ifdef RT_ARCH_AMD64
1857 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
1858 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1859 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
1860 if ((iGprDst | iGprSrc) >= 8)
1861 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1862 pbCodeBuf[off++] = 0x8d;
1863 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1864
1865#elif defined(RT_ARCH_ARM64)
1866 if ((uint32_t)iAddend < 4096)
1867 {
1868 /* add dst, src, uimm12 */
1869 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1870 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
1871 }
1872 else if ((uint32_t)-iAddend < 4096)
1873 {
1874 /* sub dst, src, uimm12 */
1875 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1876 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
1877 }
1878 else
1879 {
1880 Assert(iGprSrc != iGprDst);
1881 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
1882 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1883 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
1884 }
1885
1886#else
1887# error "port me"
1888#endif
1889 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1890 return off;
1891}
1892
1893
1894/**
1895 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
1896 */
1897DECL_INLINE_THROW(uint32_t)
1898iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1899 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1900{
1901 if (iAddend != 0)
1902 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1903 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
1904}
1905
1906
1907/**
1908 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1909 * destination.
1910 */
1911DECL_FORCE_INLINE(uint32_t)
1912iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1913{
1914#ifdef RT_ARCH_AMD64
1915 /* mov reg16, r/m16 */
1916 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1917 if (idxDst >= 8 || idxSrc >= 8)
1918 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
1919 pCodeBuf[off++] = 0x8b;
1920 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
1921
1922#elif defined(RT_ARCH_ARM64)
1923 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
1924 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
1925
1926#else
1927# error "Port me!"
1928#endif
1929 return off;
1930}
1931
1932
1933/**
1934 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1935 * destination.
1936 */
1937DECL_INLINE_THROW(uint32_t)
1938iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1939{
1940#ifdef RT_ARCH_AMD64
1941 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
1942#elif defined(RT_ARCH_ARM64)
1943 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
1944#else
1945# error "Port me!"
1946#endif
1947 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1948 return off;
1949}
1950
1951
1952#ifdef RT_ARCH_AMD64
1953/**
1954 * Common bit of iemNativeEmitLoadGprByBp and friends.
1955 */
1956DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
1957 PIEMRECOMPILERSTATE pReNativeAssert)
1958{
1959 if (offDisp < 128 && offDisp >= -128)
1960 {
1961 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
1962 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
1963 }
1964 else
1965 {
1966 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
1967 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
1968 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
1969 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
1970 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
1971 }
1972 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
1973 return off;
1974}
1975#elif defined(RT_ARCH_ARM64)
1976/**
1977 * Common bit of iemNativeEmitLoadGprByBp and friends.
1978 */
1979DECL_FORCE_INLINE_THROW(uint32_t)
1980iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
1981 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
1982{
1983 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
1984 {
1985 /* str w/ unsigned imm12 (scaled) */
1986 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1987 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
1988 }
1989 else if (offDisp >= -256 && offDisp <= 256)
1990 {
1991 /* stur w/ signed imm9 (unscaled) */
1992 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1993 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
1994 }
1995 else
1996 {
1997 /* Use temporary indexing register. */
1998 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
1999 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2000 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2001 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2002 }
2003 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2004 return off;
2005}
2006#endif
2007
2008
2009/**
2010 * Emits a 64-bit GRP load instruction with an BP relative source address.
2011 */
2012DECL_INLINE_THROW(uint32_t)
2013iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2014{
2015#ifdef RT_ARCH_AMD64
2016 /* mov gprdst, qword [rbp + offDisp] */
2017 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2018 if (iGprDst < 8)
2019 pbCodeBuf[off++] = X86_OP_REX_W;
2020 else
2021 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2022 pbCodeBuf[off++] = 0x8b;
2023 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2024
2025#elif defined(RT_ARCH_ARM64)
2026 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2027
2028#else
2029# error "port me"
2030#endif
2031}
2032
2033
2034/**
2035 * Emits a 32-bit GRP load instruction with an BP relative source address.
2036 * @note Bits 63 thru 32 of the GPR will be cleared.
2037 */
2038DECL_INLINE_THROW(uint32_t)
2039iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2040{
2041#ifdef RT_ARCH_AMD64
2042 /* mov gprdst, dword [rbp + offDisp] */
2043 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2044 if (iGprDst >= 8)
2045 pbCodeBuf[off++] = X86_OP_REX_R;
2046 pbCodeBuf[off++] = 0x8b;
2047 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2048
2049#elif defined(RT_ARCH_ARM64)
2050 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2051
2052#else
2053# error "port me"
2054#endif
2055}
2056
2057
2058/**
2059 * Emits a 16-bit GRP load instruction with an BP relative source address.
2060 * @note Bits 63 thru 16 of the GPR will be cleared.
2061 */
2062DECL_INLINE_THROW(uint32_t)
2063iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2064{
2065#ifdef RT_ARCH_AMD64
2066 /* movzx gprdst, word [rbp + offDisp] */
2067 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2068 if (iGprDst >= 8)
2069 pbCodeBuf[off++] = X86_OP_REX_R;
2070 pbCodeBuf[off++] = 0x0f;
2071 pbCodeBuf[off++] = 0xb7;
2072 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2073
2074#elif defined(RT_ARCH_ARM64)
2075 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2076
2077#else
2078# error "port me"
2079#endif
2080}
2081
2082
2083/**
2084 * Emits a 8-bit GRP load instruction with an BP relative source address.
2085 * @note Bits 63 thru 8 of the GPR will be cleared.
2086 */
2087DECL_INLINE_THROW(uint32_t)
2088iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2089{
2090#ifdef RT_ARCH_AMD64
2091 /* movzx gprdst, byte [rbp + offDisp] */
2092 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2093 if (iGprDst >= 8)
2094 pbCodeBuf[off++] = X86_OP_REX_R;
2095 pbCodeBuf[off++] = 0x0f;
2096 pbCodeBuf[off++] = 0xb6;
2097 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2098
2099#elif defined(RT_ARCH_ARM64)
2100 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2101
2102#else
2103# error "port me"
2104#endif
2105}
2106
2107
2108#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2109/**
2110 * Emits a 128-bit vector register load instruction with an BP relative source address.
2111 */
2112DECL_FORCE_INLINE_THROW(uint32_t)
2113iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2114{
2115#ifdef RT_ARCH_AMD64
2116 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2117
2118 /* movdqu reg128, mem128 */
2119 pbCodeBuf[off++] = 0xf3;
2120 if (iVecRegDst >= 8)
2121 pbCodeBuf[off++] = X86_OP_REX_R;
2122 pbCodeBuf[off++] = 0x0f;
2123 pbCodeBuf[off++] = 0x6f;
2124 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2125#elif defined(RT_ARCH_ARM64)
2126 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2127#else
2128# error "port me"
2129#endif
2130}
2131
2132
2133/**
2134 * Emits a 256-bit vector register load instruction with an BP relative source address.
2135 */
2136DECL_FORCE_INLINE_THROW(uint32_t)
2137iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2138{
2139#ifdef RT_ARCH_AMD64
2140 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2141
2142 /* vmovdqu reg256, mem256 */
2143 pbCodeBuf[off++] = X86_OP_VEX2;
2144 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2145 pbCodeBuf[off++] = 0x6f;
2146 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2147#elif defined(RT_ARCH_ARM64)
2148 /* ASSUMES two consecutive vector registers for the 256-bit value. */
2149 Assert(!(iVecRegDst & 0x1));
2150 off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2151 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2152#else
2153# error "port me"
2154#endif
2155}
2156
2157#endif
2158
2159
2160/**
2161 * Emits a load effective address to a GRP with an BP relative source address.
2162 */
2163DECL_INLINE_THROW(uint32_t)
2164iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2165{
2166#ifdef RT_ARCH_AMD64
2167 /* lea gprdst, [rbp + offDisp] */
2168 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2169 if (iGprDst < 8)
2170 pbCodeBuf[off++] = X86_OP_REX_W;
2171 else
2172 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2173 pbCodeBuf[off++] = 0x8d;
2174 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2175
2176#elif defined(RT_ARCH_ARM64)
2177 if ((uint32_t)offDisp < (unsigned)_4K)
2178 {
2179 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2180 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)offDisp);
2181 }
2182 else if ((uint32_t)-offDisp < (unsigned)_4K)
2183 {
2184 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2185 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2186 }
2187 else
2188 {
2189 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2190 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offDisp >= 0 ? (uint32_t)offDisp : (uint32_t)-offDisp);
2191 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2192 if (offDisp >= 0)
2193 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2194 else
2195 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2196 }
2197
2198#else
2199# error "port me"
2200#endif
2201
2202 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2203 return off;
2204}
2205
2206
2207/**
2208 * Emits a 64-bit GPR store with an BP relative destination address.
2209 *
2210 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2211 */
2212DECL_INLINE_THROW(uint32_t)
2213iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2214{
2215#ifdef RT_ARCH_AMD64
2216 /* mov qword [rbp + offDisp], gprdst */
2217 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2218 if (iGprSrc < 8)
2219 pbCodeBuf[off++] = X86_OP_REX_W;
2220 else
2221 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2222 pbCodeBuf[off++] = 0x89;
2223 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2224
2225#elif defined(RT_ARCH_ARM64)
2226 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2227 {
2228 /* str w/ unsigned imm12 (scaled) */
2229 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2230 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2231 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2232 }
2233 else if (offDisp >= -256 && offDisp <= 256)
2234 {
2235 /* stur w/ signed imm9 (unscaled) */
2236 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2237 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2238 }
2239 else if ((uint32_t)-offDisp < (unsigned)_4K)
2240 {
2241 /* Use temporary indexing register w/ sub uimm12. */
2242 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2243 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2244 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2245 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2246 }
2247 else
2248 {
2249 /* Use temporary indexing register. */
2250 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2251 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2252 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2253 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2254 }
2255 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2256 return off;
2257
2258#else
2259# error "Port me!"
2260#endif
2261}
2262
2263
2264/**
2265 * Emits a 64-bit immediate store with an BP relative destination address.
2266 *
2267 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2268 */
2269DECL_INLINE_THROW(uint32_t)
2270iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2271{
2272#ifdef RT_ARCH_AMD64
2273 if ((int64_t)uImm64 == (int32_t)uImm64)
2274 {
2275 /* mov qword [rbp + offDisp], imm32 - sign extended */
2276 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2277 pbCodeBuf[off++] = X86_OP_REX_W;
2278 pbCodeBuf[off++] = 0xc7;
2279 if (offDisp < 128 && offDisp >= -128)
2280 {
2281 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2282 pbCodeBuf[off++] = (uint8_t)offDisp;
2283 }
2284 else
2285 {
2286 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2287 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2288 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2289 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2290 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2291 }
2292 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2293 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2294 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2295 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2296 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2297 return off;
2298 }
2299#endif
2300
2301 /* Load tmp0, imm64; Store tmp to bp+disp. */
2302 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2303 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2304}
2305
2306
2307#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2308/**
2309 * Emits a 128-bit vector register store with an BP relative destination address.
2310 *
2311 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2312 */
2313DECL_INLINE_THROW(uint32_t)
2314iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2315{
2316#ifdef RT_ARCH_AMD64
2317 /* movdqu [rbp + offDisp], vecsrc */
2318 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2319 pbCodeBuf[off++] = 0xf3;
2320 if (iVecRegSrc >= 8)
2321 pbCodeBuf[off++] = X86_OP_REX_R;
2322 pbCodeBuf[off++] = 0x0f;
2323 pbCodeBuf[off++] = 0x7f;
2324 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2325
2326#elif defined(RT_ARCH_ARM64)
2327 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2328 {
2329 /* str w/ unsigned imm12 (scaled) */
2330 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2331 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc,
2332 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2333 }
2334 else if (offDisp >= -256 && offDisp <= 256)
2335 {
2336 /* stur w/ signed imm9 (unscaled) */
2337 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2338 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp);
2339 }
2340 else if ((uint32_t)-offDisp < (unsigned)_4K)
2341 {
2342 /* Use temporary indexing register w/ sub uimm12. */
2343 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2344 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2345 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2346 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2347 }
2348 else
2349 {
2350 /* Use temporary indexing register. */
2351 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2352 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2353 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP,
2354 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2355 }
2356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2357 return off;
2358
2359#else
2360# error "Port me!"
2361#endif
2362}
2363
2364
2365/**
2366 * Emits a 256-bit vector register store with an BP relative destination address.
2367 *
2368 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2369 */
2370DECL_INLINE_THROW(uint32_t)
2371iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2372{
2373#ifdef RT_ARCH_AMD64
2374 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2375
2376 /* vmovdqu mem256, reg256 */
2377 pbCodeBuf[off++] = X86_OP_VEX2;
2378 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2379 pbCodeBuf[off++] = 0x7f;
2380 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2381#elif defined(RT_ARCH_ARM64)
2382 Assert(!(iVecRegSrc & 0x1));
2383 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp, iVecRegSrc);
2384 return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1);
2385#else
2386# error "Port me!"
2387#endif
2388}
2389#endif
2390
2391#if defined(RT_ARCH_ARM64)
2392
2393/**
2394 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2395 *
2396 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2397 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2398 * caller does not heed this.
2399 *
2400 * @note DON'T try this with prefetch.
2401 */
2402DECL_FORCE_INLINE_THROW(uint32_t)
2403iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2404 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2405{
2406 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2407 {
2408 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2409 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2410 }
2411 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2412 && iGprReg != iGprBase)
2413 || iGprTmp != UINT8_MAX)
2414 {
2415 /* The offset is too large, so we must load it into a register and use
2416 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2417 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2418 if (iGprTmp == UINT8_MAX)
2419 iGprTmp = iGprReg;
2420 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2421 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2422 }
2423 else
2424# ifdef IEM_WITH_THROW_CATCH
2425 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2426# else
2427 AssertReleaseFailedStmt(off = UINT32_MAX);
2428# endif
2429 return off;
2430}
2431
2432/**
2433 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2434 */
2435DECL_FORCE_INLINE_THROW(uint32_t)
2436iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2437 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2438{
2439 /*
2440 * There are a couple of ldr variants that takes an immediate offset, so
2441 * try use those if we can, otherwise we have to use the temporary register
2442 * help with the addressing.
2443 */
2444 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2445 {
2446 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2447 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2448 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2449 }
2450 else
2451 {
2452 /* The offset is too large, so we must load it into a register and use
2453 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2454 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2455 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2456
2457 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2458 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2459
2460 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2461 }
2462 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2463 return off;
2464}
2465
2466# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2467/**
2468 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2469 *
2470 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2471 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2472 * caller does not heed this.
2473 *
2474 * @note DON'T try this with prefetch.
2475 */
2476DECL_FORCE_INLINE_THROW(uint32_t)
2477iemNativeEmitVecRegByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iGprBase, int32_t offDisp,
2478 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2479{
2480 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2481 {
2482 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2483 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2484 }
2485 else if ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2486 || iGprTmp != UINT8_MAX)
2487 {
2488 /* The offset is too large, so we must load it into a register and use
2489 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2490 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2491 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2492 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, iGprTmp);
2493 }
2494 else
2495# ifdef IEM_WITH_THROW_CATCH
2496 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2497# else
2498 AssertReleaseFailedStmt(off = UINT32_MAX);
2499# endif
2500 return off;
2501}
2502# endif
2503
2504
2505/**
2506 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2507 */
2508DECL_FORCE_INLINE_THROW(uint32_t)
2509iemNativeEmitVecRegByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg,
2510 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2511{
2512 /*
2513 * There are a couple of ldr variants that takes an immediate offset, so
2514 * try use those if we can, otherwise we have to use the temporary register
2515 * help with the addressing.
2516 */
2517 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2518 {
2519 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2520 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2521 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2522 }
2523 else
2524 {
2525 /* The offset is too large, so we must load it into a register and use
2526 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2527 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2528 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2529
2530 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2531 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, idxTmpReg);
2532
2533 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2534 }
2535 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2536 return off;
2537}
2538#endif /* RT_ARCH_ARM64 */
2539
2540/**
2541 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2542 *
2543 * @note ARM64: Misaligned @a offDisp values and values not in the
2544 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2545 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2546 * does not heed this.
2547 */
2548DECL_FORCE_INLINE_THROW(uint32_t)
2549iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2550 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2551{
2552#ifdef RT_ARCH_AMD64
2553 /* mov reg64, mem64 */
2554 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2555 pCodeBuf[off++] = 0x8b;
2556 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2557 RT_NOREF(iGprTmp);
2558
2559#elif defined(RT_ARCH_ARM64)
2560 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2561 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2562
2563#else
2564# error "port me"
2565#endif
2566 return off;
2567}
2568
2569
2570/**
2571 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2572 */
2573DECL_INLINE_THROW(uint32_t)
2574iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2575{
2576#ifdef RT_ARCH_AMD64
2577 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2578 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2579
2580#elif defined(RT_ARCH_ARM64)
2581 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2582
2583#else
2584# error "port me"
2585#endif
2586 return off;
2587}
2588
2589
2590/**
2591 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2592 *
2593 * @note ARM64: Misaligned @a offDisp values and values not in the
2594 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2595 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2596 * caller does not heed this.
2597 *
2598 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2599 */
2600DECL_FORCE_INLINE_THROW(uint32_t)
2601iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2602 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2603{
2604#ifdef RT_ARCH_AMD64
2605 /* mov reg32, mem32 */
2606 if (iGprDst >= 8 || iGprBase >= 8)
2607 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2608 pCodeBuf[off++] = 0x8b;
2609 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2610 RT_NOREF(iGprTmp);
2611
2612#elif defined(RT_ARCH_ARM64)
2613 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2614 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2615
2616#else
2617# error "port me"
2618#endif
2619 return off;
2620}
2621
2622
2623/**
2624 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2625 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2626 */
2627DECL_INLINE_THROW(uint32_t)
2628iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2629{
2630#ifdef RT_ARCH_AMD64
2631 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2632 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2633
2634#elif defined(RT_ARCH_ARM64)
2635 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2636
2637#else
2638# error "port me"
2639#endif
2640 return off;
2641}
2642
2643
2644/**
2645 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2646 * sign-extending the value to 64 bits.
2647 *
2648 * @note ARM64: Misaligned @a offDisp values and values not in the
2649 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2650 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2651 * caller does not heed this.
2652 */
2653DECL_FORCE_INLINE_THROW(uint32_t)
2654iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2655 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2656{
2657#ifdef RT_ARCH_AMD64
2658 /* movsxd reg64, mem32 */
2659 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2660 pCodeBuf[off++] = 0x63;
2661 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2662 RT_NOREF(iGprTmp);
2663
2664#elif defined(RT_ARCH_ARM64)
2665 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2666 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2667
2668#else
2669# error "port me"
2670#endif
2671 return off;
2672}
2673
2674
2675/**
2676 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2677 *
2678 * @note ARM64: Misaligned @a offDisp values and values not in the
2679 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2680 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2681 * caller does not heed this.
2682 *
2683 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2684 */
2685DECL_FORCE_INLINE_THROW(uint32_t)
2686iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2687 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2688{
2689#ifdef RT_ARCH_AMD64
2690 /* movzx reg32, mem16 */
2691 if (iGprDst >= 8 || iGprBase >= 8)
2692 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2693 pCodeBuf[off++] = 0x0f;
2694 pCodeBuf[off++] = 0xb7;
2695 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2696 RT_NOREF(iGprTmp);
2697
2698#elif defined(RT_ARCH_ARM64)
2699 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2700 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2701
2702#else
2703# error "port me"
2704#endif
2705 return off;
2706}
2707
2708
2709/**
2710 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2711 * sign-extending the value to 64 bits.
2712 *
2713 * @note ARM64: Misaligned @a offDisp values and values not in the
2714 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2715 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2716 * caller does not heed this.
2717 */
2718DECL_FORCE_INLINE_THROW(uint32_t)
2719iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2720 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2721{
2722#ifdef RT_ARCH_AMD64
2723 /* movsx reg64, mem16 */
2724 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2725 pCodeBuf[off++] = 0x0f;
2726 pCodeBuf[off++] = 0xbf;
2727 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2728 RT_NOREF(iGprTmp);
2729
2730#elif defined(RT_ARCH_ARM64)
2731 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2732 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2733
2734#else
2735# error "port me"
2736#endif
2737 return off;
2738}
2739
2740
2741/**
2742 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2743 * sign-extending the value to 32 bits.
2744 *
2745 * @note ARM64: Misaligned @a offDisp values and values not in the
2746 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2747 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2748 * caller does not heed this.
2749 *
2750 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2751 */
2752DECL_FORCE_INLINE_THROW(uint32_t)
2753iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2754 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2755{
2756#ifdef RT_ARCH_AMD64
2757 /* movsx reg32, mem16 */
2758 if (iGprDst >= 8 || iGprBase >= 8)
2759 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2760 pCodeBuf[off++] = 0x0f;
2761 pCodeBuf[off++] = 0xbf;
2762 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2763 RT_NOREF(iGprTmp);
2764
2765#elif defined(RT_ARCH_ARM64)
2766 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2767 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
2768
2769#else
2770# error "port me"
2771#endif
2772 return off;
2773}
2774
2775
2776/**
2777 * Emits a 8-bit GPR load via a GPR base address with a displacement.
2778 *
2779 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2780 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2781 * same. Will assert / throw if caller does not heed this.
2782 *
2783 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
2784 */
2785DECL_FORCE_INLINE_THROW(uint32_t)
2786iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2787 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2788{
2789#ifdef RT_ARCH_AMD64
2790 /* movzx reg32, mem8 */
2791 if (iGprDst >= 8 || iGprBase >= 8)
2792 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2793 pCodeBuf[off++] = 0x0f;
2794 pCodeBuf[off++] = 0xb6;
2795 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2796 RT_NOREF(iGprTmp);
2797
2798#elif defined(RT_ARCH_ARM64)
2799 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2800 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
2801
2802#else
2803# error "port me"
2804#endif
2805 return off;
2806}
2807
2808
2809/**
2810 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2811 * sign-extending the value to 64 bits.
2812 *
2813 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2814 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2815 * same. Will assert / throw if caller does not heed this.
2816 */
2817DECL_FORCE_INLINE_THROW(uint32_t)
2818iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2819 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2820{
2821#ifdef RT_ARCH_AMD64
2822 /* movsx reg64, mem8 */
2823 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2824 pCodeBuf[off++] = 0x0f;
2825 pCodeBuf[off++] = 0xbe;
2826 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2827 RT_NOREF(iGprTmp);
2828
2829#elif defined(RT_ARCH_ARM64)
2830 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2831 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
2832
2833#else
2834# error "port me"
2835#endif
2836 return off;
2837}
2838
2839
2840/**
2841 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2842 * sign-extending the value to 32 bits.
2843 *
2844 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2845 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2846 * same. Will assert / throw if caller does not heed this.
2847 *
2848 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2849 */
2850DECL_FORCE_INLINE_THROW(uint32_t)
2851iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2852 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2853{
2854#ifdef RT_ARCH_AMD64
2855 /* movsx reg32, mem8 */
2856 if (iGprDst >= 8 || iGprBase >= 8)
2857 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2858 pCodeBuf[off++] = 0x0f;
2859 pCodeBuf[off++] = 0xbe;
2860 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2861 RT_NOREF(iGprTmp);
2862
2863#elif defined(RT_ARCH_ARM64)
2864 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2865 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2866
2867#else
2868# error "port me"
2869#endif
2870 return off;
2871}
2872
2873
2874/**
2875 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2876 * sign-extending the value to 16 bits.
2877 *
2878 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2879 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2880 * same. Will assert / throw if caller does not heed this.
2881 *
2882 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2883 */
2884DECL_FORCE_INLINE_THROW(uint32_t)
2885iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2886 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2887{
2888#ifdef RT_ARCH_AMD64
2889 /* movsx reg32, mem8 */
2890 if (iGprDst >= 8 || iGprBase >= 8)
2891 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2892 pCodeBuf[off++] = 0x0f;
2893 pCodeBuf[off++] = 0xbe;
2894 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2895# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
2896 /* and reg32, 0xffffh */
2897 if (iGprDst >= 8)
2898 pCodeBuf[off++] = X86_OP_REX_B;
2899 pCodeBuf[off++] = 0x81;
2900 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
2901 pCodeBuf[off++] = 0xff;
2902 pCodeBuf[off++] = 0xff;
2903 pCodeBuf[off++] = 0;
2904 pCodeBuf[off++] = 0;
2905# else
2906 /* movzx reg32, reg16 */
2907 if (iGprDst >= 8)
2908 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
2909 pCodeBuf[off++] = 0x0f;
2910 pCodeBuf[off++] = 0xb7;
2911 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2912# endif
2913 RT_NOREF(iGprTmp);
2914
2915#elif defined(RT_ARCH_ARM64)
2916 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2917 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2918 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2919 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
2920
2921#else
2922# error "port me"
2923#endif
2924 return off;
2925}
2926
2927
2928#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2929/**
2930 * Emits a 128-bit vector register load via a GPR base address with a displacement.
2931 *
2932 * @note ARM64: Misaligned @a offDisp values and values not in the
2933 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2934 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2935 * does not heed this.
2936 */
2937DECL_FORCE_INLINE_THROW(uint32_t)
2938iemNativeEmitLoadVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
2939 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2940{
2941#ifdef RT_ARCH_AMD64
2942 /* movdqu reg128, mem128 */
2943 pCodeBuf[off++] = 0xf3;
2944 if (iVecRegDst >= 8 || iGprBase >= 8)
2945 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2946 pCodeBuf[off++] = 0x0f;
2947 pCodeBuf[off++] = 0x6f;
2948 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
2949 RT_NOREF(iGprTmp);
2950
2951#elif defined(RT_ARCH_ARM64)
2952 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
2953 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
2954
2955#else
2956# error "port me"
2957#endif
2958 return off;
2959}
2960
2961
2962/**
2963 * Emits a 128-bit GPR load via a GPR base address with a displacement.
2964 */
2965DECL_INLINE_THROW(uint32_t)
2966iemNativeEmitLoadVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
2967{
2968#ifdef RT_ARCH_AMD64
2969 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
2970 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2971
2972#elif defined(RT_ARCH_ARM64)
2973 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2974
2975#else
2976# error "port me"
2977#endif
2978 return off;
2979}
2980
2981
2982/**
2983 * Emits a 256-bit vector register load via a GPR base address with a displacement.
2984 *
2985 * @note ARM64: Misaligned @a offDisp values and values not in the
2986 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2987 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2988 * does not heed this.
2989 */
2990DECL_FORCE_INLINE_THROW(uint32_t)
2991iemNativeEmitLoadVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
2992 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2993{
2994#ifdef RT_ARCH_AMD64
2995 /* vmovdqu reg256, mem256 */
2996 pCodeBuf[off++] = X86_OP_VEX3;
2997 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
2998 | X86_OP_VEX3_BYTE1_X
2999 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3000 | UINT8_C(0x01);
3001 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3002 pCodeBuf[off++] = 0x6f;
3003 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3004 RT_NOREF(iGprTmp);
3005
3006#elif defined(RT_ARCH_ARM64)
3007 Assert(!(iVecRegDst & 0x1));
3008 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3009 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3010 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3011 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3012#else
3013# error "port me"
3014#endif
3015 return off;
3016}
3017
3018
3019/**
3020 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3021 */
3022DECL_INLINE_THROW(uint32_t)
3023iemNativeEmitLoadVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3024{
3025#ifdef RT_ARCH_AMD64
3026 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3027 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3028
3029#elif defined(RT_ARCH_ARM64)
3030 Assert(!(iVecRegDst & 0x1));
3031 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3032 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3033 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3034 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3035
3036#else
3037# error "port me"
3038#endif
3039 return off;
3040}
3041#endif
3042
3043
3044/**
3045 * Emits a 64-bit GPR store via a GPR base address with a displacement.
3046 *
3047 * @note ARM64: Misaligned @a offDisp values and values not in the
3048 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3049 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3050 * does not heed this.
3051 */
3052DECL_FORCE_INLINE_THROW(uint32_t)
3053iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3054 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3055{
3056#ifdef RT_ARCH_AMD64
3057 /* mov mem64, reg64 */
3058 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3059 pCodeBuf[off++] = 0x89;
3060 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3061 RT_NOREF(iGprTmp);
3062
3063#elif defined(RT_ARCH_ARM64)
3064 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3065 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
3066
3067#else
3068# error "port me"
3069#endif
3070 return off;
3071}
3072
3073
3074/**
3075 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3076 *
3077 * @note ARM64: Misaligned @a offDisp values and values not in the
3078 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3079 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3080 * does not heed this.
3081 */
3082DECL_FORCE_INLINE_THROW(uint32_t)
3083iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3084 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3085{
3086#ifdef RT_ARCH_AMD64
3087 /* mov mem32, reg32 */
3088 if (iGprSrc >= 8 || iGprBase >= 8)
3089 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3090 pCodeBuf[off++] = 0x89;
3091 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3092 RT_NOREF(iGprTmp);
3093
3094#elif defined(RT_ARCH_ARM64)
3095 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3096 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3097
3098#else
3099# error "port me"
3100#endif
3101 return off;
3102}
3103
3104
3105/**
3106 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3107 *
3108 * @note ARM64: Misaligned @a offDisp values and values not in the
3109 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3110 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3111 * does not heed this.
3112 */
3113DECL_FORCE_INLINE_THROW(uint32_t)
3114iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3115 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3116{
3117#ifdef RT_ARCH_AMD64
3118 /* mov mem16, reg16 */
3119 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3120 if (iGprSrc >= 8 || iGprBase >= 8)
3121 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3122 pCodeBuf[off++] = 0x89;
3123 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3124 RT_NOREF(iGprTmp);
3125
3126#elif defined(RT_ARCH_ARM64)
3127 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3128 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3129
3130#else
3131# error "port me"
3132#endif
3133 return off;
3134}
3135
3136
3137/**
3138 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3139 *
3140 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3141 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3142 * same. Will assert / throw if caller does not heed this.
3143 */
3144DECL_FORCE_INLINE_THROW(uint32_t)
3145iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3146 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3147{
3148#ifdef RT_ARCH_AMD64
3149 /* mov mem8, reg8 */
3150 if (iGprSrc >= 8 || iGprBase >= 8)
3151 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3152 else if (iGprSrc >= 4)
3153 pCodeBuf[off++] = X86_OP_REX;
3154 pCodeBuf[off++] = 0x88;
3155 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3156 RT_NOREF(iGprTmp);
3157
3158#elif defined(RT_ARCH_ARM64)
3159 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3160 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3161
3162#else
3163# error "port me"
3164#endif
3165 return off;
3166}
3167
3168
3169/**
3170 * Emits a 64-bit immediate store via a GPR base address with a displacement.
3171 *
3172 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
3173 * AMD64 it depends on the immediate value.
3174 *
3175 * @note ARM64: Misaligned @a offDisp values and values not in the
3176 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3177 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3178 * does not heed this.
3179 */
3180DECL_FORCE_INLINE_THROW(uint32_t)
3181iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
3182 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3183{
3184#ifdef RT_ARCH_AMD64
3185 if ((int32_t)uImm == (int64_t)uImm)
3186 {
3187 /* mov mem64, imm32 (sign-extended) */
3188 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3189 pCodeBuf[off++] = 0xc7;
3190 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3191 pCodeBuf[off++] = RT_BYTE1(uImm);
3192 pCodeBuf[off++] = RT_BYTE2(uImm);
3193 pCodeBuf[off++] = RT_BYTE3(uImm);
3194 pCodeBuf[off++] = RT_BYTE4(uImm);
3195 }
3196 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
3197 {
3198 /* require temporary register. */
3199 if (iGprImmTmp == UINT8_MAX)
3200 iGprImmTmp = iGprTmp;
3201 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3202 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
3203 }
3204 else
3205# ifdef IEM_WITH_THROW_CATCH
3206 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3207# else
3208 AssertReleaseFailedStmt(off = UINT32_MAX);
3209# endif
3210
3211#elif defined(RT_ARCH_ARM64)
3212 if (uImm == 0)
3213 iGprImmTmp = ARMV8_A64_REG_XZR;
3214 else
3215 {
3216 Assert(iGprImmTmp < 31);
3217 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3218 }
3219 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
3220
3221#else
3222# error "port me"
3223#endif
3224 return off;
3225}
3226
3227
3228/**
3229 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3230 *
3231 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3232 *
3233 * @note ARM64: Misaligned @a offDisp values and values not in the
3234 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3235 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3236 * does not heed this.
3237 */
3238DECL_FORCE_INLINE_THROW(uint32_t)
3239iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
3240 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3241{
3242#ifdef RT_ARCH_AMD64
3243 /* mov mem32, imm32 */
3244 if (iGprBase >= 8)
3245 pCodeBuf[off++] = X86_OP_REX_B;
3246 pCodeBuf[off++] = 0xc7;
3247 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3248 pCodeBuf[off++] = RT_BYTE1(uImm);
3249 pCodeBuf[off++] = RT_BYTE2(uImm);
3250 pCodeBuf[off++] = RT_BYTE3(uImm);
3251 pCodeBuf[off++] = RT_BYTE4(uImm);
3252 RT_NOREF(iGprImmTmp, iGprTmp);
3253
3254#elif defined(RT_ARCH_ARM64)
3255 Assert(iGprImmTmp < 31);
3256 if (uImm == 0)
3257 iGprImmTmp = ARMV8_A64_REG_XZR;
3258 else
3259 {
3260 Assert(iGprImmTmp < 31);
3261 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3262 }
3263 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3264 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3265
3266#else
3267# error "port me"
3268#endif
3269 return off;
3270}
3271
3272
3273/**
3274 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3275 *
3276 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3277 *
3278 * @note ARM64: Misaligned @a offDisp values and values not in the
3279 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3280 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3281 * does not heed this.
3282 */
3283DECL_FORCE_INLINE_THROW(uint32_t)
3284iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
3285 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3286{
3287#ifdef RT_ARCH_AMD64
3288 /* mov mem16, imm16 */
3289 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3290 if (iGprBase >= 8)
3291 pCodeBuf[off++] = X86_OP_REX_B;
3292 pCodeBuf[off++] = 0xc7;
3293 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3294 pCodeBuf[off++] = RT_BYTE1(uImm);
3295 pCodeBuf[off++] = RT_BYTE2(uImm);
3296 RT_NOREF(iGprImmTmp, iGprTmp);
3297
3298#elif defined(RT_ARCH_ARM64)
3299 if (uImm == 0)
3300 iGprImmTmp = ARMV8_A64_REG_XZR;
3301 else
3302 {
3303 Assert(iGprImmTmp < 31);
3304 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3305 }
3306 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3307 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3308
3309#else
3310# error "port me"
3311#endif
3312 return off;
3313}
3314
3315
3316/**
3317 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3318 *
3319 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3320 *
3321 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3322 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3323 * same. Will assert / throw if caller does not heed this.
3324 */
3325DECL_FORCE_INLINE_THROW(uint32_t)
3326iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3327 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3328{
3329#ifdef RT_ARCH_AMD64
3330 /* mov mem8, imm8 */
3331 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3332 if (iGprBase >= 8)
3333 pCodeBuf[off++] = X86_OP_REX_B;
3334 pCodeBuf[off++] = 0xc6;
3335 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3336 pCodeBuf[off++] = uImm;
3337 RT_NOREF(iGprImmTmp, iGprTmp);
3338
3339#elif defined(RT_ARCH_ARM64)
3340 if (uImm == 0)
3341 iGprImmTmp = ARMV8_A64_REG_XZR;
3342 else
3343 {
3344 Assert(iGprImmTmp < 31);
3345 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3346 }
3347 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3348 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3349
3350#else
3351# error "port me"
3352#endif
3353 return off;
3354}
3355
3356
3357#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3358/**
3359 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3360 *
3361 * @note ARM64: Misaligned @a offDisp values and values not in the
3362 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3363 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3364 * does not heed this.
3365 */
3366DECL_FORCE_INLINE_THROW(uint32_t)
3367iemNativeEmitStoreVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3368 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3369{
3370#ifdef RT_ARCH_AMD64
3371 /* movdqu mem128, reg128 */
3372 pCodeBuf[off++] = 0xf3;
3373 if (iVecRegDst >= 8 || iGprBase >= 8)
3374 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3375 pCodeBuf[off++] = 0x0f;
3376 pCodeBuf[off++] = 0x7f;
3377 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3378 RT_NOREF(iGprTmp);
3379
3380#elif defined(RT_ARCH_ARM64)
3381 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3382 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3383
3384#else
3385# error "port me"
3386#endif
3387 return off;
3388}
3389
3390
3391/**
3392 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3393 */
3394DECL_INLINE_THROW(uint32_t)
3395iemNativeEmitStoreVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3396{
3397#ifdef RT_ARCH_AMD64
3398 off = iemNativeEmitStoreVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3399 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3400
3401#elif defined(RT_ARCH_ARM64)
3402 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3403
3404#else
3405# error "port me"
3406#endif
3407 return off;
3408}
3409
3410
3411/**
3412 * Emits a 256-bit vector register store via a GPR base address with a displacement.
3413 *
3414 * @note ARM64: Misaligned @a offDisp values and values not in the
3415 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3416 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3417 * does not heed this.
3418 */
3419DECL_FORCE_INLINE_THROW(uint32_t)
3420iemNativeEmitStoreVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3421 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3422{
3423#ifdef RT_ARCH_AMD64
3424 /* vmovdqu mem256, reg256 */
3425 pCodeBuf[off++] = X86_OP_VEX3;
3426 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3427 | X86_OP_VEX3_BYTE1_X
3428 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3429 | UINT8_C(0x01);
3430 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3431 pCodeBuf[off++] = 0x7f;
3432 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3433 RT_NOREF(iGprTmp);
3434
3435#elif defined(RT_ARCH_ARM64)
3436 Assert(!(iVecRegDst & 0x1));
3437 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3438 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3439 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3440 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3441#else
3442# error "port me"
3443#endif
3444 return off;
3445}
3446
3447
3448/**
3449 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3450 */
3451DECL_INLINE_THROW(uint32_t)
3452iemNativeEmitStoreVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3453{
3454#ifdef RT_ARCH_AMD64
3455 off = iemNativeEmitStoreVecRegByGprU256Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3456 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3457
3458#elif defined(RT_ARCH_ARM64)
3459 Assert(!(iVecRegDst & 0x1));
3460 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3461 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3462 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3463 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3464
3465#else
3466# error "port me"
3467#endif
3468 return off;
3469}
3470#endif
3471
3472
3473
3474/*********************************************************************************************************************************
3475* Subtraction and Additions *
3476*********************************************************************************************************************************/
3477
3478/**
3479 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3480 * @note The AMD64 version sets flags.
3481 */
3482DECL_INLINE_THROW(uint32_t)
3483iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3484{
3485#if defined(RT_ARCH_AMD64)
3486 /* sub Gv,Ev */
3487 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3488 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3489 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3490 pbCodeBuf[off++] = 0x2b;
3491 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3492
3493#elif defined(RT_ARCH_ARM64)
3494 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3495 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3496
3497#else
3498# error "Port me"
3499#endif
3500 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3501 return off;
3502}
3503
3504
3505/**
3506 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3507 * @note The AMD64 version sets flags.
3508 */
3509DECL_FORCE_INLINE(uint32_t)
3510iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3511{
3512#if defined(RT_ARCH_AMD64)
3513 /* sub Gv,Ev */
3514 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3515 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3516 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3517 pCodeBuf[off++] = 0x2b;
3518 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3519
3520#elif defined(RT_ARCH_ARM64)
3521 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3522
3523#else
3524# error "Port me"
3525#endif
3526 return off;
3527}
3528
3529
3530/**
3531 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3532 * @note The AMD64 version sets flags.
3533 */
3534DECL_INLINE_THROW(uint32_t)
3535iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3536{
3537#if defined(RT_ARCH_AMD64)
3538 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3539#elif defined(RT_ARCH_ARM64)
3540 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3541#else
3542# error "Port me"
3543#endif
3544 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3545 return off;
3546}
3547
3548
3549/**
3550 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3551 *
3552 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3553 *
3554 * @note Larger constants will require a temporary register. Failing to specify
3555 * one when needed will trigger fatal assertion / throw.
3556 */
3557DECL_FORCE_INLINE_THROW(uint32_t)
3558iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3559 uint8_t iGprTmp = UINT8_MAX)
3560{
3561#ifdef RT_ARCH_AMD64
3562 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3563 if (iSubtrahend == 1)
3564 {
3565 /* dec r/m64 */
3566 pCodeBuf[off++] = 0xff;
3567 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3568 }
3569 else if (iSubtrahend == -1)
3570 {
3571 /* inc r/m64 */
3572 pCodeBuf[off++] = 0xff;
3573 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3574 }
3575 else if ((int8_t)iSubtrahend == iSubtrahend)
3576 {
3577 /* sub r/m64, imm8 */
3578 pCodeBuf[off++] = 0x83;
3579 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3580 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3581 }
3582 else if ((int32_t)iSubtrahend == iSubtrahend)
3583 {
3584 /* sub r/m64, imm32 */
3585 pCodeBuf[off++] = 0x81;
3586 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3587 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3588 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3589 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3590 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3591 }
3592 else if (iGprTmp != UINT8_MAX)
3593 {
3594 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3595 /* sub r/m64, r64 */
3596 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3597 pCodeBuf[off++] = 0x29;
3598 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3599 }
3600 else
3601# ifdef IEM_WITH_THROW_CATCH
3602 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3603# else
3604 AssertReleaseFailedStmt(off = UINT32_MAX);
3605# endif
3606
3607#elif defined(RT_ARCH_ARM64)
3608 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3609 if (uAbsSubtrahend < 4096)
3610 {
3611 if (iSubtrahend >= 0)
3612 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3613 else
3614 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3615 }
3616 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3617 {
3618 if (iSubtrahend >= 0)
3619 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3620 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3621 else
3622 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3623 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3624 }
3625 else if (iGprTmp != UINT8_MAX)
3626 {
3627 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3628 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3629 }
3630 else
3631# ifdef IEM_WITH_THROW_CATCH
3632 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3633# else
3634 AssertReleaseFailedStmt(off = UINT32_MAX);
3635# endif
3636
3637#else
3638# error "Port me"
3639#endif
3640 return off;
3641}
3642
3643
3644/**
3645 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3646 *
3647 * @note Larger constants will require a temporary register. Failing to specify
3648 * one when needed will trigger fatal assertion / throw.
3649 */
3650DECL_INLINE_THROW(uint32_t)
3651iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3652 uint8_t iGprTmp = UINT8_MAX)
3653
3654{
3655#ifdef RT_ARCH_AMD64
3656 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3657#elif defined(RT_ARCH_ARM64)
3658 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3659#else
3660# error "Port me"
3661#endif
3662 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3663 return off;
3664}
3665
3666
3667/**
3668 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3669 *
3670 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3671 *
3672 * @note ARM64: Larger constants will require a temporary register. Failing to
3673 * specify one when needed will trigger fatal assertion / throw.
3674 */
3675DECL_FORCE_INLINE_THROW(uint32_t)
3676iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3677 uint8_t iGprTmp = UINT8_MAX)
3678{
3679#ifdef RT_ARCH_AMD64
3680 if (iGprDst >= 8)
3681 pCodeBuf[off++] = X86_OP_REX_B;
3682 if (iSubtrahend == 1)
3683 {
3684 /* dec r/m32 */
3685 pCodeBuf[off++] = 0xff;
3686 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3687 }
3688 else if (iSubtrahend == -1)
3689 {
3690 /* inc r/m32 */
3691 pCodeBuf[off++] = 0xff;
3692 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3693 }
3694 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3695 {
3696 /* sub r/m32, imm8 */
3697 pCodeBuf[off++] = 0x83;
3698 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3699 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3700 }
3701 else
3702 {
3703 /* sub r/m32, imm32 */
3704 pCodeBuf[off++] = 0x81;
3705 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3706 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3707 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3708 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3709 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3710 }
3711 RT_NOREF(iGprTmp);
3712
3713#elif defined(RT_ARCH_ARM64)
3714 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3715 if (uAbsSubtrahend < 4096)
3716 {
3717 if (iSubtrahend >= 0)
3718 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3719 else
3720 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3721 }
3722 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3723 {
3724 if (iSubtrahend >= 0)
3725 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3726 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3727 else
3728 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3729 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3730 }
3731 else if (iGprTmp != UINT8_MAX)
3732 {
3733 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3734 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3735 }
3736 else
3737# ifdef IEM_WITH_THROW_CATCH
3738 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3739# else
3740 AssertReleaseFailedStmt(off = UINT32_MAX);
3741# endif
3742
3743#else
3744# error "Port me"
3745#endif
3746 return off;
3747}
3748
3749
3750/**
3751 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3752 *
3753 * @note ARM64: Larger constants will require a temporary register. Failing to
3754 * specify one when needed will trigger fatal assertion / throw.
3755 */
3756DECL_INLINE_THROW(uint32_t)
3757iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3758 uint8_t iGprTmp = UINT8_MAX)
3759
3760{
3761#ifdef RT_ARCH_AMD64
3762 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
3763#elif defined(RT_ARCH_ARM64)
3764 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
3765#else
3766# error "Port me"
3767#endif
3768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3769 return off;
3770}
3771
3772
3773/**
3774 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
3775 *
3776 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
3777 * so not suitable as a base for conditional jumps.
3778 *
3779 * @note AMD64: Will only update the lower 16 bits of the register.
3780 * @note ARM64: Will update the entire register.
3781 * @note ARM64: Larger constants will require a temporary register. Failing to
3782 * specify one when needed will trigger fatal assertion / throw.
3783 */
3784DECL_FORCE_INLINE_THROW(uint32_t)
3785iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
3786 uint8_t iGprTmp = UINT8_MAX)
3787{
3788#ifdef RT_ARCH_AMD64
3789 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3790 if (iGprDst >= 8)
3791 pCodeBuf[off++] = X86_OP_REX_B;
3792 if (iSubtrahend == 1)
3793 {
3794 /* dec r/m16 */
3795 pCodeBuf[off++] = 0xff;
3796 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3797 }
3798 else if (iSubtrahend == -1)
3799 {
3800 /* inc r/m16 */
3801 pCodeBuf[off++] = 0xff;
3802 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3803 }
3804 else if ((int8_t)iSubtrahend == iSubtrahend)
3805 {
3806 /* sub r/m16, imm8 */
3807 pCodeBuf[off++] = 0x83;
3808 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3809 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3810 }
3811 else
3812 {
3813 /* sub r/m16, imm16 */
3814 pCodeBuf[off++] = 0x81;
3815 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3816 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
3817 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
3818 }
3819 RT_NOREF(iGprTmp);
3820
3821#elif defined(RT_ARCH_ARM64)
3822 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3823 if (uAbsSubtrahend < 4096)
3824 {
3825 if (iSubtrahend >= 0)
3826 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3827 else
3828 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3829 }
3830 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3831 {
3832 if (iSubtrahend >= 0)
3833 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3834 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3835 else
3836 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3837 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3838 }
3839 else if (iGprTmp != UINT8_MAX)
3840 {
3841 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3842 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3843 }
3844 else
3845# ifdef IEM_WITH_THROW_CATCH
3846 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3847# else
3848 AssertReleaseFailedStmt(off = UINT32_MAX);
3849# endif
3850 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3851
3852#else
3853# error "Port me"
3854#endif
3855 return off;
3856}
3857
3858
3859/**
3860 * Emits adding a 64-bit GPR to another, storing the result in the first.
3861 * @note The AMD64 version sets flags.
3862 */
3863DECL_FORCE_INLINE(uint32_t)
3864iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3865{
3866#if defined(RT_ARCH_AMD64)
3867 /* add Gv,Ev */
3868 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3869 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
3870 pCodeBuf[off++] = 0x03;
3871 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3872
3873#elif defined(RT_ARCH_ARM64)
3874 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
3875
3876#else
3877# error "Port me"
3878#endif
3879 return off;
3880}
3881
3882
3883/**
3884 * Emits adding a 64-bit GPR to another, storing the result in the first.
3885 * @note The AMD64 version sets flags.
3886 */
3887DECL_INLINE_THROW(uint32_t)
3888iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3889{
3890#if defined(RT_ARCH_AMD64)
3891 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3892#elif defined(RT_ARCH_ARM64)
3893 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3894#else
3895# error "Port me"
3896#endif
3897 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3898 return off;
3899}
3900
3901
3902/**
3903 * Emits adding a 64-bit GPR to another, storing the result in the first.
3904 * @note The AMD64 version sets flags.
3905 */
3906DECL_FORCE_INLINE(uint32_t)
3907iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3908{
3909#if defined(RT_ARCH_AMD64)
3910 /* add Gv,Ev */
3911 if (iGprDst >= 8 || iGprAddend >= 8)
3912 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
3913 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
3914 pCodeBuf[off++] = 0x03;
3915 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3916
3917#elif defined(RT_ARCH_ARM64)
3918 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
3919
3920#else
3921# error "Port me"
3922#endif
3923 return off;
3924}
3925
3926
3927/**
3928 * Emits adding a 64-bit GPR to another, storing the result in the first.
3929 * @note The AMD64 version sets flags.
3930 */
3931DECL_INLINE_THROW(uint32_t)
3932iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3933{
3934#if defined(RT_ARCH_AMD64)
3935 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3936#elif defined(RT_ARCH_ARM64)
3937 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3938#else
3939# error "Port me"
3940#endif
3941 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3942 return off;
3943}
3944
3945
3946/**
3947 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3948 */
3949DECL_INLINE_THROW(uint32_t)
3950iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3951{
3952#if defined(RT_ARCH_AMD64)
3953 /* add or inc */
3954 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3955 if (iImm8 != 1)
3956 {
3957 pCodeBuf[off++] = 0x83;
3958 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3959 pCodeBuf[off++] = (uint8_t)iImm8;
3960 }
3961 else
3962 {
3963 pCodeBuf[off++] = 0xff;
3964 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3965 }
3966
3967#elif defined(RT_ARCH_ARM64)
3968 if (iImm8 >= 0)
3969 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
3970 else
3971 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
3972
3973#else
3974# error "Port me"
3975#endif
3976 return off;
3977}
3978
3979
3980/**
3981 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3982 */
3983DECL_INLINE_THROW(uint32_t)
3984iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3985{
3986#if defined(RT_ARCH_AMD64)
3987 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
3988#elif defined(RT_ARCH_ARM64)
3989 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
3990#else
3991# error "Port me"
3992#endif
3993 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3994 return off;
3995}
3996
3997
3998/**
3999 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4000 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4001 */
4002DECL_FORCE_INLINE(uint32_t)
4003iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4004{
4005#if defined(RT_ARCH_AMD64)
4006 /* add or inc */
4007 if (iGprDst >= 8)
4008 pCodeBuf[off++] = X86_OP_REX_B;
4009 if (iImm8 != 1)
4010 {
4011 pCodeBuf[off++] = 0x83;
4012 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4013 pCodeBuf[off++] = (uint8_t)iImm8;
4014 }
4015 else
4016 {
4017 pCodeBuf[off++] = 0xff;
4018 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4019 }
4020
4021#elif defined(RT_ARCH_ARM64)
4022 if (iImm8 >= 0)
4023 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
4024 else
4025 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
4026
4027#else
4028# error "Port me"
4029#endif
4030 return off;
4031}
4032
4033
4034/**
4035 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4036 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4037 */
4038DECL_INLINE_THROW(uint32_t)
4039iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4040{
4041#if defined(RT_ARCH_AMD64)
4042 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4043#elif defined(RT_ARCH_ARM64)
4044 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4045#else
4046# error "Port me"
4047#endif
4048 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4049 return off;
4050}
4051
4052
4053/**
4054 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4055 *
4056 * @note Will assert / throw if @a iGprTmp is not specified when needed.
4057 */
4058DECL_FORCE_INLINE_THROW(uint32_t)
4059iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4060{
4061#if defined(RT_ARCH_AMD64)
4062 if ((int8_t)iAddend == iAddend)
4063 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4064
4065 if ((int32_t)iAddend == iAddend)
4066 {
4067 /* add grp, imm32 */
4068 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4069 pCodeBuf[off++] = 0x81;
4070 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4071 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4072 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4073 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4074 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4075 }
4076 else if (iGprTmp != UINT8_MAX)
4077 {
4078 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4079
4080 /* add dst, tmpreg */
4081 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4082 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
4083 pCodeBuf[off++] = 0x03;
4084 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
4085 }
4086 else
4087# ifdef IEM_WITH_THROW_CATCH
4088 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4089# else
4090 AssertReleaseFailedStmt(off = UINT32_MAX);
4091# endif
4092
4093#elif defined(RT_ARCH_ARM64)
4094 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4095 if (uAbsAddend < 4096)
4096 {
4097 if (iAddend >= 0)
4098 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
4099 else
4100 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
4101 }
4102 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4103 {
4104 if (iAddend >= 0)
4105 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
4106 true /*f64Bit*/, true /*fShift12*/);
4107 else
4108 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
4109 true /*f64Bit*/, true /*fShift12*/);
4110 }
4111 else if (iGprTmp != UINT8_MAX)
4112 {
4113 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4114 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
4115 }
4116 else
4117# ifdef IEM_WITH_THROW_CATCH
4118 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4119# else
4120 AssertReleaseFailedStmt(off = UINT32_MAX);
4121# endif
4122
4123#else
4124# error "Port me"
4125#endif
4126 return off;
4127}
4128
4129
4130/**
4131 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4132 */
4133DECL_INLINE_THROW(uint32_t)
4134iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
4135{
4136#if defined(RT_ARCH_AMD64)
4137 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4138 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
4139
4140 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
4141 {
4142 /* add grp, imm32 */
4143 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4144 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4145 pbCodeBuf[off++] = 0x81;
4146 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4147 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4148 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4149 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4150 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4151 }
4152 else
4153 {
4154 /* Best to use a temporary register to deal with this in the simplest way: */
4155 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4156
4157 /* add dst, tmpreg */
4158 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4159 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4160 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
4161 pbCodeBuf[off++] = 0x03;
4162 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
4163
4164 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4165 }
4166
4167#elif defined(RT_ARCH_ARM64)
4168 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
4169 {
4170 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4171 if (iAddend >= 0)
4172 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend);
4173 else
4174 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend);
4175 }
4176 else
4177 {
4178 /* Use temporary register for the immediate. */
4179 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4180
4181 /* add gprdst, gprdst, tmpreg */
4182 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4183 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg);
4184
4185 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4186 }
4187
4188#else
4189# error "Port me"
4190#endif
4191 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4192 return off;
4193}
4194
4195
4196/**
4197 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4198 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4199 * @note For ARM64 the iAddend value must be in the range 0x000..0xfff,
4200 * or that range shifted 12 bits to the left (e.g. 0x1000..0xfff000 with
4201 * the lower 12 bits always zero). The negative ranges are also allowed,
4202 * making it behave like a subtraction. If the constant does not conform,
4203 * bad stuff will happen.
4204 */
4205DECL_FORCE_INLINE_THROW(uint32_t)
4206iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4207{
4208#if defined(RT_ARCH_AMD64)
4209 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4210 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4211
4212 /* add grp, imm32 */
4213 if (iGprDst >= 8)
4214 pCodeBuf[off++] = X86_OP_REX_B;
4215 pCodeBuf[off++] = 0x81;
4216 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4217 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4218 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4219 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4220 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4221
4222#elif defined(RT_ARCH_ARM64)
4223 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4224 if (uAbsAddend <= 0xfff)
4225 {
4226 if (iAddend >= 0)
4227 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4228 else
4229 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4230 }
4231 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4232 {
4233 if (iAddend >= 0)
4234 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
4235 false /*f64Bit*/, true /*fShift12*/);
4236 else
4237 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
4238 false /*f64Bit*/, true /*fShift12*/);
4239 }
4240 else
4241# ifdef IEM_WITH_THROW_CATCH
4242 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4243# else
4244 AssertReleaseFailedStmt(off = UINT32_MAX);
4245# endif
4246
4247#else
4248# error "Port me"
4249#endif
4250 return off;
4251}
4252
4253
4254/**
4255 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4256 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4257 */
4258DECL_INLINE_THROW(uint32_t)
4259iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4260{
4261#if defined(RT_ARCH_AMD64)
4262 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
4263
4264#elif defined(RT_ARCH_ARM64)
4265 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
4266 {
4267 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4268 if (iAddend >= 0)
4269 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend, false /*f64Bit*/);
4270 else
4271 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend, false /*f64Bit*/);
4272 }
4273 else
4274 {
4275 /* Use temporary register for the immediate. */
4276 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint32_t)iAddend);
4277
4278 /* add gprdst, gprdst, tmpreg */
4279 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4280 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
4281
4282 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4283 }
4284
4285#else
4286# error "Port me"
4287#endif
4288 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4289 return off;
4290}
4291
4292
4293/**
4294 * Emits a 16-bit GPR add with a signed immediate addend.
4295 *
4296 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
4297 * so not suitable as a base for conditional jumps.
4298 *
4299 * @note AMD64: Will only update the lower 16 bits of the register.
4300 * @note ARM64: Will update the entire register.
4301 * @note ARM64: Larger constants will require a temporary register. Failing to
4302 * specify one when needed will trigger fatal assertion / throw.
4303 * @sa iemNativeEmitSubGpr16ImmEx
4304 */
4305DECL_FORCE_INLINE_THROW(uint32_t)
4306iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend,
4307 uint8_t iGprTmp = UINT8_MAX)
4308{
4309#ifdef RT_ARCH_AMD64
4310 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4311 if (iGprDst >= 8)
4312 pCodeBuf[off++] = X86_OP_REX_B;
4313 if (iAddend == 1)
4314 {
4315 /* inc r/m16 */
4316 pCodeBuf[off++] = 0xff;
4317 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4318 }
4319 else if (iAddend == -1)
4320 {
4321 /* dec r/m16 */
4322 pCodeBuf[off++] = 0xff;
4323 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4324 }
4325 else if ((int8_t)iAddend == iAddend)
4326 {
4327 /* add r/m16, imm8 */
4328 pCodeBuf[off++] = 0x83;
4329 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4330 pCodeBuf[off++] = (uint8_t)iAddend;
4331 }
4332 else
4333 {
4334 /* add r/m16, imm16 */
4335 pCodeBuf[off++] = 0x81;
4336 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4337 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
4338 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
4339 }
4340 RT_NOREF(iGprTmp);
4341
4342#elif defined(RT_ARCH_ARM64)
4343 uint32_t uAbsAddend = RT_ABS(iAddend);
4344 if (uAbsAddend < 4096)
4345 {
4346 if (iAddend >= 0)
4347 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4348 else
4349 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4350 }
4351 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4352 {
4353 if (iAddend >= 0)
4354 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
4355 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4356 else
4357 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
4358 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4359 }
4360 else if (iGprTmp != UINT8_MAX)
4361 {
4362 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iAddend);
4363 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4364 }
4365 else
4366# ifdef IEM_WITH_THROW_CATCH
4367 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4368# else
4369 AssertReleaseFailedStmt(off = UINT32_MAX);
4370# endif
4371 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4372
4373#else
4374# error "Port me"
4375#endif
4376 return off;
4377}
4378
4379
4380
4381/**
4382 * Adds two 64-bit GPRs together, storing the result in a third register.
4383 */
4384DECL_FORCE_INLINE(uint32_t)
4385iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4386{
4387#ifdef RT_ARCH_AMD64
4388 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4389 {
4390 /** @todo consider LEA */
4391 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
4392 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
4393 }
4394 else
4395 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4396
4397#elif defined(RT_ARCH_ARM64)
4398 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
4399
4400#else
4401# error "Port me!"
4402#endif
4403 return off;
4404}
4405
4406
4407
4408/**
4409 * Adds two 32-bit GPRs together, storing the result in a third register.
4410 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4411 */
4412DECL_FORCE_INLINE(uint32_t)
4413iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4414{
4415#ifdef RT_ARCH_AMD64
4416 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4417 {
4418 /** @todo consider LEA */
4419 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
4420 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
4421 }
4422 else
4423 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4424
4425#elif defined(RT_ARCH_ARM64)
4426 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
4427
4428#else
4429# error "Port me!"
4430#endif
4431 return off;
4432}
4433
4434
4435/**
4436 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
4437 * third register.
4438 *
4439 * @note The ARM64 version does not work for non-trivial constants if the
4440 * two registers are the same. Will assert / throw exception.
4441 */
4442DECL_FORCE_INLINE_THROW(uint32_t)
4443iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4444{
4445#ifdef RT_ARCH_AMD64
4446 /** @todo consider LEA */
4447 if ((int8_t)iImmAddend == iImmAddend)
4448 {
4449 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4450 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4451 }
4452 else
4453 {
4454 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4455 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4456 }
4457
4458#elif defined(RT_ARCH_ARM64)
4459 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4460 if (uAbsImmAddend < 4096)
4461 {
4462 if (iImmAddend >= 0)
4463 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4464 else
4465 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4466 }
4467 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4468 {
4469 if (iImmAddend >= 0)
4470 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4471 else
4472 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4473 }
4474 else if (iGprDst != iGprAddend)
4475 {
4476 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4477 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4478 }
4479 else
4480# ifdef IEM_WITH_THROW_CATCH
4481 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4482# else
4483 AssertReleaseFailedStmt(off = UINT32_MAX);
4484# endif
4485
4486#else
4487# error "Port me!"
4488#endif
4489 return off;
4490}
4491
4492
4493/**
4494 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4495 * third register.
4496 *
4497 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4498 *
4499 * @note The ARM64 version does not work for non-trivial constants if the
4500 * two registers are the same. Will assert / throw exception.
4501 */
4502DECL_FORCE_INLINE_THROW(uint32_t)
4503iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4504{
4505#ifdef RT_ARCH_AMD64
4506 /** @todo consider LEA */
4507 if ((int8_t)iImmAddend == iImmAddend)
4508 {
4509 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4510 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4511 }
4512 else
4513 {
4514 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4515 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4516 }
4517
4518#elif defined(RT_ARCH_ARM64)
4519 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4520 if (uAbsImmAddend < 4096)
4521 {
4522 if (iImmAddend >= 0)
4523 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4524 else
4525 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4526 }
4527 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4528 {
4529 if (iImmAddend >= 0)
4530 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4531 else
4532 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4533 }
4534 else if (iGprDst != iGprAddend)
4535 {
4536 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4537 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4538 }
4539 else
4540# ifdef IEM_WITH_THROW_CATCH
4541 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4542# else
4543 AssertReleaseFailedStmt(off = UINT32_MAX);
4544# endif
4545
4546#else
4547# error "Port me!"
4548#endif
4549 return off;
4550}
4551
4552
4553/*********************************************************************************************************************************
4554* Unary Operations *
4555*********************************************************************************************************************************/
4556
4557/**
4558 * Emits code for two complement negation of a 64-bit GPR.
4559 */
4560DECL_FORCE_INLINE_THROW(uint32_t)
4561iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4562{
4563#if defined(RT_ARCH_AMD64)
4564 /* neg Ev */
4565 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4566 pCodeBuf[off++] = 0xf7;
4567 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4568
4569#elif defined(RT_ARCH_ARM64)
4570 /* sub dst, xzr, dst */
4571 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4572
4573#else
4574# error "Port me"
4575#endif
4576 return off;
4577}
4578
4579
4580/**
4581 * Emits code for two complement negation of a 64-bit GPR.
4582 */
4583DECL_INLINE_THROW(uint32_t)
4584iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4585{
4586#if defined(RT_ARCH_AMD64)
4587 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4588#elif defined(RT_ARCH_ARM64)
4589 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4590#else
4591# error "Port me"
4592#endif
4593 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4594 return off;
4595}
4596
4597
4598/**
4599 * Emits code for two complement negation of a 32-bit GPR.
4600 * @note bit 32 thru 63 are set to zero.
4601 */
4602DECL_FORCE_INLINE_THROW(uint32_t)
4603iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4604{
4605#if defined(RT_ARCH_AMD64)
4606 /* neg Ev */
4607 if (iGprDst >= 8)
4608 pCodeBuf[off++] = X86_OP_REX_B;
4609 pCodeBuf[off++] = 0xf7;
4610 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4611
4612#elif defined(RT_ARCH_ARM64)
4613 /* sub dst, xzr, dst */
4614 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4615
4616#else
4617# error "Port me"
4618#endif
4619 return off;
4620}
4621
4622
4623/**
4624 * Emits code for two complement negation of a 32-bit GPR.
4625 * @note bit 32 thru 63 are set to zero.
4626 */
4627DECL_INLINE_THROW(uint32_t)
4628iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4629{
4630#if defined(RT_ARCH_AMD64)
4631 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4632#elif defined(RT_ARCH_ARM64)
4633 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4634#else
4635# error "Port me"
4636#endif
4637 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4638 return off;
4639}
4640
4641
4642
4643/*********************************************************************************************************************************
4644* Bit Operations *
4645*********************************************************************************************************************************/
4646
4647/**
4648 * Emits code for clearing bits 16 thru 63 in the GPR.
4649 */
4650DECL_INLINE_THROW(uint32_t)
4651iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4652{
4653#if defined(RT_ARCH_AMD64)
4654 /* movzx Gv,Ew */
4655 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4656 if (iGprDst >= 8)
4657 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4658 pbCodeBuf[off++] = 0x0f;
4659 pbCodeBuf[off++] = 0xb7;
4660 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4661
4662#elif defined(RT_ARCH_ARM64)
4663 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4664# if 1
4665 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4666# else
4667 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4668 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4669# endif
4670#else
4671# error "Port me"
4672#endif
4673 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4674 return off;
4675}
4676
4677
4678/**
4679 * Emits code for AND'ing two 64-bit GPRs.
4680 *
4681 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4682 * and ARM64 hosts.
4683 */
4684DECL_FORCE_INLINE(uint32_t)
4685iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4686{
4687#if defined(RT_ARCH_AMD64)
4688 /* and Gv, Ev */
4689 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4690 pCodeBuf[off++] = 0x23;
4691 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4692 RT_NOREF(fSetFlags);
4693
4694#elif defined(RT_ARCH_ARM64)
4695 if (!fSetFlags)
4696 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4697 else
4698 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4699
4700#else
4701# error "Port me"
4702#endif
4703 return off;
4704}
4705
4706
4707/**
4708 * Emits code for AND'ing two 64-bit GPRs.
4709 *
4710 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4711 * and ARM64 hosts.
4712 */
4713DECL_INLINE_THROW(uint32_t)
4714iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4715{
4716#if defined(RT_ARCH_AMD64)
4717 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4718#elif defined(RT_ARCH_ARM64)
4719 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4720#else
4721# error "Port me"
4722#endif
4723 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4724 return off;
4725}
4726
4727
4728/**
4729 * Emits code for AND'ing two 32-bit GPRs.
4730 */
4731DECL_FORCE_INLINE(uint32_t)
4732iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4733{
4734#if defined(RT_ARCH_AMD64)
4735 /* and Gv, Ev */
4736 if (iGprDst >= 8 || iGprSrc >= 8)
4737 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4738 pCodeBuf[off++] = 0x23;
4739 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4740 RT_NOREF(fSetFlags);
4741
4742#elif defined(RT_ARCH_ARM64)
4743 if (!fSetFlags)
4744 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4745 else
4746 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4747
4748#else
4749# error "Port me"
4750#endif
4751 return off;
4752}
4753
4754
4755/**
4756 * Emits code for AND'ing two 32-bit GPRs.
4757 */
4758DECL_INLINE_THROW(uint32_t)
4759iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4760{
4761#if defined(RT_ARCH_AMD64)
4762 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4763#elif defined(RT_ARCH_ARM64)
4764 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4765#else
4766# error "Port me"
4767#endif
4768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4769 return off;
4770}
4771
4772
4773/**
4774 * Emits code for AND'ing a 64-bit GPRs with a constant.
4775 *
4776 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4777 * and ARM64 hosts.
4778 */
4779DECL_INLINE_THROW(uint32_t)
4780iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4781{
4782#if defined(RT_ARCH_AMD64)
4783 if ((int64_t)uImm == (int8_t)uImm)
4784 {
4785 /* and Ev, imm8 */
4786 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4787 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4788 pbCodeBuf[off++] = 0x83;
4789 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4790 pbCodeBuf[off++] = (uint8_t)uImm;
4791 }
4792 else if ((int64_t)uImm == (int32_t)uImm)
4793 {
4794 /* and Ev, imm32 */
4795 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4796 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4797 pbCodeBuf[off++] = 0x81;
4798 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4799 pbCodeBuf[off++] = RT_BYTE1(uImm);
4800 pbCodeBuf[off++] = RT_BYTE2(uImm);
4801 pbCodeBuf[off++] = RT_BYTE3(uImm);
4802 pbCodeBuf[off++] = RT_BYTE4(uImm);
4803 }
4804 else
4805 {
4806 /* Use temporary register for the 64-bit immediate. */
4807 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4808 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
4809 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4810 }
4811 RT_NOREF(fSetFlags);
4812
4813#elif defined(RT_ARCH_ARM64)
4814 uint32_t uImmR = 0;
4815 uint32_t uImmNandS = 0;
4816 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4817 {
4818 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4819 if (!fSetFlags)
4820 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
4821 else
4822 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
4823 }
4824 else
4825 {
4826 /* Use temporary register for the 64-bit immediate. */
4827 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4828 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4829 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4830 }
4831
4832#else
4833# error "Port me"
4834#endif
4835 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4836 return off;
4837}
4838
4839
4840/**
4841 * Emits code for AND'ing an 32-bit GPRs with a constant.
4842 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4843 * @note For ARM64 this only supports @a uImm values that can be expressed using
4844 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
4845 * make sure this is possible!
4846 */
4847DECL_FORCE_INLINE_THROW(uint32_t)
4848iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4849{
4850#if defined(RT_ARCH_AMD64)
4851 /* and Ev, imm */
4852 if (iGprDst >= 8)
4853 pCodeBuf[off++] = X86_OP_REX_B;
4854 if ((int32_t)uImm == (int8_t)uImm)
4855 {
4856 pCodeBuf[off++] = 0x83;
4857 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4858 pCodeBuf[off++] = (uint8_t)uImm;
4859 }
4860 else
4861 {
4862 pCodeBuf[off++] = 0x81;
4863 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4864 pCodeBuf[off++] = RT_BYTE1(uImm);
4865 pCodeBuf[off++] = RT_BYTE2(uImm);
4866 pCodeBuf[off++] = RT_BYTE3(uImm);
4867 pCodeBuf[off++] = RT_BYTE4(uImm);
4868 }
4869 RT_NOREF(fSetFlags);
4870
4871#elif defined(RT_ARCH_ARM64)
4872 uint32_t uImmR = 0;
4873 uint32_t uImmNandS = 0;
4874 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4875 {
4876 if (!fSetFlags)
4877 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4878 else
4879 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4880 }
4881 else
4882# ifdef IEM_WITH_THROW_CATCH
4883 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4884# else
4885 AssertReleaseFailedStmt(off = UINT32_MAX);
4886# endif
4887
4888#else
4889# error "Port me"
4890#endif
4891 return off;
4892}
4893
4894
4895/**
4896 * Emits code for AND'ing an 32-bit GPRs with a constant.
4897 *
4898 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4899 */
4900DECL_INLINE_THROW(uint32_t)
4901iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4902{
4903#if defined(RT_ARCH_AMD64)
4904 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
4905
4906#elif defined(RT_ARCH_ARM64)
4907 uint32_t uImmR = 0;
4908 uint32_t uImmNandS = 0;
4909 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4910 {
4911 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4912 if (!fSetFlags)
4913 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4914 else
4915 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4916 }
4917 else
4918 {
4919 /* Use temporary register for the 64-bit immediate. */
4920 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4921 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4922 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4923 }
4924
4925#else
4926# error "Port me"
4927#endif
4928 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4929 return off;
4930}
4931
4932
4933/**
4934 * Emits code for AND'ing an 64-bit GPRs with a constant.
4935 *
4936 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4937 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4938 * the same.
4939 */
4940DECL_FORCE_INLINE_THROW(uint32_t)
4941iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
4942 bool fSetFlags = false)
4943{
4944#if defined(RT_ARCH_AMD64)
4945 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4946 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
4947 RT_NOREF(fSetFlags);
4948
4949#elif defined(RT_ARCH_ARM64)
4950 uint32_t uImmR = 0;
4951 uint32_t uImmNandS = 0;
4952 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4953 {
4954 if (!fSetFlags)
4955 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4956 else
4957 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4958 }
4959 else if (iGprDst != iGprSrc)
4960 {
4961 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4962 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4963 }
4964 else
4965# ifdef IEM_WITH_THROW_CATCH
4966 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4967# else
4968 AssertReleaseFailedStmt(off = UINT32_MAX);
4969# endif
4970
4971#else
4972# error "Port me"
4973#endif
4974 return off;
4975}
4976
4977/**
4978 * Emits code for AND'ing an 32-bit GPRs with a constant.
4979 *
4980 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4981 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4982 * the same.
4983 *
4984 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4985 */
4986DECL_FORCE_INLINE_THROW(uint32_t)
4987iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
4988 bool fSetFlags = false)
4989{
4990#if defined(RT_ARCH_AMD64)
4991 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
4992 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
4993 RT_NOREF(fSetFlags);
4994
4995#elif defined(RT_ARCH_ARM64)
4996 uint32_t uImmR = 0;
4997 uint32_t uImmNandS = 0;
4998 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4999 {
5000 if (!fSetFlags)
5001 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5002 else
5003 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5004 }
5005 else if (iGprDst != iGprSrc)
5006 {
5007 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5008 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
5009 }
5010 else
5011# ifdef IEM_WITH_THROW_CATCH
5012 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5013# else
5014 AssertReleaseFailedStmt(off = UINT32_MAX);
5015# endif
5016
5017#else
5018# error "Port me"
5019#endif
5020 return off;
5021}
5022
5023
5024/**
5025 * Emits code for OR'ing two 64-bit GPRs.
5026 */
5027DECL_FORCE_INLINE(uint32_t)
5028iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5029{
5030#if defined(RT_ARCH_AMD64)
5031 /* or Gv, Ev */
5032 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5033 pCodeBuf[off++] = 0x0b;
5034 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5035
5036#elif defined(RT_ARCH_ARM64)
5037 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
5038
5039#else
5040# error "Port me"
5041#endif
5042 return off;
5043}
5044
5045
5046/**
5047 * Emits code for OR'ing two 64-bit GPRs.
5048 */
5049DECL_INLINE_THROW(uint32_t)
5050iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5051{
5052#if defined(RT_ARCH_AMD64)
5053 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5054#elif defined(RT_ARCH_ARM64)
5055 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5056#else
5057# error "Port me"
5058#endif
5059 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5060 return off;
5061}
5062
5063
5064/**
5065 * Emits code for OR'ing two 32-bit GPRs.
5066 * @note Bits 63:32 of the destination GPR will be cleared.
5067 */
5068DECL_FORCE_INLINE(uint32_t)
5069iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5070{
5071#if defined(RT_ARCH_AMD64)
5072 /* or Gv, Ev */
5073 if (iGprDst >= 8 || iGprSrc >= 8)
5074 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5075 pCodeBuf[off++] = 0x0b;
5076 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5077
5078#elif defined(RT_ARCH_ARM64)
5079 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5080
5081#else
5082# error "Port me"
5083#endif
5084 return off;
5085}
5086
5087
5088/**
5089 * Emits code for OR'ing two 32-bit GPRs.
5090 * @note Bits 63:32 of the destination GPR will be cleared.
5091 */
5092DECL_INLINE_THROW(uint32_t)
5093iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5094{
5095#if defined(RT_ARCH_AMD64)
5096 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5097#elif defined(RT_ARCH_ARM64)
5098 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5099#else
5100# error "Port me"
5101#endif
5102 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5103 return off;
5104}
5105
5106
5107/**
5108 * Emits code for OR'ing a 64-bit GPRs with a constant.
5109 */
5110DECL_INLINE_THROW(uint32_t)
5111iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
5112{
5113#if defined(RT_ARCH_AMD64)
5114 if ((int64_t)uImm == (int8_t)uImm)
5115 {
5116 /* or Ev, imm8 */
5117 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5118 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5119 pbCodeBuf[off++] = 0x83;
5120 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5121 pbCodeBuf[off++] = (uint8_t)uImm;
5122 }
5123 else if ((int64_t)uImm == (int32_t)uImm)
5124 {
5125 /* or Ev, imm32 */
5126 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5127 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5128 pbCodeBuf[off++] = 0x81;
5129 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5130 pbCodeBuf[off++] = RT_BYTE1(uImm);
5131 pbCodeBuf[off++] = RT_BYTE2(uImm);
5132 pbCodeBuf[off++] = RT_BYTE3(uImm);
5133 pbCodeBuf[off++] = RT_BYTE4(uImm);
5134 }
5135 else
5136 {
5137 /* Use temporary register for the 64-bit immediate. */
5138 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5139 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
5140 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5141 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5142 }
5143
5144#elif defined(RT_ARCH_ARM64)
5145 uint32_t uImmR = 0;
5146 uint32_t uImmNandS = 0;
5147 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5148 {
5149 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5150 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
5151 }
5152 else
5153 {
5154 /* Use temporary register for the 64-bit immediate. */
5155 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5156 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
5157 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5158 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5159 }
5160
5161#else
5162# error "Port me"
5163#endif
5164 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5165 return off;
5166}
5167
5168
5169/**
5170 * Emits code for OR'ing an 32-bit GPRs with a constant.
5171 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5172 * @note For ARM64 this only supports @a uImm values that can be expressed using
5173 * the two 6-bit immediates of the OR instructions. The caller must make
5174 * sure this is possible!
5175 */
5176DECL_FORCE_INLINE_THROW(uint32_t)
5177iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5178{
5179#if defined(RT_ARCH_AMD64)
5180 /* or Ev, imm */
5181 if (iGprDst >= 8)
5182 pCodeBuf[off++] = X86_OP_REX_B;
5183 if ((int32_t)uImm == (int8_t)uImm)
5184 {
5185 pCodeBuf[off++] = 0x83;
5186 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5187 pCodeBuf[off++] = (uint8_t)uImm;
5188 }
5189 else
5190 {
5191 pCodeBuf[off++] = 0x81;
5192 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5193 pCodeBuf[off++] = RT_BYTE1(uImm);
5194 pCodeBuf[off++] = RT_BYTE2(uImm);
5195 pCodeBuf[off++] = RT_BYTE3(uImm);
5196 pCodeBuf[off++] = RT_BYTE4(uImm);
5197 }
5198
5199#elif defined(RT_ARCH_ARM64)
5200 uint32_t uImmR = 0;
5201 uint32_t uImmNandS = 0;
5202 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5203 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5204 else
5205# ifdef IEM_WITH_THROW_CATCH
5206 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5207# else
5208 AssertReleaseFailedStmt(off = UINT32_MAX);
5209# endif
5210
5211#else
5212# error "Port me"
5213#endif
5214 return off;
5215}
5216
5217
5218/**
5219 * Emits code for OR'ing an 32-bit GPRs with a constant.
5220 *
5221 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5222 */
5223DECL_INLINE_THROW(uint32_t)
5224iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5225{
5226#if defined(RT_ARCH_AMD64)
5227 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5228
5229#elif defined(RT_ARCH_ARM64)
5230 uint32_t uImmR = 0;
5231 uint32_t uImmNandS = 0;
5232 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5233 {
5234 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5235 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5236 }
5237 else
5238 {
5239 /* Use temporary register for the 64-bit immediate. */
5240 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5241 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
5242 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5243 }
5244
5245#else
5246# error "Port me"
5247#endif
5248 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5249 return off;
5250}
5251
5252
5253
5254/**
5255 * ORs two 64-bit GPRs together, storing the result in a third register.
5256 */
5257DECL_FORCE_INLINE(uint32_t)
5258iemNativeEmitGprEqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5259{
5260#ifdef RT_ARCH_AMD64
5261 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5262 {
5263 /** @todo consider LEA */
5264 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc1);
5265 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5266 }
5267 else
5268 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5269
5270#elif defined(RT_ARCH_ARM64)
5271 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2);
5272
5273#else
5274# error "Port me!"
5275#endif
5276 return off;
5277}
5278
5279
5280
5281/**
5282 * Ors two 32-bit GPRs together, storing the result in a third register.
5283 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
5284 */
5285DECL_FORCE_INLINE(uint32_t)
5286iemNativeEmitGpr32EqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5287{
5288#ifdef RT_ARCH_AMD64
5289 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5290 {
5291 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc1);
5292 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5293 }
5294 else
5295 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5296
5297#elif defined(RT_ARCH_ARM64)
5298 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2, false /*f64Bit*/);
5299
5300#else
5301# error "Port me!"
5302#endif
5303 return off;
5304}
5305
5306
5307/**
5308 * Emits code for XOR'ing two 64-bit GPRs.
5309 */
5310DECL_INLINE_THROW(uint32_t)
5311iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5312{
5313#if defined(RT_ARCH_AMD64)
5314 /* and Gv, Ev */
5315 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5316 pCodeBuf[off++] = 0x33;
5317 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5318
5319#elif defined(RT_ARCH_ARM64)
5320 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
5321
5322#else
5323# error "Port me"
5324#endif
5325 return off;
5326}
5327
5328
5329/**
5330 * Emits code for XOR'ing two 64-bit GPRs.
5331 */
5332DECL_INLINE_THROW(uint32_t)
5333iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5334{
5335#if defined(RT_ARCH_AMD64)
5336 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5337#elif defined(RT_ARCH_ARM64)
5338 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5339#else
5340# error "Port me"
5341#endif
5342 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5343 return off;
5344}
5345
5346
5347/**
5348 * Emits code for XOR'ing two 32-bit GPRs.
5349 */
5350DECL_INLINE_THROW(uint32_t)
5351iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5352{
5353#if defined(RT_ARCH_AMD64)
5354 /* and Gv, Ev */
5355 if (iGprDst >= 8 || iGprSrc >= 8)
5356 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5357 pCodeBuf[off++] = 0x33;
5358 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5359
5360#elif defined(RT_ARCH_ARM64)
5361 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5362
5363#else
5364# error "Port me"
5365#endif
5366 return off;
5367}
5368
5369
5370/**
5371 * Emits code for XOR'ing two 32-bit GPRs.
5372 */
5373DECL_INLINE_THROW(uint32_t)
5374iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5375{
5376#if defined(RT_ARCH_AMD64)
5377 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5378#elif defined(RT_ARCH_ARM64)
5379 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5380#else
5381# error "Port me"
5382#endif
5383 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5384 return off;
5385}
5386
5387
5388/**
5389 * Emits code for XOR'ing an 32-bit GPRs with a constant.
5390 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5391 * @note For ARM64 this only supports @a uImm values that can be expressed using
5392 * the two 6-bit immediates of the EOR instructions. The caller must make
5393 * sure this is possible!
5394 */
5395DECL_FORCE_INLINE_THROW(uint32_t)
5396iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5397{
5398#if defined(RT_ARCH_AMD64)
5399 /* and Ev, imm */
5400 if (iGprDst >= 8)
5401 pCodeBuf[off++] = X86_OP_REX_B;
5402 if ((int32_t)uImm == (int8_t)uImm)
5403 {
5404 pCodeBuf[off++] = 0x83;
5405 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5406 pCodeBuf[off++] = (uint8_t)uImm;
5407 }
5408 else
5409 {
5410 pCodeBuf[off++] = 0x81;
5411 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5412 pCodeBuf[off++] = RT_BYTE1(uImm);
5413 pCodeBuf[off++] = RT_BYTE2(uImm);
5414 pCodeBuf[off++] = RT_BYTE3(uImm);
5415 pCodeBuf[off++] = RT_BYTE4(uImm);
5416 }
5417
5418#elif defined(RT_ARCH_ARM64)
5419 uint32_t uImmR = 0;
5420 uint32_t uImmNandS = 0;
5421 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5422 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5423 else
5424# ifdef IEM_WITH_THROW_CATCH
5425 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5426# else
5427 AssertReleaseFailedStmt(off = UINT32_MAX);
5428# endif
5429
5430#else
5431# error "Port me"
5432#endif
5433 return off;
5434}
5435
5436
5437/*********************************************************************************************************************************
5438* Shifting *
5439*********************************************************************************************************************************/
5440
5441/**
5442 * Emits code for shifting a GPR a fixed number of bits to the left.
5443 */
5444DECL_FORCE_INLINE(uint32_t)
5445iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5446{
5447 Assert(cShift > 0 && cShift < 64);
5448
5449#if defined(RT_ARCH_AMD64)
5450 /* shl dst, cShift */
5451 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5452 if (cShift != 1)
5453 {
5454 pCodeBuf[off++] = 0xc1;
5455 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5456 pCodeBuf[off++] = cShift;
5457 }
5458 else
5459 {
5460 pCodeBuf[off++] = 0xd1;
5461 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5462 }
5463
5464#elif defined(RT_ARCH_ARM64)
5465 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
5466
5467#else
5468# error "Port me"
5469#endif
5470 return off;
5471}
5472
5473
5474/**
5475 * Emits code for shifting a GPR a fixed number of bits to the left.
5476 */
5477DECL_INLINE_THROW(uint32_t)
5478iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5479{
5480#if defined(RT_ARCH_AMD64)
5481 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5482#elif defined(RT_ARCH_ARM64)
5483 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5484#else
5485# error "Port me"
5486#endif
5487 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5488 return off;
5489}
5490
5491
5492/**
5493 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5494 */
5495DECL_FORCE_INLINE(uint32_t)
5496iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5497{
5498 Assert(cShift > 0 && cShift < 32);
5499
5500#if defined(RT_ARCH_AMD64)
5501 /* shl dst, cShift */
5502 if (iGprDst >= 8)
5503 pCodeBuf[off++] = X86_OP_REX_B;
5504 if (cShift != 1)
5505 {
5506 pCodeBuf[off++] = 0xc1;
5507 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5508 pCodeBuf[off++] = cShift;
5509 }
5510 else
5511 {
5512 pCodeBuf[off++] = 0xd1;
5513 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5514 }
5515
5516#elif defined(RT_ARCH_ARM64)
5517 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5518
5519#else
5520# error "Port me"
5521#endif
5522 return off;
5523}
5524
5525
5526/**
5527 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5528 */
5529DECL_INLINE_THROW(uint32_t)
5530iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5531{
5532#if defined(RT_ARCH_AMD64)
5533 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5534#elif defined(RT_ARCH_ARM64)
5535 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5536#else
5537# error "Port me"
5538#endif
5539 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5540 return off;
5541}
5542
5543
5544/**
5545 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5546 */
5547DECL_FORCE_INLINE(uint32_t)
5548iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5549{
5550 Assert(cShift > 0 && cShift < 64);
5551
5552#if defined(RT_ARCH_AMD64)
5553 /* shr dst, cShift */
5554 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5555 if (cShift != 1)
5556 {
5557 pCodeBuf[off++] = 0xc1;
5558 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5559 pCodeBuf[off++] = cShift;
5560 }
5561 else
5562 {
5563 pCodeBuf[off++] = 0xd1;
5564 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5565 }
5566
5567#elif defined(RT_ARCH_ARM64)
5568 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5569
5570#else
5571# error "Port me"
5572#endif
5573 return off;
5574}
5575
5576
5577/**
5578 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5579 */
5580DECL_INLINE_THROW(uint32_t)
5581iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5582{
5583#if defined(RT_ARCH_AMD64)
5584 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5585#elif defined(RT_ARCH_ARM64)
5586 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5587#else
5588# error "Port me"
5589#endif
5590 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5591 return off;
5592}
5593
5594
5595/**
5596 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5597 * right.
5598 */
5599DECL_FORCE_INLINE(uint32_t)
5600iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5601{
5602 Assert(cShift > 0 && cShift < 32);
5603
5604#if defined(RT_ARCH_AMD64)
5605 /* shr dst, cShift */
5606 if (iGprDst >= 8)
5607 pCodeBuf[off++] = X86_OP_REX_B;
5608 if (cShift != 1)
5609 {
5610 pCodeBuf[off++] = 0xc1;
5611 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5612 pCodeBuf[off++] = cShift;
5613 }
5614 else
5615 {
5616 pCodeBuf[off++] = 0xd1;
5617 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5618 }
5619
5620#elif defined(RT_ARCH_ARM64)
5621 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5622
5623#else
5624# error "Port me"
5625#endif
5626 return off;
5627}
5628
5629
5630/**
5631 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5632 * right.
5633 */
5634DECL_INLINE_THROW(uint32_t)
5635iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5636{
5637#if defined(RT_ARCH_AMD64)
5638 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5639#elif defined(RT_ARCH_ARM64)
5640 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5641#else
5642# error "Port me"
5643#endif
5644 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5645 return off;
5646}
5647
5648
5649/**
5650 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5651 * right and assigning it to a different GPR.
5652 */
5653DECL_INLINE_THROW(uint32_t)
5654iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5655{
5656 Assert(cShift > 0); Assert(cShift < 32);
5657#if defined(RT_ARCH_AMD64)
5658 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5659 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5660
5661#elif defined(RT_ARCH_ARM64)
5662 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5663
5664#else
5665# error "Port me"
5666#endif
5667 return off;
5668}
5669
5670
5671/**
5672 * Emits code for rotating a GPR a fixed number of bits to the left.
5673 */
5674DECL_FORCE_INLINE(uint32_t)
5675iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5676{
5677 Assert(cShift > 0 && cShift < 64);
5678
5679#if defined(RT_ARCH_AMD64)
5680 /* rol dst, cShift */
5681 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5682 if (cShift != 1)
5683 {
5684 pCodeBuf[off++] = 0xc1;
5685 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5686 pCodeBuf[off++] = cShift;
5687 }
5688 else
5689 {
5690 pCodeBuf[off++] = 0xd1;
5691 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5692 }
5693
5694#elif defined(RT_ARCH_ARM64)
5695 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
5696
5697#else
5698# error "Port me"
5699#endif
5700 return off;
5701}
5702
5703
5704#if defined(RT_ARCH_AMD64)
5705/**
5706 * Emits code for rotating a 32-bit GPR a fixed number of bits to the left via carry.
5707 */
5708DECL_FORCE_INLINE(uint32_t)
5709iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5710{
5711 Assert(cShift > 0 && cShift < 32);
5712
5713 /* rcl dst, cShift */
5714 if (iGprDst >= 8)
5715 pCodeBuf[off++] = X86_OP_REX_B;
5716 if (cShift != 1)
5717 {
5718 pCodeBuf[off++] = 0xc1;
5719 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5720 pCodeBuf[off++] = cShift;
5721 }
5722 else
5723 {
5724 pCodeBuf[off++] = 0xd1;
5725 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5726 }
5727
5728 return off;
5729}
5730#endif /* RT_ARCH_AMD64 */
5731
5732
5733
5734/**
5735 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
5736 * @note Bits 63:32 of the destination GPR will be cleared.
5737 */
5738DECL_FORCE_INLINE(uint32_t)
5739iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5740{
5741#if defined(RT_ARCH_AMD64)
5742 /*
5743 * There is no bswap r16 on x86 (the encoding exists but does not work).
5744 * So just use a rol (gcc -O2 is doing that).
5745 *
5746 * rol r16, 0x8
5747 */
5748 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5749 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5750 if (iGpr >= 8)
5751 pbCodeBuf[off++] = X86_OP_REX_B;
5752 pbCodeBuf[off++] = 0xc1;
5753 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
5754 pbCodeBuf[off++] = 0x08;
5755#elif defined(RT_ARCH_ARM64)
5756 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5757
5758 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
5759#else
5760# error "Port me"
5761#endif
5762
5763 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5764 return off;
5765}
5766
5767
5768/**
5769 * Emits code for reversing the byte order in a 32-bit GPR.
5770 * @note Bits 63:32 of the destination GPR will be cleared.
5771 */
5772DECL_FORCE_INLINE(uint32_t)
5773iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5774{
5775#if defined(RT_ARCH_AMD64)
5776 /* bswap r32 */
5777 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5778
5779 if (iGpr >= 8)
5780 pbCodeBuf[off++] = X86_OP_REX_B;
5781 pbCodeBuf[off++] = 0x0f;
5782 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5783#elif defined(RT_ARCH_ARM64)
5784 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5785
5786 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
5787#else
5788# error "Port me"
5789#endif
5790
5791 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5792 return off;
5793}
5794
5795
5796/**
5797 * Emits code for reversing the byte order in a 64-bit GPR.
5798 */
5799DECL_FORCE_INLINE(uint32_t)
5800iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5801{
5802#if defined(RT_ARCH_AMD64)
5803 /* bswap r64 */
5804 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5805
5806 if (iGpr >= 8)
5807 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
5808 else
5809 pbCodeBuf[off++] = X86_OP_REX_W;
5810 pbCodeBuf[off++] = 0x0f;
5811 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5812#elif defined(RT_ARCH_ARM64)
5813 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5814
5815 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
5816#else
5817# error "Port me"
5818#endif
5819
5820 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5821 return off;
5822}
5823
5824
5825/*********************************************************************************************************************************
5826* Compare and Testing *
5827*********************************************************************************************************************************/
5828
5829
5830#ifdef RT_ARCH_ARM64
5831/**
5832 * Emits an ARM64 compare instruction.
5833 */
5834DECL_INLINE_THROW(uint32_t)
5835iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
5836 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
5837{
5838 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5839 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
5840 f64Bit, true /*fSetFlags*/, cShift, enmShift);
5841 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5842 return off;
5843}
5844#endif
5845
5846
5847/**
5848 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5849 * with conditional instruction.
5850 */
5851DECL_FORCE_INLINE(uint32_t)
5852iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5853{
5854#ifdef RT_ARCH_AMD64
5855 /* cmp Gv, Ev */
5856 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5857 pCodeBuf[off++] = 0x3b;
5858 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5859
5860#elif defined(RT_ARCH_ARM64)
5861 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
5862
5863#else
5864# error "Port me!"
5865#endif
5866 return off;
5867}
5868
5869
5870/**
5871 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5872 * with conditional instruction.
5873 */
5874DECL_INLINE_THROW(uint32_t)
5875iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5876{
5877#ifdef RT_ARCH_AMD64
5878 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5879#elif defined(RT_ARCH_ARM64)
5880 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5881#else
5882# error "Port me!"
5883#endif
5884 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5885 return off;
5886}
5887
5888
5889/**
5890 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5891 * with conditional instruction.
5892 */
5893DECL_FORCE_INLINE(uint32_t)
5894iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5895{
5896#ifdef RT_ARCH_AMD64
5897 /* cmp Gv, Ev */
5898 if (iGprLeft >= 8 || iGprRight >= 8)
5899 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5900 pCodeBuf[off++] = 0x3b;
5901 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5902
5903#elif defined(RT_ARCH_ARM64)
5904 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
5905
5906#else
5907# error "Port me!"
5908#endif
5909 return off;
5910}
5911
5912
5913/**
5914 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5915 * with conditional instruction.
5916 */
5917DECL_INLINE_THROW(uint32_t)
5918iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5919{
5920#ifdef RT_ARCH_AMD64
5921 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5922#elif defined(RT_ARCH_ARM64)
5923 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5924#else
5925# error "Port me!"
5926#endif
5927 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5928 return off;
5929}
5930
5931
5932/**
5933 * Emits a compare of a 64-bit GPR with a constant value, settings status
5934 * flags/whatever for use with conditional instruction.
5935 */
5936DECL_INLINE_THROW(uint32_t)
5937iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
5938{
5939#ifdef RT_ARCH_AMD64
5940 if (uImm <= UINT32_C(0xff))
5941 {
5942 /* cmp Ev, Ib */
5943 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5944 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
5945 pbCodeBuf[off++] = 0x83;
5946 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5947 pbCodeBuf[off++] = (uint8_t)uImm;
5948 }
5949 else if ((int64_t)uImm == (int32_t)uImm)
5950 {
5951 /* cmp Ev, imm */
5952 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5953 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
5954 pbCodeBuf[off++] = 0x81;
5955 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5956 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5957 pbCodeBuf[off++] = RT_BYTE1(uImm);
5958 pbCodeBuf[off++] = RT_BYTE2(uImm);
5959 pbCodeBuf[off++] = RT_BYTE3(uImm);
5960 pbCodeBuf[off++] = RT_BYTE4(uImm);
5961 }
5962 else
5963 {
5964 /* Use temporary register for the immediate. */
5965 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5966 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
5967 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5968 }
5969
5970#elif defined(RT_ARCH_ARM64)
5971 /** @todo guess there are clevere things we can do here... */
5972 if (uImm < _4K)
5973 {
5974 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5975 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5976 true /*64Bit*/, true /*fSetFlags*/);
5977 }
5978 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5979 {
5980 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5981 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
5982 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5983 }
5984 else
5985 {
5986 /* Use temporary register for the immediate. */
5987 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5988 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
5989 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5990 }
5991
5992#else
5993# error "Port me!"
5994#endif
5995
5996 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5997 return off;
5998}
5999
6000
6001/**
6002 * Emits a compare of a 32-bit GPR with a constant value, settings status
6003 * flags/whatever for use with conditional instruction.
6004 *
6005 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6006 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6007 * bits all zero). Will release assert or throw exception if the caller
6008 * violates this restriction.
6009 */
6010DECL_FORCE_INLINE_THROW(uint32_t)
6011iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6012{
6013#ifdef RT_ARCH_AMD64
6014 if (iGprLeft >= 8)
6015 pCodeBuf[off++] = X86_OP_REX_B;
6016 if (uImm <= UINT32_C(0x7f))
6017 {
6018 /* cmp Ev, Ib */
6019 pCodeBuf[off++] = 0x83;
6020 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6021 pCodeBuf[off++] = (uint8_t)uImm;
6022 }
6023 else
6024 {
6025 /* cmp Ev, imm */
6026 pCodeBuf[off++] = 0x81;
6027 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6028 pCodeBuf[off++] = RT_BYTE1(uImm);
6029 pCodeBuf[off++] = RT_BYTE2(uImm);
6030 pCodeBuf[off++] = RT_BYTE3(uImm);
6031 pCodeBuf[off++] = RT_BYTE4(uImm);
6032 }
6033
6034#elif defined(RT_ARCH_ARM64)
6035 /** @todo guess there are clevere things we can do here... */
6036 if (uImm < _4K)
6037 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6038 false /*64Bit*/, true /*fSetFlags*/);
6039 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
6040 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6041 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6042 else
6043# ifdef IEM_WITH_THROW_CATCH
6044 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6045# else
6046 AssertReleaseFailedStmt(off = UINT32_MAX);
6047# endif
6048
6049#else
6050# error "Port me!"
6051#endif
6052 return off;
6053}
6054
6055
6056/**
6057 * Emits a compare of a 32-bit GPR with a constant value, settings status
6058 * flags/whatever for use with conditional instruction.
6059 */
6060DECL_INLINE_THROW(uint32_t)
6061iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6062{
6063#ifdef RT_ARCH_AMD64
6064 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
6065
6066#elif defined(RT_ARCH_ARM64)
6067 /** @todo guess there are clevere things we can do here... */
6068 if (uImm < _4K)
6069 {
6070 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6071 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6072 false /*64Bit*/, true /*fSetFlags*/);
6073 }
6074 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
6075 {
6076 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6077 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6078 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6079 }
6080 else
6081 {
6082 /* Use temporary register for the immediate. */
6083 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6084 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
6085 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6086 }
6087
6088#else
6089# error "Port me!"
6090#endif
6091
6092 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6093 return off;
6094}
6095
6096
6097/**
6098 * Emits a compare of a 32-bit GPR with a constant value, settings status
6099 * flags/whatever for use with conditional instruction.
6100 *
6101 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
6102 * 16-bit value from @a iGrpLeft.
6103 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6104 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6105 * bits all zero). Will release assert or throw exception if the caller
6106 * violates this restriction.
6107 */
6108DECL_FORCE_INLINE_THROW(uint32_t)
6109iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6110 uint8_t idxTmpReg = UINT8_MAX)
6111{
6112#ifdef RT_ARCH_AMD64
6113 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6114 if (iGprLeft >= 8)
6115 pCodeBuf[off++] = X86_OP_REX_B;
6116 if (uImm <= UINT32_C(0x7f))
6117 {
6118 /* cmp Ev, Ib */
6119 pCodeBuf[off++] = 0x83;
6120 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6121 pCodeBuf[off++] = (uint8_t)uImm;
6122 }
6123 else
6124 {
6125 /* cmp Ev, imm */
6126 pCodeBuf[off++] = 0x81;
6127 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6128 pCodeBuf[off++] = RT_BYTE1(uImm);
6129 pCodeBuf[off++] = RT_BYTE2(uImm);
6130 }
6131 RT_NOREF(idxTmpReg);
6132
6133#elif defined(RT_ARCH_ARM64)
6134# ifdef IEM_WITH_THROW_CATCH
6135 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6136# else
6137 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
6138# endif
6139 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6140 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
6141 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
6142
6143#else
6144# error "Port me!"
6145#endif
6146 return off;
6147}
6148
6149
6150/**
6151 * Emits a compare of a 16-bit GPR with a constant value, settings status
6152 * flags/whatever for use with conditional instruction.
6153 *
6154 * @note ARM64: Helper register is required (idxTmpReg).
6155 */
6156DECL_INLINE_THROW(uint32_t)
6157iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6158 uint8_t idxTmpReg = UINT8_MAX)
6159{
6160#ifdef RT_ARCH_AMD64
6161 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
6162#elif defined(RT_ARCH_ARM64)
6163 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
6164#else
6165# error "Port me!"
6166#endif
6167 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6168 return off;
6169}
6170
6171
6172
6173/*********************************************************************************************************************************
6174* Branching *
6175*********************************************************************************************************************************/
6176
6177/**
6178 * Emits a JMP rel32 / B imm19 to the given label.
6179 */
6180DECL_FORCE_INLINE_THROW(uint32_t)
6181iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
6182{
6183 Assert(idxLabel < pReNative->cLabels);
6184
6185#ifdef RT_ARCH_AMD64
6186 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6187 {
6188 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
6189 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
6190 {
6191 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
6192 pCodeBuf[off++] = (uint8_t)offRel;
6193 }
6194 else
6195 {
6196 offRel -= 3;
6197 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6198 pCodeBuf[off++] = RT_BYTE1(offRel);
6199 pCodeBuf[off++] = RT_BYTE2(offRel);
6200 pCodeBuf[off++] = RT_BYTE3(offRel);
6201 pCodeBuf[off++] = RT_BYTE4(offRel);
6202 }
6203 }
6204 else
6205 {
6206 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6207 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6208 pCodeBuf[off++] = 0xfe;
6209 pCodeBuf[off++] = 0xff;
6210 pCodeBuf[off++] = 0xff;
6211 pCodeBuf[off++] = 0xff;
6212 }
6213 pCodeBuf[off++] = 0xcc; /* int3 poison */
6214
6215#elif defined(RT_ARCH_ARM64)
6216 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6217 {
6218 pCodeBuf[off] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
6219 off++;
6220 }
6221 else
6222 {
6223 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
6224 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
6225 }
6226
6227#else
6228# error "Port me!"
6229#endif
6230 return off;
6231}
6232
6233
6234/**
6235 * Emits a JMP rel32 / B imm19 to the given label.
6236 */
6237DECL_INLINE_THROW(uint32_t)
6238iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6239{
6240#ifdef RT_ARCH_AMD64
6241 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
6242#elif defined(RT_ARCH_ARM64)
6243 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
6244#else
6245# error "Port me!"
6246#endif
6247 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6248 return off;
6249}
6250
6251
6252/**
6253 * Emits a JMP rel32 / B imm19 to a new undefined label.
6254 */
6255DECL_INLINE_THROW(uint32_t)
6256iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6257{
6258 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6259 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
6260}
6261
6262/** Condition type. */
6263#ifdef RT_ARCH_AMD64
6264typedef enum IEMNATIVEINSTRCOND : uint8_t
6265{
6266 kIemNativeInstrCond_o = 0,
6267 kIemNativeInstrCond_no,
6268 kIemNativeInstrCond_c,
6269 kIemNativeInstrCond_nc,
6270 kIemNativeInstrCond_e,
6271 kIemNativeInstrCond_ne,
6272 kIemNativeInstrCond_be,
6273 kIemNativeInstrCond_nbe,
6274 kIemNativeInstrCond_s,
6275 kIemNativeInstrCond_ns,
6276 kIemNativeInstrCond_p,
6277 kIemNativeInstrCond_np,
6278 kIemNativeInstrCond_l,
6279 kIemNativeInstrCond_nl,
6280 kIemNativeInstrCond_le,
6281 kIemNativeInstrCond_nle
6282} IEMNATIVEINSTRCOND;
6283#elif defined(RT_ARCH_ARM64)
6284typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
6285# define kIemNativeInstrCond_o todo_conditional_codes
6286# define kIemNativeInstrCond_no todo_conditional_codes
6287# define kIemNativeInstrCond_c todo_conditional_codes
6288# define kIemNativeInstrCond_nc todo_conditional_codes
6289# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
6290# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
6291# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
6292# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
6293# define kIemNativeInstrCond_s todo_conditional_codes
6294# define kIemNativeInstrCond_ns todo_conditional_codes
6295# define kIemNativeInstrCond_p todo_conditional_codes
6296# define kIemNativeInstrCond_np todo_conditional_codes
6297# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
6298# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
6299# define kIemNativeInstrCond_le kArmv8InstrCond_Le
6300# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
6301#else
6302# error "Port me!"
6303#endif
6304
6305
6306/**
6307 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6308 */
6309DECL_FORCE_INLINE_THROW(uint32_t)
6310iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6311 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6312{
6313 Assert(idxLabel < pReNative->cLabels);
6314
6315 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
6316#ifdef RT_ARCH_AMD64
6317 if (offLabel >= off)
6318 {
6319 /* jcc rel32 */
6320 pCodeBuf[off++] = 0x0f;
6321 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6322 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6323 pCodeBuf[off++] = 0x00;
6324 pCodeBuf[off++] = 0x00;
6325 pCodeBuf[off++] = 0x00;
6326 pCodeBuf[off++] = 0x00;
6327 }
6328 else
6329 {
6330 int32_t offDisp = offLabel - (off + 2);
6331 if ((int8_t)offDisp == offDisp)
6332 {
6333 /* jcc rel8 */
6334 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6335 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6336 }
6337 else
6338 {
6339 /* jcc rel32 */
6340 offDisp -= 4;
6341 pCodeBuf[off++] = 0x0f;
6342 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6343 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6344 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6345 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6346 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6347 }
6348 }
6349
6350#elif defined(RT_ARCH_ARM64)
6351 if (offLabel >= off)
6352 {
6353 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6354 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
6355 }
6356 else
6357 {
6358 Assert(off - offLabel <= 0x3ffffU);
6359 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
6360 off++;
6361 }
6362
6363#else
6364# error "Port me!"
6365#endif
6366 return off;
6367}
6368
6369
6370/**
6371 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6372 */
6373DECL_INLINE_THROW(uint32_t)
6374iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6375{
6376#ifdef RT_ARCH_AMD64
6377 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
6378#elif defined(RT_ARCH_ARM64)
6379 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
6380#else
6381# error "Port me!"
6382#endif
6383 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6384 return off;
6385}
6386
6387
6388/**
6389 * Emits a Jcc rel32 / B.cc imm19 to a new label.
6390 */
6391DECL_INLINE_THROW(uint32_t)
6392iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6393 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
6394{
6395 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6396 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
6397}
6398
6399
6400/**
6401 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
6402 */
6403DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6404{
6405#ifdef RT_ARCH_AMD64
6406 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
6407#elif defined(RT_ARCH_ARM64)
6408 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
6409#else
6410# error "Port me!"
6411#endif
6412}
6413
6414/**
6415 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
6416 */
6417DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6418 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6419{
6420#ifdef RT_ARCH_AMD64
6421 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
6422#elif defined(RT_ARCH_ARM64)
6423 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
6424#else
6425# error "Port me!"
6426#endif
6427}
6428
6429
6430/**
6431 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
6432 */
6433DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6434{
6435#ifdef RT_ARCH_AMD64
6436 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
6437#elif defined(RT_ARCH_ARM64)
6438 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
6439#else
6440# error "Port me!"
6441#endif
6442}
6443
6444/**
6445 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
6446 */
6447DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6448 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6449{
6450#ifdef RT_ARCH_AMD64
6451 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
6452#elif defined(RT_ARCH_ARM64)
6453 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
6454#else
6455# error "Port me!"
6456#endif
6457}
6458
6459
6460/**
6461 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
6462 */
6463DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6464{
6465#ifdef RT_ARCH_AMD64
6466 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
6467#elif defined(RT_ARCH_ARM64)
6468 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
6469#else
6470# error "Port me!"
6471#endif
6472}
6473
6474/**
6475 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
6476 */
6477DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6478 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6479{
6480#ifdef RT_ARCH_AMD64
6481 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
6482#elif defined(RT_ARCH_ARM64)
6483 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
6484#else
6485# error "Port me!"
6486#endif
6487}
6488
6489
6490/**
6491 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
6492 */
6493DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6494{
6495#ifdef RT_ARCH_AMD64
6496 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
6497#elif defined(RT_ARCH_ARM64)
6498 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
6499#else
6500# error "Port me!"
6501#endif
6502}
6503
6504/**
6505 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
6506 */
6507DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6508 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6509{
6510#ifdef RT_ARCH_AMD64
6511 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
6512#elif defined(RT_ARCH_ARM64)
6513 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
6514#else
6515# error "Port me!"
6516#endif
6517}
6518
6519
6520/**
6521 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
6522 */
6523DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6524{
6525#ifdef RT_ARCH_AMD64
6526 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
6527#elif defined(RT_ARCH_ARM64)
6528 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
6529#else
6530# error "Port me!"
6531#endif
6532}
6533
6534/**
6535 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
6536 */
6537DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6538 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6539{
6540#ifdef RT_ARCH_AMD64
6541 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
6542#elif defined(RT_ARCH_ARM64)
6543 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
6544#else
6545# error "Port me!"
6546#endif
6547}
6548
6549
6550/**
6551 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6552 *
6553 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6554 *
6555 * Only use hardcoded jumps forward when emitting for exactly one
6556 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6557 * the right target address on all platforms!
6558 *
6559 * Please also note that on x86 it is necessary pass off + 256 or higher
6560 * for @a offTarget one believe the intervening code is more than 127
6561 * bytes long.
6562 */
6563DECL_FORCE_INLINE(uint32_t)
6564iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6565{
6566#ifdef RT_ARCH_AMD64
6567 /* jcc rel8 / rel32 */
6568 int32_t offDisp = (int32_t)(offTarget - (off + 2));
6569 if (offDisp < 128 && offDisp >= -128)
6570 {
6571 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6572 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6573 }
6574 else
6575 {
6576 offDisp -= 4;
6577 pCodeBuf[off++] = 0x0f;
6578 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6579 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6580 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6581 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6582 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6583 }
6584
6585#elif defined(RT_ARCH_ARM64)
6586 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
6587 off++;
6588#else
6589# error "Port me!"
6590#endif
6591 return off;
6592}
6593
6594
6595/**
6596 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6597 *
6598 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6599 *
6600 * Only use hardcoded jumps forward when emitting for exactly one
6601 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6602 * the right target address on all platforms!
6603 *
6604 * Please also note that on x86 it is necessary pass off + 256 or higher
6605 * for @a offTarget if one believe the intervening code is more than 127
6606 * bytes long.
6607 */
6608DECL_INLINE_THROW(uint32_t)
6609iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6610{
6611#ifdef RT_ARCH_AMD64
6612 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
6613#elif defined(RT_ARCH_ARM64)
6614 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
6615#else
6616# error "Port me!"
6617#endif
6618 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6619 return off;
6620}
6621
6622
6623/**
6624 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
6625 *
6626 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6627 */
6628DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6629{
6630#ifdef RT_ARCH_AMD64
6631 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
6632#elif defined(RT_ARCH_ARM64)
6633 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
6634#else
6635# error "Port me!"
6636#endif
6637}
6638
6639
6640/**
6641 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
6642 *
6643 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6644 */
6645DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6646{
6647#ifdef RT_ARCH_AMD64
6648 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
6649#elif defined(RT_ARCH_ARM64)
6650 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
6651#else
6652# error "Port me!"
6653#endif
6654}
6655
6656
6657/**
6658 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
6659 *
6660 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6661 */
6662DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6663{
6664#ifdef RT_ARCH_AMD64
6665 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
6666#elif defined(RT_ARCH_ARM64)
6667 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
6668#else
6669# error "Port me!"
6670#endif
6671}
6672
6673
6674/**
6675 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
6676 *
6677 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6678 */
6679DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6680{
6681#ifdef RT_ARCH_AMD64
6682 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
6683#elif defined(RT_ARCH_ARM64)
6684 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
6685#else
6686# error "Port me!"
6687#endif
6688}
6689
6690
6691/**
6692 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6693 *
6694 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6695 */
6696DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
6697{
6698#ifdef RT_ARCH_AMD64
6699 /* jmp rel8 or rel32 */
6700 int32_t offDisp = offTarget - (off + 2);
6701 if (offDisp < 128 && offDisp >= -128)
6702 {
6703 pCodeBuf[off++] = 0xeb;
6704 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6705 }
6706 else
6707 {
6708 offDisp -= 3;
6709 pCodeBuf[off++] = 0xe9;
6710 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6711 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6712 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6713 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6714 }
6715
6716#elif defined(RT_ARCH_ARM64)
6717 pCodeBuf[off] = Armv8A64MkInstrB((int32_t)(offTarget - off));
6718 off++;
6719
6720#else
6721# error "Port me!"
6722#endif
6723 return off;
6724}
6725
6726
6727/**
6728 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6729 *
6730 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6731 */
6732DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6733{
6734#ifdef RT_ARCH_AMD64
6735 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
6736#elif defined(RT_ARCH_ARM64)
6737 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
6738#else
6739# error "Port me!"
6740#endif
6741 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6742 return off;
6743}
6744
6745
6746/**
6747 * Fixes up a conditional jump to a fixed label.
6748 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
6749 * iemNativeEmitJzToFixed, ...
6750 */
6751DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
6752{
6753#ifdef RT_ARCH_AMD64
6754 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
6755 uint8_t const bOpcode = pbCodeBuf[offFixup];
6756 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
6757 {
6758 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
6759 AssertStmt(pbCodeBuf[offFixup + 1] == offTarget - (offFixup + 2),
6760 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
6761 }
6762 else
6763 {
6764 if (bOpcode != 0x0f)
6765 Assert(bOpcode == 0xe9);
6766 else
6767 {
6768 offFixup += 1;
6769 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
6770 }
6771 uint32_t const offRel32 = offTarget - (offFixup + 5);
6772 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
6773 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
6774 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
6775 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
6776 }
6777
6778#elif defined(RT_ARCH_ARM64)
6779 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
6780 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
6781 {
6782 /* B.COND + BC.COND */
6783 int32_t const offDisp = offTarget - offFixup;
6784 Assert(offDisp >= -262144 && offDisp < 262144);
6785 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
6786 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6787 }
6788 else
6789 {
6790 /* B imm26 */
6791 Assert((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000));
6792 int32_t const offDisp = offTarget - offFixup;
6793 Assert(offDisp >= -33554432 && offDisp < 33554432);
6794 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
6795 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6796 }
6797
6798#else
6799# error "Port me!"
6800#endif
6801}
6802
6803
6804#ifdef RT_ARCH_AMD64
6805/**
6806 * For doing bt on a register.
6807 */
6808DECL_INLINE_THROW(uint32_t)
6809iemNativeEmitAmd64TestBitInGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
6810{
6811 Assert(iBitNo < 64);
6812 /* bt Ev, imm8 */
6813 if (iBitNo >= 32)
6814 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6815 else if (iGprSrc >= 8)
6816 pCodeBuf[off++] = X86_OP_REX_B;
6817 pCodeBuf[off++] = 0x0f;
6818 pCodeBuf[off++] = 0xba;
6819 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6820 pCodeBuf[off++] = iBitNo;
6821 return off;
6822}
6823#endif /* RT_ARCH_AMD64 */
6824
6825
6826/**
6827 * Internal helper, don't call directly.
6828 */
6829DECL_INLINE_THROW(uint32_t)
6830iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
6831 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
6832{
6833 Assert(iBitNo < 64);
6834#ifdef RT_ARCH_AMD64
6835 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6836 if (iBitNo < 8)
6837 {
6838 /* test Eb, imm8 */
6839 if (iGprSrc >= 4)
6840 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6841 pbCodeBuf[off++] = 0xf6;
6842 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6843 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
6844 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6845 }
6846 else
6847 {
6848 /* bt Ev, imm8 */
6849 if (iBitNo >= 32)
6850 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6851 else if (iGprSrc >= 8)
6852 pbCodeBuf[off++] = X86_OP_REX_B;
6853 pbCodeBuf[off++] = 0x0f;
6854 pbCodeBuf[off++] = 0xba;
6855 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6856 pbCodeBuf[off++] = iBitNo;
6857 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
6858 }
6859
6860#elif defined(RT_ARCH_ARM64)
6861 /* Use the TBNZ instruction here. */
6862 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6863 if (pReNative->paLabels[idxLabel].enmType > kIemNativeLabelType_LastWholeTbBranch)
6864 {
6865 AssertMsg(pReNative->paLabels[idxLabel].off == UINT32_MAX,
6866 ("TODO: Please enable & test commented out code for jumping back to a predefined label.\n"));
6867 //uint32_t offLabel = pReNative->paLabels[idxLabel].off;
6868 //if (offLabel == UINT32_MAX)
6869 {
6870 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
6871 pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
6872 }
6873 //else
6874 //{
6875 // RT_BREAKPOINT();
6876 // Assert(off - offLabel <= 0x1fffU);
6877 // pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, offLabel - off, iGprSrc, iBitNo);
6878 //
6879 //}
6880 }
6881 else
6882 {
6883 Assert(Armv8A64ConvertImmRImmS2Mask64(0x40, (64U - iBitNo) & 63U) == RT_BIT_64(iBitNo));
6884 pu32CodeBuf[off++] = Armv8A64MkInstrTstImm(iGprSrc, 0x40, (64U - iBitNo) & 63U);
6885 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6886 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(fJmpIfSet ? kArmv8InstrCond_Ne : kArmv8InstrCond_Eq, 0);
6887 }
6888
6889#else
6890# error "Port me!"
6891#endif
6892 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6893 return off;
6894}
6895
6896
6897/**
6898 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
6899 * @a iGprSrc.
6900 *
6901 * @note On ARM64 the range is only +/-8191 instructions.
6902 */
6903DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6904 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
6905{
6906 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
6907}
6908
6909
6910/**
6911 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
6912 * _set_ in @a iGprSrc.
6913 *
6914 * @note On ARM64 the range is only +/-8191 instructions.
6915 */
6916DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6917 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
6918{
6919 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
6920}
6921
6922
6923/**
6924 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
6925 * flags accordingly.
6926 */
6927DECL_INLINE_THROW(uint32_t)
6928iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
6929{
6930 Assert(fBits != 0);
6931#ifdef RT_ARCH_AMD64
6932
6933 if (fBits >= UINT32_MAX)
6934 {
6935 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6936
6937 /* test Ev,Gv */
6938 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6939 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
6940 pbCodeBuf[off++] = 0x85;
6941 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
6942
6943 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6944 }
6945 else if (fBits <= UINT32_MAX)
6946 {
6947 /* test Eb, imm8 or test Ev, imm32 */
6948 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6949 if (fBits <= UINT8_MAX)
6950 {
6951 if (iGprSrc >= 4)
6952 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6953 pbCodeBuf[off++] = 0xf6;
6954 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6955 pbCodeBuf[off++] = (uint8_t)fBits;
6956 }
6957 else
6958 {
6959 if (iGprSrc >= 8)
6960 pbCodeBuf[off++] = X86_OP_REX_B;
6961 pbCodeBuf[off++] = 0xf7;
6962 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6963 pbCodeBuf[off++] = RT_BYTE1(fBits);
6964 pbCodeBuf[off++] = RT_BYTE2(fBits);
6965 pbCodeBuf[off++] = RT_BYTE3(fBits);
6966 pbCodeBuf[off++] = RT_BYTE4(fBits);
6967 }
6968 }
6969 /** @todo implement me. */
6970 else
6971 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
6972
6973#elif defined(RT_ARCH_ARM64)
6974 uint32_t uImmR = 0;
6975 uint32_t uImmNandS = 0;
6976 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
6977 {
6978 /* ands xzr, iGprSrc, #fBits */
6979 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6980 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
6981 }
6982 else
6983 {
6984 /* ands xzr, iGprSrc, iTmpReg */
6985 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6986 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6987 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
6988 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6989 }
6990
6991#else
6992# error "Port me!"
6993#endif
6994 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6995 return off;
6996}
6997
6998
6999/**
7000 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
7001 * @a iGprSrc, setting CPU flags accordingly.
7002 *
7003 * @note For ARM64 this only supports @a fBits values that can be expressed
7004 * using the two 6-bit immediates of the ANDS instruction. The caller
7005 * must make sure this is possible!
7006 */
7007DECL_FORCE_INLINE_THROW(uint32_t)
7008iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits)
7009{
7010 Assert(fBits != 0);
7011
7012#ifdef RT_ARCH_AMD64
7013 if (fBits <= UINT8_MAX)
7014 {
7015 /* test Eb, imm8 */
7016 if (iGprSrc >= 4)
7017 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7018 pCodeBuf[off++] = 0xf6;
7019 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7020 pCodeBuf[off++] = (uint8_t)fBits;
7021 }
7022 else
7023 {
7024 /* test Ev, imm32 */
7025 if (iGprSrc >= 8)
7026 pCodeBuf[off++] = X86_OP_REX_B;
7027 pCodeBuf[off++] = 0xf7;
7028 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7029 pCodeBuf[off++] = RT_BYTE1(fBits);
7030 pCodeBuf[off++] = RT_BYTE2(fBits);
7031 pCodeBuf[off++] = RT_BYTE3(fBits);
7032 pCodeBuf[off++] = RT_BYTE4(fBits);
7033 }
7034
7035#elif defined(RT_ARCH_ARM64)
7036 /* ands xzr, src, #fBits */
7037 uint32_t uImmR = 0;
7038 uint32_t uImmNandS = 0;
7039 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7040 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7041 else
7042# ifdef IEM_WITH_THROW_CATCH
7043 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7044# else
7045 AssertReleaseFailedStmt(off = UINT32_MAX);
7046# endif
7047
7048#else
7049# error "Port me!"
7050#endif
7051 return off;
7052}
7053
7054
7055
7056/**
7057 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7058 * @a iGprSrc, setting CPU flags accordingly.
7059 *
7060 * @note For ARM64 this only supports @a fBits values that can be expressed
7061 * using the two 6-bit immediates of the ANDS instruction. The caller
7062 * must make sure this is possible!
7063 */
7064DECL_FORCE_INLINE_THROW(uint32_t)
7065iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7066{
7067 Assert(fBits != 0);
7068
7069#ifdef RT_ARCH_AMD64
7070 /* test Eb, imm8 */
7071 if (iGprSrc >= 4)
7072 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7073 pCodeBuf[off++] = 0xf6;
7074 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7075 pCodeBuf[off++] = fBits;
7076
7077#elif defined(RT_ARCH_ARM64)
7078 /* ands xzr, src, #fBits */
7079 uint32_t uImmR = 0;
7080 uint32_t uImmNandS = 0;
7081 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7082 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7083 else
7084# ifdef IEM_WITH_THROW_CATCH
7085 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7086# else
7087 AssertReleaseFailedStmt(off = UINT32_MAX);
7088# endif
7089
7090#else
7091# error "Port me!"
7092#endif
7093 return off;
7094}
7095
7096
7097/**
7098 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7099 * @a iGprSrc, setting CPU flags accordingly.
7100 */
7101DECL_INLINE_THROW(uint32_t)
7102iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7103{
7104 Assert(fBits != 0);
7105
7106#ifdef RT_ARCH_AMD64
7107 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
7108
7109#elif defined(RT_ARCH_ARM64)
7110 /* ands xzr, src, [tmp|#imm] */
7111 uint32_t uImmR = 0;
7112 uint32_t uImmNandS = 0;
7113 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7114 {
7115 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7116 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7117 }
7118 else
7119 {
7120 /* Use temporary register for the 64-bit immediate. */
7121 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7122 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7123 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
7124 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7125 }
7126
7127#else
7128# error "Port me!"
7129#endif
7130 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7131 return off;
7132}
7133
7134
7135/**
7136 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
7137 * are set in @a iGprSrc.
7138 */
7139DECL_INLINE_THROW(uint32_t)
7140iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7141 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7142{
7143 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7144
7145 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7146 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7147
7148 return off;
7149}
7150
7151
7152/**
7153 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
7154 * are set in @a iGprSrc.
7155 */
7156DECL_INLINE_THROW(uint32_t)
7157iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7158 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7159{
7160 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7161
7162 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7163 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7164
7165 return off;
7166}
7167
7168
7169/**
7170 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7171 *
7172 * The operand size is given by @a f64Bit.
7173 */
7174DECL_FORCE_INLINE_THROW(uint32_t)
7175iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7176 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7177{
7178 Assert(idxLabel < pReNative->cLabels);
7179
7180#ifdef RT_ARCH_AMD64
7181 /* test reg32,reg32 / test reg64,reg64 */
7182 if (f64Bit)
7183 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7184 else if (iGprSrc >= 8)
7185 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7186 pCodeBuf[off++] = 0x85;
7187 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7188
7189 /* jnz idxLabel */
7190 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7191 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7192
7193#elif defined(RT_ARCH_ARM64)
7194 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
7195 {
7196 pCodeBuf[off] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
7197 iGprSrc, f64Bit);
7198 off++;
7199 }
7200 else
7201 {
7202 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7203 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
7204 }
7205
7206#else
7207# error "Port me!"
7208#endif
7209 return off;
7210}
7211
7212
7213/**
7214 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7215 *
7216 * The operand size is given by @a f64Bit.
7217 */
7218DECL_FORCE_INLINE_THROW(uint32_t)
7219iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7220 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7221{
7222#ifdef RT_ARCH_AMD64
7223 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7224 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7225#elif defined(RT_ARCH_ARM64)
7226 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
7227 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7228#else
7229# error "Port me!"
7230#endif
7231 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7232 return off;
7233}
7234
7235
7236/* if (Grp1 == 0) Jmp idxLabel; */
7237
7238/**
7239 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7240 *
7241 * The operand size is given by @a f64Bit.
7242 */
7243DECL_FORCE_INLINE_THROW(uint32_t)
7244iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7245 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7246{
7247 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7248 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7249}
7250
7251
7252/**
7253 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7254 *
7255 * The operand size is given by @a f64Bit.
7256 */
7257DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7258 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7259{
7260 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7261}
7262
7263
7264/**
7265 * Emits code that jumps to a new label if @a iGprSrc is zero.
7266 *
7267 * The operand size is given by @a f64Bit.
7268 */
7269DECL_INLINE_THROW(uint32_t)
7270iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7271 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7272{
7273 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7274 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7275}
7276
7277
7278/* if (Grp1 != 0) Jmp idxLabel; */
7279
7280/**
7281 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7282 *
7283 * The operand size is given by @a f64Bit.
7284 */
7285DECL_FORCE_INLINE_THROW(uint32_t)
7286iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7287 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7288{
7289 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7290 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7291}
7292
7293
7294/**
7295 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7296 *
7297 * The operand size is given by @a f64Bit.
7298 */
7299DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7300 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7301{
7302 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7303}
7304
7305
7306/**
7307 * Emits code that jumps to a new label if @a iGprSrc is not zero.
7308 *
7309 * The operand size is given by @a f64Bit.
7310 */
7311DECL_INLINE_THROW(uint32_t)
7312iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7313 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7314{
7315 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7316 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7317}
7318
7319
7320/* if (Grp1 != Gpr2) Jmp idxLabel; */
7321
7322/**
7323 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
7324 * differs.
7325 */
7326DECL_INLINE_THROW(uint32_t)
7327iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7328 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
7329{
7330 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
7331 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7332 return off;
7333}
7334
7335
7336/**
7337 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
7338 */
7339DECL_INLINE_THROW(uint32_t)
7340iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7341 uint8_t iGprLeft, uint8_t iGprRight,
7342 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7343{
7344 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7345 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
7346}
7347
7348
7349/* if (Grp != Imm) Jmp idxLabel; */
7350
7351/**
7352 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
7353 */
7354DECL_INLINE_THROW(uint32_t)
7355iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7356 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7357{
7358 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7359 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7360 return off;
7361}
7362
7363
7364/**
7365 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
7366 */
7367DECL_INLINE_THROW(uint32_t)
7368iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7369 uint8_t iGprSrc, uint64_t uImm,
7370 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7371{
7372 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7373 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7374}
7375
7376
7377/**
7378 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
7379 * @a uImm.
7380 */
7381DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7382 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7383{
7384 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7385 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7386 return off;
7387}
7388
7389
7390/**
7391 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
7392 * @a uImm.
7393 */
7394DECL_INLINE_THROW(uint32_t)
7395iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7396 uint8_t iGprSrc, uint32_t uImm,
7397 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7398{
7399 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7400 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7401}
7402
7403
7404/**
7405 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
7406 * @a uImm.
7407 */
7408DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7409 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
7410{
7411 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
7412 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7413 return off;
7414}
7415
7416
7417/**
7418 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
7419 * @a uImm.
7420 */
7421DECL_INLINE_THROW(uint32_t)
7422iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7423 uint8_t iGprSrc, uint16_t uImm,
7424 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7425{
7426 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7427 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7428}
7429
7430
7431/* if (Grp == Imm) Jmp idxLabel; */
7432
7433/**
7434 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
7435 */
7436DECL_INLINE_THROW(uint32_t)
7437iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7438 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7439{
7440 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7441 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7442 return off;
7443}
7444
7445
7446/**
7447 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
7448 */
7449DECL_INLINE_THROW(uint32_t)
7450iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
7451 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7452{
7453 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7454 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7455}
7456
7457
7458/**
7459 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
7460 */
7461DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7462 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7463{
7464 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7465 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7466 return off;
7467}
7468
7469
7470/**
7471 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
7472 */
7473DECL_INLINE_THROW(uint32_t)
7474iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
7475 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7476{
7477 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7478 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7479}
7480
7481
7482/**
7483 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
7484 *
7485 * @note ARM64: Helper register is required (idxTmpReg).
7486 */
7487DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7488 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
7489 uint8_t idxTmpReg = UINT8_MAX)
7490{
7491 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
7492 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7493 return off;
7494}
7495
7496
7497/**
7498 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
7499 *
7500 * @note ARM64: Helper register is required (idxTmpReg).
7501 */
7502DECL_INLINE_THROW(uint32_t)
7503iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
7504 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
7505 uint8_t idxTmpReg = UINT8_MAX)
7506{
7507 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7508 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
7509}
7510
7511
7512/*********************************************************************************************************************************
7513* Calls. *
7514*********************************************************************************************************************************/
7515
7516/**
7517 * Emits a call to a 64-bit address.
7518 */
7519DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
7520{
7521#ifdef RT_ARCH_AMD64
7522 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
7523
7524 /* call rax */
7525 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7526 pbCodeBuf[off++] = 0xff;
7527 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
7528
7529#elif defined(RT_ARCH_ARM64)
7530 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
7531
7532 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7533 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
7534
7535#else
7536# error "port me"
7537#endif
7538 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7539 return off;
7540}
7541
7542
7543/**
7544 * Emits code to load a stack variable into an argument GPR.
7545 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7546 */
7547DECL_FORCE_INLINE_THROW(uint32_t)
7548iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7549 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
7550 bool fSpilledVarsInVolatileRegs = false)
7551{
7552 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7553 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7554 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7555
7556 uint8_t const idxRegVar = pVar->idxReg;
7557 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
7558 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
7559 || !fSpilledVarsInVolatileRegs ))
7560 {
7561 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
7562 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
7563 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
7564 if (!offAddend)
7565 {
7566 if (idxRegArg != idxRegVar)
7567 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
7568 }
7569 else
7570 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
7571 }
7572 else
7573 {
7574 uint8_t const idxStackSlot = pVar->idxStackSlot;
7575 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7576 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
7577 if (offAddend)
7578 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
7579 }
7580 return off;
7581}
7582
7583
7584/**
7585 * Emits code to load a stack or immediate variable value into an argument GPR,
7586 * optional with a addend.
7587 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7588 */
7589DECL_FORCE_INLINE_THROW(uint32_t)
7590iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7591 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
7592 bool fSpilledVarsInVolatileRegs = false)
7593{
7594 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7595 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7596 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7597 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
7598 else
7599 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
7600 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
7601 return off;
7602}
7603
7604
7605/**
7606 * Emits code to load the variable address into an argument GPR.
7607 *
7608 * This only works for uninitialized and stack variables.
7609 */
7610DECL_FORCE_INLINE_THROW(uint32_t)
7611iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7612 bool fFlushShadows)
7613{
7614 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7615 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7616 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7617 || pVar->enmKind == kIemNativeVarKind_Stack,
7618 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7619 AssertStmt(!pVar->fSimdReg,
7620 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7621
7622 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7623 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7624
7625 uint8_t const idxRegVar = pVar->idxReg;
7626 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
7627 {
7628 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
7629 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
7630 Assert(pVar->idxReg == UINT8_MAX);
7631 }
7632 Assert( pVar->idxStackSlot != UINT8_MAX
7633 && pVar->idxReg == UINT8_MAX);
7634
7635 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7636}
7637
7638
7639#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7640/**
7641 * Emits code to load the variable address into an argument GPR.
7642 *
7643 * This is a special variant intended for SIMD variables only and only called
7644 * by the TLB miss path in the memory fetch/store code because there we pass
7645 * the value by reference and need both the register and stack depending on which
7646 * path is taken (TLB hit vs. miss).
7647 */
7648DECL_FORCE_INLINE_THROW(uint32_t)
7649iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7650 bool fSyncRegWithStack = true)
7651{
7652 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7653 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7654 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7655 || pVar->enmKind == kIemNativeVarKind_Stack,
7656 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7657 AssertStmt(pVar->fSimdReg,
7658 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7659 Assert( pVar->idxStackSlot != UINT8_MAX
7660 && pVar->idxReg != UINT8_MAX);
7661
7662 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7663 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7664
7665 uint8_t const idxRegVar = pVar->idxReg;
7666 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7667 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
7668
7669 if (fSyncRegWithStack)
7670 {
7671 if (pVar->cbVar == sizeof(RTUINT128U))
7672 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDisp, idxRegVar);
7673 else
7674 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDisp, idxRegVar);
7675 }
7676
7677 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7678}
7679
7680
7681/**
7682 * Emits code to sync the host SIMD register assigned to the given SIMD variable.
7683 *
7684 * This is a special helper and only called
7685 * by the TLB miss path in the memory fetch/store code because there we pass
7686 * the value by reference and need to sync the value on the stack with the assigned host register
7687 * after a TLB miss where the value ends up on the stack.
7688 */
7689DECL_FORCE_INLINE_THROW(uint32_t)
7690iemNativeEmitSimdVarSyncStackToRegister(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar)
7691{
7692 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7693 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7694 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7695 || pVar->enmKind == kIemNativeVarKind_Stack,
7696 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7697 AssertStmt(pVar->fSimdReg,
7698 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7699 Assert( pVar->idxStackSlot != UINT8_MAX
7700 && pVar->idxReg != UINT8_MAX);
7701
7702 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7703 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7704
7705 uint8_t const idxRegVar = pVar->idxReg;
7706 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7707 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
7708
7709 if (pVar->cbVar == sizeof(RTUINT128U))
7710 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxRegVar, offBpDisp);
7711 else
7712 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxRegVar, offBpDisp);
7713
7714 return off;
7715}
7716
7717
7718/**
7719 * Emits a gprdst = ~gprsrc store.
7720 */
7721DECL_FORCE_INLINE_THROW(uint32_t)
7722iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7723{
7724#ifdef RT_ARCH_AMD64
7725 if (iGprDst != iGprSrc)
7726 {
7727 /* mov gprdst, gprsrc. */
7728 if (f64Bit)
7729 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
7730 else
7731 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
7732 }
7733
7734 /* not gprdst */
7735 if (f64Bit || iGprDst >= 8)
7736 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
7737 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
7738 pCodeBuf[off++] = 0xf7;
7739 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
7740#elif defined(RT_ARCH_ARM64)
7741 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
7742#else
7743# error "port me"
7744#endif
7745 return off;
7746}
7747
7748
7749/**
7750 * Emits a gprdst = ~gprsrc store.
7751 */
7752DECL_INLINE_THROW(uint32_t)
7753iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7754{
7755#ifdef RT_ARCH_AMD64
7756 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
7757#elif defined(RT_ARCH_ARM64)
7758 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
7759#else
7760# error "port me"
7761#endif
7762 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7763 return off;
7764}
7765
7766
7767/**
7768 * Emits a 128-bit vector register store to a VCpu value.
7769 */
7770DECL_FORCE_INLINE_THROW(uint32_t)
7771iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7772{
7773#ifdef RT_ARCH_AMD64
7774 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
7775 pCodeBuf[off++] = 0x66;
7776 if (iVecReg >= 8)
7777 pCodeBuf[off++] = X86_OP_REX_R;
7778 pCodeBuf[off++] = 0x0f;
7779 pCodeBuf[off++] = 0x7f;
7780 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7781#elif defined(RT_ARCH_ARM64)
7782 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7783
7784#else
7785# error "port me"
7786#endif
7787 return off;
7788}
7789
7790
7791/**
7792 * Emits a 128-bit vector register load of a VCpu value.
7793 */
7794DECL_INLINE_THROW(uint32_t)
7795iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7796{
7797#ifdef RT_ARCH_AMD64
7798 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7799#elif defined(RT_ARCH_ARM64)
7800 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7801#else
7802# error "port me"
7803#endif
7804 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7805 return off;
7806}
7807
7808
7809/**
7810 * Emits a high 128-bit vector register store to a VCpu value.
7811 */
7812DECL_FORCE_INLINE_THROW(uint32_t)
7813iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7814{
7815#ifdef RT_ARCH_AMD64
7816 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
7817 pCodeBuf[off++] = X86_OP_VEX3;
7818 if (iVecReg >= 8)
7819 pCodeBuf[off++] = 0x63;
7820 else
7821 pCodeBuf[off++] = 0xe3;
7822 pCodeBuf[off++] = 0x7d;
7823 pCodeBuf[off++] = 0x39;
7824 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7825 pCodeBuf[off++] = 0x01; /* Immediate */
7826#elif defined(RT_ARCH_ARM64)
7827 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7828#else
7829# error "port me"
7830#endif
7831 return off;
7832}
7833
7834
7835/**
7836 * Emits a high 128-bit vector register load of a VCpu value.
7837 */
7838DECL_INLINE_THROW(uint32_t)
7839iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7840{
7841#ifdef RT_ARCH_AMD64
7842 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7843#elif defined(RT_ARCH_ARM64)
7844 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7845 Assert(!(iVecReg & 0x1));
7846 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
7847#else
7848# error "port me"
7849#endif
7850 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7851 return off;
7852}
7853
7854
7855/**
7856 * Emits a 128-bit vector register load of a VCpu value.
7857 */
7858DECL_FORCE_INLINE_THROW(uint32_t)
7859iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7860{
7861#ifdef RT_ARCH_AMD64
7862 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
7863 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7864 if (iVecReg >= 8)
7865 pCodeBuf[off++] = X86_OP_REX_R;
7866 pCodeBuf[off++] = 0x0f;
7867 pCodeBuf[off++] = 0x6f;
7868 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7869#elif defined(RT_ARCH_ARM64)
7870 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
7871
7872#else
7873# error "port me"
7874#endif
7875 return off;
7876}
7877
7878
7879/**
7880 * Emits a 128-bit vector register load of a VCpu value.
7881 */
7882DECL_INLINE_THROW(uint32_t)
7883iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7884{
7885#ifdef RT_ARCH_AMD64
7886 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7887#elif defined(RT_ARCH_ARM64)
7888 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7889#else
7890# error "port me"
7891#endif
7892 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7893 return off;
7894}
7895
7896
7897/**
7898 * Emits a 128-bit vector register load of a VCpu value.
7899 */
7900DECL_FORCE_INLINE_THROW(uint32_t)
7901iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7902{
7903#ifdef RT_ARCH_AMD64
7904 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
7905 pCodeBuf[off++] = X86_OP_VEX3;
7906 if (iVecReg >= 8)
7907 pCodeBuf[off++] = 0x63;
7908 else
7909 pCodeBuf[off++] = 0xe3;
7910 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
7911 pCodeBuf[off++] = 0x38;
7912 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7913 pCodeBuf[off++] = 0x01; /* Immediate */
7914#elif defined(RT_ARCH_ARM64)
7915 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
7916#else
7917# error "port me"
7918#endif
7919 return off;
7920}
7921
7922
7923/**
7924 * Emits a 128-bit vector register load of a VCpu value.
7925 */
7926DECL_INLINE_THROW(uint32_t)
7927iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7928{
7929#ifdef RT_ARCH_AMD64
7930 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7931#elif defined(RT_ARCH_ARM64)
7932 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7933 Assert(!(iVecReg & 0x1));
7934 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
7935#else
7936# error "port me"
7937#endif
7938 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7939 return off;
7940}
7941
7942
7943/**
7944 * Emits a vecdst = vecsrc load.
7945 */
7946DECL_FORCE_INLINE(uint32_t)
7947iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7948{
7949#ifdef RT_ARCH_AMD64
7950 /* movdqu vecdst, vecsrc */
7951 pCodeBuf[off++] = 0xf3;
7952
7953 if ((iVecRegDst | iVecRegSrc) >= 8)
7954 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
7955 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
7956 : X86_OP_REX_R;
7957 pCodeBuf[off++] = 0x0f;
7958 pCodeBuf[off++] = 0x6f;
7959 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7960
7961#elif defined(RT_ARCH_ARM64)
7962 /* mov dst, src; alias for: orr dst, src, src */
7963 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
7964
7965#else
7966# error "port me"
7967#endif
7968 return off;
7969}
7970
7971
7972/**
7973 * Emits a vecdst = vecsrc load, 128-bit.
7974 */
7975DECL_INLINE_THROW(uint32_t)
7976iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7977{
7978#ifdef RT_ARCH_AMD64
7979 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
7980#elif defined(RT_ARCH_ARM64)
7981 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
7982#else
7983# error "port me"
7984#endif
7985 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7986 return off;
7987}
7988
7989
7990/**
7991 * Emits a vecdst = vecsrc load, 256-bit.
7992 */
7993DECL_INLINE_THROW(uint32_t)
7994iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7995{
7996#ifdef RT_ARCH_AMD64
7997 /* vmovdqa ymm, ymm */
7998 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7999 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
8000 {
8001 pbCodeBuf[off++] = X86_OP_VEX3;
8002 pbCodeBuf[off++] = 0x41;
8003 pbCodeBuf[off++] = 0x7d;
8004 pbCodeBuf[off++] = 0x6f;
8005 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8006 }
8007 else
8008 {
8009 pbCodeBuf[off++] = X86_OP_VEX2;
8010 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
8011 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
8012 pbCodeBuf[off++] = iVecRegSrc >= 8
8013 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
8014 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8015 }
8016#elif defined(RT_ARCH_ARM64)
8017 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8018 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
8019 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
8020 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
8021#else
8022# error "port me"
8023#endif
8024 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8025 return off;
8026}
8027
8028
8029/**
8030 * Emits a vecdst = vecsrc load.
8031 */
8032DECL_FORCE_INLINE(uint32_t)
8033iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8034{
8035#ifdef RT_ARCH_AMD64
8036 /* vinserti128 dst, dst, src, 1. */ /* ASSUMES AVX2 support */
8037 pCodeBuf[off++] = X86_OP_VEX3;
8038 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
8039 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8040 pCodeBuf[off++] = 0x38;
8041 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8042 pCodeBuf[off++] = 0x01; /* Immediate */
8043
8044#elif defined(RT_ARCH_ARM64)
8045 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8046 /* mov dst, src; alias for: orr dst, src, src */
8047 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
8048
8049#else
8050# error "port me"
8051#endif
8052 return off;
8053}
8054
8055
8056/**
8057 * Emits a vecdst[128:255] = vecsrc[0:127] load, 128-bit.
8058 */
8059DECL_INLINE_THROW(uint32_t)
8060iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8061{
8062#ifdef RT_ARCH_AMD64
8063 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
8064#elif defined(RT_ARCH_ARM64)
8065 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
8066#else
8067# error "port me"
8068#endif
8069 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8070 return off;
8071}
8072
8073
8074/**
8075 * Emits a gprdst = vecsrc[x] load, 64-bit.
8076 */
8077DECL_FORCE_INLINE(uint32_t)
8078iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
8079{
8080#ifdef RT_ARCH_AMD64
8081 if (iQWord >= 2)
8082 {
8083 /** @todo Currently not used. */
8084 AssertReleaseFailed();
8085 }
8086 else
8087 {
8088 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
8089 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8090 pCodeBuf[off++] = X86_OP_REX_W
8091 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8092 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8093 pCodeBuf[off++] = 0x0f;
8094 pCodeBuf[off++] = 0x3a;
8095 pCodeBuf[off++] = 0x16;
8096 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8097 pCodeBuf[off++] = iQWord;
8098 }
8099#elif defined(RT_ARCH_ARM64)
8100 /* umov gprdst, vecsrc[iQWord] */
8101 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
8102#else
8103# error "port me"
8104#endif
8105 return off;
8106}
8107
8108
8109/**
8110 * Emits a gprdst = vecsrc[x] load, 64-bit.
8111 */
8112DECL_INLINE_THROW(uint32_t)
8113iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
8114{
8115 Assert(iQWord <= 3);
8116
8117#ifdef RT_ARCH_AMD64
8118 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iQWord);
8119#elif defined(RT_ARCH_ARM64)
8120 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8121 Assert(!(iVecRegSrc & 0x1));
8122 /* Need to access the "high" 128-bit vector register. */
8123 if (iQWord >= 2)
8124 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
8125 else
8126 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
8127#else
8128# error "port me"
8129#endif
8130 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8131 return off;
8132}
8133
8134
8135/**
8136 * Emits a gprdst = vecsrc[x] load, 32-bit.
8137 */
8138DECL_FORCE_INLINE(uint32_t)
8139iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
8140{
8141#ifdef RT_ARCH_AMD64
8142 if (iDWord >= 4)
8143 {
8144 /** @todo Currently not used. */
8145 AssertReleaseFailed();
8146 }
8147 else
8148 {
8149 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
8150 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8151 if (iGprDst >= 8 || iVecRegSrc >= 8)
8152 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8153 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8154 pCodeBuf[off++] = 0x0f;
8155 pCodeBuf[off++] = 0x3a;
8156 pCodeBuf[off++] = 0x16;
8157 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8158 pCodeBuf[off++] = iDWord;
8159 }
8160#elif defined(RT_ARCH_ARM64)
8161 /* umov gprdst, vecsrc[iDWord] */
8162 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
8163#else
8164# error "port me"
8165#endif
8166 return off;
8167}
8168
8169
8170/**
8171 * Emits a gprdst = vecsrc[x] load, 32-bit.
8172 */
8173DECL_INLINE_THROW(uint32_t)
8174iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
8175{
8176 Assert(iDWord <= 7);
8177
8178#ifdef RT_ARCH_AMD64
8179 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iDWord);
8180#elif defined(RT_ARCH_ARM64)
8181 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8182 Assert(!(iVecRegSrc & 0x1));
8183 /* Need to access the "high" 128-bit vector register. */
8184 if (iDWord >= 4)
8185 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
8186 else
8187 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
8188#else
8189# error "port me"
8190#endif
8191 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8192 return off;
8193}
8194
8195
8196/**
8197 * Emits a gprdst = vecsrc[x] load, 16-bit.
8198 */
8199DECL_FORCE_INLINE(uint32_t)
8200iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
8201{
8202#ifdef RT_ARCH_AMD64
8203 if (iWord >= 8)
8204 {
8205 /** @todo Currently not used. */
8206 AssertReleaseFailed();
8207 }
8208 else
8209 {
8210 /* pextrw gpr, vecsrc, #iWord */
8211 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8212 if (iGprDst >= 8 || iVecRegSrc >= 8)
8213 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
8214 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
8215 pCodeBuf[off++] = 0x0f;
8216 pCodeBuf[off++] = 0xc5;
8217 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
8218 pCodeBuf[off++] = iWord;
8219 }
8220#elif defined(RT_ARCH_ARM64)
8221 /* umov gprdst, vecsrc[iWord] */
8222 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
8223#else
8224# error "port me"
8225#endif
8226 return off;
8227}
8228
8229
8230/**
8231 * Emits a gprdst = vecsrc[x] load, 16-bit.
8232 */
8233DECL_INLINE_THROW(uint32_t)
8234iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
8235{
8236 Assert(iWord <= 16);
8237
8238#ifdef RT_ARCH_AMD64
8239 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
8240#elif defined(RT_ARCH_ARM64)
8241 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8242 Assert(!(iVecRegSrc & 0x1));
8243 /* Need to access the "high" 128-bit vector register. */
8244 if (iWord >= 8)
8245 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
8246 else
8247 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
8248#else
8249# error "port me"
8250#endif
8251 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8252 return off;
8253}
8254
8255
8256/**
8257 * Emits a gprdst = vecsrc[x] load, 8-bit.
8258 */
8259DECL_FORCE_INLINE(uint32_t)
8260iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
8261{
8262#ifdef RT_ARCH_AMD64
8263 if (iByte >= 16)
8264 {
8265 /** @todo Currently not used. */
8266 AssertReleaseFailed();
8267 }
8268 else
8269 {
8270 /* pextrb gpr, vecsrc, #iByte */
8271 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8272 if (iGprDst >= 8 || iVecRegSrc >= 8)
8273 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8274 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8275 pCodeBuf[off++] = 0x0f;
8276 pCodeBuf[off++] = 0x3a;
8277 pCodeBuf[off++] = 0x14;
8278 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8279 pCodeBuf[off++] = iByte;
8280 }
8281#elif defined(RT_ARCH_ARM64)
8282 /* umov gprdst, vecsrc[iByte] */
8283 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
8284#else
8285# error "port me"
8286#endif
8287 return off;
8288}
8289
8290
8291/**
8292 * Emits a gprdst = vecsrc[x] load, 8-bit.
8293 */
8294DECL_INLINE_THROW(uint32_t)
8295iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
8296{
8297 Assert(iByte <= 32);
8298
8299#ifdef RT_ARCH_AMD64
8300 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iByte);
8301#elif defined(RT_ARCH_ARM64)
8302 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8303 Assert(!(iVecRegSrc & 0x1));
8304 /* Need to access the "high" 128-bit vector register. */
8305 if (iByte >= 16)
8306 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
8307 else
8308 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
8309#else
8310# error "port me"
8311#endif
8312 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8313 return off;
8314}
8315
8316
8317/**
8318 * Emits a vecdst[x] = gprsrc store, 64-bit.
8319 */
8320DECL_FORCE_INLINE(uint32_t)
8321iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
8322{
8323#ifdef RT_ARCH_AMD64
8324 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
8325 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8326 pCodeBuf[off++] = X86_OP_REX_W
8327 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8328 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8329 pCodeBuf[off++] = 0x0f;
8330 pCodeBuf[off++] = 0x3a;
8331 pCodeBuf[off++] = 0x22;
8332 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8333 pCodeBuf[off++] = iQWord;
8334#elif defined(RT_ARCH_ARM64)
8335 /* ins vecsrc[iQWord], gpr */
8336 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
8337#else
8338# error "port me"
8339#endif
8340 return off;
8341}
8342
8343
8344/**
8345 * Emits a vecdst[x] = gprsrc store, 64-bit.
8346 */
8347DECL_INLINE_THROW(uint32_t)
8348iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
8349{
8350 Assert(iQWord <= 1);
8351
8352#ifdef RT_ARCH_AMD64
8353 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iQWord);
8354#elif defined(RT_ARCH_ARM64)
8355 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
8356#else
8357# error "port me"
8358#endif
8359 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8360 return off;
8361}
8362
8363
8364/**
8365 * Emits a vecdst[x] = gprsrc store, 32-bit.
8366 */
8367DECL_FORCE_INLINE(uint32_t)
8368iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
8369{
8370#ifdef RT_ARCH_AMD64
8371 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
8372 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8373 if (iVecRegDst >= 8 || iGprSrc >= 8)
8374 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8375 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8376 pCodeBuf[off++] = 0x0f;
8377 pCodeBuf[off++] = 0x3a;
8378 pCodeBuf[off++] = 0x22;
8379 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8380 pCodeBuf[off++] = iDWord;
8381#elif defined(RT_ARCH_ARM64)
8382 /* ins vecsrc[iDWord], gpr */
8383 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
8384#else
8385# error "port me"
8386#endif
8387 return off;
8388}
8389
8390
8391/**
8392 * Emits a vecdst[x] = gprsrc store, 64-bit.
8393 */
8394DECL_INLINE_THROW(uint32_t)
8395iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
8396{
8397 Assert(iDWord <= 3);
8398
8399#ifdef RT_ARCH_AMD64
8400 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iDWord);
8401#elif defined(RT_ARCH_ARM64)
8402 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
8403#else
8404# error "port me"
8405#endif
8406 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8407 return off;
8408}
8409
8410
8411/**
8412 * Emits a vecdst.au32[iDWord] = 0 store.
8413 */
8414DECL_FORCE_INLINE(uint32_t)
8415iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
8416{
8417 Assert(iDWord <= 7);
8418
8419#ifdef RT_ARCH_AMD64
8420 /*
8421 * xor tmp0, tmp0
8422 * pinsrd xmm, tmp0, iDword
8423 */
8424 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
8425 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8426 pCodeBuf[off++] = 0x33;
8427 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
8428 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(&pCodeBuf[off], off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
8429#elif defined(RT_ARCH_ARM64)
8430 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8431 Assert(!(iVecReg & 0x1));
8432 /* ins vecsrc[iDWord], wzr */
8433 if (iDWord >= 4)
8434 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
8435 else
8436 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
8437#else
8438# error "port me"
8439#endif
8440 return off;
8441}
8442
8443
8444/**
8445 * Emits a vecdst.au32[iDWord] = 0 store.
8446 */
8447DECL_INLINE_THROW(uint32_t)
8448iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
8449{
8450
8451#ifdef RT_ARCH_AMD64
8452 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
8453#elif defined(RT_ARCH_ARM64)
8454 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
8455#else
8456# error "port me"
8457#endif
8458 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8459 return off;
8460}
8461
8462
8463/**
8464 * Emits a vecdst[0:127] = 0 store.
8465 */
8466DECL_FORCE_INLINE(uint32_t)
8467iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8468{
8469#ifdef RT_ARCH_AMD64
8470 /* pxor xmm, xmm */
8471 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8472 if (iVecReg >= 8)
8473 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
8474 pCodeBuf[off++] = 0x0f;
8475 pCodeBuf[off++] = 0xef;
8476 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8477#elif defined(RT_ARCH_ARM64)
8478 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8479 Assert(!(iVecReg & 0x1));
8480 /* eor vecreg, vecreg, vecreg */
8481 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
8482#else
8483# error "port me"
8484#endif
8485 return off;
8486}
8487
8488
8489/**
8490 * Emits a vecdst[0:127] = 0 store.
8491 */
8492DECL_INLINE_THROW(uint32_t)
8493iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8494{
8495#ifdef RT_ARCH_AMD64
8496 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
8497#elif defined(RT_ARCH_ARM64)
8498 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
8499#else
8500# error "port me"
8501#endif
8502 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8503 return off;
8504}
8505
8506
8507/**
8508 * Emits a vecdst[128:255] = 0 store.
8509 */
8510DECL_FORCE_INLINE(uint32_t)
8511iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8512{
8513#ifdef RT_ARCH_AMD64
8514 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
8515 if (iVecReg < 8)
8516 {
8517 pCodeBuf[off++] = X86_OP_VEX2;
8518 pCodeBuf[off++] = 0xf9;
8519 }
8520 else
8521 {
8522 pCodeBuf[off++] = X86_OP_VEX3;
8523 pCodeBuf[off++] = 0x41;
8524 pCodeBuf[off++] = 0x79;
8525 }
8526 pCodeBuf[off++] = 0x6f;
8527 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8528#elif defined(RT_ARCH_ARM64)
8529 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8530 Assert(!(iVecReg & 0x1));
8531 /* eor vecreg, vecreg, vecreg */
8532 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
8533#else
8534# error "port me"
8535#endif
8536 return off;
8537}
8538
8539
8540/**
8541 * Emits a vecdst[128:255] = 0 store.
8542 */
8543DECL_INLINE_THROW(uint32_t)
8544iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8545{
8546#ifdef RT_ARCH_AMD64
8547 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
8548#elif defined(RT_ARCH_ARM64)
8549 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
8550#else
8551# error "port me"
8552#endif
8553 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8554 return off;
8555}
8556
8557
8558/**
8559 * Emits a vecdst[0:255] = 0 store.
8560 */
8561DECL_FORCE_INLINE(uint32_t)
8562iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8563{
8564#ifdef RT_ARCH_AMD64
8565 /* vpxor ymm, ymm, ymm */
8566 if (iVecReg < 8)
8567 {
8568 pCodeBuf[off++] = X86_OP_VEX2;
8569 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8570 }
8571 else
8572 {
8573 pCodeBuf[off++] = X86_OP_VEX3;
8574 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
8575 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8576 }
8577 pCodeBuf[off++] = 0xef;
8578 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8579#elif defined(RT_ARCH_ARM64)
8580 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8581 Assert(!(iVecReg & 0x1));
8582 /* eor vecreg, vecreg, vecreg */
8583 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
8584 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
8585#else
8586# error "port me"
8587#endif
8588 return off;
8589}
8590
8591
8592/**
8593 * Emits a vecdst[0:255] = 0 store.
8594 */
8595DECL_INLINE_THROW(uint32_t)
8596iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8597{
8598#ifdef RT_ARCH_AMD64
8599 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
8600#elif defined(RT_ARCH_ARM64)
8601 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
8602#else
8603# error "port me"
8604#endif
8605 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8606 return off;
8607}
8608
8609
8610/**
8611 * Emits a vecdst = gprsrc broadcast, 8-bit.
8612 */
8613DECL_FORCE_INLINE(uint32_t)
8614iemNativeEmitSimdBroadcastGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8615{
8616#ifdef RT_ARCH_AMD64
8617 /* pinsrb vecdst, gpr, #0 (ASSUMES SSE 4.1) */
8618 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8619 if (iVecRegDst >= 8 || iGprSrc >= 8)
8620 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8621 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8622 pCodeBuf[off++] = 0x0f;
8623 pCodeBuf[off++] = 0x3a;
8624 pCodeBuf[off++] = 0x20;
8625 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8626 pCodeBuf[off++] = 0x00;
8627
8628 /* vpbroadcastb {y,x}mm, xmm (ASSUMES AVX2). */
8629 pCodeBuf[off++] = X86_OP_VEX3;
8630 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8631 | 0x02 /* opcode map. */
8632 | ( iVecRegDst >= 8
8633 ? 0
8634 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8635 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8636 pCodeBuf[off++] = 0x78;
8637 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8638#elif defined(RT_ARCH_ARM64)
8639 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8640 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8641
8642 /* dup vecsrc, gpr */
8643 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U8);
8644 if (f256Bit)
8645 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U8);
8646#else
8647# error "port me"
8648#endif
8649 return off;
8650}
8651
8652
8653/**
8654 * Emits a vecdst[x] = gprsrc broadcast, 8-bit.
8655 */
8656DECL_INLINE_THROW(uint32_t)
8657iemNativeEmitSimdBroadcastGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8658{
8659#ifdef RT_ARCH_AMD64
8660 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8661#elif defined(RT_ARCH_ARM64)
8662 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8663#else
8664# error "port me"
8665#endif
8666 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8667 return off;
8668}
8669
8670
8671/**
8672 * Emits a vecdst = gprsrc broadcast, 16-bit.
8673 */
8674DECL_FORCE_INLINE(uint32_t)
8675iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8676{
8677#ifdef RT_ARCH_AMD64
8678 /* pinsrw vecdst, gpr, #0 */
8679 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8680 if (iVecRegDst >= 8 || iGprSrc >= 8)
8681 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8682 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8683 pCodeBuf[off++] = 0x0f;
8684 pCodeBuf[off++] = 0xc4;
8685 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8686 pCodeBuf[off++] = 0x00;
8687
8688 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
8689 pCodeBuf[off++] = X86_OP_VEX3;
8690 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8691 | 0x02 /* opcode map. */
8692 | ( iVecRegDst >= 8
8693 ? 0
8694 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8695 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8696 pCodeBuf[off++] = 0x79;
8697 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8698#elif defined(RT_ARCH_ARM64)
8699 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8700 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8701
8702 /* dup vecsrc, gpr */
8703 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
8704 if (f256Bit)
8705 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
8706#else
8707# error "port me"
8708#endif
8709 return off;
8710}
8711
8712
8713/**
8714 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
8715 */
8716DECL_INLINE_THROW(uint32_t)
8717iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8718{
8719#ifdef RT_ARCH_AMD64
8720 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8721#elif defined(RT_ARCH_ARM64)
8722 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8723#else
8724# error "port me"
8725#endif
8726 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8727 return off;
8728}
8729
8730
8731/**
8732 * Emits a vecdst = gprsrc broadcast, 32-bit.
8733 */
8734DECL_FORCE_INLINE(uint32_t)
8735iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8736{
8737#ifdef RT_ARCH_AMD64
8738 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
8739 * vbroadcast needs a memory operand or another xmm register to work... */
8740
8741 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
8742 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8743 if (iVecRegDst >= 8 || iGprSrc >= 8)
8744 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8745 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8746 pCodeBuf[off++] = 0x0f;
8747 pCodeBuf[off++] = 0x3a;
8748 pCodeBuf[off++] = 0x22;
8749 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8750 pCodeBuf[off++] = 0x00;
8751
8752 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
8753 pCodeBuf[off++] = X86_OP_VEX3;
8754 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8755 | 0x02 /* opcode map. */
8756 | ( iVecRegDst >= 8
8757 ? 0
8758 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8759 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8760 pCodeBuf[off++] = 0x58;
8761 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8762#elif defined(RT_ARCH_ARM64)
8763 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8764 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8765
8766 /* dup vecsrc, gpr */
8767 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
8768 if (f256Bit)
8769 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
8770#else
8771# error "port me"
8772#endif
8773 return off;
8774}
8775
8776
8777/**
8778 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
8779 */
8780DECL_INLINE_THROW(uint32_t)
8781iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8782{
8783#ifdef RT_ARCH_AMD64
8784 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8785#elif defined(RT_ARCH_ARM64)
8786 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8787#else
8788# error "port me"
8789#endif
8790 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8791 return off;
8792}
8793
8794
8795/**
8796 * Emits a vecdst = gprsrc broadcast, 64-bit.
8797 */
8798DECL_FORCE_INLINE(uint32_t)
8799iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8800{
8801#ifdef RT_ARCH_AMD64
8802 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
8803 * vbroadcast needs a memory operand or another xmm register to work... */
8804
8805 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
8806 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8807 pCodeBuf[off++] = X86_OP_REX_W
8808 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8809 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8810 pCodeBuf[off++] = 0x0f;
8811 pCodeBuf[off++] = 0x3a;
8812 pCodeBuf[off++] = 0x22;
8813 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8814 pCodeBuf[off++] = 0x00;
8815
8816 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
8817 pCodeBuf[off++] = X86_OP_VEX3;
8818 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8819 | 0x02 /* opcode map. */
8820 | ( iVecRegDst >= 8
8821 ? 0
8822 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8823 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8824 pCodeBuf[off++] = 0x59;
8825 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8826#elif defined(RT_ARCH_ARM64)
8827 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8828 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8829
8830 /* dup vecsrc, gpr */
8831 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
8832 if (f256Bit)
8833 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
8834#else
8835# error "port me"
8836#endif
8837 return off;
8838}
8839
8840
8841/**
8842 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
8843 */
8844DECL_INLINE_THROW(uint32_t)
8845iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8846{
8847#ifdef RT_ARCH_AMD64
8848 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
8849#elif defined(RT_ARCH_ARM64)
8850 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8851#else
8852# error "port me"
8853#endif
8854 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8855 return off;
8856}
8857
8858
8859/**
8860 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
8861 */
8862DECL_FORCE_INLINE(uint32_t)
8863iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8864{
8865#ifdef RT_ARCH_AMD64
8866 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(pCodeBuf, off, iVecRegDst, iVecRegSrc);
8867
8868 /* vinserti128 ymm, ymm, xmm, 1. */ /* ASSUMES AVX2 support */
8869 pCodeBuf[off++] = X86_OP_VEX3;
8870 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
8871 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8872 pCodeBuf[off++] = 0x38;
8873 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8874 pCodeBuf[off++] = 0x01; /* Immediate */
8875#elif defined(RT_ARCH_ARM64)
8876 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8877 Assert(!(iVecRegDst & 0x1));
8878
8879 /* mov dst, src; alias for: orr dst, src, src */
8880 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
8881 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
8882#else
8883# error "port me"
8884#endif
8885 return off;
8886}
8887
8888
8889/**
8890 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
8891 */
8892DECL_INLINE_THROW(uint32_t)
8893iemNativeEmitSimdBroadcastVecRegU128ToVecReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8894{
8895#ifdef RT_ARCH_AMD64
8896 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 11), off, iVecRegDst, iVecRegSrc);
8897#elif defined(RT_ARCH_ARM64)
8898 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecRegDst, iVecRegSrc);
8899#else
8900# error "port me"
8901#endif
8902 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8903 return off;
8904}
8905
8906#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
8907
8908/** @} */
8909
8910#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
8911
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette