VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 103942

Last change on this file since 103942 was 103942, checked in by vboxsync, 11 months ago

VMM/IEM: Implement memory stores from SIMD registers and implement native emitters for IEM_MC_STORE_MEM_U128_ALIGN_SSE()/IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 307.4 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 103942 2024-03-20 10:22:38Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 pu32CodeBuf[off++] = 0xd503201f;
71
72 RT_NOREF(uInfo);
73#else
74# error "port me"
75#endif
76 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
77 return off;
78}
79
80
81/**
82 * Emit a breakpoint instruction.
83 */
84DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
85{
86#ifdef RT_ARCH_AMD64
87 pCodeBuf[off++] = 0xcc;
88 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
89
90#elif defined(RT_ARCH_ARM64)
91 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
92
93#else
94# error "error"
95#endif
96 return off;
97}
98
99
100/**
101 * Emit a breakpoint instruction.
102 */
103DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
104{
105#ifdef RT_ARCH_AMD64
106 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
107#elif defined(RT_ARCH_ARM64)
108 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
109#else
110# error "error"
111#endif
112 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
113 return off;
114}
115
116
117/*********************************************************************************************************************************
118* Loads, Stores and Related Stuff. *
119*********************************************************************************************************************************/
120
121#ifdef RT_ARCH_AMD64
122/**
123 * Common bit of iemNativeEmitLoadGprByGpr and friends.
124 */
125DECL_FORCE_INLINE(uint32_t)
126iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
127{
128 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
129 {
130 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
131 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
132 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
133 }
134 else if (offDisp == (int8_t)offDisp)
135 {
136 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
137 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
138 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
139 pbCodeBuf[off++] = (uint8_t)offDisp;
140 }
141 else
142 {
143 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
144 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
145 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
146 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
147 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
148 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
149 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
150 }
151 return off;
152}
153#endif /* RT_ARCH_AMD64 */
154
155/**
156 * Emits setting a GPR to zero.
157 */
158DECL_INLINE_THROW(uint32_t)
159iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
160{
161#ifdef RT_ARCH_AMD64
162 /* xor gpr32, gpr32 */
163 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
164 if (iGpr >= 8)
165 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
166 pbCodeBuf[off++] = 0x33;
167 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
168
169#elif defined(RT_ARCH_ARM64)
170 /* mov gpr, #0x0 */
171 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
172 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
173
174#else
175# error "port me"
176#endif
177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
178 return off;
179}
180
181
182/**
183 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
184 * buffer space.
185 *
186 * Max buffer consumption:
187 * - AMD64: 10 instruction bytes.
188 * - ARM64: 4 instruction words (16 bytes).
189 */
190DECL_FORCE_INLINE(uint32_t)
191iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
192{
193#ifdef RT_ARCH_AMD64
194 if (uImm64 == 0)
195 {
196 /* xor gpr, gpr */
197 if (iGpr >= 8)
198 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
199 pCodeBuf[off++] = 0x33;
200 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
201 }
202 else if (uImm64 <= UINT32_MAX)
203 {
204 /* mov gpr, imm32 */
205 if (iGpr >= 8)
206 pCodeBuf[off++] = X86_OP_REX_B;
207 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
208 pCodeBuf[off++] = RT_BYTE1(uImm64);
209 pCodeBuf[off++] = RT_BYTE2(uImm64);
210 pCodeBuf[off++] = RT_BYTE3(uImm64);
211 pCodeBuf[off++] = RT_BYTE4(uImm64);
212 }
213 else if (uImm64 == (uint64_t)(int32_t)uImm64)
214 {
215 /* mov gpr, sx(imm32) */
216 if (iGpr < 8)
217 pCodeBuf[off++] = X86_OP_REX_W;
218 else
219 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
220 pCodeBuf[off++] = 0xc7;
221 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
222 pCodeBuf[off++] = RT_BYTE1(uImm64);
223 pCodeBuf[off++] = RT_BYTE2(uImm64);
224 pCodeBuf[off++] = RT_BYTE3(uImm64);
225 pCodeBuf[off++] = RT_BYTE4(uImm64);
226 }
227 else
228 {
229 /* mov gpr, imm64 */
230 if (iGpr < 8)
231 pCodeBuf[off++] = X86_OP_REX_W;
232 else
233 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
234 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
235 pCodeBuf[off++] = RT_BYTE1(uImm64);
236 pCodeBuf[off++] = RT_BYTE2(uImm64);
237 pCodeBuf[off++] = RT_BYTE3(uImm64);
238 pCodeBuf[off++] = RT_BYTE4(uImm64);
239 pCodeBuf[off++] = RT_BYTE5(uImm64);
240 pCodeBuf[off++] = RT_BYTE6(uImm64);
241 pCodeBuf[off++] = RT_BYTE7(uImm64);
242 pCodeBuf[off++] = RT_BYTE8(uImm64);
243 }
244
245#elif defined(RT_ARCH_ARM64)
246 /*
247 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
248 * supply remaining bits using 'movk grp, imm16, lsl #x'.
249 *
250 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
251 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
252 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
253 * after the first non-zero immediate component so we switch to movk for
254 * the remainder.
255 */
256 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
257 + !((uImm64 >> 16) & UINT16_MAX)
258 + !((uImm64 >> 32) & UINT16_MAX)
259 + !((uImm64 >> 48) & UINT16_MAX);
260 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
261 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
262 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
263 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
264 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
265 if (cFfffHalfWords <= cZeroHalfWords)
266 {
267 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
268
269 /* movz gpr, imm16 */
270 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
271 if (uImmPart || cZeroHalfWords == 4)
272 {
273 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
274 fMovBase |= RT_BIT_32(29);
275 }
276 /* mov[z/k] gpr, imm16, lsl #16 */
277 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
278 if (uImmPart)
279 {
280 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
281 fMovBase |= RT_BIT_32(29);
282 }
283 /* mov[z/k] gpr, imm16, lsl #32 */
284 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
285 if (uImmPart)
286 {
287 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
288 fMovBase |= RT_BIT_32(29);
289 }
290 /* mov[z/k] gpr, imm16, lsl #48 */
291 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
292 if (uImmPart)
293 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
294 }
295 else
296 {
297 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
298
299 /* find the first half-word that isn't UINT16_MAX. */
300 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
301 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
302 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
303
304 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
305 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
306 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
307 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
308 /* movk gpr, imm16 */
309 if (iHwNotFfff != 0)
310 {
311 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
312 if (uImmPart != UINT32_C(0xffff))
313 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
314 }
315 /* movk gpr, imm16, lsl #16 */
316 if (iHwNotFfff != 1)
317 {
318 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
319 if (uImmPart != UINT32_C(0xffff))
320 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
321 }
322 /* movk gpr, imm16, lsl #32 */
323 if (iHwNotFfff != 2)
324 {
325 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
326 if (uImmPart != UINT32_C(0xffff))
327 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
328 }
329 /* movk gpr, imm16, lsl #48 */
330 if (iHwNotFfff != 3)
331 {
332 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
333 if (uImmPart != UINT32_C(0xffff))
334 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
335 }
336 }
337
338 /** @todo load into 'w' register instead of 'x' when imm64 <= UINT32_MAX?
339 * clang 12.x does that, only to use the 'x' version for the
340 * addressing in the following ldr). */
341
342#else
343# error "port me"
344#endif
345 return off;
346}
347
348
349/**
350 * Emits loading a constant into a 64-bit GPR
351 */
352DECL_INLINE_THROW(uint32_t)
353iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
354{
355#ifdef RT_ARCH_AMD64
356 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
357#elif defined(RT_ARCH_ARM64)
358 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
359#else
360# error "port me"
361#endif
362 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
363 return off;
364}
365
366
367/**
368 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
369 * buffer space.
370 *
371 * Max buffer consumption:
372 * - AMD64: 6 instruction bytes.
373 * - ARM64: 2 instruction words (8 bytes).
374 *
375 * @note The top 32 bits will be cleared.
376 */
377DECLINLINE(uint32_t) iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
378{
379#ifdef RT_ARCH_AMD64
380 if (uImm32 == 0)
381 {
382 /* xor gpr, gpr */
383 if (iGpr >= 8)
384 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
385 pCodeBuf[off++] = 0x33;
386 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
387 }
388 else
389 {
390 /* mov gpr, imm32 */
391 if (iGpr >= 8)
392 pCodeBuf[off++] = X86_OP_REX_B;
393 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
394 pCodeBuf[off++] = RT_BYTE1(uImm32);
395 pCodeBuf[off++] = RT_BYTE2(uImm32);
396 pCodeBuf[off++] = RT_BYTE3(uImm32);
397 pCodeBuf[off++] = RT_BYTE4(uImm32);
398 }
399
400#elif defined(RT_ARCH_ARM64)
401 if ((uImm32 >> 16) == 0)
402 /* movz gpr, imm16 */
403 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
404 else if ((uImm32 & UINT32_C(0xffff)) == 0)
405 /* movz gpr, imm16, lsl #16 */
406 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
407 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
408 /* movn gpr, imm16, lsl #16 */
409 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
410 else if ((uImm32 >> 16) == UINT32_C(0xffff))
411 /* movn gpr, imm16 */
412 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
413 else
414 {
415 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
416 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
417 }
418
419#else
420# error "port me"
421#endif
422 return off;
423}
424
425
426/**
427 * Emits loading a constant into a 32-bit GPR.
428 * @note The top 32 bits will be cleared.
429 */
430DECL_INLINE_THROW(uint32_t)
431iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
432{
433#ifdef RT_ARCH_AMD64
434 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
435#elif defined(RT_ARCH_ARM64)
436 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
437#else
438# error "port me"
439#endif
440 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
441 return off;
442}
443
444
445/**
446 * Emits loading a constant into a 8-bit GPR
447 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
448 * only the ARM64 version does that.
449 */
450DECL_INLINE_THROW(uint32_t)
451iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
452{
453#ifdef RT_ARCH_AMD64
454 /* mov gpr, imm8 */
455 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
456 if (iGpr >= 8)
457 pbCodeBuf[off++] = X86_OP_REX_B;
458 else if (iGpr >= 4)
459 pbCodeBuf[off++] = X86_OP_REX;
460 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
461 pbCodeBuf[off++] = RT_BYTE1(uImm8);
462
463#elif defined(RT_ARCH_ARM64)
464 /* movz gpr, imm16, lsl #0 */
465 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
466 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
467
468#else
469# error "port me"
470#endif
471 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
472 return off;
473}
474
475
476#ifdef RT_ARCH_AMD64
477/**
478 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
479 */
480DECL_FORCE_INLINE(uint32_t)
481iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
482{
483 if (offVCpu < 128)
484 {
485 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
486 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
487 }
488 else
489 {
490 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
491 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
492 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
493 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
494 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
495 }
496 return off;
497}
498
499#elif defined(RT_ARCH_ARM64)
500
501/**
502 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
503 *
504 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
505 * registers (@a iGprTmp).
506 * @note DON'T try this with prefetch.
507 */
508DECL_FORCE_INLINE_THROW(uint32_t)
509iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
510 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
511{
512 /*
513 * There are a couple of ldr variants that takes an immediate offset, so
514 * try use those if we can, otherwise we have to use the temporary register
515 * help with the addressing.
516 */
517 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
518 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
519 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
520 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
521 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
522 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
523 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
524 {
525 /* The offset is too large, so we must load it into a register and use
526 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
527 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
528 if (iGprTmp == UINT8_MAX)
529 iGprTmp = iGprReg;
530 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
531 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
532 }
533 else
534# ifdef IEM_WITH_THROW_CATCH
535 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
536# else
537 AssertReleaseFailedStmt(off = UINT32_MAX);
538# endif
539
540 return off;
541}
542
543/**
544 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
545 */
546DECL_FORCE_INLINE_THROW(uint32_t)
547iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
548 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
549{
550 /*
551 * There are a couple of ldr variants that takes an immediate offset, so
552 * try use those if we can, otherwise we have to use the temporary register
553 * help with the addressing.
554 */
555 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
556 {
557 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
558 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
559 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
560 }
561 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
562 {
563 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
564 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
565 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
566 }
567 else
568 {
569 /* The offset is too large, so we must load it into a register and use
570 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
571 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
572 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
573 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
574 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
575 IEMNATIVE_REG_FIXED_TMP0);
576 }
577 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
578 return off;
579}
580
581#endif /* RT_ARCH_ARM64 */
582
583
584/**
585 * Emits a 64-bit GPR load of a VCpu value.
586 */
587DECL_FORCE_INLINE_THROW(uint32_t)
588iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
589{
590#ifdef RT_ARCH_AMD64
591 /* mov reg64, mem64 */
592 if (iGpr < 8)
593 pCodeBuf[off++] = X86_OP_REX_W;
594 else
595 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
596 pCodeBuf[off++] = 0x8b;
597 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off,iGpr, offVCpu);
598
599#elif defined(RT_ARCH_ARM64)
600 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
601
602#else
603# error "port me"
604#endif
605 return off;
606}
607
608
609/**
610 * Emits a 64-bit GPR load of a VCpu value.
611 */
612DECL_INLINE_THROW(uint32_t)
613iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
614{
615#ifdef RT_ARCH_AMD64
616 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
617 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
618
619#elif defined(RT_ARCH_ARM64)
620 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
621
622#else
623# error "port me"
624#endif
625 return off;
626}
627
628
629/**
630 * Emits a 32-bit GPR load of a VCpu value.
631 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
632 */
633DECL_INLINE_THROW(uint32_t)
634iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
635{
636#ifdef RT_ARCH_AMD64
637 /* mov reg32, mem32 */
638 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
639 if (iGpr >= 8)
640 pbCodeBuf[off++] = X86_OP_REX_R;
641 pbCodeBuf[off++] = 0x8b;
642 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
644
645#elif defined(RT_ARCH_ARM64)
646 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
647
648#else
649# error "port me"
650#endif
651 return off;
652}
653
654
655/**
656 * Emits a 16-bit GPR load of a VCpu value.
657 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
658 */
659DECL_INLINE_THROW(uint32_t)
660iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
661{
662#ifdef RT_ARCH_AMD64
663 /* movzx reg32, mem16 */
664 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
665 if (iGpr >= 8)
666 pbCodeBuf[off++] = X86_OP_REX_R;
667 pbCodeBuf[off++] = 0x0f;
668 pbCodeBuf[off++] = 0xb7;
669 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
670 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
671
672#elif defined(RT_ARCH_ARM64)
673 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
674
675#else
676# error "port me"
677#endif
678 return off;
679}
680
681
682/**
683 * Emits a 8-bit GPR load of a VCpu value.
684 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
685 */
686DECL_INLINE_THROW(uint32_t)
687iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
688{
689#ifdef RT_ARCH_AMD64
690 /* movzx reg32, mem8 */
691 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
692 if (iGpr >= 8)
693 pbCodeBuf[off++] = X86_OP_REX_R;
694 pbCodeBuf[off++] = 0x0f;
695 pbCodeBuf[off++] = 0xb6;
696 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
697 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
698
699#elif defined(RT_ARCH_ARM64)
700 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
701
702#else
703# error "port me"
704#endif
705 return off;
706}
707
708
709/**
710 * Emits a store of a GPR value to a 64-bit VCpu field.
711 */
712DECL_FORCE_INLINE_THROW(uint32_t)
713iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
714 uint8_t iGprTmp = UINT8_MAX)
715{
716#ifdef RT_ARCH_AMD64
717 /* mov mem64, reg64 */
718 if (iGpr < 8)
719 pCodeBuf[off++] = X86_OP_REX_W;
720 else
721 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
722 pCodeBuf[off++] = 0x89;
723 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
724 RT_NOREF(iGprTmp);
725
726#elif defined(RT_ARCH_ARM64)
727 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
728
729#else
730# error "port me"
731#endif
732 return off;
733}
734
735
736/**
737 * Emits a store of a GPR value to a 64-bit VCpu field.
738 */
739DECL_INLINE_THROW(uint32_t)
740iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
741{
742#ifdef RT_ARCH_AMD64
743 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
744#elif defined(RT_ARCH_ARM64)
745 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
746 IEMNATIVE_REG_FIXED_TMP0);
747#else
748# error "port me"
749#endif
750 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
751 return off;
752}
753
754
755/**
756 * Emits a store of a GPR value to a 32-bit VCpu field.
757 */
758DECL_INLINE_THROW(uint32_t)
759iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
760{
761#ifdef RT_ARCH_AMD64
762 /* mov mem32, reg32 */
763 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
764 if (iGpr >= 8)
765 pbCodeBuf[off++] = X86_OP_REX_R;
766 pbCodeBuf[off++] = 0x89;
767 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
769
770#elif defined(RT_ARCH_ARM64)
771 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
772
773#else
774# error "port me"
775#endif
776 return off;
777}
778
779
780/**
781 * Emits a store of a GPR value to a 16-bit VCpu field.
782 */
783DECL_INLINE_THROW(uint32_t)
784iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
785{
786#ifdef RT_ARCH_AMD64
787 /* mov mem16, reg16 */
788 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
789 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
790 if (iGpr >= 8)
791 pbCodeBuf[off++] = X86_OP_REX_R;
792 pbCodeBuf[off++] = 0x89;
793 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
794 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
795
796#elif defined(RT_ARCH_ARM64)
797 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
798
799#else
800# error "port me"
801#endif
802 return off;
803}
804
805
806/**
807 * Emits a store of a GPR value to a 8-bit VCpu field.
808 */
809DECL_INLINE_THROW(uint32_t)
810iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
811{
812#ifdef RT_ARCH_AMD64
813 /* mov mem8, reg8 */
814 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
815 if (iGpr >= 8)
816 pbCodeBuf[off++] = X86_OP_REX_R;
817 pbCodeBuf[off++] = 0x88;
818 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
820
821#elif defined(RT_ARCH_ARM64)
822 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
823
824#else
825# error "port me"
826#endif
827 return off;
828}
829
830
831/**
832 * Emits a store of an immediate value to a 32-bit VCpu field.
833 *
834 * @note ARM64: Will allocate temporary registers.
835 */
836DECL_FORCE_INLINE_THROW(uint32_t)
837iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
838{
839#ifdef RT_ARCH_AMD64
840 /* mov mem32, imm32 */
841 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
842 pCodeBuf[off++] = 0xc7;
843 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
844 pCodeBuf[off++] = RT_BYTE1(uImm);
845 pCodeBuf[off++] = RT_BYTE2(uImm);
846 pCodeBuf[off++] = RT_BYTE3(uImm);
847 pCodeBuf[off++] = RT_BYTE4(uImm);
848 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
849
850#elif defined(RT_ARCH_ARM64)
851 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
852 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
853 if (idxRegImm != ARMV8_A64_REG_XZR)
854 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
855
856#else
857# error "port me"
858#endif
859 return off;
860}
861
862
863
864/**
865 * Emits a store of an immediate value to a 16-bit VCpu field.
866 *
867 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
868 * offset can be encoded as an immediate or not. The @a offVCpu immediate
869 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
870 */
871DECL_FORCE_INLINE_THROW(uint32_t)
872iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
873 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
874{
875#ifdef RT_ARCH_AMD64
876 /* mov mem16, imm16 */
877 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
878 pCodeBuf[off++] = 0xc7;
879 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
880 pCodeBuf[off++] = RT_BYTE1(uImm);
881 pCodeBuf[off++] = RT_BYTE2(uImm);
882 RT_NOREF(idxTmp1, idxTmp2);
883
884#elif defined(RT_ARCH_ARM64)
885 if (idxTmp1 != UINT8_MAX)
886 {
887 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
888 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
889 sizeof(uint16_t), idxTmp2);
890 }
891 else
892# ifdef IEM_WITH_THROW_CATCH
893 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
894# else
895 AssertReleaseFailedStmt(off = UINT32_MAX);
896# endif
897
898#else
899# error "port me"
900#endif
901 return off;
902}
903
904
905/**
906 * Emits a store of an immediate value to a 8-bit VCpu field.
907 */
908DECL_INLINE_THROW(uint32_t)
909iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu)
910{
911#ifdef RT_ARCH_AMD64
912 /* mov mem8, imm8 */
913 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
914 pbCodeBuf[off++] = 0xc6;
915 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
916 pbCodeBuf[off++] = bImm;
917 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
918
919#elif defined(RT_ARCH_ARM64)
920 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
921 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
922 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
923 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
924
925#else
926# error "port me"
927#endif
928 return off;
929}
930
931
932/**
933 * Emits a load effective address to a GRP of a VCpu field.
934 */
935DECL_INLINE_THROW(uint32_t)
936iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
937{
938#ifdef RT_ARCH_AMD64
939 /* lea gprdst, [rbx + offDisp] */
940 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
941 if (iGprDst < 8)
942 pbCodeBuf[off++] = X86_OP_REX_W;
943 else
944 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
945 pbCodeBuf[off++] = 0x8d;
946 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
947
948#elif defined(RT_ARCH_ARM64)
949 if (offVCpu < (unsigned)_4K)
950 {
951 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
952 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
953 }
954 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
955 {
956 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
957 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
958 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
959 }
960 else
961 {
962 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
963 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
964 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
965 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
966 }
967
968#else
969# error "port me"
970#endif
971 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
972 return off;
973}
974
975
976/** This is just as a typesafe alternative to RT_UOFFSETOF. */
977DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
978{
979 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
980 Assert(off < sizeof(VMCPU));
981 return off;
982}
983
984
985/** This is just as a typesafe alternative to RT_UOFFSETOF. */
986DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
987{
988 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
989 Assert(off < sizeof(VMCPU));
990 return off;
991}
992
993
994/**
995 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
996 *
997 * @note The two temp registers are not required for AMD64. ARM64 always
998 * requires the first, and the 2nd is needed if the offset cannot be
999 * encoded as an immediate.
1000 */
1001DECL_FORCE_INLINE(uint32_t)
1002iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1003{
1004#ifdef RT_ARCH_AMD64
1005 /* inc qword [pVCpu + off] */
1006 pCodeBuf[off++] = X86_OP_REX_W;
1007 pCodeBuf[off++] = 0xff;
1008 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1009 RT_NOREF(idxTmp1, idxTmp2);
1010
1011#elif defined(RT_ARCH_ARM64)
1012 /* Determine how we're to access pVCpu first. */
1013 uint32_t const cbData = sizeof(STAMCOUNTER);
1014 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1015 {
1016 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1017 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1018 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1019 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1020 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1021 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1022 }
1023 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1024 {
1025 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1026 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1027 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1028 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1029 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1030 }
1031 else
1032 {
1033 /* The offset is too large, so we must load it into a register and use
1034 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1035 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1036 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1037 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1038 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1039 }
1040
1041#else
1042# error "port me"
1043#endif
1044 return off;
1045}
1046
1047
1048/**
1049 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1050 *
1051 * @note The two temp registers are not required for AMD64. ARM64 always
1052 * requires the first, and the 2nd is needed if the offset cannot be
1053 * encoded as an immediate.
1054 */
1055DECL_FORCE_INLINE(uint32_t)
1056iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1057{
1058#ifdef RT_ARCH_AMD64
1059 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1060#elif defined(RT_ARCH_ARM64)
1061 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1062#else
1063# error "port me"
1064#endif
1065 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1066 return off;
1067}
1068
1069
1070/**
1071 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1072 *
1073 * @note The two temp registers are not required for AMD64. ARM64 always
1074 * requires the first, and the 2nd is needed if the offset cannot be
1075 * encoded as an immediate.
1076 */
1077DECL_FORCE_INLINE(uint32_t)
1078iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1079{
1080 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1081#ifdef RT_ARCH_AMD64
1082 /* inc dword [pVCpu + offVCpu] */
1083 pCodeBuf[off++] = 0xff;
1084 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1085 RT_NOREF(idxTmp1, idxTmp2);
1086
1087#elif defined(RT_ARCH_ARM64)
1088 /* Determine how we're to access pVCpu first. */
1089 uint32_t const cbData = sizeof(uint32_t);
1090 if (offVCpu < (unsigned)(_4K * cbData))
1091 {
1092 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1093 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1094 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1095 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1096 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1097 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1098 }
1099 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1100 {
1101 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1102 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1103 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1104 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1105 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1106 }
1107 else
1108 {
1109 /* The offset is too large, so we must load it into a register and use
1110 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1111 of the instruction if that'll reduce the constant to 16-bits. */
1112 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1113 {
1114 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1115 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1116 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1117 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1118 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1119 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1120 }
1121 else
1122 {
1123 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1124 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1125 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1126 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1127 }
1128 }
1129
1130#else
1131# error "port me"
1132#endif
1133 return off;
1134}
1135
1136
1137/**
1138 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1139 *
1140 * @note The two temp registers are not required for AMD64. ARM64 always
1141 * requires the first, and the 2nd is needed if the offset cannot be
1142 * encoded as an immediate.
1143 */
1144DECL_FORCE_INLINE(uint32_t)
1145iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1146{
1147#ifdef RT_ARCH_AMD64
1148 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1149#elif defined(RT_ARCH_ARM64)
1150 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1151#else
1152# error "port me"
1153#endif
1154 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1155 return off;
1156}
1157
1158
1159/**
1160 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1161 *
1162 * @note May allocate temporary registers (not AMD64).
1163 */
1164DECL_FORCE_INLINE(uint32_t)
1165iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1166{
1167 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1168#ifdef RT_ARCH_AMD64
1169 /* or dword [pVCpu + offVCpu], imm8/32 */
1170 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1171 if (fMask < 0x80)
1172 {
1173 pCodeBuf[off++] = 0x83;
1174 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1175 pCodeBuf[off++] = (uint8_t)fMask;
1176 }
1177 else
1178 {
1179 pCodeBuf[off++] = 0x81;
1180 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1181 pCodeBuf[off++] = RT_BYTE1(fMask);
1182 pCodeBuf[off++] = RT_BYTE2(fMask);
1183 pCodeBuf[off++] = RT_BYTE3(fMask);
1184 pCodeBuf[off++] = RT_BYTE4(fMask);
1185 }
1186
1187#elif defined(RT_ARCH_ARM64)
1188 /* If the constant is unwieldy we'll need a register to hold it as well. */
1189 uint32_t uImmSizeLen, uImmRotate;
1190 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1191 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1192
1193 /* We need a temp register for holding the member value we're modifying. */
1194 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1195
1196 /* Determine how we're to access pVCpu first. */
1197 uint32_t const cbData = sizeof(uint32_t);
1198 if (offVCpu < (unsigned)(_4K * cbData))
1199 {
1200 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1201 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1202 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1203 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1204 if (idxTmpMask == UINT8_MAX)
1205 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1206 else
1207 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1208 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1209 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1210 }
1211 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1212 {
1213 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1214 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1215 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1216 if (idxTmpMask == UINT8_MAX)
1217 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1218 else
1219 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1220 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1221 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1222 }
1223 else
1224 {
1225 /* The offset is too large, so we must load it into a register and use
1226 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1227 of the instruction if that'll reduce the constant to 16-bits. */
1228 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1229 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1230 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1231 if (fShifted)
1232 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1233 else
1234 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1235
1236 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1237 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1238
1239 if (idxTmpMask == UINT8_MAX)
1240 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1241 else
1242 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1243
1244 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1245 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1246 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1247 }
1248 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1249 if (idxTmpMask != UINT8_MAX)
1250 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1251
1252#else
1253# error "port me"
1254#endif
1255 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1256 return off;
1257}
1258
1259
1260/**
1261 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1262 *
1263 * @note May allocate temporary registers (not AMD64).
1264 */
1265DECL_FORCE_INLINE(uint32_t)
1266iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1267{
1268 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1269#ifdef RT_ARCH_AMD64
1270 /* and dword [pVCpu + offVCpu], imm8/32 */
1271 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1272 if (fMask < 0x80)
1273 {
1274 pCodeBuf[off++] = 0x83;
1275 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1276 pCodeBuf[off++] = (uint8_t)fMask;
1277 }
1278 else
1279 {
1280 pCodeBuf[off++] = 0x81;
1281 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1282 pCodeBuf[off++] = RT_BYTE1(fMask);
1283 pCodeBuf[off++] = RT_BYTE2(fMask);
1284 pCodeBuf[off++] = RT_BYTE3(fMask);
1285 pCodeBuf[off++] = RT_BYTE4(fMask);
1286 }
1287
1288#elif defined(RT_ARCH_ARM64)
1289 /* If the constant is unwieldy we'll need a register to hold it as well. */
1290 uint32_t uImmSizeLen, uImmRotate;
1291 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1292 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1293
1294 /* We need a temp register for holding the member value we're modifying. */
1295 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1296
1297 /* Determine how we're to access pVCpu first. */
1298 uint32_t const cbData = sizeof(uint32_t);
1299 if (offVCpu < (unsigned)(_4K * cbData))
1300 {
1301 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1302 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1303 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1304 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1305 if (idxTmpMask == UINT8_MAX)
1306 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1307 else
1308 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1309 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1310 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1311 }
1312 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1313 {
1314 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1315 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1316 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1317 if (idxTmpMask == UINT8_MAX)
1318 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1319 else
1320 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1321 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1322 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1323 }
1324 else
1325 {
1326 /* The offset is too large, so we must load it into a register and use
1327 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1328 of the instruction if that'll reduce the constant to 16-bits. */
1329 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1330 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1331 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1332 if (fShifted)
1333 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1334 else
1335 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1336
1337 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1338 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1339
1340 if (idxTmpMask == UINT8_MAX)
1341 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1342 else
1343 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1344
1345 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1346 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1347 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1348 }
1349 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1350 if (idxTmpMask != UINT8_MAX)
1351 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1352
1353#else
1354# error "port me"
1355#endif
1356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1357 return off;
1358}
1359
1360
1361/**
1362 * Emits a gprdst = gprsrc load.
1363 */
1364DECL_FORCE_INLINE(uint32_t)
1365iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1366{
1367#ifdef RT_ARCH_AMD64
1368 /* mov gprdst, gprsrc */
1369 if ((iGprDst | iGprSrc) >= 8)
1370 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1371 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1372 : X86_OP_REX_W | X86_OP_REX_R;
1373 else
1374 pCodeBuf[off++] = X86_OP_REX_W;
1375 pCodeBuf[off++] = 0x8b;
1376 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1377
1378#elif defined(RT_ARCH_ARM64)
1379 /* mov dst, src; alias for: orr dst, xzr, src */
1380 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1381
1382#else
1383# error "port me"
1384#endif
1385 return off;
1386}
1387
1388
1389/**
1390 * Emits a gprdst = gprsrc load.
1391 */
1392DECL_INLINE_THROW(uint32_t)
1393iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1394{
1395#ifdef RT_ARCH_AMD64
1396 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1397#elif defined(RT_ARCH_ARM64)
1398 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1399#else
1400# error "port me"
1401#endif
1402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1403 return off;
1404}
1405
1406
1407/**
1408 * Emits a gprdst = gprsrc[31:0] load.
1409 * @note Bits 63 thru 32 are cleared.
1410 */
1411DECL_FORCE_INLINE(uint32_t)
1412iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1413{
1414#ifdef RT_ARCH_AMD64
1415 /* mov gprdst, gprsrc */
1416 if ((iGprDst | iGprSrc) >= 8)
1417 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1418 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1419 : X86_OP_REX_R;
1420 pCodeBuf[off++] = 0x8b;
1421 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1422
1423#elif defined(RT_ARCH_ARM64)
1424 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1425 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1426
1427#else
1428# error "port me"
1429#endif
1430 return off;
1431}
1432
1433
1434/**
1435 * Emits a gprdst = gprsrc[31:0] load.
1436 * @note Bits 63 thru 32 are cleared.
1437 */
1438DECL_INLINE_THROW(uint32_t)
1439iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1440{
1441#ifdef RT_ARCH_AMD64
1442 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1443#elif defined(RT_ARCH_ARM64)
1444 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1445#else
1446# error "port me"
1447#endif
1448 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1449 return off;
1450}
1451
1452
1453/**
1454 * Emits a gprdst = gprsrc[15:0] load.
1455 * @note Bits 63 thru 15 are cleared.
1456 */
1457DECL_INLINE_THROW(uint32_t)
1458iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1459{
1460#ifdef RT_ARCH_AMD64
1461 /* movzx Gv,Ew */
1462 if ((iGprDst | iGprSrc) >= 8)
1463 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1464 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1465 : X86_OP_REX_R;
1466 pCodeBuf[off++] = 0x0f;
1467 pCodeBuf[off++] = 0xb7;
1468 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1469
1470#elif defined(RT_ARCH_ARM64)
1471 /* and gprdst, gprsrc, #0xffff */
1472# if 1
1473 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1474 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1475# else
1476 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1477 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1478# endif
1479
1480#else
1481# error "port me"
1482#endif
1483 return off;
1484}
1485
1486
1487/**
1488 * Emits a gprdst = gprsrc[15:0] load.
1489 * @note Bits 63 thru 15 are cleared.
1490 */
1491DECL_INLINE_THROW(uint32_t)
1492iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1493{
1494#ifdef RT_ARCH_AMD64
1495 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1496#elif defined(RT_ARCH_ARM64)
1497 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1498#else
1499# error "port me"
1500#endif
1501 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1502 return off;
1503}
1504
1505
1506/**
1507 * Emits a gprdst = gprsrc[7:0] load.
1508 * @note Bits 63 thru 8 are cleared.
1509 */
1510DECL_FORCE_INLINE(uint32_t)
1511iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1512{
1513#ifdef RT_ARCH_AMD64
1514 /* movzx Gv,Eb */
1515 if (iGprDst >= 8 || iGprSrc >= 8)
1516 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1517 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1518 : X86_OP_REX_R;
1519 else if (iGprSrc >= 4)
1520 pCodeBuf[off++] = X86_OP_REX;
1521 pCodeBuf[off++] = 0x0f;
1522 pCodeBuf[off++] = 0xb6;
1523 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1524
1525#elif defined(RT_ARCH_ARM64)
1526 /* and gprdst, gprsrc, #0xff */
1527 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1528 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1529
1530#else
1531# error "port me"
1532#endif
1533 return off;
1534}
1535
1536
1537/**
1538 * Emits a gprdst = gprsrc[7:0] load.
1539 * @note Bits 63 thru 8 are cleared.
1540 */
1541DECL_INLINE_THROW(uint32_t)
1542iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1543{
1544#ifdef RT_ARCH_AMD64
1545 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1546#elif defined(RT_ARCH_ARM64)
1547 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1548#else
1549# error "port me"
1550#endif
1551 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1552 return off;
1553}
1554
1555
1556/**
1557 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1558 * @note Bits 63 thru 8 are cleared.
1559 */
1560DECL_INLINE_THROW(uint32_t)
1561iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1562{
1563#ifdef RT_ARCH_AMD64
1564 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1565
1566 /* movzx Gv,Ew */
1567 if ((iGprDst | iGprSrc) >= 8)
1568 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1569 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1570 : X86_OP_REX_R;
1571 pbCodeBuf[off++] = 0x0f;
1572 pbCodeBuf[off++] = 0xb7;
1573 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1574
1575 /* shr Ev,8 */
1576 if (iGprDst >= 8)
1577 pbCodeBuf[off++] = X86_OP_REX_B;
1578 pbCodeBuf[off++] = 0xc1;
1579 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1580 pbCodeBuf[off++] = 8;
1581
1582#elif defined(RT_ARCH_ARM64)
1583 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1584 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1585 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1586
1587#else
1588# error "port me"
1589#endif
1590 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1591 return off;
1592}
1593
1594
1595/**
1596 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1597 */
1598DECL_INLINE_THROW(uint32_t)
1599iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1600{
1601#ifdef RT_ARCH_AMD64
1602 /* movsxd r64, r/m32 */
1603 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1604 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1605 pbCodeBuf[off++] = 0x63;
1606 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1607
1608#elif defined(RT_ARCH_ARM64)
1609 /* sxtw dst, src */
1610 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1611 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1612
1613#else
1614# error "port me"
1615#endif
1616 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1617 return off;
1618}
1619
1620
1621/**
1622 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1623 */
1624DECL_INLINE_THROW(uint32_t)
1625iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1626{
1627#ifdef RT_ARCH_AMD64
1628 /* movsx r64, r/m16 */
1629 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1630 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1631 pbCodeBuf[off++] = 0x0f;
1632 pbCodeBuf[off++] = 0xbf;
1633 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1634
1635#elif defined(RT_ARCH_ARM64)
1636 /* sxth dst, src */
1637 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1638 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1639
1640#else
1641# error "port me"
1642#endif
1643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1644 return off;
1645}
1646
1647
1648/**
1649 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1650 */
1651DECL_INLINE_THROW(uint32_t)
1652iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1653{
1654#ifdef RT_ARCH_AMD64
1655 /* movsx r64, r/m16 */
1656 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1657 if (iGprDst >= 8 || iGprSrc >= 8)
1658 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1659 pbCodeBuf[off++] = 0x0f;
1660 pbCodeBuf[off++] = 0xbf;
1661 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1662
1663#elif defined(RT_ARCH_ARM64)
1664 /* sxth dst32, src */
1665 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1666 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1667
1668#else
1669# error "port me"
1670#endif
1671 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1672 return off;
1673}
1674
1675
1676/**
1677 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1678 */
1679DECL_INLINE_THROW(uint32_t)
1680iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1681{
1682#ifdef RT_ARCH_AMD64
1683 /* movsx r64, r/m8 */
1684 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1685 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1686 pbCodeBuf[off++] = 0x0f;
1687 pbCodeBuf[off++] = 0xbe;
1688 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1689
1690#elif defined(RT_ARCH_ARM64)
1691 /* sxtb dst, src */
1692 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1693 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1694
1695#else
1696# error "port me"
1697#endif
1698 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1699 return off;
1700}
1701
1702
1703/**
1704 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1705 * @note Bits 63 thru 32 are cleared.
1706 */
1707DECL_INLINE_THROW(uint32_t)
1708iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1709{
1710#ifdef RT_ARCH_AMD64
1711 /* movsx r32, r/m8 */
1712 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1713 if (iGprDst >= 8 || iGprSrc >= 8)
1714 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1715 else if (iGprSrc >= 4)
1716 pbCodeBuf[off++] = X86_OP_REX;
1717 pbCodeBuf[off++] = 0x0f;
1718 pbCodeBuf[off++] = 0xbe;
1719 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1720
1721#elif defined(RT_ARCH_ARM64)
1722 /* sxtb dst32, src32 */
1723 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1724 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1725
1726#else
1727# error "port me"
1728#endif
1729 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1730 return off;
1731}
1732
1733
1734/**
1735 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1736 * @note Bits 63 thru 16 are cleared.
1737 */
1738DECL_INLINE_THROW(uint32_t)
1739iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1740{
1741#ifdef RT_ARCH_AMD64
1742 /* movsx r16, r/m8 */
1743 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1744 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1745 if (iGprDst >= 8 || iGprSrc >= 8)
1746 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1747 else if (iGprSrc >= 4)
1748 pbCodeBuf[off++] = X86_OP_REX;
1749 pbCodeBuf[off++] = 0x0f;
1750 pbCodeBuf[off++] = 0xbe;
1751 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1752
1753 /* movzx r32, r/m16 */
1754 if (iGprDst >= 8)
1755 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
1756 pbCodeBuf[off++] = 0x0f;
1757 pbCodeBuf[off++] = 0xb7;
1758 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
1759
1760#elif defined(RT_ARCH_ARM64)
1761 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
1762 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1763 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1764 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1765 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
1766
1767#else
1768# error "port me"
1769#endif
1770 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1771 return off;
1772}
1773
1774
1775/**
1776 * Emits a gprdst = gprsrc + addend load.
1777 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1778 */
1779#ifdef RT_ARCH_AMD64
1780DECL_INLINE_THROW(uint32_t)
1781iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1782 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1783{
1784 Assert(iAddend != 0);
1785
1786 /* lea gprdst, [gprsrc + iAddend] */
1787 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1788 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1789 pbCodeBuf[off++] = 0x8d;
1790 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1791 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1792 return off;
1793}
1794
1795#elif defined(RT_ARCH_ARM64)
1796DECL_INLINE_THROW(uint32_t)
1797iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1798 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1799{
1800 if ((uint32_t)iAddend < 4096)
1801 {
1802 /* add dst, src, uimm12 */
1803 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1804 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
1805 }
1806 else if ((uint32_t)-iAddend < 4096)
1807 {
1808 /* sub dst, src, uimm12 */
1809 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1810 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
1811 }
1812 else
1813 {
1814 Assert(iGprSrc != iGprDst);
1815 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
1816 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1817 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
1818 }
1819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1820 return off;
1821}
1822#else
1823# error "port me"
1824#endif
1825
1826/**
1827 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
1828 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1829 */
1830#ifdef RT_ARCH_AMD64
1831DECL_INLINE_THROW(uint32_t)
1832iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1833 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1834#else
1835DECL_INLINE_THROW(uint32_t)
1836iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1837 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1838#endif
1839{
1840 if (iAddend != 0)
1841 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1842 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
1843}
1844
1845
1846/**
1847 * Emits a gprdst = gprsrc32 + addend load.
1848 * @note Bits 63 thru 32 are cleared.
1849 */
1850DECL_INLINE_THROW(uint32_t)
1851iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1852 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1853{
1854 Assert(iAddend != 0);
1855
1856#ifdef RT_ARCH_AMD64
1857 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
1858 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1859 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
1860 if ((iGprDst | iGprSrc) >= 8)
1861 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1862 pbCodeBuf[off++] = 0x8d;
1863 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1864
1865#elif defined(RT_ARCH_ARM64)
1866 if ((uint32_t)iAddend < 4096)
1867 {
1868 /* add dst, src, uimm12 */
1869 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1870 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
1871 }
1872 else if ((uint32_t)-iAddend < 4096)
1873 {
1874 /* sub dst, src, uimm12 */
1875 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1876 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
1877 }
1878 else
1879 {
1880 Assert(iGprSrc != iGprDst);
1881 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
1882 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1883 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
1884 }
1885
1886#else
1887# error "port me"
1888#endif
1889 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1890 return off;
1891}
1892
1893
1894/**
1895 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
1896 */
1897DECL_INLINE_THROW(uint32_t)
1898iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1899 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1900{
1901 if (iAddend != 0)
1902 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1903 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
1904}
1905
1906
1907/**
1908 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1909 * destination.
1910 */
1911DECL_FORCE_INLINE(uint32_t)
1912iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1913{
1914#ifdef RT_ARCH_AMD64
1915 /* mov reg16, r/m16 */
1916 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1917 if (idxDst >= 8 || idxSrc >= 8)
1918 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
1919 pCodeBuf[off++] = 0x8b;
1920 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
1921
1922#elif defined(RT_ARCH_ARM64)
1923 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
1924 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
1925
1926#else
1927# error "Port me!"
1928#endif
1929 return off;
1930}
1931
1932
1933/**
1934 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1935 * destination.
1936 */
1937DECL_INLINE_THROW(uint32_t)
1938iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1939{
1940#ifdef RT_ARCH_AMD64
1941 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
1942#elif defined(RT_ARCH_ARM64)
1943 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
1944#else
1945# error "Port me!"
1946#endif
1947 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1948 return off;
1949}
1950
1951
1952#ifdef RT_ARCH_AMD64
1953/**
1954 * Common bit of iemNativeEmitLoadGprByBp and friends.
1955 */
1956DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
1957 PIEMRECOMPILERSTATE pReNativeAssert)
1958{
1959 if (offDisp < 128 && offDisp >= -128)
1960 {
1961 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
1962 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
1963 }
1964 else
1965 {
1966 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
1967 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
1968 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
1969 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
1970 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
1971 }
1972 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
1973 return off;
1974}
1975#elif defined(RT_ARCH_ARM64)
1976/**
1977 * Common bit of iemNativeEmitLoadGprByBp and friends.
1978 */
1979DECL_FORCE_INLINE_THROW(uint32_t)
1980iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
1981 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
1982{
1983 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
1984 {
1985 /* str w/ unsigned imm12 (scaled) */
1986 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1987 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
1988 }
1989 else if (offDisp >= -256 && offDisp <= 256)
1990 {
1991 /* stur w/ signed imm9 (unscaled) */
1992 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1993 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
1994 }
1995 else
1996 {
1997 /* Use temporary indexing register. */
1998 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
1999 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2000 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2001 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2002 }
2003 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2004 return off;
2005}
2006#endif
2007
2008
2009/**
2010 * Emits a 64-bit GRP load instruction with an BP relative source address.
2011 */
2012DECL_INLINE_THROW(uint32_t)
2013iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2014{
2015#ifdef RT_ARCH_AMD64
2016 /* mov gprdst, qword [rbp + offDisp] */
2017 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2018 if (iGprDst < 8)
2019 pbCodeBuf[off++] = X86_OP_REX_W;
2020 else
2021 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2022 pbCodeBuf[off++] = 0x8b;
2023 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2024
2025#elif defined(RT_ARCH_ARM64)
2026 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2027
2028#else
2029# error "port me"
2030#endif
2031}
2032
2033
2034/**
2035 * Emits a 32-bit GRP load instruction with an BP relative source address.
2036 * @note Bits 63 thru 32 of the GPR will be cleared.
2037 */
2038DECL_INLINE_THROW(uint32_t)
2039iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2040{
2041#ifdef RT_ARCH_AMD64
2042 /* mov gprdst, dword [rbp + offDisp] */
2043 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2044 if (iGprDst >= 8)
2045 pbCodeBuf[off++] = X86_OP_REX_R;
2046 pbCodeBuf[off++] = 0x8b;
2047 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2048
2049#elif defined(RT_ARCH_ARM64)
2050 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2051
2052#else
2053# error "port me"
2054#endif
2055}
2056
2057
2058/**
2059 * Emits a 16-bit GRP load instruction with an BP relative source address.
2060 * @note Bits 63 thru 16 of the GPR will be cleared.
2061 */
2062DECL_INLINE_THROW(uint32_t)
2063iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2064{
2065#ifdef RT_ARCH_AMD64
2066 /* movzx gprdst, word [rbp + offDisp] */
2067 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2068 if (iGprDst >= 8)
2069 pbCodeBuf[off++] = X86_OP_REX_R;
2070 pbCodeBuf[off++] = 0x0f;
2071 pbCodeBuf[off++] = 0xb7;
2072 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2073
2074#elif defined(RT_ARCH_ARM64)
2075 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2076
2077#else
2078# error "port me"
2079#endif
2080}
2081
2082
2083/**
2084 * Emits a 8-bit GRP load instruction with an BP relative source address.
2085 * @note Bits 63 thru 8 of the GPR will be cleared.
2086 */
2087DECL_INLINE_THROW(uint32_t)
2088iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2089{
2090#ifdef RT_ARCH_AMD64
2091 /* movzx gprdst, byte [rbp + offDisp] */
2092 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2093 if (iGprDst >= 8)
2094 pbCodeBuf[off++] = X86_OP_REX_R;
2095 pbCodeBuf[off++] = 0x0f;
2096 pbCodeBuf[off++] = 0xb6;
2097 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2098
2099#elif defined(RT_ARCH_ARM64)
2100 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2101
2102#else
2103# error "port me"
2104#endif
2105}
2106
2107
2108#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2109/**
2110 * Emits a 128-bit vector register load instruction with an BP relative source address.
2111 */
2112DECL_FORCE_INLINE_THROW(uint32_t)
2113iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2114{
2115#ifdef RT_ARCH_AMD64
2116 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2117
2118 /* movdqu reg128, mem128 */
2119 pbCodeBuf[off++] = 0xf3;
2120 if (iVecRegDst >= 8)
2121 pbCodeBuf[off++] = X86_OP_REX_R;
2122 pbCodeBuf[off++] = 0x0f;
2123 pbCodeBuf[off++] = 0x6f;
2124 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2125#elif defined(RT_ARCH_ARM64)
2126 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2127#else
2128# error "port me"
2129#endif
2130}
2131
2132
2133/**
2134 * Emits a 256-bit vector register load instruction with an BP relative source address.
2135 */
2136DECL_FORCE_INLINE_THROW(uint32_t)
2137iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2138{
2139#ifdef RT_ARCH_AMD64
2140 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2141
2142 /* vmovdqu reg256, mem256 */
2143 pbCodeBuf[off++] = X86_OP_VEX2;
2144 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2145 pbCodeBuf[off++] = 0x6f;
2146 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2147#elif defined(RT_ARCH_ARM64)
2148 /* ASSUMES two consecutive vector registers for the 256-bit value. */
2149 Assert(!(iVecRegDst & 0x1));
2150 off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2151 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2152#else
2153# error "port me"
2154#endif
2155}
2156
2157#endif
2158
2159
2160/**
2161 * Emits a load effective address to a GRP with an BP relative source address.
2162 */
2163DECL_INLINE_THROW(uint32_t)
2164iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2165{
2166#ifdef RT_ARCH_AMD64
2167 /* lea gprdst, [rbp + offDisp] */
2168 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2169 if (iGprDst < 8)
2170 pbCodeBuf[off++] = X86_OP_REX_W;
2171 else
2172 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2173 pbCodeBuf[off++] = 0x8d;
2174 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2175
2176#elif defined(RT_ARCH_ARM64)
2177 if ((uint32_t)offDisp < (unsigned)_4K)
2178 {
2179 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2180 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)offDisp);
2181 }
2182 else if ((uint32_t)-offDisp < (unsigned)_4K)
2183 {
2184 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2185 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2186 }
2187 else
2188 {
2189 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2190 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offDisp >= 0 ? (uint32_t)offDisp : (uint32_t)-offDisp);
2191 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2192 if (offDisp >= 0)
2193 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2194 else
2195 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2196 }
2197
2198#else
2199# error "port me"
2200#endif
2201
2202 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2203 return off;
2204}
2205
2206
2207/**
2208 * Emits a 64-bit GPR store with an BP relative destination address.
2209 *
2210 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2211 */
2212DECL_INLINE_THROW(uint32_t)
2213iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2214{
2215#ifdef RT_ARCH_AMD64
2216 /* mov qword [rbp + offDisp], gprdst */
2217 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2218 if (iGprSrc < 8)
2219 pbCodeBuf[off++] = X86_OP_REX_W;
2220 else
2221 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2222 pbCodeBuf[off++] = 0x89;
2223 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2224
2225#elif defined(RT_ARCH_ARM64)
2226 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2227 {
2228 /* str w/ unsigned imm12 (scaled) */
2229 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2230 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2231 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2232 }
2233 else if (offDisp >= -256 && offDisp <= 256)
2234 {
2235 /* stur w/ signed imm9 (unscaled) */
2236 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2237 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2238 }
2239 else if ((uint32_t)-offDisp < (unsigned)_4K)
2240 {
2241 /* Use temporary indexing register w/ sub uimm12. */
2242 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2243 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2244 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2245 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2246 }
2247 else
2248 {
2249 /* Use temporary indexing register. */
2250 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2251 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2252 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2253 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2254 }
2255 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2256 return off;
2257
2258#else
2259# error "Port me!"
2260#endif
2261}
2262
2263
2264/**
2265 * Emits a 64-bit immediate store with an BP relative destination address.
2266 *
2267 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2268 */
2269DECL_INLINE_THROW(uint32_t)
2270iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2271{
2272#ifdef RT_ARCH_AMD64
2273 if ((int64_t)uImm64 == (int32_t)uImm64)
2274 {
2275 /* mov qword [rbp + offDisp], imm32 - sign extended */
2276 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2277 pbCodeBuf[off++] = X86_OP_REX_W;
2278 pbCodeBuf[off++] = 0xc7;
2279 if (offDisp < 128 && offDisp >= -128)
2280 {
2281 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2282 pbCodeBuf[off++] = (uint8_t)offDisp;
2283 }
2284 else
2285 {
2286 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2287 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2288 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2289 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2290 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2291 }
2292 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2293 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2294 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2295 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2296 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2297 return off;
2298 }
2299#endif
2300
2301 /* Load tmp0, imm64; Store tmp to bp+disp. */
2302 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2303 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2304}
2305
2306
2307#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2308/**
2309 * Emits a 128-bit vector register store with an BP relative destination address.
2310 *
2311 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2312 */
2313DECL_INLINE_THROW(uint32_t)
2314iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2315{
2316#ifdef RT_ARCH_AMD64
2317 /* movdqu [rbp + offDisp], vecsrc */
2318 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2319 pbCodeBuf[off++] = 0xf3;
2320 if (iVecRegSrc >= 8)
2321 pbCodeBuf[off++] = X86_OP_REX_R;
2322 pbCodeBuf[off++] = 0x0f;
2323 pbCodeBuf[off++] = 0x7f;
2324 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2325
2326#elif defined(RT_ARCH_ARM64)
2327 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2328 {
2329 /* str w/ unsigned imm12 (scaled) */
2330 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2331 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc,
2332 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2333 }
2334 else if (offDisp >= -256 && offDisp <= 256)
2335 {
2336 /* stur w/ signed imm9 (unscaled) */
2337 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2338 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp);
2339 }
2340 else if ((uint32_t)-offDisp < (unsigned)_4K)
2341 {
2342 /* Use temporary indexing register w/ sub uimm12. */
2343 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2344 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2345 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2346 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2347 }
2348 else
2349 {
2350 /* Use temporary indexing register. */
2351 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2352 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2353 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP,
2354 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2355 }
2356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2357 return off;
2358
2359#else
2360# error "Port me!"
2361#endif
2362}
2363
2364
2365/**
2366 * Emits a 256-bit vector register store with an BP relative destination address.
2367 *
2368 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2369 */
2370DECL_INLINE_THROW(uint32_t)
2371iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2372{
2373#ifdef RT_ARCH_AMD64
2374 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2375
2376 /* vmovdqu mem256, reg256 */
2377 pbCodeBuf[off++] = X86_OP_VEX2;
2378 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2379 pbCodeBuf[off++] = 0x7f;
2380 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2381#elif defined(RT_ARCH_ARM64)
2382 Assert(!(iVecRegSrc & 0x1));
2383 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp, iVecRegSrc);
2384 return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1);
2385#else
2386# error "Port me!"
2387#endif
2388}
2389#endif
2390
2391#if defined(RT_ARCH_ARM64)
2392
2393/**
2394 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2395 *
2396 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2397 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2398 * caller does not heed this.
2399 *
2400 * @note DON'T try this with prefetch.
2401 */
2402DECL_FORCE_INLINE_THROW(uint32_t)
2403iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2404 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2405{
2406 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2407 {
2408 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2409 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2410 }
2411 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2412 && iGprReg != iGprBase)
2413 || iGprTmp != UINT8_MAX)
2414 {
2415 /* The offset is too large, so we must load it into a register and use
2416 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2417 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2418 if (iGprTmp == UINT8_MAX)
2419 iGprTmp = iGprReg;
2420 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2421 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2422 }
2423 else
2424# ifdef IEM_WITH_THROW_CATCH
2425 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2426# else
2427 AssertReleaseFailedStmt(off = UINT32_MAX);
2428# endif
2429 return off;
2430}
2431
2432/**
2433 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2434 */
2435DECL_FORCE_INLINE_THROW(uint32_t)
2436iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2437 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2438{
2439 /*
2440 * There are a couple of ldr variants that takes an immediate offset, so
2441 * try use those if we can, otherwise we have to use the temporary register
2442 * help with the addressing.
2443 */
2444 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2445 {
2446 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2447 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2448 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2449 }
2450 else
2451 {
2452 /* The offset is too large, so we must load it into a register and use
2453 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2454 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2455 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2456
2457 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2458 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2459
2460 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2461 }
2462 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2463 return off;
2464}
2465
2466# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2467/**
2468 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2469 *
2470 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2471 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2472 * caller does not heed this.
2473 *
2474 * @note DON'T try this with prefetch.
2475 */
2476DECL_FORCE_INLINE_THROW(uint32_t)
2477iemNativeEmitVecRegByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iGprBase, int32_t offDisp,
2478 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2479{
2480 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2481 {
2482 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2483 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2484 }
2485 else if ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2486 || iGprTmp != UINT8_MAX)
2487 {
2488 /* The offset is too large, so we must load it into a register and use
2489 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2490 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2491 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2492 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, iGprTmp);
2493 }
2494 else
2495# ifdef IEM_WITH_THROW_CATCH
2496 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2497# else
2498 AssertReleaseFailedStmt(off = UINT32_MAX);
2499# endif
2500 return off;
2501}
2502# endif
2503
2504
2505/**
2506 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2507 */
2508DECL_FORCE_INLINE_THROW(uint32_t)
2509iemNativeEmitVecRegByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg,
2510 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2511{
2512 /*
2513 * There are a couple of ldr variants that takes an immediate offset, so
2514 * try use those if we can, otherwise we have to use the temporary register
2515 * help with the addressing.
2516 */
2517 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2518 {
2519 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2520 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2521 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2522 }
2523 else
2524 {
2525 /* The offset is too large, so we must load it into a register and use
2526 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2527 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2528 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2529
2530 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2531 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, idxTmpReg);
2532
2533 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2534 }
2535 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2536 return off;
2537}
2538#endif /* RT_ARCH_ARM64 */
2539
2540/**
2541 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2542 *
2543 * @note ARM64: Misaligned @a offDisp values and values not in the
2544 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2545 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2546 * does not heed this.
2547 */
2548DECL_FORCE_INLINE_THROW(uint32_t)
2549iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2550 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2551{
2552#ifdef RT_ARCH_AMD64
2553 /* mov reg64, mem64 */
2554 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2555 pCodeBuf[off++] = 0x8b;
2556 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2557 RT_NOREF(iGprTmp);
2558
2559#elif defined(RT_ARCH_ARM64)
2560 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2561 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2562
2563#else
2564# error "port me"
2565#endif
2566 return off;
2567}
2568
2569
2570/**
2571 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2572 */
2573DECL_INLINE_THROW(uint32_t)
2574iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2575{
2576#ifdef RT_ARCH_AMD64
2577 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2578 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2579
2580#elif defined(RT_ARCH_ARM64)
2581 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2582
2583#else
2584# error "port me"
2585#endif
2586 return off;
2587}
2588
2589
2590/**
2591 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2592 *
2593 * @note ARM64: Misaligned @a offDisp values and values not in the
2594 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2595 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2596 * caller does not heed this.
2597 *
2598 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2599 */
2600DECL_FORCE_INLINE_THROW(uint32_t)
2601iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2602 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2603{
2604#ifdef RT_ARCH_AMD64
2605 /* mov reg32, mem32 */
2606 if (iGprDst >= 8 || iGprBase >= 8)
2607 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2608 pCodeBuf[off++] = 0x8b;
2609 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2610 RT_NOREF(iGprTmp);
2611
2612#elif defined(RT_ARCH_ARM64)
2613 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2614 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2615
2616#else
2617# error "port me"
2618#endif
2619 return off;
2620}
2621
2622
2623/**
2624 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2625 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2626 */
2627DECL_INLINE_THROW(uint32_t)
2628iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2629{
2630#ifdef RT_ARCH_AMD64
2631 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2632 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2633
2634#elif defined(RT_ARCH_ARM64)
2635 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2636
2637#else
2638# error "port me"
2639#endif
2640 return off;
2641}
2642
2643
2644/**
2645 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2646 * sign-extending the value to 64 bits.
2647 *
2648 * @note ARM64: Misaligned @a offDisp values and values not in the
2649 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2650 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2651 * caller does not heed this.
2652 */
2653DECL_FORCE_INLINE_THROW(uint32_t)
2654iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2655 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2656{
2657#ifdef RT_ARCH_AMD64
2658 /* movsxd reg64, mem32 */
2659 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2660 pCodeBuf[off++] = 0x63;
2661 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2662 RT_NOREF(iGprTmp);
2663
2664#elif defined(RT_ARCH_ARM64)
2665 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2666 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2667
2668#else
2669# error "port me"
2670#endif
2671 return off;
2672}
2673
2674
2675/**
2676 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2677 *
2678 * @note ARM64: Misaligned @a offDisp values and values not in the
2679 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2680 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2681 * caller does not heed this.
2682 *
2683 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2684 */
2685DECL_FORCE_INLINE_THROW(uint32_t)
2686iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2687 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2688{
2689#ifdef RT_ARCH_AMD64
2690 /* movzx reg32, mem16 */
2691 if (iGprDst >= 8 || iGprBase >= 8)
2692 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2693 pCodeBuf[off++] = 0x0f;
2694 pCodeBuf[off++] = 0xb7;
2695 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2696 RT_NOREF(iGprTmp);
2697
2698#elif defined(RT_ARCH_ARM64)
2699 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2700 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2701
2702#else
2703# error "port me"
2704#endif
2705 return off;
2706}
2707
2708
2709/**
2710 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2711 * sign-extending the value to 64 bits.
2712 *
2713 * @note ARM64: Misaligned @a offDisp values and values not in the
2714 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2715 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2716 * caller does not heed this.
2717 */
2718DECL_FORCE_INLINE_THROW(uint32_t)
2719iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2720 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2721{
2722#ifdef RT_ARCH_AMD64
2723 /* movsx reg64, mem16 */
2724 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2725 pCodeBuf[off++] = 0x0f;
2726 pCodeBuf[off++] = 0xbf;
2727 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2728 RT_NOREF(iGprTmp);
2729
2730#elif defined(RT_ARCH_ARM64)
2731 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2732 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2733
2734#else
2735# error "port me"
2736#endif
2737 return off;
2738}
2739
2740
2741/**
2742 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2743 * sign-extending the value to 32 bits.
2744 *
2745 * @note ARM64: Misaligned @a offDisp values and values not in the
2746 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2747 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2748 * caller does not heed this.
2749 *
2750 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2751 */
2752DECL_FORCE_INLINE_THROW(uint32_t)
2753iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2754 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2755{
2756#ifdef RT_ARCH_AMD64
2757 /* movsx reg32, mem16 */
2758 if (iGprDst >= 8 || iGprBase >= 8)
2759 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2760 pCodeBuf[off++] = 0x0f;
2761 pCodeBuf[off++] = 0xbf;
2762 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2763 RT_NOREF(iGprTmp);
2764
2765#elif defined(RT_ARCH_ARM64)
2766 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2767 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
2768
2769#else
2770# error "port me"
2771#endif
2772 return off;
2773}
2774
2775
2776/**
2777 * Emits a 8-bit GPR load via a GPR base address with a displacement.
2778 *
2779 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2780 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2781 * same. Will assert / throw if caller does not heed this.
2782 *
2783 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
2784 */
2785DECL_FORCE_INLINE_THROW(uint32_t)
2786iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2787 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2788{
2789#ifdef RT_ARCH_AMD64
2790 /* movzx reg32, mem8 */
2791 if (iGprDst >= 8 || iGprBase >= 8)
2792 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2793 pCodeBuf[off++] = 0x0f;
2794 pCodeBuf[off++] = 0xb6;
2795 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2796 RT_NOREF(iGprTmp);
2797
2798#elif defined(RT_ARCH_ARM64)
2799 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2800 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
2801
2802#else
2803# error "port me"
2804#endif
2805 return off;
2806}
2807
2808
2809/**
2810 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2811 * sign-extending the value to 64 bits.
2812 *
2813 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2814 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2815 * same. Will assert / throw if caller does not heed this.
2816 */
2817DECL_FORCE_INLINE_THROW(uint32_t)
2818iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2819 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2820{
2821#ifdef RT_ARCH_AMD64
2822 /* movsx reg64, mem8 */
2823 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2824 pCodeBuf[off++] = 0x0f;
2825 pCodeBuf[off++] = 0xbe;
2826 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2827 RT_NOREF(iGprTmp);
2828
2829#elif defined(RT_ARCH_ARM64)
2830 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2831 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
2832
2833#else
2834# error "port me"
2835#endif
2836 return off;
2837}
2838
2839
2840/**
2841 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2842 * sign-extending the value to 32 bits.
2843 *
2844 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2845 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2846 * same. Will assert / throw if caller does not heed this.
2847 *
2848 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2849 */
2850DECL_FORCE_INLINE_THROW(uint32_t)
2851iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2852 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2853{
2854#ifdef RT_ARCH_AMD64
2855 /* movsx reg32, mem8 */
2856 if (iGprDst >= 8 || iGprBase >= 8)
2857 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2858 pCodeBuf[off++] = 0x0f;
2859 pCodeBuf[off++] = 0xbe;
2860 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2861 RT_NOREF(iGprTmp);
2862
2863#elif defined(RT_ARCH_ARM64)
2864 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2865 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2866
2867#else
2868# error "port me"
2869#endif
2870 return off;
2871}
2872
2873
2874/**
2875 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2876 * sign-extending the value to 16 bits.
2877 *
2878 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2879 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2880 * same. Will assert / throw if caller does not heed this.
2881 *
2882 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2883 */
2884DECL_FORCE_INLINE_THROW(uint32_t)
2885iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2886 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2887{
2888#ifdef RT_ARCH_AMD64
2889 /* movsx reg32, mem8 */
2890 if (iGprDst >= 8 || iGprBase >= 8)
2891 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2892 pCodeBuf[off++] = 0x0f;
2893 pCodeBuf[off++] = 0xbe;
2894 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2895# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
2896 /* and reg32, 0xffffh */
2897 if (iGprDst >= 8)
2898 pCodeBuf[off++] = X86_OP_REX_B;
2899 pCodeBuf[off++] = 0x81;
2900 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
2901 pCodeBuf[off++] = 0xff;
2902 pCodeBuf[off++] = 0xff;
2903 pCodeBuf[off++] = 0;
2904 pCodeBuf[off++] = 0;
2905# else
2906 /* movzx reg32, reg16 */
2907 if (iGprDst >= 8)
2908 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
2909 pCodeBuf[off++] = 0x0f;
2910 pCodeBuf[off++] = 0xb7;
2911 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2912# endif
2913 RT_NOREF(iGprTmp);
2914
2915#elif defined(RT_ARCH_ARM64)
2916 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2917 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2918 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2919 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
2920
2921#else
2922# error "port me"
2923#endif
2924 return off;
2925}
2926
2927
2928#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2929/**
2930 * Emits a 128-bit vector register load via a GPR base address with a displacement.
2931 *
2932 * @note ARM64: Misaligned @a offDisp values and values not in the
2933 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2934 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2935 * does not heed this.
2936 */
2937DECL_FORCE_INLINE_THROW(uint32_t)
2938iemNativeEmitLoadVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
2939 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2940{
2941#ifdef RT_ARCH_AMD64
2942 /* movdqu reg128, mem128 */
2943 pCodeBuf[off++] = 0xf3;
2944 if (iVecRegDst >= 8 || iGprBase >= 8)
2945 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2946 pCodeBuf[off++] = 0x0f;
2947 pCodeBuf[off++] = 0x6f;
2948 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
2949 RT_NOREF(iGprTmp);
2950
2951#elif defined(RT_ARCH_ARM64)
2952 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
2953 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
2954
2955#else
2956# error "port me"
2957#endif
2958 return off;
2959}
2960
2961
2962/**
2963 * Emits a 128-bit GPR load via a GPR base address with a displacement.
2964 */
2965DECL_INLINE_THROW(uint32_t)
2966iemNativeEmitLoadVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
2967{
2968#ifdef RT_ARCH_AMD64
2969 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
2970 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2971
2972#elif defined(RT_ARCH_ARM64)
2973 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2974
2975#else
2976# error "port me"
2977#endif
2978 return off;
2979}
2980
2981
2982/**
2983 * Emits a 256-bit vector register load via a GPR base address with a displacement.
2984 *
2985 * @note ARM64: Misaligned @a offDisp values and values not in the
2986 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2987 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2988 * does not heed this.
2989 */
2990DECL_FORCE_INLINE_THROW(uint32_t)
2991iemNativeEmitLoadVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
2992 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2993{
2994#ifdef RT_ARCH_AMD64
2995 /* vmovdqu reg256, mem256 */
2996 pCodeBuf[off++] = X86_OP_VEX3;
2997 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
2998 | X86_OP_VEX3_BYTE1_X
2999 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3000 | UINT8_C(0x01);
3001 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3002 pCodeBuf[off++] = 0x6f;
3003 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3004 RT_NOREF(iGprTmp);
3005
3006#elif defined(RT_ARCH_ARM64)
3007 Assert(!(iVecRegDst & 0x1));
3008 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3009 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3010 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3011 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3012#else
3013# error "port me"
3014#endif
3015 return off;
3016}
3017
3018
3019/**
3020 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3021 */
3022DECL_INLINE_THROW(uint32_t)
3023iemNativeEmitLoadVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3024{
3025#ifdef RT_ARCH_AMD64
3026 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3027 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3028
3029#elif defined(RT_ARCH_ARM64)
3030 Assert(!(iVecRegDst & 0x1));
3031 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3032 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3033 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3034 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3035
3036#else
3037# error "port me"
3038#endif
3039 return off;
3040}
3041#endif
3042
3043
3044/**
3045 * Emits a 64-bit GPR store via a GPR base address with a displacement.
3046 *
3047 * @note ARM64: Misaligned @a offDisp values and values not in the
3048 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3049 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3050 * does not heed this.
3051 */
3052DECL_FORCE_INLINE_THROW(uint32_t)
3053iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3054 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3055{
3056#ifdef RT_ARCH_AMD64
3057 /* mov mem64, reg64 */
3058 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3059 pCodeBuf[off++] = 0x89;
3060 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3061 RT_NOREF(iGprTmp);
3062
3063#elif defined(RT_ARCH_ARM64)
3064 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3065 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
3066
3067#else
3068# error "port me"
3069#endif
3070 return off;
3071}
3072
3073
3074/**
3075 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3076 *
3077 * @note ARM64: Misaligned @a offDisp values and values not in the
3078 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3079 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3080 * does not heed this.
3081 */
3082DECL_FORCE_INLINE_THROW(uint32_t)
3083iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3084 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3085{
3086#ifdef RT_ARCH_AMD64
3087 /* mov mem32, reg32 */
3088 if (iGprSrc >= 8 || iGprBase >= 8)
3089 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3090 pCodeBuf[off++] = 0x89;
3091 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3092 RT_NOREF(iGprTmp);
3093
3094#elif defined(RT_ARCH_ARM64)
3095 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3096 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3097
3098#else
3099# error "port me"
3100#endif
3101 return off;
3102}
3103
3104
3105/**
3106 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3107 *
3108 * @note ARM64: Misaligned @a offDisp values and values not in the
3109 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3110 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3111 * does not heed this.
3112 */
3113DECL_FORCE_INLINE_THROW(uint32_t)
3114iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3115 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3116{
3117#ifdef RT_ARCH_AMD64
3118 /* mov mem16, reg16 */
3119 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3120 if (iGprSrc >= 8 || iGprBase >= 8)
3121 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3122 pCodeBuf[off++] = 0x89;
3123 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3124 RT_NOREF(iGprTmp);
3125
3126#elif defined(RT_ARCH_ARM64)
3127 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3128 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3129
3130#else
3131# error "port me"
3132#endif
3133 return off;
3134}
3135
3136
3137/**
3138 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3139 *
3140 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3141 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3142 * same. Will assert / throw if caller does not heed this.
3143 */
3144DECL_FORCE_INLINE_THROW(uint32_t)
3145iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3146 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3147{
3148#ifdef RT_ARCH_AMD64
3149 /* mov mem8, reg8 */
3150 if (iGprSrc >= 8 || iGprBase >= 8)
3151 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3152 else if (iGprSrc >= 4)
3153 pCodeBuf[off++] = X86_OP_REX;
3154 pCodeBuf[off++] = 0x88;
3155 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3156 RT_NOREF(iGprTmp);
3157
3158#elif defined(RT_ARCH_ARM64)
3159 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3160 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3161
3162#else
3163# error "port me"
3164#endif
3165 return off;
3166}
3167
3168
3169/**
3170 * Emits a 64-bit immediate store via a GPR base address with a displacement.
3171 *
3172 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
3173 * AMD64 it depends on the immediate value.
3174 *
3175 * @note ARM64: Misaligned @a offDisp values and values not in the
3176 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3177 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3178 * does not heed this.
3179 */
3180DECL_FORCE_INLINE_THROW(uint32_t)
3181iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
3182 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3183{
3184#ifdef RT_ARCH_AMD64
3185 if ((int32_t)uImm == (int64_t)uImm)
3186 {
3187 /* mov mem64, imm32 (sign-extended) */
3188 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3189 pCodeBuf[off++] = 0xc7;
3190 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3191 pCodeBuf[off++] = RT_BYTE1(uImm);
3192 pCodeBuf[off++] = RT_BYTE2(uImm);
3193 pCodeBuf[off++] = RT_BYTE3(uImm);
3194 pCodeBuf[off++] = RT_BYTE4(uImm);
3195 }
3196 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
3197 {
3198 /* require temporary register. */
3199 if (iGprImmTmp == UINT8_MAX)
3200 iGprImmTmp = iGprTmp;
3201 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3202 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
3203 }
3204 else
3205# ifdef IEM_WITH_THROW_CATCH
3206 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3207# else
3208 AssertReleaseFailedStmt(off = UINT32_MAX);
3209# endif
3210
3211#elif defined(RT_ARCH_ARM64)
3212 if (uImm == 0)
3213 iGprImmTmp = ARMV8_A64_REG_XZR;
3214 else
3215 {
3216 Assert(iGprImmTmp < 31);
3217 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3218 }
3219 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
3220
3221#else
3222# error "port me"
3223#endif
3224 return off;
3225}
3226
3227
3228/**
3229 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3230 *
3231 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3232 *
3233 * @note ARM64: Misaligned @a offDisp values and values not in the
3234 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3235 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3236 * does not heed this.
3237 */
3238DECL_FORCE_INLINE_THROW(uint32_t)
3239iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
3240 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3241{
3242#ifdef RT_ARCH_AMD64
3243 /* mov mem32, imm32 */
3244 if (iGprBase >= 8)
3245 pCodeBuf[off++] = X86_OP_REX_B;
3246 pCodeBuf[off++] = 0xc7;
3247 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3248 pCodeBuf[off++] = RT_BYTE1(uImm);
3249 pCodeBuf[off++] = RT_BYTE2(uImm);
3250 pCodeBuf[off++] = RT_BYTE3(uImm);
3251 pCodeBuf[off++] = RT_BYTE4(uImm);
3252 RT_NOREF(iGprImmTmp, iGprTmp);
3253
3254#elif defined(RT_ARCH_ARM64)
3255 Assert(iGprImmTmp < 31);
3256 if (uImm == 0)
3257 iGprImmTmp = ARMV8_A64_REG_XZR;
3258 else
3259 {
3260 Assert(iGprImmTmp < 31);
3261 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3262 }
3263 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3264 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3265
3266#else
3267# error "port me"
3268#endif
3269 return off;
3270}
3271
3272
3273/**
3274 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3275 *
3276 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3277 *
3278 * @note ARM64: Misaligned @a offDisp values and values not in the
3279 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3280 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3281 * does not heed this.
3282 */
3283DECL_FORCE_INLINE_THROW(uint32_t)
3284iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
3285 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3286{
3287#ifdef RT_ARCH_AMD64
3288 /* mov mem16, imm16 */
3289 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3290 if (iGprBase >= 8)
3291 pCodeBuf[off++] = X86_OP_REX_B;
3292 pCodeBuf[off++] = 0xc7;
3293 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3294 pCodeBuf[off++] = RT_BYTE1(uImm);
3295 pCodeBuf[off++] = RT_BYTE2(uImm);
3296 RT_NOREF(iGprImmTmp, iGprTmp);
3297
3298#elif defined(RT_ARCH_ARM64)
3299 if (uImm == 0)
3300 iGprImmTmp = ARMV8_A64_REG_XZR;
3301 else
3302 {
3303 Assert(iGprImmTmp < 31);
3304 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3305 }
3306 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3307 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3308
3309#else
3310# error "port me"
3311#endif
3312 return off;
3313}
3314
3315
3316/**
3317 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3318 *
3319 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3320 *
3321 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3322 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3323 * same. Will assert / throw if caller does not heed this.
3324 */
3325DECL_FORCE_INLINE_THROW(uint32_t)
3326iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3327 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3328{
3329#ifdef RT_ARCH_AMD64
3330 /* mov mem8, imm8 */
3331 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3332 if (iGprBase >= 8)
3333 pCodeBuf[off++] = X86_OP_REX_B;
3334 pCodeBuf[off++] = 0xc6;
3335 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3336 pCodeBuf[off++] = uImm;
3337 RT_NOREF(iGprImmTmp, iGprTmp);
3338
3339#elif defined(RT_ARCH_ARM64)
3340 if (uImm == 0)
3341 iGprImmTmp = ARMV8_A64_REG_XZR;
3342 else
3343 {
3344 Assert(iGprImmTmp < 31);
3345 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3346 }
3347 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3348 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3349
3350#else
3351# error "port me"
3352#endif
3353 return off;
3354}
3355
3356
3357#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3358/**
3359 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3360 *
3361 * @note ARM64: Misaligned @a offDisp values and values not in the
3362 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3363 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3364 * does not heed this.
3365 */
3366DECL_FORCE_INLINE_THROW(uint32_t)
3367iemNativeEmitStoreVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3368 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3369{
3370#ifdef RT_ARCH_AMD64
3371 /* movdqu mem128, reg128 */
3372 pCodeBuf[off++] = 0xf3;
3373 if (iVecRegDst >= 8 || iGprBase >= 8)
3374 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3375 pCodeBuf[off++] = 0x0f;
3376 pCodeBuf[off++] = 0x7f;
3377 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3378 RT_NOREF(iGprTmp);
3379
3380#elif defined(RT_ARCH_ARM64)
3381 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3382 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3383
3384#else
3385# error "port me"
3386#endif
3387 return off;
3388}
3389
3390
3391/**
3392 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3393 */
3394DECL_INLINE_THROW(uint32_t)
3395iemNativeEmitStoreVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3396{
3397#ifdef RT_ARCH_AMD64
3398 off = iemNativeEmitStoreVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3399 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3400
3401#elif defined(RT_ARCH_ARM64)
3402 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3403
3404#else
3405# error "port me"
3406#endif
3407 return off;
3408}
3409
3410
3411/**
3412 * Emits a 256-bit vector register store via a GPR base address with a displacement.
3413 *
3414 * @note ARM64: Misaligned @a offDisp values and values not in the
3415 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3416 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3417 * does not heed this.
3418 */
3419DECL_FORCE_INLINE_THROW(uint32_t)
3420iemNativeEmitStoreVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3421 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3422{
3423#ifdef RT_ARCH_AMD64
3424 /* vmovdqu reg256, mem256 */
3425 AssertFailed();
3426 pCodeBuf[off++] = X86_OP_VEX3;
3427 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3428 | X86_OP_VEX3_BYTE1_X
3429 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3430 | UINT8_C(0x01);
3431 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3432 pCodeBuf[off++] = 0x7f;
3433 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3434 RT_NOREF(iGprTmp);
3435
3436#elif defined(RT_ARCH_ARM64)
3437 Assert(!(iVecRegDst & 0x1));
3438 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3439 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3440 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3441 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3442#else
3443# error "port me"
3444#endif
3445 return off;
3446}
3447
3448
3449/**
3450 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3451 */
3452DECL_INLINE_THROW(uint32_t)
3453iemNativeEmitStoreVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3454{
3455#ifdef RT_ARCH_AMD64
3456 off = iemNativeEmitStoreVecRegByGprU256Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3457 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3458
3459#elif defined(RT_ARCH_ARM64)
3460 Assert(!(iVecRegDst & 0x1));
3461 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3462 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3463 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3464 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3465
3466#else
3467# error "port me"
3468#endif
3469 return off;
3470}
3471#endif
3472
3473
3474
3475/*********************************************************************************************************************************
3476* Subtraction and Additions *
3477*********************************************************************************************************************************/
3478
3479/**
3480 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3481 * @note The AMD64 version sets flags.
3482 */
3483DECL_INLINE_THROW(uint32_t)
3484iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3485{
3486#if defined(RT_ARCH_AMD64)
3487 /* sub Gv,Ev */
3488 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3489 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3490 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3491 pbCodeBuf[off++] = 0x2b;
3492 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3493
3494#elif defined(RT_ARCH_ARM64)
3495 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3496 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3497
3498#else
3499# error "Port me"
3500#endif
3501 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3502 return off;
3503}
3504
3505
3506/**
3507 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3508 * @note The AMD64 version sets flags.
3509 */
3510DECL_FORCE_INLINE(uint32_t)
3511iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3512{
3513#if defined(RT_ARCH_AMD64)
3514 /* sub Gv,Ev */
3515 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3516 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3517 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3518 pCodeBuf[off++] = 0x2b;
3519 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3520
3521#elif defined(RT_ARCH_ARM64)
3522 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3523
3524#else
3525# error "Port me"
3526#endif
3527 return off;
3528}
3529
3530
3531/**
3532 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3533 * @note The AMD64 version sets flags.
3534 */
3535DECL_INLINE_THROW(uint32_t)
3536iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3537{
3538#if defined(RT_ARCH_AMD64)
3539 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3540#elif defined(RT_ARCH_ARM64)
3541 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3542#else
3543# error "Port me"
3544#endif
3545 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3546 return off;
3547}
3548
3549
3550/**
3551 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3552 *
3553 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3554 *
3555 * @note Larger constants will require a temporary register. Failing to specify
3556 * one when needed will trigger fatal assertion / throw.
3557 */
3558DECL_FORCE_INLINE_THROW(uint32_t)
3559iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3560 uint8_t iGprTmp = UINT8_MAX)
3561{
3562#ifdef RT_ARCH_AMD64
3563 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3564 if (iSubtrahend == 1)
3565 {
3566 /* dec r/m64 */
3567 pCodeBuf[off++] = 0xff;
3568 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3569 }
3570 else if (iSubtrahend == -1)
3571 {
3572 /* inc r/m64 */
3573 pCodeBuf[off++] = 0xff;
3574 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3575 }
3576 else if ((int8_t)iSubtrahend == iSubtrahend)
3577 {
3578 /* sub r/m64, imm8 */
3579 pCodeBuf[off++] = 0x83;
3580 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3581 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3582 }
3583 else if ((int32_t)iSubtrahend == iSubtrahend)
3584 {
3585 /* sub r/m64, imm32 */
3586 pCodeBuf[off++] = 0x81;
3587 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3588 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3589 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3590 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3591 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3592 }
3593 else if (iGprTmp != UINT8_MAX)
3594 {
3595 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3596 /* sub r/m64, r64 */
3597 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3598 pCodeBuf[off++] = 0x29;
3599 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3600 }
3601 else
3602# ifdef IEM_WITH_THROW_CATCH
3603 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3604# else
3605 AssertReleaseFailedStmt(off = UINT32_MAX);
3606# endif
3607
3608#elif defined(RT_ARCH_ARM64)
3609 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3610 if (uAbsSubtrahend < 4096)
3611 {
3612 if (iSubtrahend >= 0)
3613 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3614 else
3615 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3616 }
3617 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3618 {
3619 if (iSubtrahend >= 0)
3620 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3621 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3622 else
3623 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3624 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3625 }
3626 else if (iGprTmp != UINT8_MAX)
3627 {
3628 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3629 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3630 }
3631 else
3632# ifdef IEM_WITH_THROW_CATCH
3633 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3634# else
3635 AssertReleaseFailedStmt(off = UINT32_MAX);
3636# endif
3637
3638#else
3639# error "Port me"
3640#endif
3641 return off;
3642}
3643
3644
3645/**
3646 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3647 *
3648 * @note Larger constants will require a temporary register. Failing to specify
3649 * one when needed will trigger fatal assertion / throw.
3650 */
3651DECL_INLINE_THROW(uint32_t)
3652iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3653 uint8_t iGprTmp = UINT8_MAX)
3654
3655{
3656#ifdef RT_ARCH_AMD64
3657 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3658#elif defined(RT_ARCH_ARM64)
3659 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3660#else
3661# error "Port me"
3662#endif
3663 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3664 return off;
3665}
3666
3667
3668/**
3669 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3670 *
3671 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3672 *
3673 * @note ARM64: Larger constants will require a temporary register. Failing to
3674 * specify one when needed will trigger fatal assertion / throw.
3675 */
3676DECL_FORCE_INLINE_THROW(uint32_t)
3677iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3678 uint8_t iGprTmp = UINT8_MAX)
3679{
3680#ifdef RT_ARCH_AMD64
3681 if (iGprDst >= 8)
3682 pCodeBuf[off++] = X86_OP_REX_B;
3683 if (iSubtrahend == 1)
3684 {
3685 /* dec r/m32 */
3686 pCodeBuf[off++] = 0xff;
3687 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3688 }
3689 else if (iSubtrahend == -1)
3690 {
3691 /* inc r/m32 */
3692 pCodeBuf[off++] = 0xff;
3693 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3694 }
3695 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3696 {
3697 /* sub r/m32, imm8 */
3698 pCodeBuf[off++] = 0x83;
3699 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3700 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3701 }
3702 else
3703 {
3704 /* sub r/m32, imm32 */
3705 pCodeBuf[off++] = 0x81;
3706 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3707 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3708 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3709 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3710 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3711 }
3712 RT_NOREF(iGprTmp);
3713
3714#elif defined(RT_ARCH_ARM64)
3715 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3716 if (uAbsSubtrahend < 4096)
3717 {
3718 if (iSubtrahend >= 0)
3719 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3720 else
3721 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3722 }
3723 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3724 {
3725 if (iSubtrahend >= 0)
3726 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3727 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3728 else
3729 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3730 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3731 }
3732 else if (iGprTmp != UINT8_MAX)
3733 {
3734 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3735 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3736 }
3737 else
3738# ifdef IEM_WITH_THROW_CATCH
3739 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3740# else
3741 AssertReleaseFailedStmt(off = UINT32_MAX);
3742# endif
3743
3744#else
3745# error "Port me"
3746#endif
3747 return off;
3748}
3749
3750
3751/**
3752 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3753 *
3754 * @note ARM64: Larger constants will require a temporary register. Failing to
3755 * specify one when needed will trigger fatal assertion / throw.
3756 */
3757DECL_INLINE_THROW(uint32_t)
3758iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3759 uint8_t iGprTmp = UINT8_MAX)
3760
3761{
3762#ifdef RT_ARCH_AMD64
3763 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
3764#elif defined(RT_ARCH_ARM64)
3765 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
3766#else
3767# error "Port me"
3768#endif
3769 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3770 return off;
3771}
3772
3773
3774/**
3775 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
3776 *
3777 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
3778 * so not suitable as a base for conditional jumps.
3779 *
3780 * @note AMD64: Will only update the lower 16 bits of the register.
3781 * @note ARM64: Will update the entire register.
3782 * @note ARM64: Larger constants will require a temporary register. Failing to
3783 * specify one when needed will trigger fatal assertion / throw.
3784 */
3785DECL_FORCE_INLINE_THROW(uint32_t)
3786iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
3787 uint8_t iGprTmp = UINT8_MAX)
3788{
3789#ifdef RT_ARCH_AMD64
3790 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3791 if (iGprDst >= 8)
3792 pCodeBuf[off++] = X86_OP_REX_B;
3793 if (iSubtrahend == 1)
3794 {
3795 /* dec r/m16 */
3796 pCodeBuf[off++] = 0xff;
3797 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3798 }
3799 else if (iSubtrahend == -1)
3800 {
3801 /* inc r/m16 */
3802 pCodeBuf[off++] = 0xff;
3803 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3804 }
3805 else if ((int8_t)iSubtrahend == iSubtrahend)
3806 {
3807 /* sub r/m16, imm8 */
3808 pCodeBuf[off++] = 0x83;
3809 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3810 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3811 }
3812 else
3813 {
3814 /* sub r/m16, imm16 */
3815 pCodeBuf[off++] = 0x81;
3816 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3817 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
3818 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
3819 }
3820 RT_NOREF(iGprTmp);
3821
3822#elif defined(RT_ARCH_ARM64)
3823 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3824 if (uAbsSubtrahend < 4096)
3825 {
3826 if (iSubtrahend >= 0)
3827 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3828 else
3829 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3830 }
3831 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3832 {
3833 if (iSubtrahend >= 0)
3834 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3835 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3836 else
3837 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3838 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3839 }
3840 else if (iGprTmp != UINT8_MAX)
3841 {
3842 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3843 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3844 }
3845 else
3846# ifdef IEM_WITH_THROW_CATCH
3847 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3848# else
3849 AssertReleaseFailedStmt(off = UINT32_MAX);
3850# endif
3851 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3852
3853#else
3854# error "Port me"
3855#endif
3856 return off;
3857}
3858
3859
3860/**
3861 * Emits adding a 64-bit GPR to another, storing the result in the first.
3862 * @note The AMD64 version sets flags.
3863 */
3864DECL_FORCE_INLINE(uint32_t)
3865iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3866{
3867#if defined(RT_ARCH_AMD64)
3868 /* add Gv,Ev */
3869 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3870 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
3871 pCodeBuf[off++] = 0x03;
3872 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3873
3874#elif defined(RT_ARCH_ARM64)
3875 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
3876
3877#else
3878# error "Port me"
3879#endif
3880 return off;
3881}
3882
3883
3884/**
3885 * Emits adding a 64-bit GPR to another, storing the result in the first.
3886 * @note The AMD64 version sets flags.
3887 */
3888DECL_INLINE_THROW(uint32_t)
3889iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3890{
3891#if defined(RT_ARCH_AMD64)
3892 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3893#elif defined(RT_ARCH_ARM64)
3894 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3895#else
3896# error "Port me"
3897#endif
3898 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3899 return off;
3900}
3901
3902
3903/**
3904 * Emits adding a 64-bit GPR to another, storing the result in the first.
3905 * @note The AMD64 version sets flags.
3906 */
3907DECL_FORCE_INLINE(uint32_t)
3908iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3909{
3910#if defined(RT_ARCH_AMD64)
3911 /* add Gv,Ev */
3912 if (iGprDst >= 8 || iGprAddend >= 8)
3913 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
3914 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
3915 pCodeBuf[off++] = 0x03;
3916 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3917
3918#elif defined(RT_ARCH_ARM64)
3919 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
3920
3921#else
3922# error "Port me"
3923#endif
3924 return off;
3925}
3926
3927
3928/**
3929 * Emits adding a 64-bit GPR to another, storing the result in the first.
3930 * @note The AMD64 version sets flags.
3931 */
3932DECL_INLINE_THROW(uint32_t)
3933iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3934{
3935#if defined(RT_ARCH_AMD64)
3936 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3937#elif defined(RT_ARCH_ARM64)
3938 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3939#else
3940# error "Port me"
3941#endif
3942 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3943 return off;
3944}
3945
3946
3947/**
3948 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3949 */
3950DECL_INLINE_THROW(uint32_t)
3951iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3952{
3953#if defined(RT_ARCH_AMD64)
3954 /* add or inc */
3955 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3956 if (iImm8 != 1)
3957 {
3958 pCodeBuf[off++] = 0x83;
3959 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3960 pCodeBuf[off++] = (uint8_t)iImm8;
3961 }
3962 else
3963 {
3964 pCodeBuf[off++] = 0xff;
3965 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3966 }
3967
3968#elif defined(RT_ARCH_ARM64)
3969 if (iImm8 >= 0)
3970 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
3971 else
3972 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
3973
3974#else
3975# error "Port me"
3976#endif
3977 return off;
3978}
3979
3980
3981/**
3982 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3983 */
3984DECL_INLINE_THROW(uint32_t)
3985iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3986{
3987#if defined(RT_ARCH_AMD64)
3988 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
3989#elif defined(RT_ARCH_ARM64)
3990 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
3991#else
3992# error "Port me"
3993#endif
3994 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3995 return off;
3996}
3997
3998
3999/**
4000 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4001 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4002 */
4003DECL_FORCE_INLINE(uint32_t)
4004iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4005{
4006#if defined(RT_ARCH_AMD64)
4007 /* add or inc */
4008 if (iGprDst >= 8)
4009 pCodeBuf[off++] = X86_OP_REX_B;
4010 if (iImm8 != 1)
4011 {
4012 pCodeBuf[off++] = 0x83;
4013 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4014 pCodeBuf[off++] = (uint8_t)iImm8;
4015 }
4016 else
4017 {
4018 pCodeBuf[off++] = 0xff;
4019 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4020 }
4021
4022#elif defined(RT_ARCH_ARM64)
4023 if (iImm8 >= 0)
4024 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
4025 else
4026 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
4027
4028#else
4029# error "Port me"
4030#endif
4031 return off;
4032}
4033
4034
4035/**
4036 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4037 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4038 */
4039DECL_INLINE_THROW(uint32_t)
4040iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4041{
4042#if defined(RT_ARCH_AMD64)
4043 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4044#elif defined(RT_ARCH_ARM64)
4045 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4046#else
4047# error "Port me"
4048#endif
4049 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4050 return off;
4051}
4052
4053
4054/**
4055 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4056 *
4057 * @note Will assert / throw if @a iGprTmp is not specified when needed.
4058 */
4059DECL_FORCE_INLINE_THROW(uint32_t)
4060iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4061{
4062#if defined(RT_ARCH_AMD64)
4063 if ((int8_t)iAddend == iAddend)
4064 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4065
4066 if ((int32_t)iAddend == iAddend)
4067 {
4068 /* add grp, imm32 */
4069 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4070 pCodeBuf[off++] = 0x81;
4071 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4072 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4073 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4074 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4075 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4076 }
4077 else if (iGprTmp != UINT8_MAX)
4078 {
4079 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4080
4081 /* add dst, tmpreg */
4082 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4083 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
4084 pCodeBuf[off++] = 0x03;
4085 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
4086 }
4087 else
4088# ifdef IEM_WITH_THROW_CATCH
4089 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4090# else
4091 AssertReleaseFailedStmt(off = UINT32_MAX);
4092# endif
4093
4094#elif defined(RT_ARCH_ARM64)
4095 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4096 if (uAbsAddend < 4096)
4097 {
4098 if (iAddend >= 0)
4099 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
4100 else
4101 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
4102 }
4103 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4104 {
4105 if (iAddend >= 0)
4106 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
4107 true /*f64Bit*/, true /*fShift12*/);
4108 else
4109 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
4110 true /*f64Bit*/, true /*fShift12*/);
4111 }
4112 else if (iGprTmp != UINT8_MAX)
4113 {
4114 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4115 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
4116 }
4117 else
4118# ifdef IEM_WITH_THROW_CATCH
4119 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4120# else
4121 AssertReleaseFailedStmt(off = UINT32_MAX);
4122# endif
4123
4124#else
4125# error "Port me"
4126#endif
4127 return off;
4128}
4129
4130
4131/**
4132 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4133 */
4134DECL_INLINE_THROW(uint32_t)
4135iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
4136{
4137#if defined(RT_ARCH_AMD64)
4138 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4139 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
4140
4141 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
4142 {
4143 /* add grp, imm32 */
4144 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4145 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4146 pbCodeBuf[off++] = 0x81;
4147 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4148 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4149 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4150 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4151 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4152 }
4153 else
4154 {
4155 /* Best to use a temporary register to deal with this in the simplest way: */
4156 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4157
4158 /* add dst, tmpreg */
4159 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4160 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4161 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
4162 pbCodeBuf[off++] = 0x03;
4163 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
4164
4165 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4166 }
4167
4168#elif defined(RT_ARCH_ARM64)
4169 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
4170 {
4171 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4172 if (iAddend >= 0)
4173 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend);
4174 else
4175 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend);
4176 }
4177 else
4178 {
4179 /* Use temporary register for the immediate. */
4180 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4181
4182 /* add gprdst, gprdst, tmpreg */
4183 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4184 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg);
4185
4186 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4187 }
4188
4189#else
4190# error "Port me"
4191#endif
4192 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4193 return off;
4194}
4195
4196
4197/**
4198 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4199 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4200 * @note For ARM64 the iAddend value must be in the range 0x000..0xfff,
4201 * or that range shifted 12 bits to the left (e.g. 0x1000..0xfff000 with
4202 * the lower 12 bits always zero). The negative ranges are also allowed,
4203 * making it behave like a subtraction. If the constant does not conform,
4204 * bad stuff will happen.
4205 */
4206DECL_FORCE_INLINE_THROW(uint32_t)
4207iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4208{
4209#if defined(RT_ARCH_AMD64)
4210 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4211 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4212
4213 /* add grp, imm32 */
4214 if (iGprDst >= 8)
4215 pCodeBuf[off++] = X86_OP_REX_B;
4216 pCodeBuf[off++] = 0x81;
4217 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4218 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4219 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4220 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4221 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4222
4223#elif defined(RT_ARCH_ARM64)
4224 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4225 if (uAbsAddend <= 0xfff)
4226 {
4227 if (iAddend >= 0)
4228 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4229 else
4230 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4231 }
4232 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4233 {
4234 if (iAddend >= 0)
4235 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
4236 false /*f64Bit*/, true /*fShift12*/);
4237 else
4238 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
4239 false /*f64Bit*/, true /*fShift12*/);
4240 }
4241 else
4242# ifdef IEM_WITH_THROW_CATCH
4243 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4244# else
4245 AssertReleaseFailedStmt(off = UINT32_MAX);
4246# endif
4247
4248#else
4249# error "Port me"
4250#endif
4251 return off;
4252}
4253
4254
4255/**
4256 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4257 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4258 */
4259DECL_INLINE_THROW(uint32_t)
4260iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4261{
4262#if defined(RT_ARCH_AMD64)
4263 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
4264
4265#elif defined(RT_ARCH_ARM64)
4266 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
4267 {
4268 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4269 if (iAddend >= 0)
4270 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend, false /*f64Bit*/);
4271 else
4272 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend, false /*f64Bit*/);
4273 }
4274 else
4275 {
4276 /* Use temporary register for the immediate. */
4277 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint32_t)iAddend);
4278
4279 /* add gprdst, gprdst, tmpreg */
4280 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4281 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
4282
4283 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4284 }
4285
4286#else
4287# error "Port me"
4288#endif
4289 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4290 return off;
4291}
4292
4293
4294/**
4295 * Emits a 16-bit GPR add with a signed immediate addend.
4296 *
4297 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
4298 * so not suitable as a base for conditional jumps.
4299 *
4300 * @note AMD64: Will only update the lower 16 bits of the register.
4301 * @note ARM64: Will update the entire register.
4302 * @note ARM64: Larger constants will require a temporary register. Failing to
4303 * specify one when needed will trigger fatal assertion / throw.
4304 * @sa iemNativeEmitSubGpr16ImmEx
4305 */
4306DECL_FORCE_INLINE_THROW(uint32_t)
4307iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend,
4308 uint8_t iGprTmp = UINT8_MAX)
4309{
4310#ifdef RT_ARCH_AMD64
4311 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4312 if (iGprDst >= 8)
4313 pCodeBuf[off++] = X86_OP_REX_B;
4314 if (iAddend == 1)
4315 {
4316 /* inc r/m16 */
4317 pCodeBuf[off++] = 0xff;
4318 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4319 }
4320 else if (iAddend == -1)
4321 {
4322 /* dec r/m16 */
4323 pCodeBuf[off++] = 0xff;
4324 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4325 }
4326 else if ((int8_t)iAddend == iAddend)
4327 {
4328 /* add r/m16, imm8 */
4329 pCodeBuf[off++] = 0x83;
4330 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4331 pCodeBuf[off++] = (uint8_t)iAddend;
4332 }
4333 else
4334 {
4335 /* add r/m16, imm16 */
4336 pCodeBuf[off++] = 0x81;
4337 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4338 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
4339 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
4340 }
4341 RT_NOREF(iGprTmp);
4342
4343#elif defined(RT_ARCH_ARM64)
4344 uint32_t uAbsAddend = RT_ABS(iAddend);
4345 if (uAbsAddend < 4096)
4346 {
4347 if (iAddend >= 0)
4348 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4349 else
4350 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4351 }
4352 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4353 {
4354 if (iAddend >= 0)
4355 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
4356 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4357 else
4358 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
4359 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4360 }
4361 else if (iGprTmp != UINT8_MAX)
4362 {
4363 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iAddend);
4364 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4365 }
4366 else
4367# ifdef IEM_WITH_THROW_CATCH
4368 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4369# else
4370 AssertReleaseFailedStmt(off = UINT32_MAX);
4371# endif
4372 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4373
4374#else
4375# error "Port me"
4376#endif
4377 return off;
4378}
4379
4380
4381
4382/**
4383 * Adds two 64-bit GPRs together, storing the result in a third register.
4384 */
4385DECL_FORCE_INLINE(uint32_t)
4386iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4387{
4388#ifdef RT_ARCH_AMD64
4389 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4390 {
4391 /** @todo consider LEA */
4392 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
4393 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
4394 }
4395 else
4396 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4397
4398#elif defined(RT_ARCH_ARM64)
4399 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
4400
4401#else
4402# error "Port me!"
4403#endif
4404 return off;
4405}
4406
4407
4408
4409/**
4410 * Adds two 32-bit GPRs together, storing the result in a third register.
4411 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4412 */
4413DECL_FORCE_INLINE(uint32_t)
4414iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4415{
4416#ifdef RT_ARCH_AMD64
4417 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4418 {
4419 /** @todo consider LEA */
4420 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
4421 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
4422 }
4423 else
4424 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4425
4426#elif defined(RT_ARCH_ARM64)
4427 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
4428
4429#else
4430# error "Port me!"
4431#endif
4432 return off;
4433}
4434
4435
4436/**
4437 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
4438 * third register.
4439 *
4440 * @note The ARM64 version does not work for non-trivial constants if the
4441 * two registers are the same. Will assert / throw exception.
4442 */
4443DECL_FORCE_INLINE_THROW(uint32_t)
4444iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4445{
4446#ifdef RT_ARCH_AMD64
4447 /** @todo consider LEA */
4448 if ((int8_t)iImmAddend == iImmAddend)
4449 {
4450 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4451 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4452 }
4453 else
4454 {
4455 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4456 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4457 }
4458
4459#elif defined(RT_ARCH_ARM64)
4460 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4461 if (uAbsImmAddend < 4096)
4462 {
4463 if (iImmAddend >= 0)
4464 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4465 else
4466 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4467 }
4468 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4469 {
4470 if (iImmAddend >= 0)
4471 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4472 else
4473 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4474 }
4475 else if (iGprDst != iGprAddend)
4476 {
4477 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4478 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4479 }
4480 else
4481# ifdef IEM_WITH_THROW_CATCH
4482 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4483# else
4484 AssertReleaseFailedStmt(off = UINT32_MAX);
4485# endif
4486
4487#else
4488# error "Port me!"
4489#endif
4490 return off;
4491}
4492
4493
4494/**
4495 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4496 * third register.
4497 *
4498 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4499 *
4500 * @note The ARM64 version does not work for non-trivial constants if the
4501 * two registers are the same. Will assert / throw exception.
4502 */
4503DECL_FORCE_INLINE_THROW(uint32_t)
4504iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4505{
4506#ifdef RT_ARCH_AMD64
4507 /** @todo consider LEA */
4508 if ((int8_t)iImmAddend == iImmAddend)
4509 {
4510 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4511 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4512 }
4513 else
4514 {
4515 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4516 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4517 }
4518
4519#elif defined(RT_ARCH_ARM64)
4520 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4521 if (uAbsImmAddend < 4096)
4522 {
4523 if (iImmAddend >= 0)
4524 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4525 else
4526 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4527 }
4528 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4529 {
4530 if (iImmAddend >= 0)
4531 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4532 else
4533 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4534 }
4535 else if (iGprDst != iGprAddend)
4536 {
4537 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4538 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4539 }
4540 else
4541# ifdef IEM_WITH_THROW_CATCH
4542 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4543# else
4544 AssertReleaseFailedStmt(off = UINT32_MAX);
4545# endif
4546
4547#else
4548# error "Port me!"
4549#endif
4550 return off;
4551}
4552
4553
4554/*********************************************************************************************************************************
4555* Unary Operations *
4556*********************************************************************************************************************************/
4557
4558/**
4559 * Emits code for two complement negation of a 64-bit GPR.
4560 */
4561DECL_FORCE_INLINE_THROW(uint32_t)
4562iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4563{
4564#if defined(RT_ARCH_AMD64)
4565 /* neg Ev */
4566 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4567 pCodeBuf[off++] = 0xf7;
4568 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4569
4570#elif defined(RT_ARCH_ARM64)
4571 /* sub dst, xzr, dst */
4572 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4573
4574#else
4575# error "Port me"
4576#endif
4577 return off;
4578}
4579
4580
4581/**
4582 * Emits code for two complement negation of a 64-bit GPR.
4583 */
4584DECL_INLINE_THROW(uint32_t)
4585iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4586{
4587#if defined(RT_ARCH_AMD64)
4588 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4589#elif defined(RT_ARCH_ARM64)
4590 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4591#else
4592# error "Port me"
4593#endif
4594 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4595 return off;
4596}
4597
4598
4599/**
4600 * Emits code for two complement negation of a 32-bit GPR.
4601 * @note bit 32 thru 63 are set to zero.
4602 */
4603DECL_FORCE_INLINE_THROW(uint32_t)
4604iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4605{
4606#if defined(RT_ARCH_AMD64)
4607 /* neg Ev */
4608 if (iGprDst >= 8)
4609 pCodeBuf[off++] = X86_OP_REX_B;
4610 pCodeBuf[off++] = 0xf7;
4611 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4612
4613#elif defined(RT_ARCH_ARM64)
4614 /* sub dst, xzr, dst */
4615 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4616
4617#else
4618# error "Port me"
4619#endif
4620 return off;
4621}
4622
4623
4624/**
4625 * Emits code for two complement negation of a 32-bit GPR.
4626 * @note bit 32 thru 63 are set to zero.
4627 */
4628DECL_INLINE_THROW(uint32_t)
4629iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4630{
4631#if defined(RT_ARCH_AMD64)
4632 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4633#elif defined(RT_ARCH_ARM64)
4634 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4635#else
4636# error "Port me"
4637#endif
4638 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4639 return off;
4640}
4641
4642
4643
4644/*********************************************************************************************************************************
4645* Bit Operations *
4646*********************************************************************************************************************************/
4647
4648/**
4649 * Emits code for clearing bits 16 thru 63 in the GPR.
4650 */
4651DECL_INLINE_THROW(uint32_t)
4652iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4653{
4654#if defined(RT_ARCH_AMD64)
4655 /* movzx Gv,Ew */
4656 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4657 if (iGprDst >= 8)
4658 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4659 pbCodeBuf[off++] = 0x0f;
4660 pbCodeBuf[off++] = 0xb7;
4661 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4662
4663#elif defined(RT_ARCH_ARM64)
4664 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4665# if 1
4666 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4667# else
4668 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4669 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4670# endif
4671#else
4672# error "Port me"
4673#endif
4674 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4675 return off;
4676}
4677
4678
4679/**
4680 * Emits code for AND'ing two 64-bit GPRs.
4681 *
4682 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4683 * and ARM64 hosts.
4684 */
4685DECL_FORCE_INLINE(uint32_t)
4686iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4687{
4688#if defined(RT_ARCH_AMD64)
4689 /* and Gv, Ev */
4690 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4691 pCodeBuf[off++] = 0x23;
4692 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4693 RT_NOREF(fSetFlags);
4694
4695#elif defined(RT_ARCH_ARM64)
4696 if (!fSetFlags)
4697 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4698 else
4699 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4700
4701#else
4702# error "Port me"
4703#endif
4704 return off;
4705}
4706
4707
4708/**
4709 * Emits code for AND'ing two 64-bit GPRs.
4710 *
4711 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4712 * and ARM64 hosts.
4713 */
4714DECL_INLINE_THROW(uint32_t)
4715iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4716{
4717#if defined(RT_ARCH_AMD64)
4718 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4719#elif defined(RT_ARCH_ARM64)
4720 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4721#else
4722# error "Port me"
4723#endif
4724 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4725 return off;
4726}
4727
4728
4729/**
4730 * Emits code for AND'ing two 32-bit GPRs.
4731 */
4732DECL_FORCE_INLINE(uint32_t)
4733iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4734{
4735#if defined(RT_ARCH_AMD64)
4736 /* and Gv, Ev */
4737 if (iGprDst >= 8 || iGprSrc >= 8)
4738 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4739 pCodeBuf[off++] = 0x23;
4740 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4741 RT_NOREF(fSetFlags);
4742
4743#elif defined(RT_ARCH_ARM64)
4744 if (!fSetFlags)
4745 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4746 else
4747 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4748
4749#else
4750# error "Port me"
4751#endif
4752 return off;
4753}
4754
4755
4756/**
4757 * Emits code for AND'ing two 32-bit GPRs.
4758 */
4759DECL_INLINE_THROW(uint32_t)
4760iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4761{
4762#if defined(RT_ARCH_AMD64)
4763 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4764#elif defined(RT_ARCH_ARM64)
4765 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4766#else
4767# error "Port me"
4768#endif
4769 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4770 return off;
4771}
4772
4773
4774/**
4775 * Emits code for AND'ing a 64-bit GPRs with a constant.
4776 *
4777 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4778 * and ARM64 hosts.
4779 */
4780DECL_INLINE_THROW(uint32_t)
4781iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4782{
4783#if defined(RT_ARCH_AMD64)
4784 if ((int64_t)uImm == (int8_t)uImm)
4785 {
4786 /* and Ev, imm8 */
4787 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4788 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4789 pbCodeBuf[off++] = 0x83;
4790 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4791 pbCodeBuf[off++] = (uint8_t)uImm;
4792 }
4793 else if ((int64_t)uImm == (int32_t)uImm)
4794 {
4795 /* and Ev, imm32 */
4796 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4797 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4798 pbCodeBuf[off++] = 0x81;
4799 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4800 pbCodeBuf[off++] = RT_BYTE1(uImm);
4801 pbCodeBuf[off++] = RT_BYTE2(uImm);
4802 pbCodeBuf[off++] = RT_BYTE3(uImm);
4803 pbCodeBuf[off++] = RT_BYTE4(uImm);
4804 }
4805 else
4806 {
4807 /* Use temporary register for the 64-bit immediate. */
4808 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4809 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
4810 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4811 }
4812 RT_NOREF(fSetFlags);
4813
4814#elif defined(RT_ARCH_ARM64)
4815 uint32_t uImmR = 0;
4816 uint32_t uImmNandS = 0;
4817 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4818 {
4819 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4820 if (!fSetFlags)
4821 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
4822 else
4823 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
4824 }
4825 else
4826 {
4827 /* Use temporary register for the 64-bit immediate. */
4828 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4829 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4830 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4831 }
4832
4833#else
4834# error "Port me"
4835#endif
4836 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4837 return off;
4838}
4839
4840
4841/**
4842 * Emits code for AND'ing an 32-bit GPRs with a constant.
4843 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4844 * @note For ARM64 this only supports @a uImm values that can be expressed using
4845 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
4846 * make sure this is possible!
4847 */
4848DECL_FORCE_INLINE_THROW(uint32_t)
4849iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4850{
4851#if defined(RT_ARCH_AMD64)
4852 /* and Ev, imm */
4853 if (iGprDst >= 8)
4854 pCodeBuf[off++] = X86_OP_REX_B;
4855 if ((int32_t)uImm == (int8_t)uImm)
4856 {
4857 pCodeBuf[off++] = 0x83;
4858 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4859 pCodeBuf[off++] = (uint8_t)uImm;
4860 }
4861 else
4862 {
4863 pCodeBuf[off++] = 0x81;
4864 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4865 pCodeBuf[off++] = RT_BYTE1(uImm);
4866 pCodeBuf[off++] = RT_BYTE2(uImm);
4867 pCodeBuf[off++] = RT_BYTE3(uImm);
4868 pCodeBuf[off++] = RT_BYTE4(uImm);
4869 }
4870 RT_NOREF(fSetFlags);
4871
4872#elif defined(RT_ARCH_ARM64)
4873 uint32_t uImmR = 0;
4874 uint32_t uImmNandS = 0;
4875 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4876 {
4877 if (!fSetFlags)
4878 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4879 else
4880 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4881 }
4882 else
4883# ifdef IEM_WITH_THROW_CATCH
4884 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4885# else
4886 AssertReleaseFailedStmt(off = UINT32_MAX);
4887# endif
4888
4889#else
4890# error "Port me"
4891#endif
4892 return off;
4893}
4894
4895
4896/**
4897 * Emits code for AND'ing an 32-bit GPRs with a constant.
4898 *
4899 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4900 */
4901DECL_INLINE_THROW(uint32_t)
4902iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4903{
4904#if defined(RT_ARCH_AMD64)
4905 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
4906
4907#elif defined(RT_ARCH_ARM64)
4908 uint32_t uImmR = 0;
4909 uint32_t uImmNandS = 0;
4910 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4911 {
4912 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4913 if (!fSetFlags)
4914 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4915 else
4916 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4917 }
4918 else
4919 {
4920 /* Use temporary register for the 64-bit immediate. */
4921 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4922 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4923 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4924 }
4925
4926#else
4927# error "Port me"
4928#endif
4929 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4930 return off;
4931}
4932
4933
4934/**
4935 * Emits code for AND'ing an 64-bit GPRs with a constant.
4936 *
4937 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4938 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4939 * the same.
4940 */
4941DECL_FORCE_INLINE_THROW(uint32_t)
4942iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
4943 bool fSetFlags = false)
4944{
4945#if defined(RT_ARCH_AMD64)
4946 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4947 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
4948 RT_NOREF(fSetFlags);
4949
4950#elif defined(RT_ARCH_ARM64)
4951 uint32_t uImmR = 0;
4952 uint32_t uImmNandS = 0;
4953 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4954 {
4955 if (!fSetFlags)
4956 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4957 else
4958 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4959 }
4960 else if (iGprDst != iGprSrc)
4961 {
4962 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4963 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4964 }
4965 else
4966# ifdef IEM_WITH_THROW_CATCH
4967 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4968# else
4969 AssertReleaseFailedStmt(off = UINT32_MAX);
4970# endif
4971
4972#else
4973# error "Port me"
4974#endif
4975 return off;
4976}
4977
4978/**
4979 * Emits code for AND'ing an 32-bit GPRs with a constant.
4980 *
4981 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4982 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4983 * the same.
4984 *
4985 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4986 */
4987DECL_FORCE_INLINE_THROW(uint32_t)
4988iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
4989 bool fSetFlags = false)
4990{
4991#if defined(RT_ARCH_AMD64)
4992 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
4993 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
4994 RT_NOREF(fSetFlags);
4995
4996#elif defined(RT_ARCH_ARM64)
4997 uint32_t uImmR = 0;
4998 uint32_t uImmNandS = 0;
4999 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5000 {
5001 if (!fSetFlags)
5002 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5003 else
5004 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5005 }
5006 else if (iGprDst != iGprSrc)
5007 {
5008 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5009 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
5010 }
5011 else
5012# ifdef IEM_WITH_THROW_CATCH
5013 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5014# else
5015 AssertReleaseFailedStmt(off = UINT32_MAX);
5016# endif
5017
5018#else
5019# error "Port me"
5020#endif
5021 return off;
5022}
5023
5024
5025/**
5026 * Emits code for OR'ing two 64-bit GPRs.
5027 */
5028DECL_FORCE_INLINE(uint32_t)
5029iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5030{
5031#if defined(RT_ARCH_AMD64)
5032 /* or Gv, Ev */
5033 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5034 pCodeBuf[off++] = 0x0b;
5035 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5036
5037#elif defined(RT_ARCH_ARM64)
5038 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
5039
5040#else
5041# error "Port me"
5042#endif
5043 return off;
5044}
5045
5046
5047/**
5048 * Emits code for OR'ing two 64-bit GPRs.
5049 */
5050DECL_INLINE_THROW(uint32_t)
5051iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5052{
5053#if defined(RT_ARCH_AMD64)
5054 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5055#elif defined(RT_ARCH_ARM64)
5056 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5057#else
5058# error "Port me"
5059#endif
5060 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5061 return off;
5062}
5063
5064
5065/**
5066 * Emits code for OR'ing two 32-bit GPRs.
5067 * @note Bits 63:32 of the destination GPR will be cleared.
5068 */
5069DECL_FORCE_INLINE(uint32_t)
5070iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5071{
5072#if defined(RT_ARCH_AMD64)
5073 /* or Gv, Ev */
5074 if (iGprDst >= 8 || iGprSrc >= 8)
5075 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5076 pCodeBuf[off++] = 0x0b;
5077 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5078
5079#elif defined(RT_ARCH_ARM64)
5080 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5081
5082#else
5083# error "Port me"
5084#endif
5085 return off;
5086}
5087
5088
5089/**
5090 * Emits code for OR'ing two 32-bit GPRs.
5091 * @note Bits 63:32 of the destination GPR will be cleared.
5092 */
5093DECL_INLINE_THROW(uint32_t)
5094iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5095{
5096#if defined(RT_ARCH_AMD64)
5097 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5098#elif defined(RT_ARCH_ARM64)
5099 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5100#else
5101# error "Port me"
5102#endif
5103 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5104 return off;
5105}
5106
5107
5108/**
5109 * Emits code for OR'ing a 64-bit GPRs with a constant.
5110 */
5111DECL_INLINE_THROW(uint32_t)
5112iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
5113{
5114#if defined(RT_ARCH_AMD64)
5115 if ((int64_t)uImm == (int8_t)uImm)
5116 {
5117 /* or Ev, imm8 */
5118 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5119 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5120 pbCodeBuf[off++] = 0x83;
5121 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5122 pbCodeBuf[off++] = (uint8_t)uImm;
5123 }
5124 else if ((int64_t)uImm == (int32_t)uImm)
5125 {
5126 /* or Ev, imm32 */
5127 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5128 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5129 pbCodeBuf[off++] = 0x81;
5130 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5131 pbCodeBuf[off++] = RT_BYTE1(uImm);
5132 pbCodeBuf[off++] = RT_BYTE2(uImm);
5133 pbCodeBuf[off++] = RT_BYTE3(uImm);
5134 pbCodeBuf[off++] = RT_BYTE4(uImm);
5135 }
5136 else
5137 {
5138 /* Use temporary register for the 64-bit immediate. */
5139 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5140 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
5141 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5142 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5143 }
5144
5145#elif defined(RT_ARCH_ARM64)
5146 uint32_t uImmR = 0;
5147 uint32_t uImmNandS = 0;
5148 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5149 {
5150 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5151 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
5152 }
5153 else
5154 {
5155 /* Use temporary register for the 64-bit immediate. */
5156 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5157 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
5158 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5159 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5160 }
5161
5162#else
5163# error "Port me"
5164#endif
5165 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5166 return off;
5167}
5168
5169
5170/**
5171 * Emits code for OR'ing an 32-bit GPRs with a constant.
5172 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5173 * @note For ARM64 this only supports @a uImm values that can be expressed using
5174 * the two 6-bit immediates of the OR instructions. The caller must make
5175 * sure this is possible!
5176 */
5177DECL_FORCE_INLINE_THROW(uint32_t)
5178iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5179{
5180#if defined(RT_ARCH_AMD64)
5181 /* or Ev, imm */
5182 if (iGprDst >= 8)
5183 pCodeBuf[off++] = X86_OP_REX_B;
5184 if ((int32_t)uImm == (int8_t)uImm)
5185 {
5186 pCodeBuf[off++] = 0x83;
5187 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5188 pCodeBuf[off++] = (uint8_t)uImm;
5189 }
5190 else
5191 {
5192 pCodeBuf[off++] = 0x81;
5193 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5194 pCodeBuf[off++] = RT_BYTE1(uImm);
5195 pCodeBuf[off++] = RT_BYTE2(uImm);
5196 pCodeBuf[off++] = RT_BYTE3(uImm);
5197 pCodeBuf[off++] = RT_BYTE4(uImm);
5198 }
5199
5200#elif defined(RT_ARCH_ARM64)
5201 uint32_t uImmR = 0;
5202 uint32_t uImmNandS = 0;
5203 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5204 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5205 else
5206# ifdef IEM_WITH_THROW_CATCH
5207 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5208# else
5209 AssertReleaseFailedStmt(off = UINT32_MAX);
5210# endif
5211
5212#else
5213# error "Port me"
5214#endif
5215 return off;
5216}
5217
5218
5219/**
5220 * Emits code for OR'ing an 32-bit GPRs with a constant.
5221 *
5222 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5223 */
5224DECL_INLINE_THROW(uint32_t)
5225iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5226{
5227#if defined(RT_ARCH_AMD64)
5228 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5229
5230#elif defined(RT_ARCH_ARM64)
5231 uint32_t uImmR = 0;
5232 uint32_t uImmNandS = 0;
5233 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5234 {
5235 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5236 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5237 }
5238 else
5239 {
5240 /* Use temporary register for the 64-bit immediate. */
5241 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5242 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
5243 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5244 }
5245
5246#else
5247# error "Port me"
5248#endif
5249 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5250 return off;
5251}
5252
5253
5254
5255/**
5256 * ORs two 64-bit GPRs together, storing the result in a third register.
5257 */
5258DECL_FORCE_INLINE(uint32_t)
5259iemNativeEmitGprEqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5260{
5261#ifdef RT_ARCH_AMD64
5262 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5263 {
5264 /** @todo consider LEA */
5265 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc1);
5266 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5267 }
5268 else
5269 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5270
5271#elif defined(RT_ARCH_ARM64)
5272 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2);
5273
5274#else
5275# error "Port me!"
5276#endif
5277 return off;
5278}
5279
5280
5281
5282/**
5283 * Ors two 32-bit GPRs together, storing the result in a third register.
5284 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
5285 */
5286DECL_FORCE_INLINE(uint32_t)
5287iemNativeEmitGpr32EqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5288{
5289#ifdef RT_ARCH_AMD64
5290 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5291 {
5292 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc1);
5293 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5294 }
5295 else
5296 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5297
5298#elif defined(RT_ARCH_ARM64)
5299 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2, false /*f64Bit*/);
5300
5301#else
5302# error "Port me!"
5303#endif
5304 return off;
5305}
5306
5307
5308/**
5309 * Emits code for XOR'ing two 64-bit GPRs.
5310 */
5311DECL_INLINE_THROW(uint32_t)
5312iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5313{
5314#if defined(RT_ARCH_AMD64)
5315 /* and Gv, Ev */
5316 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5317 pCodeBuf[off++] = 0x33;
5318 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5319
5320#elif defined(RT_ARCH_ARM64)
5321 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
5322
5323#else
5324# error "Port me"
5325#endif
5326 return off;
5327}
5328
5329
5330/**
5331 * Emits code for XOR'ing two 64-bit GPRs.
5332 */
5333DECL_INLINE_THROW(uint32_t)
5334iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5335{
5336#if defined(RT_ARCH_AMD64)
5337 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5338#elif defined(RT_ARCH_ARM64)
5339 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5340#else
5341# error "Port me"
5342#endif
5343 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5344 return off;
5345}
5346
5347
5348/**
5349 * Emits code for XOR'ing two 32-bit GPRs.
5350 */
5351DECL_INLINE_THROW(uint32_t)
5352iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5353{
5354#if defined(RT_ARCH_AMD64)
5355 /* and Gv, Ev */
5356 if (iGprDst >= 8 || iGprSrc >= 8)
5357 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5358 pCodeBuf[off++] = 0x33;
5359 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5360
5361#elif defined(RT_ARCH_ARM64)
5362 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5363
5364#else
5365# error "Port me"
5366#endif
5367 return off;
5368}
5369
5370
5371/**
5372 * Emits code for XOR'ing two 32-bit GPRs.
5373 */
5374DECL_INLINE_THROW(uint32_t)
5375iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5376{
5377#if defined(RT_ARCH_AMD64)
5378 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5379#elif defined(RT_ARCH_ARM64)
5380 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5381#else
5382# error "Port me"
5383#endif
5384 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5385 return off;
5386}
5387
5388
5389/**
5390 * Emits code for XOR'ing an 32-bit GPRs with a constant.
5391 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5392 * @note For ARM64 this only supports @a uImm values that can be expressed using
5393 * the two 6-bit immediates of the EOR instructions. The caller must make
5394 * sure this is possible!
5395 */
5396DECL_FORCE_INLINE_THROW(uint32_t)
5397iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5398{
5399#if defined(RT_ARCH_AMD64)
5400 /* and Ev, imm */
5401 if (iGprDst >= 8)
5402 pCodeBuf[off++] = X86_OP_REX_B;
5403 if ((int32_t)uImm == (int8_t)uImm)
5404 {
5405 pCodeBuf[off++] = 0x83;
5406 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5407 pCodeBuf[off++] = (uint8_t)uImm;
5408 }
5409 else
5410 {
5411 pCodeBuf[off++] = 0x81;
5412 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5413 pCodeBuf[off++] = RT_BYTE1(uImm);
5414 pCodeBuf[off++] = RT_BYTE2(uImm);
5415 pCodeBuf[off++] = RT_BYTE3(uImm);
5416 pCodeBuf[off++] = RT_BYTE4(uImm);
5417 }
5418
5419#elif defined(RT_ARCH_ARM64)
5420 uint32_t uImmR = 0;
5421 uint32_t uImmNandS = 0;
5422 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5423 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5424 else
5425# ifdef IEM_WITH_THROW_CATCH
5426 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5427# else
5428 AssertReleaseFailedStmt(off = UINT32_MAX);
5429# endif
5430
5431#else
5432# error "Port me"
5433#endif
5434 return off;
5435}
5436
5437
5438/*********************************************************************************************************************************
5439* Shifting *
5440*********************************************************************************************************************************/
5441
5442/**
5443 * Emits code for shifting a GPR a fixed number of bits to the left.
5444 */
5445DECL_FORCE_INLINE(uint32_t)
5446iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5447{
5448 Assert(cShift > 0 && cShift < 64);
5449
5450#if defined(RT_ARCH_AMD64)
5451 /* shl dst, cShift */
5452 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5453 if (cShift != 1)
5454 {
5455 pCodeBuf[off++] = 0xc1;
5456 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5457 pCodeBuf[off++] = cShift;
5458 }
5459 else
5460 {
5461 pCodeBuf[off++] = 0xd1;
5462 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5463 }
5464
5465#elif defined(RT_ARCH_ARM64)
5466 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
5467
5468#else
5469# error "Port me"
5470#endif
5471 return off;
5472}
5473
5474
5475/**
5476 * Emits code for shifting a GPR a fixed number of bits to the left.
5477 */
5478DECL_INLINE_THROW(uint32_t)
5479iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5480{
5481#if defined(RT_ARCH_AMD64)
5482 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5483#elif defined(RT_ARCH_ARM64)
5484 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5485#else
5486# error "Port me"
5487#endif
5488 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5489 return off;
5490}
5491
5492
5493/**
5494 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5495 */
5496DECL_FORCE_INLINE(uint32_t)
5497iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5498{
5499 Assert(cShift > 0 && cShift < 32);
5500
5501#if defined(RT_ARCH_AMD64)
5502 /* shl dst, cShift */
5503 if (iGprDst >= 8)
5504 pCodeBuf[off++] = X86_OP_REX_B;
5505 if (cShift != 1)
5506 {
5507 pCodeBuf[off++] = 0xc1;
5508 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5509 pCodeBuf[off++] = cShift;
5510 }
5511 else
5512 {
5513 pCodeBuf[off++] = 0xd1;
5514 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5515 }
5516
5517#elif defined(RT_ARCH_ARM64)
5518 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5519
5520#else
5521# error "Port me"
5522#endif
5523 return off;
5524}
5525
5526
5527/**
5528 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5529 */
5530DECL_INLINE_THROW(uint32_t)
5531iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5532{
5533#if defined(RT_ARCH_AMD64)
5534 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5535#elif defined(RT_ARCH_ARM64)
5536 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5537#else
5538# error "Port me"
5539#endif
5540 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5541 return off;
5542}
5543
5544
5545/**
5546 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5547 */
5548DECL_FORCE_INLINE(uint32_t)
5549iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5550{
5551 Assert(cShift > 0 && cShift < 64);
5552
5553#if defined(RT_ARCH_AMD64)
5554 /* shr dst, cShift */
5555 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5556 if (cShift != 1)
5557 {
5558 pCodeBuf[off++] = 0xc1;
5559 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5560 pCodeBuf[off++] = cShift;
5561 }
5562 else
5563 {
5564 pCodeBuf[off++] = 0xd1;
5565 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5566 }
5567
5568#elif defined(RT_ARCH_ARM64)
5569 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5570
5571#else
5572# error "Port me"
5573#endif
5574 return off;
5575}
5576
5577
5578/**
5579 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5580 */
5581DECL_INLINE_THROW(uint32_t)
5582iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5583{
5584#if defined(RT_ARCH_AMD64)
5585 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5586#elif defined(RT_ARCH_ARM64)
5587 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5588#else
5589# error "Port me"
5590#endif
5591 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5592 return off;
5593}
5594
5595
5596/**
5597 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5598 * right.
5599 */
5600DECL_FORCE_INLINE(uint32_t)
5601iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5602{
5603 Assert(cShift > 0 && cShift < 32);
5604
5605#if defined(RT_ARCH_AMD64)
5606 /* shr dst, cShift */
5607 if (iGprDst >= 8)
5608 pCodeBuf[off++] = X86_OP_REX_B;
5609 if (cShift != 1)
5610 {
5611 pCodeBuf[off++] = 0xc1;
5612 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5613 pCodeBuf[off++] = cShift;
5614 }
5615 else
5616 {
5617 pCodeBuf[off++] = 0xd1;
5618 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5619 }
5620
5621#elif defined(RT_ARCH_ARM64)
5622 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5623
5624#else
5625# error "Port me"
5626#endif
5627 return off;
5628}
5629
5630
5631/**
5632 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5633 * right.
5634 */
5635DECL_INLINE_THROW(uint32_t)
5636iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5637{
5638#if defined(RT_ARCH_AMD64)
5639 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5640#elif defined(RT_ARCH_ARM64)
5641 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5642#else
5643# error "Port me"
5644#endif
5645 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5646 return off;
5647}
5648
5649
5650/**
5651 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5652 * right and assigning it to a different GPR.
5653 */
5654DECL_INLINE_THROW(uint32_t)
5655iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5656{
5657 Assert(cShift > 0); Assert(cShift < 32);
5658#if defined(RT_ARCH_AMD64)
5659 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5660 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5661
5662#elif defined(RT_ARCH_ARM64)
5663 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5664
5665#else
5666# error "Port me"
5667#endif
5668 return off;
5669}
5670
5671
5672/**
5673 * Emits code for rotating a GPR a fixed number of bits to the left.
5674 */
5675DECL_FORCE_INLINE(uint32_t)
5676iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5677{
5678 Assert(cShift > 0 && cShift < 64);
5679
5680#if defined(RT_ARCH_AMD64)
5681 /* rol dst, cShift */
5682 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5683 if (cShift != 1)
5684 {
5685 pCodeBuf[off++] = 0xc1;
5686 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5687 pCodeBuf[off++] = cShift;
5688 }
5689 else
5690 {
5691 pCodeBuf[off++] = 0xd1;
5692 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5693 }
5694
5695#elif defined(RT_ARCH_ARM64)
5696 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
5697
5698#else
5699# error "Port me"
5700#endif
5701 return off;
5702}
5703
5704
5705#if defined(RT_ARCH_AMD64)
5706/**
5707 * Emits code for rotating a 32-bit GPR a fixed number of bits to the left via carry.
5708 */
5709DECL_FORCE_INLINE(uint32_t)
5710iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5711{
5712 Assert(cShift > 0 && cShift < 32);
5713
5714 /* rcl dst, cShift */
5715 if (iGprDst >= 8)
5716 pCodeBuf[off++] = X86_OP_REX_B;
5717 if (cShift != 1)
5718 {
5719 pCodeBuf[off++] = 0xc1;
5720 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5721 pCodeBuf[off++] = cShift;
5722 }
5723 else
5724 {
5725 pCodeBuf[off++] = 0xd1;
5726 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5727 }
5728
5729 return off;
5730}
5731#endif /* RT_ARCH_AMD64 */
5732
5733
5734
5735/**
5736 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
5737 * @note Bits 63:32 of the destination GPR will be cleared.
5738 */
5739DECL_FORCE_INLINE(uint32_t)
5740iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5741{
5742#if defined(RT_ARCH_AMD64)
5743 /*
5744 * There is no bswap r16 on x86 (the encoding exists but does not work).
5745 * So just use a rol (gcc -O2 is doing that).
5746 *
5747 * rol r16, 0x8
5748 */
5749 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5750 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5751 if (iGpr >= 8)
5752 pbCodeBuf[off++] = X86_OP_REX_B;
5753 pbCodeBuf[off++] = 0xc1;
5754 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
5755 pbCodeBuf[off++] = 0x08;
5756#elif defined(RT_ARCH_ARM64)
5757 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5758
5759 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
5760#else
5761# error "Port me"
5762#endif
5763
5764 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5765 return off;
5766}
5767
5768
5769/**
5770 * Emits code for reversing the byte order in a 32-bit GPR.
5771 * @note Bits 63:32 of the destination GPR will be cleared.
5772 */
5773DECL_FORCE_INLINE(uint32_t)
5774iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5775{
5776#if defined(RT_ARCH_AMD64)
5777 /* bswap r32 */
5778 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5779
5780 if (iGpr >= 8)
5781 pbCodeBuf[off++] = X86_OP_REX_B;
5782 pbCodeBuf[off++] = 0x0f;
5783 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5784#elif defined(RT_ARCH_ARM64)
5785 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5786
5787 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
5788#else
5789# error "Port me"
5790#endif
5791
5792 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5793 return off;
5794}
5795
5796
5797/**
5798 * Emits code for reversing the byte order in a 64-bit GPR.
5799 */
5800DECL_FORCE_INLINE(uint32_t)
5801iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5802{
5803#if defined(RT_ARCH_AMD64)
5804 /* bswap r64 */
5805 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5806
5807 if (iGpr >= 8)
5808 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
5809 else
5810 pbCodeBuf[off++] = X86_OP_REX_W;
5811 pbCodeBuf[off++] = 0x0f;
5812 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5813#elif defined(RT_ARCH_ARM64)
5814 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5815
5816 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
5817#else
5818# error "Port me"
5819#endif
5820
5821 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5822 return off;
5823}
5824
5825
5826/*********************************************************************************************************************************
5827* Compare and Testing *
5828*********************************************************************************************************************************/
5829
5830
5831#ifdef RT_ARCH_ARM64
5832/**
5833 * Emits an ARM64 compare instruction.
5834 */
5835DECL_INLINE_THROW(uint32_t)
5836iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
5837 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
5838{
5839 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5840 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
5841 f64Bit, true /*fSetFlags*/, cShift, enmShift);
5842 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5843 return off;
5844}
5845#endif
5846
5847
5848/**
5849 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5850 * with conditional instruction.
5851 */
5852DECL_FORCE_INLINE(uint32_t)
5853iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5854{
5855#ifdef RT_ARCH_AMD64
5856 /* cmp Gv, Ev */
5857 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5858 pCodeBuf[off++] = 0x3b;
5859 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5860
5861#elif defined(RT_ARCH_ARM64)
5862 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
5863
5864#else
5865# error "Port me!"
5866#endif
5867 return off;
5868}
5869
5870
5871/**
5872 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5873 * with conditional instruction.
5874 */
5875DECL_INLINE_THROW(uint32_t)
5876iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5877{
5878#ifdef RT_ARCH_AMD64
5879 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5880#elif defined(RT_ARCH_ARM64)
5881 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5882#else
5883# error "Port me!"
5884#endif
5885 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5886 return off;
5887}
5888
5889
5890/**
5891 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5892 * with conditional instruction.
5893 */
5894DECL_FORCE_INLINE(uint32_t)
5895iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5896{
5897#ifdef RT_ARCH_AMD64
5898 /* cmp Gv, Ev */
5899 if (iGprLeft >= 8 || iGprRight >= 8)
5900 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5901 pCodeBuf[off++] = 0x3b;
5902 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5903
5904#elif defined(RT_ARCH_ARM64)
5905 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
5906
5907#else
5908# error "Port me!"
5909#endif
5910 return off;
5911}
5912
5913
5914/**
5915 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5916 * with conditional instruction.
5917 */
5918DECL_INLINE_THROW(uint32_t)
5919iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5920{
5921#ifdef RT_ARCH_AMD64
5922 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5923#elif defined(RT_ARCH_ARM64)
5924 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5925#else
5926# error "Port me!"
5927#endif
5928 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5929 return off;
5930}
5931
5932
5933/**
5934 * Emits a compare of a 64-bit GPR with a constant value, settings status
5935 * flags/whatever for use with conditional instruction.
5936 */
5937DECL_INLINE_THROW(uint32_t)
5938iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
5939{
5940#ifdef RT_ARCH_AMD64
5941 if (uImm <= UINT32_C(0xff))
5942 {
5943 /* cmp Ev, Ib */
5944 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5945 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
5946 pbCodeBuf[off++] = 0x83;
5947 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5948 pbCodeBuf[off++] = (uint8_t)uImm;
5949 }
5950 else if ((int64_t)uImm == (int32_t)uImm)
5951 {
5952 /* cmp Ev, imm */
5953 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5954 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
5955 pbCodeBuf[off++] = 0x81;
5956 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5957 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5958 pbCodeBuf[off++] = RT_BYTE1(uImm);
5959 pbCodeBuf[off++] = RT_BYTE2(uImm);
5960 pbCodeBuf[off++] = RT_BYTE3(uImm);
5961 pbCodeBuf[off++] = RT_BYTE4(uImm);
5962 }
5963 else
5964 {
5965 /* Use temporary register for the immediate. */
5966 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5967 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
5968 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5969 }
5970
5971#elif defined(RT_ARCH_ARM64)
5972 /** @todo guess there are clevere things we can do here... */
5973 if (uImm < _4K)
5974 {
5975 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5976 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5977 true /*64Bit*/, true /*fSetFlags*/);
5978 }
5979 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5980 {
5981 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5982 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
5983 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5984 }
5985 else
5986 {
5987 /* Use temporary register for the immediate. */
5988 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5989 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
5990 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5991 }
5992
5993#else
5994# error "Port me!"
5995#endif
5996
5997 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5998 return off;
5999}
6000
6001
6002/**
6003 * Emits a compare of a 32-bit GPR with a constant value, settings status
6004 * flags/whatever for use with conditional instruction.
6005 *
6006 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6007 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6008 * bits all zero). Will release assert or throw exception if the caller
6009 * violates this restriction.
6010 */
6011DECL_FORCE_INLINE_THROW(uint32_t)
6012iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6013{
6014#ifdef RT_ARCH_AMD64
6015 if (iGprLeft >= 8)
6016 pCodeBuf[off++] = X86_OP_REX_B;
6017 if (uImm <= UINT32_C(0x7f))
6018 {
6019 /* cmp Ev, Ib */
6020 pCodeBuf[off++] = 0x83;
6021 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6022 pCodeBuf[off++] = (uint8_t)uImm;
6023 }
6024 else
6025 {
6026 /* cmp Ev, imm */
6027 pCodeBuf[off++] = 0x81;
6028 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6029 pCodeBuf[off++] = RT_BYTE1(uImm);
6030 pCodeBuf[off++] = RT_BYTE2(uImm);
6031 pCodeBuf[off++] = RT_BYTE3(uImm);
6032 pCodeBuf[off++] = RT_BYTE4(uImm);
6033 }
6034
6035#elif defined(RT_ARCH_ARM64)
6036 /** @todo guess there are clevere things we can do here... */
6037 if (uImm < _4K)
6038 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6039 false /*64Bit*/, true /*fSetFlags*/);
6040 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
6041 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6042 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6043 else
6044# ifdef IEM_WITH_THROW_CATCH
6045 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6046# else
6047 AssertReleaseFailedStmt(off = UINT32_MAX);
6048# endif
6049
6050#else
6051# error "Port me!"
6052#endif
6053 return off;
6054}
6055
6056
6057/**
6058 * Emits a compare of a 32-bit GPR with a constant value, settings status
6059 * flags/whatever for use with conditional instruction.
6060 */
6061DECL_INLINE_THROW(uint32_t)
6062iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6063{
6064#ifdef RT_ARCH_AMD64
6065 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
6066
6067#elif defined(RT_ARCH_ARM64)
6068 /** @todo guess there are clevere things we can do here... */
6069 if (uImm < _4K)
6070 {
6071 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6072 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6073 false /*64Bit*/, true /*fSetFlags*/);
6074 }
6075 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
6076 {
6077 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6078 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6079 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6080 }
6081 else
6082 {
6083 /* Use temporary register for the immediate. */
6084 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6085 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
6086 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6087 }
6088
6089#else
6090# error "Port me!"
6091#endif
6092
6093 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6094 return off;
6095}
6096
6097
6098/**
6099 * Emits a compare of a 32-bit GPR with a constant value, settings status
6100 * flags/whatever for use with conditional instruction.
6101 *
6102 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
6103 * 16-bit value from @a iGrpLeft.
6104 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6105 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6106 * bits all zero). Will release assert or throw exception if the caller
6107 * violates this restriction.
6108 */
6109DECL_FORCE_INLINE_THROW(uint32_t)
6110iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6111 uint8_t idxTmpReg = UINT8_MAX)
6112{
6113#ifdef RT_ARCH_AMD64
6114 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6115 if (iGprLeft >= 8)
6116 pCodeBuf[off++] = X86_OP_REX_B;
6117 if (uImm <= UINT32_C(0x7f))
6118 {
6119 /* cmp Ev, Ib */
6120 pCodeBuf[off++] = 0x83;
6121 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6122 pCodeBuf[off++] = (uint8_t)uImm;
6123 }
6124 else
6125 {
6126 /* cmp Ev, imm */
6127 pCodeBuf[off++] = 0x81;
6128 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6129 pCodeBuf[off++] = RT_BYTE1(uImm);
6130 pCodeBuf[off++] = RT_BYTE2(uImm);
6131 }
6132 RT_NOREF(idxTmpReg);
6133
6134#elif defined(RT_ARCH_ARM64)
6135# ifdef IEM_WITH_THROW_CATCH
6136 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6137# else
6138 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
6139# endif
6140 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6141 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
6142 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
6143
6144#else
6145# error "Port me!"
6146#endif
6147 return off;
6148}
6149
6150
6151/**
6152 * Emits a compare of a 16-bit GPR with a constant value, settings status
6153 * flags/whatever for use with conditional instruction.
6154 *
6155 * @note ARM64: Helper register is required (idxTmpReg).
6156 */
6157DECL_INLINE_THROW(uint32_t)
6158iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6159 uint8_t idxTmpReg = UINT8_MAX)
6160{
6161#ifdef RT_ARCH_AMD64
6162 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
6163#elif defined(RT_ARCH_ARM64)
6164 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
6165#else
6166# error "Port me!"
6167#endif
6168 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6169 return off;
6170}
6171
6172
6173
6174/*********************************************************************************************************************************
6175* Branching *
6176*********************************************************************************************************************************/
6177
6178/**
6179 * Emits a JMP rel32 / B imm19 to the given label.
6180 */
6181DECL_FORCE_INLINE_THROW(uint32_t)
6182iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
6183{
6184 Assert(idxLabel < pReNative->cLabels);
6185
6186#ifdef RT_ARCH_AMD64
6187 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6188 {
6189 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
6190 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
6191 {
6192 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
6193 pCodeBuf[off++] = (uint8_t)offRel;
6194 }
6195 else
6196 {
6197 offRel -= 3;
6198 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6199 pCodeBuf[off++] = RT_BYTE1(offRel);
6200 pCodeBuf[off++] = RT_BYTE2(offRel);
6201 pCodeBuf[off++] = RT_BYTE3(offRel);
6202 pCodeBuf[off++] = RT_BYTE4(offRel);
6203 }
6204 }
6205 else
6206 {
6207 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6208 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6209 pCodeBuf[off++] = 0xfe;
6210 pCodeBuf[off++] = 0xff;
6211 pCodeBuf[off++] = 0xff;
6212 pCodeBuf[off++] = 0xff;
6213 }
6214 pCodeBuf[off++] = 0xcc; /* int3 poison */
6215
6216#elif defined(RT_ARCH_ARM64)
6217 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6218 pCodeBuf[off++] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
6219 else
6220 {
6221 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
6222 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
6223 }
6224
6225#else
6226# error "Port me!"
6227#endif
6228 return off;
6229}
6230
6231
6232/**
6233 * Emits a JMP rel32 / B imm19 to the given label.
6234 */
6235DECL_INLINE_THROW(uint32_t)
6236iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6237{
6238#ifdef RT_ARCH_AMD64
6239 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
6240#elif defined(RT_ARCH_ARM64)
6241 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
6242#else
6243# error "Port me!"
6244#endif
6245 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6246 return off;
6247}
6248
6249
6250/**
6251 * Emits a JMP rel32 / B imm19 to a new undefined label.
6252 */
6253DECL_INLINE_THROW(uint32_t)
6254iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6255{
6256 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6257 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
6258}
6259
6260/** Condition type. */
6261#ifdef RT_ARCH_AMD64
6262typedef enum IEMNATIVEINSTRCOND : uint8_t
6263{
6264 kIemNativeInstrCond_o = 0,
6265 kIemNativeInstrCond_no,
6266 kIemNativeInstrCond_c,
6267 kIemNativeInstrCond_nc,
6268 kIemNativeInstrCond_e,
6269 kIemNativeInstrCond_ne,
6270 kIemNativeInstrCond_be,
6271 kIemNativeInstrCond_nbe,
6272 kIemNativeInstrCond_s,
6273 kIemNativeInstrCond_ns,
6274 kIemNativeInstrCond_p,
6275 kIemNativeInstrCond_np,
6276 kIemNativeInstrCond_l,
6277 kIemNativeInstrCond_nl,
6278 kIemNativeInstrCond_le,
6279 kIemNativeInstrCond_nle
6280} IEMNATIVEINSTRCOND;
6281#elif defined(RT_ARCH_ARM64)
6282typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
6283# define kIemNativeInstrCond_o todo_conditional_codes
6284# define kIemNativeInstrCond_no todo_conditional_codes
6285# define kIemNativeInstrCond_c todo_conditional_codes
6286# define kIemNativeInstrCond_nc todo_conditional_codes
6287# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
6288# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
6289# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
6290# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
6291# define kIemNativeInstrCond_s todo_conditional_codes
6292# define kIemNativeInstrCond_ns todo_conditional_codes
6293# define kIemNativeInstrCond_p todo_conditional_codes
6294# define kIemNativeInstrCond_np todo_conditional_codes
6295# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
6296# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
6297# define kIemNativeInstrCond_le kArmv8InstrCond_Le
6298# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
6299#else
6300# error "Port me!"
6301#endif
6302
6303
6304/**
6305 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6306 */
6307DECL_FORCE_INLINE_THROW(uint32_t)
6308iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6309 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6310{
6311 Assert(idxLabel < pReNative->cLabels);
6312
6313 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
6314#ifdef RT_ARCH_AMD64
6315 if (offLabel >= off)
6316 {
6317 /* jcc rel32 */
6318 pCodeBuf[off++] = 0x0f;
6319 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6320 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6321 pCodeBuf[off++] = 0x00;
6322 pCodeBuf[off++] = 0x00;
6323 pCodeBuf[off++] = 0x00;
6324 pCodeBuf[off++] = 0x00;
6325 }
6326 else
6327 {
6328 int32_t offDisp = offLabel - (off + 2);
6329 if ((int8_t)offDisp == offDisp)
6330 {
6331 /* jcc rel8 */
6332 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6333 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6334 }
6335 else
6336 {
6337 /* jcc rel32 */
6338 offDisp -= 4;
6339 pCodeBuf[off++] = 0x0f;
6340 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6341 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6342 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6343 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6344 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6345 }
6346 }
6347
6348#elif defined(RT_ARCH_ARM64)
6349 if (offLabel >= off)
6350 {
6351 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6352 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
6353 }
6354 else
6355 {
6356 Assert(off - offLabel <= 0x3ffffU);
6357 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
6358 }
6359
6360#else
6361# error "Port me!"
6362#endif
6363 return off;
6364}
6365
6366
6367/**
6368 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6369 */
6370DECL_INLINE_THROW(uint32_t)
6371iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6372{
6373#ifdef RT_ARCH_AMD64
6374 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
6375#elif defined(RT_ARCH_ARM64)
6376 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
6377#else
6378# error "Port me!"
6379#endif
6380 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6381 return off;
6382}
6383
6384
6385/**
6386 * Emits a Jcc rel32 / B.cc imm19 to a new label.
6387 */
6388DECL_INLINE_THROW(uint32_t)
6389iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6390 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
6391{
6392 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6393 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
6394}
6395
6396
6397/**
6398 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
6399 */
6400DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6401{
6402#ifdef RT_ARCH_AMD64
6403 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
6404#elif defined(RT_ARCH_ARM64)
6405 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
6406#else
6407# error "Port me!"
6408#endif
6409}
6410
6411/**
6412 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
6413 */
6414DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6415 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6416{
6417#ifdef RT_ARCH_AMD64
6418 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
6419#elif defined(RT_ARCH_ARM64)
6420 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
6421#else
6422# error "Port me!"
6423#endif
6424}
6425
6426
6427/**
6428 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
6429 */
6430DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6431{
6432#ifdef RT_ARCH_AMD64
6433 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
6434#elif defined(RT_ARCH_ARM64)
6435 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
6436#else
6437# error "Port me!"
6438#endif
6439}
6440
6441/**
6442 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
6443 */
6444DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6445 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6446{
6447#ifdef RT_ARCH_AMD64
6448 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
6449#elif defined(RT_ARCH_ARM64)
6450 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
6451#else
6452# error "Port me!"
6453#endif
6454}
6455
6456
6457/**
6458 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
6459 */
6460DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6461{
6462#ifdef RT_ARCH_AMD64
6463 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
6464#elif defined(RT_ARCH_ARM64)
6465 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
6466#else
6467# error "Port me!"
6468#endif
6469}
6470
6471/**
6472 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
6473 */
6474DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6475 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6476{
6477#ifdef RT_ARCH_AMD64
6478 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
6479#elif defined(RT_ARCH_ARM64)
6480 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
6481#else
6482# error "Port me!"
6483#endif
6484}
6485
6486
6487/**
6488 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
6489 */
6490DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6491{
6492#ifdef RT_ARCH_AMD64
6493 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
6494#elif defined(RT_ARCH_ARM64)
6495 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
6496#else
6497# error "Port me!"
6498#endif
6499}
6500
6501/**
6502 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
6503 */
6504DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6505 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6506{
6507#ifdef RT_ARCH_AMD64
6508 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
6509#elif defined(RT_ARCH_ARM64)
6510 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
6511#else
6512# error "Port me!"
6513#endif
6514}
6515
6516
6517/**
6518 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
6519 */
6520DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6521{
6522#ifdef RT_ARCH_AMD64
6523 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
6524#elif defined(RT_ARCH_ARM64)
6525 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
6526#else
6527# error "Port me!"
6528#endif
6529}
6530
6531/**
6532 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
6533 */
6534DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6535 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6536{
6537#ifdef RT_ARCH_AMD64
6538 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
6539#elif defined(RT_ARCH_ARM64)
6540 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
6541#else
6542# error "Port me!"
6543#endif
6544}
6545
6546
6547/**
6548 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6549 *
6550 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6551 *
6552 * Only use hardcoded jumps forward when emitting for exactly one
6553 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6554 * the right target address on all platforms!
6555 *
6556 * Please also note that on x86 it is necessary pass off + 256 or higher
6557 * for @a offTarget one believe the intervening code is more than 127
6558 * bytes long.
6559 */
6560DECL_FORCE_INLINE(uint32_t)
6561iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6562{
6563#ifdef RT_ARCH_AMD64
6564 /* jcc rel8 / rel32 */
6565 int32_t offDisp = (int32_t)(offTarget - (off + 2));
6566 if (offDisp < 128 && offDisp >= -128)
6567 {
6568 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6569 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6570 }
6571 else
6572 {
6573 offDisp -= 4;
6574 pCodeBuf[off++] = 0x0f;
6575 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6576 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6577 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6578 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6579 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6580 }
6581
6582#elif defined(RT_ARCH_ARM64)
6583 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
6584
6585#else
6586# error "Port me!"
6587#endif
6588 return off;
6589}
6590
6591
6592/**
6593 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6594 *
6595 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6596 *
6597 * Only use hardcoded jumps forward when emitting for exactly one
6598 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6599 * the right target address on all platforms!
6600 *
6601 * Please also note that on x86 it is necessary pass off + 256 or higher
6602 * for @a offTarget if one believe the intervening code is more than 127
6603 * bytes long.
6604 */
6605DECL_INLINE_THROW(uint32_t)
6606iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6607{
6608#ifdef RT_ARCH_AMD64
6609 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
6610#elif defined(RT_ARCH_ARM64)
6611 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
6612#else
6613# error "Port me!"
6614#endif
6615 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6616 return off;
6617}
6618
6619
6620/**
6621 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
6622 *
6623 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6624 */
6625DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6626{
6627#ifdef RT_ARCH_AMD64
6628 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
6629#elif defined(RT_ARCH_ARM64)
6630 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
6631#else
6632# error "Port me!"
6633#endif
6634}
6635
6636
6637/**
6638 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
6639 *
6640 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6641 */
6642DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6643{
6644#ifdef RT_ARCH_AMD64
6645 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
6646#elif defined(RT_ARCH_ARM64)
6647 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
6648#else
6649# error "Port me!"
6650#endif
6651}
6652
6653
6654/**
6655 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
6656 *
6657 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6658 */
6659DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6660{
6661#ifdef RT_ARCH_AMD64
6662 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
6663#elif defined(RT_ARCH_ARM64)
6664 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
6665#else
6666# error "Port me!"
6667#endif
6668}
6669
6670
6671/**
6672 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
6673 *
6674 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6675 */
6676DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6677{
6678#ifdef RT_ARCH_AMD64
6679 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
6680#elif defined(RT_ARCH_ARM64)
6681 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
6682#else
6683# error "Port me!"
6684#endif
6685}
6686
6687
6688/**
6689 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6690 *
6691 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6692 */
6693DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
6694{
6695#ifdef RT_ARCH_AMD64
6696 /* jmp rel8 or rel32 */
6697 int32_t offDisp = offTarget - (off + 2);
6698 if (offDisp < 128 && offDisp >= -128)
6699 {
6700 pCodeBuf[off++] = 0xeb;
6701 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6702 }
6703 else
6704 {
6705 offDisp -= 3;
6706 pCodeBuf[off++] = 0xe9;
6707 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6708 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6709 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6710 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6711 }
6712
6713#elif defined(RT_ARCH_ARM64)
6714 pCodeBuf[off++] = Armv8A64MkInstrB((int32_t)(offTarget - off));
6715
6716#else
6717# error "Port me!"
6718#endif
6719 return off;
6720}
6721
6722
6723/**
6724 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6725 *
6726 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6727 */
6728DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6729{
6730#ifdef RT_ARCH_AMD64
6731 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
6732#elif defined(RT_ARCH_ARM64)
6733 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
6734#else
6735# error "Port me!"
6736#endif
6737 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6738 return off;
6739}
6740
6741
6742/**
6743 * Fixes up a conditional jump to a fixed label.
6744 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
6745 * iemNativeEmitJzToFixed, ...
6746 */
6747DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
6748{
6749#ifdef RT_ARCH_AMD64
6750 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
6751 uint8_t const bOpcode = pbCodeBuf[offFixup];
6752 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
6753 {
6754 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
6755 AssertStmt(pbCodeBuf[offFixup + 1] == offTarget - (offFixup + 2),
6756 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
6757 }
6758 else
6759 {
6760 if (bOpcode != 0x0f)
6761 Assert(bOpcode == 0xe9);
6762 else
6763 {
6764 offFixup += 1;
6765 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
6766 }
6767 uint32_t const offRel32 = offTarget - (offFixup + 5);
6768 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
6769 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
6770 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
6771 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
6772 }
6773
6774#elif defined(RT_ARCH_ARM64)
6775 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
6776 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
6777 {
6778 /* B.COND + BC.COND */
6779 int32_t const offDisp = offTarget - offFixup;
6780 Assert(offDisp >= -262144 && offDisp < 262144);
6781 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
6782 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6783 }
6784 else
6785 {
6786 /* B imm26 */
6787 Assert((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000));
6788 int32_t const offDisp = offTarget - offFixup;
6789 Assert(offDisp >= -33554432 && offDisp < 33554432);
6790 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
6791 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6792 }
6793
6794#else
6795# error "Port me!"
6796#endif
6797}
6798
6799
6800#ifdef RT_ARCH_AMD64
6801/**
6802 * For doing bt on a register.
6803 */
6804DECL_INLINE_THROW(uint32_t)
6805iemNativeEmitAmd64TestBitInGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
6806{
6807 Assert(iBitNo < 64);
6808 /* bt Ev, imm8 */
6809 if (iBitNo >= 32)
6810 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6811 else if (iGprSrc >= 8)
6812 pCodeBuf[off++] = X86_OP_REX_B;
6813 pCodeBuf[off++] = 0x0f;
6814 pCodeBuf[off++] = 0xba;
6815 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6816 pCodeBuf[off++] = iBitNo;
6817 return off;
6818}
6819#endif /* RT_ARCH_AMD64 */
6820
6821
6822/**
6823 * Internal helper, don't call directly.
6824 */
6825DECL_INLINE_THROW(uint32_t)
6826iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
6827 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
6828{
6829 Assert(iBitNo < 64);
6830#ifdef RT_ARCH_AMD64
6831 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6832 if (iBitNo < 8)
6833 {
6834 /* test Eb, imm8 */
6835 if (iGprSrc >= 4)
6836 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6837 pbCodeBuf[off++] = 0xf6;
6838 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6839 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
6840 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6841 }
6842 else
6843 {
6844 /* bt Ev, imm8 */
6845 if (iBitNo >= 32)
6846 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6847 else if (iGprSrc >= 8)
6848 pbCodeBuf[off++] = X86_OP_REX_B;
6849 pbCodeBuf[off++] = 0x0f;
6850 pbCodeBuf[off++] = 0xba;
6851 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6852 pbCodeBuf[off++] = iBitNo;
6853 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
6854 }
6855
6856#elif defined(RT_ARCH_ARM64)
6857 /* Use the TBNZ instruction here. */
6858 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6859 if (pReNative->paLabels[idxLabel].enmType > kIemNativeLabelType_LastWholeTbBranch)
6860 {
6861 AssertMsg(pReNative->paLabels[idxLabel].off == UINT32_MAX,
6862 ("TODO: Please enable & test commented out code for jumping back to a predefined label.\n"));
6863 //uint32_t offLabel = pReNative->paLabels[idxLabel].off;
6864 //if (offLabel == UINT32_MAX)
6865 {
6866 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
6867 pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
6868 }
6869 //else
6870 //{
6871 // RT_BREAKPOINT();
6872 // Assert(off - offLabel <= 0x1fffU);
6873 // pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, offLabel - off, iGprSrc, iBitNo);
6874 //
6875 //}
6876 }
6877 else
6878 {
6879 Assert(Armv8A64ConvertImmRImmS2Mask64(0x40, (64U - iBitNo) & 63U) == RT_BIT_64(iBitNo));
6880 pu32CodeBuf[off++] = Armv8A64MkInstrTstImm(iGprSrc, 0x40, (64U - iBitNo) & 63U);
6881 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6882 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(fJmpIfSet ? kArmv8InstrCond_Ne : kArmv8InstrCond_Eq, 0);
6883 }
6884
6885#else
6886# error "Port me!"
6887#endif
6888 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6889 return off;
6890}
6891
6892
6893/**
6894 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
6895 * @a iGprSrc.
6896 *
6897 * @note On ARM64 the range is only +/-8191 instructions.
6898 */
6899DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6900 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
6901{
6902 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
6903}
6904
6905
6906/**
6907 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
6908 * _set_ in @a iGprSrc.
6909 *
6910 * @note On ARM64 the range is only +/-8191 instructions.
6911 */
6912DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6913 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
6914{
6915 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
6916}
6917
6918
6919/**
6920 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
6921 * flags accordingly.
6922 */
6923DECL_INLINE_THROW(uint32_t)
6924iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
6925{
6926 Assert(fBits != 0);
6927#ifdef RT_ARCH_AMD64
6928
6929 if (fBits >= UINT32_MAX)
6930 {
6931 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6932
6933 /* test Ev,Gv */
6934 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6935 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
6936 pbCodeBuf[off++] = 0x85;
6937 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
6938
6939 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6940 }
6941 else if (fBits <= UINT32_MAX)
6942 {
6943 /* test Eb, imm8 or test Ev, imm32 */
6944 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6945 if (fBits <= UINT8_MAX)
6946 {
6947 if (iGprSrc >= 4)
6948 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6949 pbCodeBuf[off++] = 0xf6;
6950 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6951 pbCodeBuf[off++] = (uint8_t)fBits;
6952 }
6953 else
6954 {
6955 if (iGprSrc >= 8)
6956 pbCodeBuf[off++] = X86_OP_REX_B;
6957 pbCodeBuf[off++] = 0xf7;
6958 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6959 pbCodeBuf[off++] = RT_BYTE1(fBits);
6960 pbCodeBuf[off++] = RT_BYTE2(fBits);
6961 pbCodeBuf[off++] = RT_BYTE3(fBits);
6962 pbCodeBuf[off++] = RT_BYTE4(fBits);
6963 }
6964 }
6965 /** @todo implement me. */
6966 else
6967 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
6968
6969#elif defined(RT_ARCH_ARM64)
6970 uint32_t uImmR = 0;
6971 uint32_t uImmNandS = 0;
6972 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
6973 {
6974 /* ands xzr, iGprSrc, #fBits */
6975 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6976 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
6977 }
6978 else
6979 {
6980 /* ands xzr, iGprSrc, iTmpReg */
6981 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6982 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6983 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
6984 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6985 }
6986
6987#else
6988# error "Port me!"
6989#endif
6990 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6991 return off;
6992}
6993
6994
6995/**
6996 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
6997 * @a iGprSrc, setting CPU flags accordingly.
6998 *
6999 * @note For ARM64 this only supports @a fBits values that can be expressed
7000 * using the two 6-bit immediates of the ANDS instruction. The caller
7001 * must make sure this is possible!
7002 */
7003DECL_FORCE_INLINE_THROW(uint32_t)
7004iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits)
7005{
7006 Assert(fBits != 0);
7007
7008#ifdef RT_ARCH_AMD64
7009 if (fBits <= UINT8_MAX)
7010 {
7011 /* test Eb, imm8 */
7012 if (iGprSrc >= 4)
7013 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7014 pCodeBuf[off++] = 0xf6;
7015 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7016 pCodeBuf[off++] = (uint8_t)fBits;
7017 }
7018 else
7019 {
7020 /* test Ev, imm32 */
7021 if (iGprSrc >= 8)
7022 pCodeBuf[off++] = X86_OP_REX_B;
7023 pCodeBuf[off++] = 0xf7;
7024 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7025 pCodeBuf[off++] = RT_BYTE1(fBits);
7026 pCodeBuf[off++] = RT_BYTE2(fBits);
7027 pCodeBuf[off++] = RT_BYTE3(fBits);
7028 pCodeBuf[off++] = RT_BYTE4(fBits);
7029 }
7030
7031#elif defined(RT_ARCH_ARM64)
7032 /* ands xzr, src, #fBits */
7033 uint32_t uImmR = 0;
7034 uint32_t uImmNandS = 0;
7035 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7036 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7037 else
7038# ifdef IEM_WITH_THROW_CATCH
7039 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7040# else
7041 AssertReleaseFailedStmt(off = UINT32_MAX);
7042# endif
7043
7044#else
7045# error "Port me!"
7046#endif
7047 return off;
7048}
7049
7050
7051
7052/**
7053 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7054 * @a iGprSrc, setting CPU flags accordingly.
7055 *
7056 * @note For ARM64 this only supports @a fBits values that can be expressed
7057 * using the two 6-bit immediates of the ANDS instruction. The caller
7058 * must make sure this is possible!
7059 */
7060DECL_FORCE_INLINE_THROW(uint32_t)
7061iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7062{
7063 Assert(fBits != 0);
7064
7065#ifdef RT_ARCH_AMD64
7066 /* test Eb, imm8 */
7067 if (iGprSrc >= 4)
7068 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7069 pCodeBuf[off++] = 0xf6;
7070 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7071 pCodeBuf[off++] = fBits;
7072
7073#elif defined(RT_ARCH_ARM64)
7074 /* ands xzr, src, #fBits */
7075 uint32_t uImmR = 0;
7076 uint32_t uImmNandS = 0;
7077 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7078 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7079 else
7080# ifdef IEM_WITH_THROW_CATCH
7081 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7082# else
7083 AssertReleaseFailedStmt(off = UINT32_MAX);
7084# endif
7085
7086#else
7087# error "Port me!"
7088#endif
7089 return off;
7090}
7091
7092
7093/**
7094 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7095 * @a iGprSrc, setting CPU flags accordingly.
7096 */
7097DECL_INLINE_THROW(uint32_t)
7098iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7099{
7100 Assert(fBits != 0);
7101
7102#ifdef RT_ARCH_AMD64
7103 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
7104
7105#elif defined(RT_ARCH_ARM64)
7106 /* ands xzr, src, [tmp|#imm] */
7107 uint32_t uImmR = 0;
7108 uint32_t uImmNandS = 0;
7109 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7110 {
7111 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7112 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7113 }
7114 else
7115 {
7116 /* Use temporary register for the 64-bit immediate. */
7117 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7118 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7119 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
7120 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7121 }
7122
7123#else
7124# error "Port me!"
7125#endif
7126 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7127 return off;
7128}
7129
7130
7131/**
7132 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
7133 * are set in @a iGprSrc.
7134 */
7135DECL_INLINE_THROW(uint32_t)
7136iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7137 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7138{
7139 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7140
7141 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7142 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7143
7144 return off;
7145}
7146
7147
7148/**
7149 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
7150 * are set in @a iGprSrc.
7151 */
7152DECL_INLINE_THROW(uint32_t)
7153iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7154 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7155{
7156 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7157
7158 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7159 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7160
7161 return off;
7162}
7163
7164
7165/**
7166 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7167 *
7168 * The operand size is given by @a f64Bit.
7169 */
7170DECL_FORCE_INLINE_THROW(uint32_t)
7171iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7172 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7173{
7174 Assert(idxLabel < pReNative->cLabels);
7175
7176#ifdef RT_ARCH_AMD64
7177 /* test reg32,reg32 / test reg64,reg64 */
7178 if (f64Bit)
7179 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7180 else if (iGprSrc >= 8)
7181 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7182 pCodeBuf[off++] = 0x85;
7183 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7184
7185 /* jnz idxLabel */
7186 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7187 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7188
7189#elif defined(RT_ARCH_ARM64)
7190 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
7191 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
7192 iGprSrc, f64Bit);
7193 else
7194 {
7195 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7196 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
7197 }
7198
7199#else
7200# error "Port me!"
7201#endif
7202 return off;
7203}
7204
7205
7206/**
7207 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7208 *
7209 * The operand size is given by @a f64Bit.
7210 */
7211DECL_FORCE_INLINE_THROW(uint32_t)
7212iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7213 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7214{
7215#ifdef RT_ARCH_AMD64
7216 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7217 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7218#elif defined(RT_ARCH_ARM64)
7219 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
7220 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7221#else
7222# error "Port me!"
7223#endif
7224 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7225 return off;
7226}
7227
7228
7229/* if (Grp1 == 0) Jmp idxLabel; */
7230
7231/**
7232 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7233 *
7234 * The operand size is given by @a f64Bit.
7235 */
7236DECL_FORCE_INLINE_THROW(uint32_t)
7237iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7238 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7239{
7240 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7241 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7242}
7243
7244
7245/**
7246 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7247 *
7248 * The operand size is given by @a f64Bit.
7249 */
7250DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7251 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7252{
7253 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7254}
7255
7256
7257/**
7258 * Emits code that jumps to a new label if @a iGprSrc is zero.
7259 *
7260 * The operand size is given by @a f64Bit.
7261 */
7262DECL_INLINE_THROW(uint32_t)
7263iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7264 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7265{
7266 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7267 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7268}
7269
7270
7271/* if (Grp1 != 0) Jmp idxLabel; */
7272
7273/**
7274 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7275 *
7276 * The operand size is given by @a f64Bit.
7277 */
7278DECL_FORCE_INLINE_THROW(uint32_t)
7279iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7280 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7281{
7282 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7283 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7284}
7285
7286
7287/**
7288 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7289 *
7290 * The operand size is given by @a f64Bit.
7291 */
7292DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7293 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7294{
7295 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7296}
7297
7298
7299/**
7300 * Emits code that jumps to a new label if @a iGprSrc is not zero.
7301 *
7302 * The operand size is given by @a f64Bit.
7303 */
7304DECL_INLINE_THROW(uint32_t)
7305iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7306 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7307{
7308 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7309 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7310}
7311
7312
7313/* if (Grp1 != Gpr2) Jmp idxLabel; */
7314
7315/**
7316 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
7317 * differs.
7318 */
7319DECL_INLINE_THROW(uint32_t)
7320iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7321 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
7322{
7323 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
7324 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7325 return off;
7326}
7327
7328
7329/**
7330 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
7331 */
7332DECL_INLINE_THROW(uint32_t)
7333iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7334 uint8_t iGprLeft, uint8_t iGprRight,
7335 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7336{
7337 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7338 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
7339}
7340
7341
7342/* if (Grp != Imm) Jmp idxLabel; */
7343
7344/**
7345 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
7346 */
7347DECL_INLINE_THROW(uint32_t)
7348iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7349 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7350{
7351 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7352 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7353 return off;
7354}
7355
7356
7357/**
7358 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
7359 */
7360DECL_INLINE_THROW(uint32_t)
7361iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7362 uint8_t iGprSrc, uint64_t uImm,
7363 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7364{
7365 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7366 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7367}
7368
7369
7370/**
7371 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
7372 * @a uImm.
7373 */
7374DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7375 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7376{
7377 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7378 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7379 return off;
7380}
7381
7382
7383/**
7384 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
7385 * @a uImm.
7386 */
7387DECL_INLINE_THROW(uint32_t)
7388iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7389 uint8_t iGprSrc, uint32_t uImm,
7390 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7391{
7392 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7393 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7394}
7395
7396
7397/**
7398 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
7399 * @a uImm.
7400 */
7401DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7402 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
7403{
7404 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
7405 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7406 return off;
7407}
7408
7409
7410/**
7411 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
7412 * @a uImm.
7413 */
7414DECL_INLINE_THROW(uint32_t)
7415iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7416 uint8_t iGprSrc, uint16_t uImm,
7417 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7418{
7419 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7420 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7421}
7422
7423
7424/* if (Grp == Imm) Jmp idxLabel; */
7425
7426/**
7427 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
7428 */
7429DECL_INLINE_THROW(uint32_t)
7430iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7431 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7432{
7433 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7434 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7435 return off;
7436}
7437
7438
7439/**
7440 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
7441 */
7442DECL_INLINE_THROW(uint32_t)
7443iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
7444 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7445{
7446 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7447 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7448}
7449
7450
7451/**
7452 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
7453 */
7454DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7455 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7456{
7457 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7458 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7459 return off;
7460}
7461
7462
7463/**
7464 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
7465 */
7466DECL_INLINE_THROW(uint32_t)
7467iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
7468 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7469{
7470 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7471 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7472}
7473
7474
7475/**
7476 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
7477 *
7478 * @note ARM64: Helper register is required (idxTmpReg).
7479 */
7480DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7481 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
7482 uint8_t idxTmpReg = UINT8_MAX)
7483{
7484 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
7485 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7486 return off;
7487}
7488
7489
7490/**
7491 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
7492 *
7493 * @note ARM64: Helper register is required (idxTmpReg).
7494 */
7495DECL_INLINE_THROW(uint32_t)
7496iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
7497 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
7498 uint8_t idxTmpReg = UINT8_MAX)
7499{
7500 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7501 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
7502}
7503
7504
7505/*********************************************************************************************************************************
7506* Calls. *
7507*********************************************************************************************************************************/
7508
7509/**
7510 * Emits a call to a 64-bit address.
7511 */
7512DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
7513{
7514#ifdef RT_ARCH_AMD64
7515 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
7516
7517 /* call rax */
7518 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7519 pbCodeBuf[off++] = 0xff;
7520 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
7521
7522#elif defined(RT_ARCH_ARM64)
7523 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
7524
7525 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7526 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
7527
7528#else
7529# error "port me"
7530#endif
7531 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7532 return off;
7533}
7534
7535
7536/**
7537 * Emits code to load a stack variable into an argument GPR.
7538 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7539 */
7540DECL_FORCE_INLINE_THROW(uint32_t)
7541iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7542 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
7543 bool fSpilledVarsInVolatileRegs = false)
7544{
7545 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7546 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7547 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7548
7549 uint8_t const idxRegVar = pVar->idxReg;
7550 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
7551 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
7552 || !fSpilledVarsInVolatileRegs ))
7553 {
7554 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
7555 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
7556 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
7557 if (!offAddend)
7558 {
7559 if (idxRegArg != idxRegVar)
7560 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
7561 }
7562 else
7563 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
7564 }
7565 else
7566 {
7567 uint8_t const idxStackSlot = pVar->idxStackSlot;
7568 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7569 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
7570 if (offAddend)
7571 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
7572 }
7573 return off;
7574}
7575
7576
7577/**
7578 * Emits code to load a stack or immediate variable value into an argument GPR,
7579 * optional with a addend.
7580 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7581 */
7582DECL_FORCE_INLINE_THROW(uint32_t)
7583iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7584 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
7585 bool fSpilledVarsInVolatileRegs = false)
7586{
7587 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7588 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7589 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7590 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
7591 else
7592 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
7593 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
7594 return off;
7595}
7596
7597
7598/**
7599 * Emits code to load the variable address into an argument GPR.
7600 *
7601 * This only works for uninitialized and stack variables.
7602 */
7603DECL_FORCE_INLINE_THROW(uint32_t)
7604iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7605 bool fFlushShadows)
7606{
7607 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7608 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7609 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7610 || pVar->enmKind == kIemNativeVarKind_Stack,
7611 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7612 AssertStmt(!pVar->fSimdReg,
7613 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7614
7615 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7616 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7617
7618 uint8_t const idxRegVar = pVar->idxReg;
7619 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
7620 {
7621 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
7622 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
7623 Assert(pVar->idxReg == UINT8_MAX);
7624 }
7625 Assert( pVar->idxStackSlot != UINT8_MAX
7626 && pVar->idxReg == UINT8_MAX);
7627
7628 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7629}
7630
7631
7632#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7633/**
7634 * Emits code to load the variable address into an argument GPR.
7635 *
7636 * This is a special variant intended for SIMD variables only and only called
7637 * by the TLB miss path in the memory fetch/store code because there we pass
7638 * the value by reference and need both the register and stack depending on which
7639 * path is taken (TLB hit vs. miss).
7640 */
7641DECL_FORCE_INLINE_THROW(uint32_t)
7642iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7643 bool fSyncRegWithStack = true)
7644{
7645 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7646 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7647 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7648 || pVar->enmKind == kIemNativeVarKind_Stack,
7649 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7650 AssertStmt(pVar->fSimdReg,
7651 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7652 Assert( pVar->idxStackSlot != UINT8_MAX
7653 && pVar->idxReg != UINT8_MAX);
7654
7655 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7656 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7657
7658 uint8_t const idxRegVar = pVar->idxReg;
7659 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7660 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
7661
7662 if (fSyncRegWithStack)
7663 {
7664 if (pVar->cbVar == sizeof(RTUINT128U))
7665 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDisp, idxRegVar);
7666 else
7667 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDisp, idxRegVar);
7668 }
7669
7670 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7671}
7672
7673
7674/**
7675 * Emits code to sync the host SIMD register assigned to the given SIMD variable.
7676 *
7677 * This is a special helper and only called
7678 * by the TLB miss path in the memory fetch/store code because there we pass
7679 * the value by reference and need to sync the value on the stack with the assigned host register
7680 * after a TLB miss where the value ends up on the stack.
7681 */
7682DECL_FORCE_INLINE_THROW(uint32_t)
7683iemNativeEmitSimdVarSyncStackToRegister(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar)
7684{
7685 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7686 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7687 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7688 || pVar->enmKind == kIemNativeVarKind_Stack,
7689 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7690 AssertStmt(pVar->fSimdReg,
7691 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7692 Assert( pVar->idxStackSlot != UINT8_MAX
7693 && pVar->idxReg != UINT8_MAX);
7694
7695 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7696 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7697
7698 uint8_t const idxRegVar = pVar->idxReg;
7699 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7700 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
7701
7702 if (pVar->cbVar == sizeof(RTUINT128U))
7703 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxRegVar, offBpDisp);
7704 else
7705 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxRegVar, offBpDisp);
7706
7707 return off;
7708}
7709
7710
7711/**
7712 * Emits a gprdst = ~gprsrc store.
7713 */
7714DECL_FORCE_INLINE_THROW(uint32_t)
7715iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7716{
7717#ifdef RT_ARCH_AMD64
7718 if (iGprDst != iGprSrc)
7719 {
7720 /* mov gprdst, gprsrc. */
7721 if (f64Bit)
7722 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
7723 else
7724 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
7725 }
7726
7727 /* not gprdst */
7728 if (f64Bit || iGprDst >= 8)
7729 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
7730 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
7731 pCodeBuf[off++] = 0xf7;
7732 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
7733#elif defined(RT_ARCH_ARM64)
7734 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
7735#else
7736# error "port me"
7737#endif
7738 return off;
7739}
7740
7741
7742/**
7743 * Emits a gprdst = ~gprsrc store.
7744 */
7745DECL_INLINE_THROW(uint32_t)
7746iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7747{
7748#ifdef RT_ARCH_AMD64
7749 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
7750#elif defined(RT_ARCH_ARM64)
7751 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
7752#else
7753# error "port me"
7754#endif
7755 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7756 return off;
7757}
7758
7759
7760/**
7761 * Emits a 128-bit vector register store to a VCpu value.
7762 */
7763DECL_FORCE_INLINE_THROW(uint32_t)
7764iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7765{
7766#ifdef RT_ARCH_AMD64
7767 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
7768 pCodeBuf[off++] = 0x66;
7769 if (iVecReg >= 8)
7770 pCodeBuf[off++] = X86_OP_REX_R;
7771 pCodeBuf[off++] = 0x0f;
7772 pCodeBuf[off++] = 0x7f;
7773 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7774#elif defined(RT_ARCH_ARM64)
7775 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7776
7777#else
7778# error "port me"
7779#endif
7780 return off;
7781}
7782
7783
7784/**
7785 * Emits a 128-bit vector register load of a VCpu value.
7786 */
7787DECL_INLINE_THROW(uint32_t)
7788iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7789{
7790#ifdef RT_ARCH_AMD64
7791 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7792#elif defined(RT_ARCH_ARM64)
7793 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7794#else
7795# error "port me"
7796#endif
7797 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7798 return off;
7799}
7800
7801
7802/**
7803 * Emits a high 128-bit vector register store to a VCpu value.
7804 */
7805DECL_FORCE_INLINE_THROW(uint32_t)
7806iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7807{
7808#ifdef RT_ARCH_AMD64
7809 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
7810 pCodeBuf[off++] = X86_OP_VEX3;
7811 if (iVecReg >= 8)
7812 pCodeBuf[off++] = 0x63;
7813 else
7814 pCodeBuf[off++] = 0xe3;
7815 pCodeBuf[off++] = 0x7d;
7816 pCodeBuf[off++] = 0x39;
7817 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7818 pCodeBuf[off++] = 0x01; /* Immediate */
7819#elif defined(RT_ARCH_ARM64)
7820 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7821#else
7822# error "port me"
7823#endif
7824 return off;
7825}
7826
7827
7828/**
7829 * Emits a high 128-bit vector register load of a VCpu value.
7830 */
7831DECL_INLINE_THROW(uint32_t)
7832iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7833{
7834#ifdef RT_ARCH_AMD64
7835 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7836#elif defined(RT_ARCH_ARM64)
7837 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7838 Assert(!(iVecReg & 0x1));
7839 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
7840#else
7841# error "port me"
7842#endif
7843 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7844 return off;
7845}
7846
7847
7848/**
7849 * Emits a 128-bit vector register load of a VCpu value.
7850 */
7851DECL_FORCE_INLINE_THROW(uint32_t)
7852iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7853{
7854#ifdef RT_ARCH_AMD64
7855 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
7856 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7857 if (iVecReg >= 8)
7858 pCodeBuf[off++] = X86_OP_REX_R;
7859 pCodeBuf[off++] = 0x0f;
7860 pCodeBuf[off++] = 0x6f;
7861 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7862#elif defined(RT_ARCH_ARM64)
7863 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
7864
7865#else
7866# error "port me"
7867#endif
7868 return off;
7869}
7870
7871
7872/**
7873 * Emits a 128-bit vector register load of a VCpu value.
7874 */
7875DECL_INLINE_THROW(uint32_t)
7876iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7877{
7878#ifdef RT_ARCH_AMD64
7879 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7880#elif defined(RT_ARCH_ARM64)
7881 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7882#else
7883# error "port me"
7884#endif
7885 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7886 return off;
7887}
7888
7889
7890/**
7891 * Emits a 128-bit vector register load of a VCpu value.
7892 */
7893DECL_FORCE_INLINE_THROW(uint32_t)
7894iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7895{
7896#ifdef RT_ARCH_AMD64
7897 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
7898 pCodeBuf[off++] = X86_OP_VEX3;
7899 if (iVecReg >= 8)
7900 pCodeBuf[off++] = 0x63;
7901 else
7902 pCodeBuf[off++] = 0xe3;
7903 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
7904 pCodeBuf[off++] = 0x38;
7905 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7906 pCodeBuf[off++] = 0x01; /* Immediate */
7907#elif defined(RT_ARCH_ARM64)
7908 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
7909#else
7910# error "port me"
7911#endif
7912 return off;
7913}
7914
7915
7916/**
7917 * Emits a 128-bit vector register load of a VCpu value.
7918 */
7919DECL_INLINE_THROW(uint32_t)
7920iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7921{
7922#ifdef RT_ARCH_AMD64
7923 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7924#elif defined(RT_ARCH_ARM64)
7925 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7926 Assert(!(iVecReg & 0x1));
7927 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
7928#else
7929# error "port me"
7930#endif
7931 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7932 return off;
7933}
7934
7935
7936/**
7937 * Emits a vecdst = vecsrc load.
7938 */
7939DECL_FORCE_INLINE(uint32_t)
7940iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7941{
7942#ifdef RT_ARCH_AMD64
7943 /* movdqu vecdst, vecsrc */
7944 pCodeBuf[off++] = 0xf3;
7945
7946 if ((iVecRegDst | iVecRegSrc) >= 8)
7947 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
7948 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
7949 : X86_OP_REX_R;
7950 pCodeBuf[off++] = 0x0f;
7951 pCodeBuf[off++] = 0x6f;
7952 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7953
7954#elif defined(RT_ARCH_ARM64)
7955 /* mov dst, src; alias for: orr dst, src, src */
7956 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
7957
7958#else
7959# error "port me"
7960#endif
7961 return off;
7962}
7963
7964
7965/**
7966 * Emits a vecdst = vecsrc load, 128-bit.
7967 */
7968DECL_INLINE_THROW(uint32_t)
7969iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7970{
7971#ifdef RT_ARCH_AMD64
7972 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
7973#elif defined(RT_ARCH_ARM64)
7974 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
7975#else
7976# error "port me"
7977#endif
7978 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7979 return off;
7980}
7981
7982
7983/**
7984 * Emits a vecdst = vecsrc load, 256-bit.
7985 */
7986DECL_INLINE_THROW(uint32_t)
7987iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7988{
7989#ifdef RT_ARCH_AMD64
7990 /* vmovdqa ymm, ymm */
7991 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7992 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
7993 {
7994 pbCodeBuf[off++] = X86_OP_VEX3;
7995 pbCodeBuf[off++] = 0x41;
7996 pbCodeBuf[off++] = 0x7d;
7997 pbCodeBuf[off++] = 0x6f;
7998 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7999 }
8000 else
8001 {
8002 pbCodeBuf[off++] = X86_OP_VEX2;
8003 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
8004 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
8005 pbCodeBuf[off++] = iVecRegSrc >= 8
8006 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
8007 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8008 }
8009#elif defined(RT_ARCH_ARM64)
8010 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8011 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
8012 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
8013 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
8014#else
8015# error "port me"
8016#endif
8017 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8018 return off;
8019}
8020
8021
8022/**
8023 * Emits a gprdst = vecsrc[x] load, 64-bit.
8024 */
8025DECL_FORCE_INLINE(uint32_t)
8026iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
8027{
8028#ifdef RT_ARCH_AMD64
8029 if (iQWord >= 2)
8030 {
8031 /** @todo Currently not used. */
8032 AssertReleaseFailed();
8033 }
8034 else
8035 {
8036 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
8037 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8038 pCodeBuf[off++] = X86_OP_REX_W
8039 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8040 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8041 pCodeBuf[off++] = 0x0f;
8042 pCodeBuf[off++] = 0x3a;
8043 pCodeBuf[off++] = 0x16;
8044 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8045 pCodeBuf[off++] = iQWord;
8046 }
8047#elif defined(RT_ARCH_ARM64)
8048 /* umov gprdst, vecsrc[iQWord] */
8049 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
8050#else
8051# error "port me"
8052#endif
8053 return off;
8054}
8055
8056
8057/**
8058 * Emits a gprdst = vecsrc[x] load, 64-bit.
8059 */
8060DECL_INLINE_THROW(uint32_t)
8061iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
8062{
8063 Assert(iQWord <= 3);
8064
8065#ifdef RT_ARCH_AMD64
8066 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iQWord);
8067#elif defined(RT_ARCH_ARM64)
8068 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8069 Assert(!(iVecRegSrc & 0x1));
8070 /* Need to access the "high" 128-bit vector register. */
8071 if (iQWord >= 2)
8072 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
8073 else
8074 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
8075#else
8076# error "port me"
8077#endif
8078 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8079 return off;
8080}
8081
8082
8083/**
8084 * Emits a gprdst = vecsrc[x] load, 32-bit.
8085 */
8086DECL_FORCE_INLINE(uint32_t)
8087iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
8088{
8089#ifdef RT_ARCH_AMD64
8090 if (iDWord >= 4)
8091 {
8092 /** @todo Currently not used. */
8093 AssertReleaseFailed();
8094 }
8095 else
8096 {
8097 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
8098 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8099 if (iGprDst >= 8 || iVecRegSrc >= 8)
8100 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8101 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8102 pCodeBuf[off++] = 0x0f;
8103 pCodeBuf[off++] = 0x3a;
8104 pCodeBuf[off++] = 0x16;
8105 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8106 pCodeBuf[off++] = iDWord;
8107 }
8108#elif defined(RT_ARCH_ARM64)
8109 /* umov gprdst, vecsrc[iDWord] */
8110 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
8111#else
8112# error "port me"
8113#endif
8114 return off;
8115}
8116
8117
8118/**
8119 * Emits a gprdst = vecsrc[x] load, 32-bit.
8120 */
8121DECL_INLINE_THROW(uint32_t)
8122iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
8123{
8124 Assert(iDWord <= 7);
8125
8126#ifdef RT_ARCH_AMD64
8127 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iDWord);
8128#elif defined(RT_ARCH_ARM64)
8129 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8130 Assert(!(iVecRegSrc & 0x1));
8131 /* Need to access the "high" 128-bit vector register. */
8132 if (iDWord >= 4)
8133 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
8134 else
8135 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
8136#else
8137# error "port me"
8138#endif
8139 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8140 return off;
8141}
8142
8143
8144/**
8145 * Emits a gprdst = vecsrc[x] load, 16-bit.
8146 */
8147DECL_FORCE_INLINE(uint32_t)
8148iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
8149{
8150#ifdef RT_ARCH_AMD64
8151 if (iWord >= 8)
8152 {
8153 /** @todo Currently not used. */
8154 AssertReleaseFailed();
8155 }
8156 else
8157 {
8158 /* pextrw gpr, vecsrc, #iWord */
8159 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8160 if (iGprDst >= 8 || iVecRegSrc >= 8)
8161 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
8162 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
8163 pCodeBuf[off++] = 0x0f;
8164 pCodeBuf[off++] = 0xc5;
8165 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
8166 pCodeBuf[off++] = iWord;
8167 }
8168#elif defined(RT_ARCH_ARM64)
8169 /* umov gprdst, vecsrc[iWord] */
8170 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
8171#else
8172# error "port me"
8173#endif
8174 return off;
8175}
8176
8177
8178/**
8179 * Emits a gprdst = vecsrc[x] load, 16-bit.
8180 */
8181DECL_INLINE_THROW(uint32_t)
8182iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
8183{
8184 Assert(iWord <= 16);
8185
8186#ifdef RT_ARCH_AMD64
8187 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
8188#elif defined(RT_ARCH_ARM64)
8189 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8190 Assert(!(iVecRegSrc & 0x1));
8191 /* Need to access the "high" 128-bit vector register. */
8192 if (iWord >= 8)
8193 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
8194 else
8195 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
8196#else
8197# error "port me"
8198#endif
8199 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8200 return off;
8201}
8202
8203
8204/**
8205 * Emits a gprdst = vecsrc[x] load, 8-bit.
8206 */
8207DECL_FORCE_INLINE(uint32_t)
8208iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
8209{
8210#ifdef RT_ARCH_AMD64
8211 if (iByte >= 16)
8212 {
8213 /** @todo Currently not used. */
8214 AssertReleaseFailed();
8215 }
8216 else
8217 {
8218 /* pextrb gpr, vecsrc, #iByte */
8219 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8220 if (iGprDst >= 8 || iVecRegSrc >= 8)
8221 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8222 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8223 pCodeBuf[off++] = 0x0f;
8224 pCodeBuf[off++] = 0x3a;
8225 pCodeBuf[off++] = 0x14;
8226 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8227 pCodeBuf[off++] = iByte;
8228 }
8229#elif defined(RT_ARCH_ARM64)
8230 /* umov gprdst, vecsrc[iByte] */
8231 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
8232#else
8233# error "port me"
8234#endif
8235 return off;
8236}
8237
8238
8239/**
8240 * Emits a gprdst = vecsrc[x] load, 8-bit.
8241 */
8242DECL_INLINE_THROW(uint32_t)
8243iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
8244{
8245 Assert(iByte <= 32);
8246
8247#ifdef RT_ARCH_AMD64
8248 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iByte);
8249#elif defined(RT_ARCH_ARM64)
8250 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8251 Assert(!(iVecRegSrc & 0x1));
8252 /* Need to access the "high" 128-bit vector register. */
8253 if (iByte >= 16)
8254 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
8255 else
8256 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
8257#else
8258# error "port me"
8259#endif
8260 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8261 return off;
8262}
8263
8264
8265/**
8266 * Emits a vecdst[x] = gprsrc store, 64-bit.
8267 */
8268DECL_FORCE_INLINE(uint32_t)
8269iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
8270{
8271#ifdef RT_ARCH_AMD64
8272 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
8273 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8274 pCodeBuf[off++] = X86_OP_REX_W
8275 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8276 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8277 pCodeBuf[off++] = 0x0f;
8278 pCodeBuf[off++] = 0x3a;
8279 pCodeBuf[off++] = 0x22;
8280 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8281 pCodeBuf[off++] = iQWord;
8282#elif defined(RT_ARCH_ARM64)
8283 /* ins vecsrc[iQWord], gpr */
8284 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
8285#else
8286# error "port me"
8287#endif
8288 return off;
8289}
8290
8291
8292/**
8293 * Emits a vecdst[x] = gprsrc store, 64-bit.
8294 */
8295DECL_INLINE_THROW(uint32_t)
8296iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
8297{
8298 Assert(iQWord <= 1);
8299
8300#ifdef RT_ARCH_AMD64
8301 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iQWord);
8302#elif defined(RT_ARCH_ARM64)
8303 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
8304#else
8305# error "port me"
8306#endif
8307 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8308 return off;
8309}
8310
8311
8312/**
8313 * Emits a vecdst[x] = gprsrc store, 32-bit.
8314 */
8315DECL_FORCE_INLINE(uint32_t)
8316iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
8317{
8318#ifdef RT_ARCH_AMD64
8319 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
8320 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8321 if (iVecRegDst >= 8 || iGprSrc >= 8)
8322 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8323 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8324 pCodeBuf[off++] = 0x0f;
8325 pCodeBuf[off++] = 0x3a;
8326 pCodeBuf[off++] = 0x22;
8327 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8328 pCodeBuf[off++] = iDWord;
8329#elif defined(RT_ARCH_ARM64)
8330 /* ins vecsrc[iDWord], gpr */
8331 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
8332#else
8333# error "port me"
8334#endif
8335 return off;
8336}
8337
8338
8339/**
8340 * Emits a vecdst[x] = gprsrc store, 64-bit.
8341 */
8342DECL_INLINE_THROW(uint32_t)
8343iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
8344{
8345 Assert(iDWord <= 3);
8346
8347#ifdef RT_ARCH_AMD64
8348 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iDWord);
8349#elif defined(RT_ARCH_ARM64)
8350 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
8351#else
8352# error "port me"
8353#endif
8354 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8355 return off;
8356}
8357
8358
8359/**
8360 * Emits a vecdst.au32[iDWord] = 0 store.
8361 */
8362DECL_FORCE_INLINE(uint32_t)
8363iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
8364{
8365 Assert(iDWord <= 7);
8366
8367#ifdef RT_ARCH_AMD64
8368 /*
8369 * xor tmp0, tmp0
8370 * pinsrd xmm, tmp0, iDword
8371 */
8372 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
8373 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8374 pCodeBuf[off++] = 0x33;
8375 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
8376 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(&pCodeBuf[off], off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
8377#elif defined(RT_ARCH_ARM64)
8378 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8379 Assert(!(iVecReg & 0x1));
8380 /* ins vecsrc[iDWord], wzr */
8381 if (iDWord >= 4)
8382 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
8383 else
8384 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
8385#else
8386# error "port me"
8387#endif
8388 return off;
8389}
8390
8391
8392/**
8393 * Emits a vecdst.au32[iDWord] = 0 store.
8394 */
8395DECL_INLINE_THROW(uint32_t)
8396iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
8397{
8398
8399#ifdef RT_ARCH_AMD64
8400 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
8401#elif defined(RT_ARCH_ARM64)
8402 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
8403#else
8404# error "port me"
8405#endif
8406 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8407 return off;
8408}
8409
8410
8411/**
8412 * Emits a vecdst[0:127] = 0 store.
8413 */
8414DECL_FORCE_INLINE(uint32_t)
8415iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8416{
8417#ifdef RT_ARCH_AMD64
8418 /* pxor xmm, xmm */
8419 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8420 if (iVecReg >= 8)
8421 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
8422 pCodeBuf[off++] = 0x0f;
8423 pCodeBuf[off++] = 0xef;
8424 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8425#elif defined(RT_ARCH_ARM64)
8426 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8427 Assert(!(iVecReg & 0x1));
8428 /* eor vecreg, vecreg, vecreg */
8429 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
8430#else
8431# error "port me"
8432#endif
8433 return off;
8434}
8435
8436
8437/**
8438 * Emits a vecdst[0:127] = 0 store.
8439 */
8440DECL_INLINE_THROW(uint32_t)
8441iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8442{
8443#ifdef RT_ARCH_AMD64
8444 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
8445#elif defined(RT_ARCH_ARM64)
8446 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
8447#else
8448# error "port me"
8449#endif
8450 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8451 return off;
8452}
8453
8454
8455/**
8456 * Emits a vecdst[128:255] = 0 store.
8457 */
8458DECL_FORCE_INLINE(uint32_t)
8459iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8460{
8461#ifdef RT_ARCH_AMD64
8462 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
8463 if (iVecReg < 8)
8464 {
8465 pCodeBuf[off++] = X86_OP_VEX2;
8466 pCodeBuf[off++] = 0xf9;
8467 }
8468 else
8469 {
8470 pCodeBuf[off++] = X86_OP_VEX3;
8471 pCodeBuf[off++] = 0x41;
8472 pCodeBuf[off++] = 0x79;
8473 }
8474 pCodeBuf[off++] = 0x6f;
8475 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8476#elif defined(RT_ARCH_ARM64)
8477 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8478 Assert(!(iVecReg & 0x1));
8479 /* eor vecreg, vecreg, vecreg */
8480 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
8481#else
8482# error "port me"
8483#endif
8484 return off;
8485}
8486
8487
8488/**
8489 * Emits a vecdst[128:255] = 0 store.
8490 */
8491DECL_INLINE_THROW(uint32_t)
8492iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8493{
8494#ifdef RT_ARCH_AMD64
8495 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
8496#elif defined(RT_ARCH_ARM64)
8497 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
8498#else
8499# error "port me"
8500#endif
8501 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8502 return off;
8503}
8504
8505
8506/**
8507 * Emits a vecdst[0:255] = 0 store.
8508 */
8509DECL_FORCE_INLINE(uint32_t)
8510iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8511{
8512#ifdef RT_ARCH_AMD64
8513 /* vpxor ymm, ymm, ymm */
8514 if (iVecReg < 8)
8515 {
8516 pCodeBuf[off++] = X86_OP_VEX2;
8517 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8518 }
8519 else
8520 {
8521 pCodeBuf[off++] = X86_OP_VEX3;
8522 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
8523 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8524 }
8525 pCodeBuf[off++] = 0xef;
8526 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8527#elif defined(RT_ARCH_ARM64)
8528 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8529 Assert(!(iVecReg & 0x1));
8530 /* eor vecreg, vecreg, vecreg */
8531 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
8532 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
8533#else
8534# error "port me"
8535#endif
8536 return off;
8537}
8538
8539
8540/**
8541 * Emits a vecdst[0:255] = 0 store.
8542 */
8543DECL_INLINE_THROW(uint32_t)
8544iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8545{
8546#ifdef RT_ARCH_AMD64
8547 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
8548#elif defined(RT_ARCH_ARM64)
8549 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
8550#else
8551# error "port me"
8552#endif
8553 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8554 return off;
8555}
8556
8557
8558/**
8559 * Emits a vecdst = gprsrc broadcast, 8-bit.
8560 */
8561DECL_FORCE_INLINE(uint32_t)
8562iemNativeEmitSimdBroadcastGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8563{
8564#ifdef RT_ARCH_AMD64
8565 /* pinsrb vecdst, gpr, #0 (ASSUMES SSE 4.1) */
8566 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8567 if (iVecRegDst >= 8 || iGprSrc >= 8)
8568 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8569 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8570 pCodeBuf[off++] = 0x0f;
8571 pCodeBuf[off++] = 0x3a;
8572 pCodeBuf[off++] = 0x20;
8573 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8574 pCodeBuf[off++] = 0x00;
8575
8576 /* vpbroadcastb {y,x}mm, xmm (ASSUMES AVX2). */
8577 pCodeBuf[off++] = X86_OP_VEX3;
8578 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8579 | 0x02 /* opcode map. */
8580 | ( iVecRegDst >= 8
8581 ? 0
8582 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8583 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8584 pCodeBuf[off++] = 0x78;
8585 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8586#elif defined(RT_ARCH_ARM64)
8587 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8588 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8589
8590 /* dup vecsrc, gpr */
8591 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U8);
8592 if (f256Bit)
8593 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U8);
8594#else
8595# error "port me"
8596#endif
8597 return off;
8598}
8599
8600
8601/**
8602 * Emits a vecdst[x] = gprsrc broadcast, 8-bit.
8603 */
8604DECL_INLINE_THROW(uint32_t)
8605iemNativeEmitSimdBroadcastGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8606{
8607#ifdef RT_ARCH_AMD64
8608 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8609#elif defined(RT_ARCH_ARM64)
8610 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8611#else
8612# error "port me"
8613#endif
8614 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8615 return off;
8616}
8617
8618
8619/**
8620 * Emits a vecdst = gprsrc broadcast, 16-bit.
8621 */
8622DECL_FORCE_INLINE(uint32_t)
8623iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8624{
8625#ifdef RT_ARCH_AMD64
8626 /* pinsrw vecdst, gpr, #0 */
8627 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8628 if (iVecRegDst >= 8 || iGprSrc >= 8)
8629 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8630 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8631 pCodeBuf[off++] = 0x0f;
8632 pCodeBuf[off++] = 0xc4;
8633 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8634 pCodeBuf[off++] = 0x00;
8635
8636 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
8637 pCodeBuf[off++] = X86_OP_VEX3;
8638 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8639 | 0x02 /* opcode map. */
8640 | ( iVecRegDst >= 8
8641 ? 0
8642 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8643 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8644 pCodeBuf[off++] = 0x79;
8645 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8646#elif defined(RT_ARCH_ARM64)
8647 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8648 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8649
8650 /* dup vecsrc, gpr */
8651 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
8652 if (f256Bit)
8653 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
8654#else
8655# error "port me"
8656#endif
8657 return off;
8658}
8659
8660
8661/**
8662 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
8663 */
8664DECL_INLINE_THROW(uint32_t)
8665iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8666{
8667#ifdef RT_ARCH_AMD64
8668 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8669#elif defined(RT_ARCH_ARM64)
8670 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8671#else
8672# error "port me"
8673#endif
8674 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8675 return off;
8676}
8677
8678
8679/**
8680 * Emits a vecdst = gprsrc broadcast, 32-bit.
8681 */
8682DECL_FORCE_INLINE(uint32_t)
8683iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8684{
8685#ifdef RT_ARCH_AMD64
8686 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
8687 * vbroadcast needs a memory operand or another xmm register to work... */
8688
8689 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
8690 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8691 if (iVecRegDst >= 8 || iGprSrc >= 8)
8692 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8693 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8694 pCodeBuf[off++] = 0x0f;
8695 pCodeBuf[off++] = 0x3a;
8696 pCodeBuf[off++] = 0x22;
8697 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8698 pCodeBuf[off++] = 0x00;
8699
8700 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
8701 pCodeBuf[off++] = X86_OP_VEX3;
8702 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8703 | 0x02 /* opcode map. */
8704 | ( iVecRegDst >= 8
8705 ? 0
8706 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8707 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8708 pCodeBuf[off++] = 0x58;
8709 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8710#elif defined(RT_ARCH_ARM64)
8711 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8712 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8713
8714 /* dup vecsrc, gpr */
8715 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
8716 if (f256Bit)
8717 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
8718#else
8719# error "port me"
8720#endif
8721 return off;
8722}
8723
8724
8725/**
8726 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
8727 */
8728DECL_INLINE_THROW(uint32_t)
8729iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8730{
8731#ifdef RT_ARCH_AMD64
8732 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8733#elif defined(RT_ARCH_ARM64)
8734 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8735#else
8736# error "port me"
8737#endif
8738 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8739 return off;
8740}
8741
8742
8743/**
8744 * Emits a vecdst = gprsrc broadcast, 64-bit.
8745 */
8746DECL_FORCE_INLINE(uint32_t)
8747iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8748{
8749#ifdef RT_ARCH_AMD64
8750 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
8751 * vbroadcast needs a memory operand or another xmm register to work... */
8752
8753 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
8754 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8755 pCodeBuf[off++] = X86_OP_REX_W
8756 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8757 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8758 pCodeBuf[off++] = 0x0f;
8759 pCodeBuf[off++] = 0x3a;
8760 pCodeBuf[off++] = 0x22;
8761 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8762 pCodeBuf[off++] = 0x00;
8763
8764 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
8765 pCodeBuf[off++] = X86_OP_VEX3;
8766 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8767 | 0x02 /* opcode map. */
8768 | ( iVecRegDst >= 8
8769 ? 0
8770 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8771 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8772 pCodeBuf[off++] = 0x59;
8773 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8774#elif defined(RT_ARCH_ARM64)
8775 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8776 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8777
8778 /* dup vecsrc, gpr */
8779 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
8780 if (f256Bit)
8781 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
8782#else
8783# error "port me"
8784#endif
8785 return off;
8786}
8787
8788
8789/**
8790 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
8791 */
8792DECL_INLINE_THROW(uint32_t)
8793iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8794{
8795#ifdef RT_ARCH_AMD64
8796 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
8797#elif defined(RT_ARCH_ARM64)
8798 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8799#else
8800# error "port me"
8801#endif
8802 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8803 return off;
8804}
8805
8806#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
8807
8808/** @} */
8809
8810#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
8811
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette