VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 104155

Last change on this file since 104155 was 104155, checked in by vboxsync, 10 months ago

VMM/IEM: Implement native emitters for IEM_MC_CLEAR_EFL_BIT(), IEM_MC_FLIP_EFL_BIT() and IEM_MC_SET_EFL_BIT(), bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 327.1 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 104155 2024-04-04 10:47:23Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 pu32CodeBuf[off++] = 0xd503201f;
71
72 RT_NOREF(uInfo);
73#else
74# error "port me"
75#endif
76 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
77 return off;
78}
79
80
81/**
82 * Emit a breakpoint instruction.
83 */
84DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
85{
86#ifdef RT_ARCH_AMD64
87 pCodeBuf[off++] = 0xcc;
88 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
89
90#elif defined(RT_ARCH_ARM64)
91 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
92
93#else
94# error "error"
95#endif
96 return off;
97}
98
99
100/**
101 * Emit a breakpoint instruction.
102 */
103DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
104{
105#ifdef RT_ARCH_AMD64
106 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
107#elif defined(RT_ARCH_ARM64)
108 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
109#else
110# error "error"
111#endif
112 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
113 return off;
114}
115
116
117/*********************************************************************************************************************************
118* Loads, Stores and Related Stuff. *
119*********************************************************************************************************************************/
120
121#ifdef RT_ARCH_AMD64
122/**
123 * Common bit of iemNativeEmitLoadGprByGpr and friends.
124 */
125DECL_FORCE_INLINE(uint32_t)
126iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
127{
128 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
129 {
130 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
131 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
132 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
133 }
134 else if (offDisp == (int8_t)offDisp)
135 {
136 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
137 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
138 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
139 pbCodeBuf[off++] = (uint8_t)offDisp;
140 }
141 else
142 {
143 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
144 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
145 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
146 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
147 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
148 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
149 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
150 }
151 return off;
152}
153#endif /* RT_ARCH_AMD64 */
154
155/**
156 * Emits setting a GPR to zero.
157 */
158DECL_INLINE_THROW(uint32_t)
159iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
160{
161#ifdef RT_ARCH_AMD64
162 /* xor gpr32, gpr32 */
163 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
164 if (iGpr >= 8)
165 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
166 pbCodeBuf[off++] = 0x33;
167 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
168
169#elif defined(RT_ARCH_ARM64)
170 /* mov gpr, #0x0 */
171 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
172 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
173
174#else
175# error "port me"
176#endif
177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
178 return off;
179}
180
181
182/**
183 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
184 * buffer space.
185 *
186 * Max buffer consumption:
187 * - AMD64: 10 instruction bytes.
188 * - ARM64: 4 instruction words (16 bytes).
189 */
190DECL_FORCE_INLINE(uint32_t)
191iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
192{
193#ifdef RT_ARCH_AMD64
194 if (uImm64 == 0)
195 {
196 /* xor gpr, gpr */
197 if (iGpr >= 8)
198 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
199 pCodeBuf[off++] = 0x33;
200 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
201 }
202 else if (uImm64 <= UINT32_MAX)
203 {
204 /* mov gpr, imm32 */
205 if (iGpr >= 8)
206 pCodeBuf[off++] = X86_OP_REX_B;
207 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
208 pCodeBuf[off++] = RT_BYTE1(uImm64);
209 pCodeBuf[off++] = RT_BYTE2(uImm64);
210 pCodeBuf[off++] = RT_BYTE3(uImm64);
211 pCodeBuf[off++] = RT_BYTE4(uImm64);
212 }
213 else if (uImm64 == (uint64_t)(int32_t)uImm64)
214 {
215 /* mov gpr, sx(imm32) */
216 if (iGpr < 8)
217 pCodeBuf[off++] = X86_OP_REX_W;
218 else
219 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
220 pCodeBuf[off++] = 0xc7;
221 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
222 pCodeBuf[off++] = RT_BYTE1(uImm64);
223 pCodeBuf[off++] = RT_BYTE2(uImm64);
224 pCodeBuf[off++] = RT_BYTE3(uImm64);
225 pCodeBuf[off++] = RT_BYTE4(uImm64);
226 }
227 else
228 {
229 /* mov gpr, imm64 */
230 if (iGpr < 8)
231 pCodeBuf[off++] = X86_OP_REX_W;
232 else
233 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
234 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
235 pCodeBuf[off++] = RT_BYTE1(uImm64);
236 pCodeBuf[off++] = RT_BYTE2(uImm64);
237 pCodeBuf[off++] = RT_BYTE3(uImm64);
238 pCodeBuf[off++] = RT_BYTE4(uImm64);
239 pCodeBuf[off++] = RT_BYTE5(uImm64);
240 pCodeBuf[off++] = RT_BYTE6(uImm64);
241 pCodeBuf[off++] = RT_BYTE7(uImm64);
242 pCodeBuf[off++] = RT_BYTE8(uImm64);
243 }
244
245#elif defined(RT_ARCH_ARM64)
246 /*
247 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
248 * supply remaining bits using 'movk grp, imm16, lsl #x'.
249 *
250 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
251 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
252 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
253 * after the first non-zero immediate component so we switch to movk for
254 * the remainder.
255 */
256 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
257 + !((uImm64 >> 16) & UINT16_MAX)
258 + !((uImm64 >> 32) & UINT16_MAX)
259 + !((uImm64 >> 48) & UINT16_MAX);
260 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
261 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
262 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
263 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
264 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
265 if (cFfffHalfWords <= cZeroHalfWords)
266 {
267 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
268
269 /* movz gpr, imm16 */
270 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
271 if (uImmPart || cZeroHalfWords == 4)
272 {
273 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
274 fMovBase |= RT_BIT_32(29);
275 }
276 /* mov[z/k] gpr, imm16, lsl #16 */
277 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
278 if (uImmPart)
279 {
280 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
281 fMovBase |= RT_BIT_32(29);
282 }
283 /* mov[z/k] gpr, imm16, lsl #32 */
284 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
285 if (uImmPart)
286 {
287 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
288 fMovBase |= RT_BIT_32(29);
289 }
290 /* mov[z/k] gpr, imm16, lsl #48 */
291 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
292 if (uImmPart)
293 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
294 }
295 else
296 {
297 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
298
299 /* find the first half-word that isn't UINT16_MAX. */
300 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
301 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
302 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
303
304 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
305 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
306 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
307 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
308 /* movk gpr, imm16 */
309 if (iHwNotFfff != 0)
310 {
311 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
312 if (uImmPart != UINT32_C(0xffff))
313 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
314 }
315 /* movk gpr, imm16, lsl #16 */
316 if (iHwNotFfff != 1)
317 {
318 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
319 if (uImmPart != UINT32_C(0xffff))
320 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
321 }
322 /* movk gpr, imm16, lsl #32 */
323 if (iHwNotFfff != 2)
324 {
325 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
326 if (uImmPart != UINT32_C(0xffff))
327 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
328 }
329 /* movk gpr, imm16, lsl #48 */
330 if (iHwNotFfff != 3)
331 {
332 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
333 if (uImmPart != UINT32_C(0xffff))
334 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
335 }
336 }
337
338 /** @todo load into 'w' register instead of 'x' when imm64 <= UINT32_MAX?
339 * clang 12.x does that, only to use the 'x' version for the
340 * addressing in the following ldr). */
341
342#else
343# error "port me"
344#endif
345 return off;
346}
347
348
349/**
350 * Emits loading a constant into a 64-bit GPR
351 */
352DECL_INLINE_THROW(uint32_t)
353iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
354{
355#ifdef RT_ARCH_AMD64
356 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
357#elif defined(RT_ARCH_ARM64)
358 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
359#else
360# error "port me"
361#endif
362 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
363 return off;
364}
365
366
367/**
368 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
369 * buffer space.
370 *
371 * Max buffer consumption:
372 * - AMD64: 6 instruction bytes.
373 * - ARM64: 2 instruction words (8 bytes).
374 *
375 * @note The top 32 bits will be cleared.
376 */
377DECLINLINE(uint32_t) iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
378{
379#ifdef RT_ARCH_AMD64
380 if (uImm32 == 0)
381 {
382 /* xor gpr, gpr */
383 if (iGpr >= 8)
384 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
385 pCodeBuf[off++] = 0x33;
386 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
387 }
388 else
389 {
390 /* mov gpr, imm32 */
391 if (iGpr >= 8)
392 pCodeBuf[off++] = X86_OP_REX_B;
393 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
394 pCodeBuf[off++] = RT_BYTE1(uImm32);
395 pCodeBuf[off++] = RT_BYTE2(uImm32);
396 pCodeBuf[off++] = RT_BYTE3(uImm32);
397 pCodeBuf[off++] = RT_BYTE4(uImm32);
398 }
399
400#elif defined(RT_ARCH_ARM64)
401 if ((uImm32 >> 16) == 0)
402 /* movz gpr, imm16 */
403 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
404 else if ((uImm32 & UINT32_C(0xffff)) == 0)
405 /* movz gpr, imm16, lsl #16 */
406 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
407 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
408 /* movn gpr, imm16, lsl #16 */
409 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
410 else if ((uImm32 >> 16) == UINT32_C(0xffff))
411 /* movn gpr, imm16 */
412 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
413 else
414 {
415 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
416 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
417 }
418
419#else
420# error "port me"
421#endif
422 return off;
423}
424
425
426/**
427 * Emits loading a constant into a 32-bit GPR.
428 * @note The top 32 bits will be cleared.
429 */
430DECL_INLINE_THROW(uint32_t)
431iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
432{
433#ifdef RT_ARCH_AMD64
434 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
435#elif defined(RT_ARCH_ARM64)
436 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
437#else
438# error "port me"
439#endif
440 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
441 return off;
442}
443
444
445/**
446 * Emits loading a constant into a 8-bit GPR
447 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
448 * only the ARM64 version does that.
449 */
450DECL_INLINE_THROW(uint32_t)
451iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
452{
453#ifdef RT_ARCH_AMD64
454 /* mov gpr, imm8 */
455 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
456 if (iGpr >= 8)
457 pbCodeBuf[off++] = X86_OP_REX_B;
458 else if (iGpr >= 4)
459 pbCodeBuf[off++] = X86_OP_REX;
460 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
461 pbCodeBuf[off++] = RT_BYTE1(uImm8);
462
463#elif defined(RT_ARCH_ARM64)
464 /* movz gpr, imm16, lsl #0 */
465 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
466 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
467
468#else
469# error "port me"
470#endif
471 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
472 return off;
473}
474
475
476#ifdef RT_ARCH_AMD64
477/**
478 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
479 */
480DECL_FORCE_INLINE(uint32_t)
481iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
482{
483 if (offVCpu < 128)
484 {
485 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
486 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
487 }
488 else
489 {
490 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
491 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
492 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
493 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
494 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
495 }
496 return off;
497}
498
499#elif defined(RT_ARCH_ARM64)
500
501/**
502 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
503 *
504 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
505 * registers (@a iGprTmp).
506 * @note DON'T try this with prefetch.
507 */
508DECL_FORCE_INLINE_THROW(uint32_t)
509iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
510 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
511{
512 /*
513 * There are a couple of ldr variants that takes an immediate offset, so
514 * try use those if we can, otherwise we have to use the temporary register
515 * help with the addressing.
516 */
517 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
518 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
519 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
520 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
521 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
522 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
523 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
524 {
525 /* The offset is too large, so we must load it into a register and use
526 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
527 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
528 if (iGprTmp == UINT8_MAX)
529 iGprTmp = iGprReg;
530 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
531 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
532 }
533 else
534# ifdef IEM_WITH_THROW_CATCH
535 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
536# else
537 AssertReleaseFailedStmt(off = UINT32_MAX);
538# endif
539
540 return off;
541}
542
543/**
544 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
545 */
546DECL_FORCE_INLINE_THROW(uint32_t)
547iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
548 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
549{
550 /*
551 * There are a couple of ldr variants that takes an immediate offset, so
552 * try use those if we can, otherwise we have to use the temporary register
553 * help with the addressing.
554 */
555 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
556 {
557 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
558 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
559 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
560 }
561 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
562 {
563 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
564 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
565 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
566 }
567 else
568 {
569 /* The offset is too large, so we must load it into a register and use
570 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
571 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
572 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
573 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
574 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
575 IEMNATIVE_REG_FIXED_TMP0);
576 }
577 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
578 return off;
579}
580
581#endif /* RT_ARCH_ARM64 */
582
583
584/**
585 * Emits a 64-bit GPR load of a VCpu value.
586 */
587DECL_FORCE_INLINE_THROW(uint32_t)
588iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
589{
590#ifdef RT_ARCH_AMD64
591 /* mov reg64, mem64 */
592 if (iGpr < 8)
593 pCodeBuf[off++] = X86_OP_REX_W;
594 else
595 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
596 pCodeBuf[off++] = 0x8b;
597 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off,iGpr, offVCpu);
598
599#elif defined(RT_ARCH_ARM64)
600 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
601
602#else
603# error "port me"
604#endif
605 return off;
606}
607
608
609/**
610 * Emits a 64-bit GPR load of a VCpu value.
611 */
612DECL_INLINE_THROW(uint32_t)
613iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
614{
615#ifdef RT_ARCH_AMD64
616 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
617 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
618
619#elif defined(RT_ARCH_ARM64)
620 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
621
622#else
623# error "port me"
624#endif
625 return off;
626}
627
628
629/**
630 * Emits a 32-bit GPR load of a VCpu value.
631 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
632 */
633DECL_INLINE_THROW(uint32_t)
634iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
635{
636#ifdef RT_ARCH_AMD64
637 /* mov reg32, mem32 */
638 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
639 if (iGpr >= 8)
640 pbCodeBuf[off++] = X86_OP_REX_R;
641 pbCodeBuf[off++] = 0x8b;
642 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
644
645#elif defined(RT_ARCH_ARM64)
646 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
647
648#else
649# error "port me"
650#endif
651 return off;
652}
653
654
655/**
656 * Emits a 16-bit GPR load of a VCpu value.
657 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
658 */
659DECL_INLINE_THROW(uint32_t)
660iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
661{
662#ifdef RT_ARCH_AMD64
663 /* movzx reg32, mem16 */
664 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
665 if (iGpr >= 8)
666 pbCodeBuf[off++] = X86_OP_REX_R;
667 pbCodeBuf[off++] = 0x0f;
668 pbCodeBuf[off++] = 0xb7;
669 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
670 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
671
672#elif defined(RT_ARCH_ARM64)
673 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
674
675#else
676# error "port me"
677#endif
678 return off;
679}
680
681
682/**
683 * Emits a 8-bit GPR load of a VCpu value.
684 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
685 */
686DECL_INLINE_THROW(uint32_t)
687iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
688{
689#ifdef RT_ARCH_AMD64
690 /* movzx reg32, mem8 */
691 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
692 if (iGpr >= 8)
693 pbCodeBuf[off++] = X86_OP_REX_R;
694 pbCodeBuf[off++] = 0x0f;
695 pbCodeBuf[off++] = 0xb6;
696 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
697 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
698
699#elif defined(RT_ARCH_ARM64)
700 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
701
702#else
703# error "port me"
704#endif
705 return off;
706}
707
708
709/**
710 * Emits a store of a GPR value to a 64-bit VCpu field.
711 */
712DECL_FORCE_INLINE_THROW(uint32_t)
713iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
714 uint8_t iGprTmp = UINT8_MAX)
715{
716#ifdef RT_ARCH_AMD64
717 /* mov mem64, reg64 */
718 if (iGpr < 8)
719 pCodeBuf[off++] = X86_OP_REX_W;
720 else
721 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
722 pCodeBuf[off++] = 0x89;
723 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
724 RT_NOREF(iGprTmp);
725
726#elif defined(RT_ARCH_ARM64)
727 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
728
729#else
730# error "port me"
731#endif
732 return off;
733}
734
735
736/**
737 * Emits a store of a GPR value to a 64-bit VCpu field.
738 */
739DECL_INLINE_THROW(uint32_t)
740iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
741{
742#ifdef RT_ARCH_AMD64
743 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
744#elif defined(RT_ARCH_ARM64)
745 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
746 IEMNATIVE_REG_FIXED_TMP0);
747#else
748# error "port me"
749#endif
750 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
751 return off;
752}
753
754
755/**
756 * Emits a store of a GPR value to a 32-bit VCpu field.
757 */
758DECL_INLINE_THROW(uint32_t)
759iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
760{
761#ifdef RT_ARCH_AMD64
762 /* mov mem32, reg32 */
763 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
764 if (iGpr >= 8)
765 pbCodeBuf[off++] = X86_OP_REX_R;
766 pbCodeBuf[off++] = 0x89;
767 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
769
770#elif defined(RT_ARCH_ARM64)
771 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
772
773#else
774# error "port me"
775#endif
776 return off;
777}
778
779
780/**
781 * Emits a store of a GPR value to a 16-bit VCpu field.
782 */
783DECL_INLINE_THROW(uint32_t)
784iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
785{
786#ifdef RT_ARCH_AMD64
787 /* mov mem16, reg16 */
788 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
789 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
790 if (iGpr >= 8)
791 pbCodeBuf[off++] = X86_OP_REX_R;
792 pbCodeBuf[off++] = 0x89;
793 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
794 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
795
796#elif defined(RT_ARCH_ARM64)
797 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
798
799#else
800# error "port me"
801#endif
802 return off;
803}
804
805
806/**
807 * Emits a store of a GPR value to a 8-bit VCpu field.
808 */
809DECL_INLINE_THROW(uint32_t)
810iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
811{
812#ifdef RT_ARCH_AMD64
813 /* mov mem8, reg8 */
814 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
815 if (iGpr >= 8)
816 pbCodeBuf[off++] = X86_OP_REX_R;
817 pbCodeBuf[off++] = 0x88;
818 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
820
821#elif defined(RT_ARCH_ARM64)
822 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
823
824#else
825# error "port me"
826#endif
827 return off;
828}
829
830
831/**
832 * Emits a store of an immediate value to a 32-bit VCpu field.
833 *
834 * @note ARM64: Will allocate temporary registers.
835 */
836DECL_FORCE_INLINE_THROW(uint32_t)
837iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
838{
839#ifdef RT_ARCH_AMD64
840 /* mov mem32, imm32 */
841 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
842 pCodeBuf[off++] = 0xc7;
843 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
844 pCodeBuf[off++] = RT_BYTE1(uImm);
845 pCodeBuf[off++] = RT_BYTE2(uImm);
846 pCodeBuf[off++] = RT_BYTE3(uImm);
847 pCodeBuf[off++] = RT_BYTE4(uImm);
848 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
849
850#elif defined(RT_ARCH_ARM64)
851 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
852 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
853 if (idxRegImm != ARMV8_A64_REG_XZR)
854 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
855
856#else
857# error "port me"
858#endif
859 return off;
860}
861
862
863
864/**
865 * Emits a store of an immediate value to a 16-bit VCpu field.
866 *
867 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
868 * offset can be encoded as an immediate or not. The @a offVCpu immediate
869 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
870 */
871DECL_FORCE_INLINE_THROW(uint32_t)
872iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
873 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
874{
875#ifdef RT_ARCH_AMD64
876 /* mov mem16, imm16 */
877 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
878 pCodeBuf[off++] = 0xc7;
879 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
880 pCodeBuf[off++] = RT_BYTE1(uImm);
881 pCodeBuf[off++] = RT_BYTE2(uImm);
882 RT_NOREF(idxTmp1, idxTmp2);
883
884#elif defined(RT_ARCH_ARM64)
885 if (idxTmp1 != UINT8_MAX)
886 {
887 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
888 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
889 sizeof(uint16_t), idxTmp2);
890 }
891 else
892# ifdef IEM_WITH_THROW_CATCH
893 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
894# else
895 AssertReleaseFailedStmt(off = UINT32_MAX);
896# endif
897
898#else
899# error "port me"
900#endif
901 return off;
902}
903
904
905/**
906 * Emits a store of an immediate value to a 8-bit VCpu field.
907 */
908DECL_INLINE_THROW(uint32_t)
909iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu)
910{
911#ifdef RT_ARCH_AMD64
912 /* mov mem8, imm8 */
913 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
914 pbCodeBuf[off++] = 0xc6;
915 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
916 pbCodeBuf[off++] = bImm;
917 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
918
919#elif defined(RT_ARCH_ARM64)
920 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
921 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
922 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
923 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
924
925#else
926# error "port me"
927#endif
928 return off;
929}
930
931
932/**
933 * Emits a load effective address to a GRP of a VCpu field.
934 */
935DECL_INLINE_THROW(uint32_t)
936iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
937{
938#ifdef RT_ARCH_AMD64
939 /* lea gprdst, [rbx + offDisp] */
940 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
941 if (iGprDst < 8)
942 pbCodeBuf[off++] = X86_OP_REX_W;
943 else
944 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
945 pbCodeBuf[off++] = 0x8d;
946 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
947
948#elif defined(RT_ARCH_ARM64)
949 if (offVCpu < (unsigned)_4K)
950 {
951 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
952 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
953 }
954 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
955 {
956 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
957 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
958 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
959 }
960 else
961 {
962 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
963 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
964 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
965 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
966 }
967
968#else
969# error "port me"
970#endif
971 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
972 return off;
973}
974
975
976/** This is just as a typesafe alternative to RT_UOFFSETOF. */
977DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
978{
979 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
980 Assert(off < sizeof(VMCPU));
981 return off;
982}
983
984
985/** This is just as a typesafe alternative to RT_UOFFSETOF. */
986DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
987{
988 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
989 Assert(off < sizeof(VMCPU));
990 return off;
991}
992
993
994/**
995 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
996 *
997 * @note The two temp registers are not required for AMD64. ARM64 always
998 * requires the first, and the 2nd is needed if the offset cannot be
999 * encoded as an immediate.
1000 */
1001DECL_FORCE_INLINE(uint32_t)
1002iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1003{
1004#ifdef RT_ARCH_AMD64
1005 /* inc qword [pVCpu + off] */
1006 pCodeBuf[off++] = X86_OP_REX_W;
1007 pCodeBuf[off++] = 0xff;
1008 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1009 RT_NOREF(idxTmp1, idxTmp2);
1010
1011#elif defined(RT_ARCH_ARM64)
1012 /* Determine how we're to access pVCpu first. */
1013 uint32_t const cbData = sizeof(STAMCOUNTER);
1014 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1015 {
1016 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1017 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1018 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1019 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1020 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1021 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1022 }
1023 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1024 {
1025 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1026 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1027 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1028 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1029 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1030 }
1031 else
1032 {
1033 /* The offset is too large, so we must load it into a register and use
1034 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1035 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1036 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1037 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1038 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1039 }
1040
1041#else
1042# error "port me"
1043#endif
1044 return off;
1045}
1046
1047
1048/**
1049 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1050 *
1051 * @note The two temp registers are not required for AMD64. ARM64 always
1052 * requires the first, and the 2nd is needed if the offset cannot be
1053 * encoded as an immediate.
1054 */
1055DECL_FORCE_INLINE(uint32_t)
1056iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1057{
1058#ifdef RT_ARCH_AMD64
1059 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1060#elif defined(RT_ARCH_ARM64)
1061 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1062#else
1063# error "port me"
1064#endif
1065 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1066 return off;
1067}
1068
1069
1070/**
1071 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1072 *
1073 * @note The two temp registers are not required for AMD64. ARM64 always
1074 * requires the first, and the 2nd is needed if the offset cannot be
1075 * encoded as an immediate.
1076 */
1077DECL_FORCE_INLINE(uint32_t)
1078iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1079{
1080 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1081#ifdef RT_ARCH_AMD64
1082 /* inc dword [pVCpu + offVCpu] */
1083 pCodeBuf[off++] = 0xff;
1084 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1085 RT_NOREF(idxTmp1, idxTmp2);
1086
1087#elif defined(RT_ARCH_ARM64)
1088 /* Determine how we're to access pVCpu first. */
1089 uint32_t const cbData = sizeof(uint32_t);
1090 if (offVCpu < (unsigned)(_4K * cbData))
1091 {
1092 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1093 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1094 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1095 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1096 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1097 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1098 }
1099 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1100 {
1101 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1102 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1103 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1104 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1105 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1106 }
1107 else
1108 {
1109 /* The offset is too large, so we must load it into a register and use
1110 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1111 of the instruction if that'll reduce the constant to 16-bits. */
1112 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1113 {
1114 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1115 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1116 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1117 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1118 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1119 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1120 }
1121 else
1122 {
1123 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1124 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1125 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1126 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1127 }
1128 }
1129
1130#else
1131# error "port me"
1132#endif
1133 return off;
1134}
1135
1136
1137/**
1138 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1139 *
1140 * @note The two temp registers are not required for AMD64. ARM64 always
1141 * requires the first, and the 2nd is needed if the offset cannot be
1142 * encoded as an immediate.
1143 */
1144DECL_FORCE_INLINE(uint32_t)
1145iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1146{
1147#ifdef RT_ARCH_AMD64
1148 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1149#elif defined(RT_ARCH_ARM64)
1150 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1151#else
1152# error "port me"
1153#endif
1154 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1155 return off;
1156}
1157
1158
1159/**
1160 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1161 *
1162 * @note May allocate temporary registers (not AMD64).
1163 */
1164DECL_FORCE_INLINE(uint32_t)
1165iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1166{
1167 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1168#ifdef RT_ARCH_AMD64
1169 /* or dword [pVCpu + offVCpu], imm8/32 */
1170 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1171 if (fMask < 0x80)
1172 {
1173 pCodeBuf[off++] = 0x83;
1174 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1175 pCodeBuf[off++] = (uint8_t)fMask;
1176 }
1177 else
1178 {
1179 pCodeBuf[off++] = 0x81;
1180 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1181 pCodeBuf[off++] = RT_BYTE1(fMask);
1182 pCodeBuf[off++] = RT_BYTE2(fMask);
1183 pCodeBuf[off++] = RT_BYTE3(fMask);
1184 pCodeBuf[off++] = RT_BYTE4(fMask);
1185 }
1186
1187#elif defined(RT_ARCH_ARM64)
1188 /* If the constant is unwieldy we'll need a register to hold it as well. */
1189 uint32_t uImmSizeLen, uImmRotate;
1190 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1191 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1192
1193 /* We need a temp register for holding the member value we're modifying. */
1194 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1195
1196 /* Determine how we're to access pVCpu first. */
1197 uint32_t const cbData = sizeof(uint32_t);
1198 if (offVCpu < (unsigned)(_4K * cbData))
1199 {
1200 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1201 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1202 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1203 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1204 if (idxTmpMask == UINT8_MAX)
1205 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1206 else
1207 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1208 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1209 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1210 }
1211 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1212 {
1213 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1214 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1215 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1216 if (idxTmpMask == UINT8_MAX)
1217 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1218 else
1219 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1220 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1221 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1222 }
1223 else
1224 {
1225 /* The offset is too large, so we must load it into a register and use
1226 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1227 of the instruction if that'll reduce the constant to 16-bits. */
1228 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1229 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1230 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1231 if (fShifted)
1232 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1233 else
1234 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1235
1236 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1237 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1238
1239 if (idxTmpMask == UINT8_MAX)
1240 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1241 else
1242 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1243
1244 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1245 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1246 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1247 }
1248 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1249 if (idxTmpMask != UINT8_MAX)
1250 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1251
1252#else
1253# error "port me"
1254#endif
1255 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1256 return off;
1257}
1258
1259
1260/**
1261 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1262 *
1263 * @note May allocate temporary registers (not AMD64).
1264 */
1265DECL_FORCE_INLINE(uint32_t)
1266iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1267{
1268 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1269#ifdef RT_ARCH_AMD64
1270 /* and dword [pVCpu + offVCpu], imm8/32 */
1271 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1272 if (fMask < 0x80)
1273 {
1274 pCodeBuf[off++] = 0x83;
1275 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1276 pCodeBuf[off++] = (uint8_t)fMask;
1277 }
1278 else
1279 {
1280 pCodeBuf[off++] = 0x81;
1281 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1282 pCodeBuf[off++] = RT_BYTE1(fMask);
1283 pCodeBuf[off++] = RT_BYTE2(fMask);
1284 pCodeBuf[off++] = RT_BYTE3(fMask);
1285 pCodeBuf[off++] = RT_BYTE4(fMask);
1286 }
1287
1288#elif defined(RT_ARCH_ARM64)
1289 /* If the constant is unwieldy we'll need a register to hold it as well. */
1290 uint32_t uImmSizeLen, uImmRotate;
1291 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1292 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1293
1294 /* We need a temp register for holding the member value we're modifying. */
1295 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1296
1297 /* Determine how we're to access pVCpu first. */
1298 uint32_t const cbData = sizeof(uint32_t);
1299 if (offVCpu < (unsigned)(_4K * cbData))
1300 {
1301 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1302 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1303 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1304 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1305 if (idxTmpMask == UINT8_MAX)
1306 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1307 else
1308 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1309 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1310 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1311 }
1312 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1313 {
1314 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1315 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1316 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1317 if (idxTmpMask == UINT8_MAX)
1318 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1319 else
1320 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1321 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1322 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1323 }
1324 else
1325 {
1326 /* The offset is too large, so we must load it into a register and use
1327 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1328 of the instruction if that'll reduce the constant to 16-bits. */
1329 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1330 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1331 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1332 if (fShifted)
1333 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1334 else
1335 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1336
1337 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1338 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1339
1340 if (idxTmpMask == UINT8_MAX)
1341 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1342 else
1343 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1344
1345 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1346 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1347 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1348 }
1349 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1350 if (idxTmpMask != UINT8_MAX)
1351 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1352
1353#else
1354# error "port me"
1355#endif
1356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1357 return off;
1358}
1359
1360
1361/**
1362 * Emits a gprdst = gprsrc load.
1363 */
1364DECL_FORCE_INLINE(uint32_t)
1365iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1366{
1367#ifdef RT_ARCH_AMD64
1368 /* mov gprdst, gprsrc */
1369 if ((iGprDst | iGprSrc) >= 8)
1370 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1371 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1372 : X86_OP_REX_W | X86_OP_REX_R;
1373 else
1374 pCodeBuf[off++] = X86_OP_REX_W;
1375 pCodeBuf[off++] = 0x8b;
1376 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1377
1378#elif defined(RT_ARCH_ARM64)
1379 /* mov dst, src; alias for: orr dst, xzr, src */
1380 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1381
1382#else
1383# error "port me"
1384#endif
1385 return off;
1386}
1387
1388
1389/**
1390 * Emits a gprdst = gprsrc load.
1391 */
1392DECL_INLINE_THROW(uint32_t)
1393iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1394{
1395#ifdef RT_ARCH_AMD64
1396 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1397#elif defined(RT_ARCH_ARM64)
1398 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1399#else
1400# error "port me"
1401#endif
1402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1403 return off;
1404}
1405
1406
1407/**
1408 * Emits a gprdst = gprsrc[31:0] load.
1409 * @note Bits 63 thru 32 are cleared.
1410 */
1411DECL_FORCE_INLINE(uint32_t)
1412iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1413{
1414#ifdef RT_ARCH_AMD64
1415 /* mov gprdst, gprsrc */
1416 if ((iGprDst | iGprSrc) >= 8)
1417 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1418 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1419 : X86_OP_REX_R;
1420 pCodeBuf[off++] = 0x8b;
1421 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1422
1423#elif defined(RT_ARCH_ARM64)
1424 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1425 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1426
1427#else
1428# error "port me"
1429#endif
1430 return off;
1431}
1432
1433
1434/**
1435 * Emits a gprdst = gprsrc[31:0] load.
1436 * @note Bits 63 thru 32 are cleared.
1437 */
1438DECL_INLINE_THROW(uint32_t)
1439iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1440{
1441#ifdef RT_ARCH_AMD64
1442 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1443#elif defined(RT_ARCH_ARM64)
1444 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1445#else
1446# error "port me"
1447#endif
1448 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1449 return off;
1450}
1451
1452
1453/**
1454 * Emits a gprdst = gprsrc[15:0] load.
1455 * @note Bits 63 thru 15 are cleared.
1456 */
1457DECL_INLINE_THROW(uint32_t)
1458iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1459{
1460#ifdef RT_ARCH_AMD64
1461 /* movzx Gv,Ew */
1462 if ((iGprDst | iGprSrc) >= 8)
1463 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1464 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1465 : X86_OP_REX_R;
1466 pCodeBuf[off++] = 0x0f;
1467 pCodeBuf[off++] = 0xb7;
1468 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1469
1470#elif defined(RT_ARCH_ARM64)
1471 /* and gprdst, gprsrc, #0xffff */
1472# if 1
1473 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1474 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1475# else
1476 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1477 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1478# endif
1479
1480#else
1481# error "port me"
1482#endif
1483 return off;
1484}
1485
1486
1487/**
1488 * Emits a gprdst = gprsrc[15:0] load.
1489 * @note Bits 63 thru 15 are cleared.
1490 */
1491DECL_INLINE_THROW(uint32_t)
1492iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1493{
1494#ifdef RT_ARCH_AMD64
1495 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1496#elif defined(RT_ARCH_ARM64)
1497 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1498#else
1499# error "port me"
1500#endif
1501 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1502 return off;
1503}
1504
1505
1506/**
1507 * Emits a gprdst = gprsrc[7:0] load.
1508 * @note Bits 63 thru 8 are cleared.
1509 */
1510DECL_FORCE_INLINE(uint32_t)
1511iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1512{
1513#ifdef RT_ARCH_AMD64
1514 /* movzx Gv,Eb */
1515 if (iGprDst >= 8 || iGprSrc >= 8)
1516 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1517 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1518 : X86_OP_REX_R;
1519 else if (iGprSrc >= 4)
1520 pCodeBuf[off++] = X86_OP_REX;
1521 pCodeBuf[off++] = 0x0f;
1522 pCodeBuf[off++] = 0xb6;
1523 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1524
1525#elif defined(RT_ARCH_ARM64)
1526 /* and gprdst, gprsrc, #0xff */
1527 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1528 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1529
1530#else
1531# error "port me"
1532#endif
1533 return off;
1534}
1535
1536
1537/**
1538 * Emits a gprdst = gprsrc[7:0] load.
1539 * @note Bits 63 thru 8 are cleared.
1540 */
1541DECL_INLINE_THROW(uint32_t)
1542iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1543{
1544#ifdef RT_ARCH_AMD64
1545 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1546#elif defined(RT_ARCH_ARM64)
1547 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1548#else
1549# error "port me"
1550#endif
1551 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1552 return off;
1553}
1554
1555
1556/**
1557 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1558 * @note Bits 63 thru 8 are cleared.
1559 */
1560DECL_INLINE_THROW(uint32_t)
1561iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1562{
1563#ifdef RT_ARCH_AMD64
1564 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1565
1566 /* movzx Gv,Ew */
1567 if ((iGprDst | iGprSrc) >= 8)
1568 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1569 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1570 : X86_OP_REX_R;
1571 pbCodeBuf[off++] = 0x0f;
1572 pbCodeBuf[off++] = 0xb7;
1573 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1574
1575 /* shr Ev,8 */
1576 if (iGprDst >= 8)
1577 pbCodeBuf[off++] = X86_OP_REX_B;
1578 pbCodeBuf[off++] = 0xc1;
1579 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1580 pbCodeBuf[off++] = 8;
1581
1582#elif defined(RT_ARCH_ARM64)
1583 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1584 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1585 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1586
1587#else
1588# error "port me"
1589#endif
1590 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1591 return off;
1592}
1593
1594
1595/**
1596 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1597 */
1598DECL_INLINE_THROW(uint32_t)
1599iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1600{
1601#ifdef RT_ARCH_AMD64
1602 /* movsxd r64, r/m32 */
1603 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1604 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1605 pbCodeBuf[off++] = 0x63;
1606 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1607
1608#elif defined(RT_ARCH_ARM64)
1609 /* sxtw dst, src */
1610 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1611 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1612
1613#else
1614# error "port me"
1615#endif
1616 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1617 return off;
1618}
1619
1620
1621/**
1622 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1623 */
1624DECL_INLINE_THROW(uint32_t)
1625iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1626{
1627#ifdef RT_ARCH_AMD64
1628 /* movsx r64, r/m16 */
1629 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1630 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1631 pbCodeBuf[off++] = 0x0f;
1632 pbCodeBuf[off++] = 0xbf;
1633 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1634
1635#elif defined(RT_ARCH_ARM64)
1636 /* sxth dst, src */
1637 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1638 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1639
1640#else
1641# error "port me"
1642#endif
1643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1644 return off;
1645}
1646
1647
1648/**
1649 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1650 */
1651DECL_INLINE_THROW(uint32_t)
1652iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1653{
1654#ifdef RT_ARCH_AMD64
1655 /* movsx r64, r/m16 */
1656 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1657 if (iGprDst >= 8 || iGprSrc >= 8)
1658 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1659 pbCodeBuf[off++] = 0x0f;
1660 pbCodeBuf[off++] = 0xbf;
1661 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1662
1663#elif defined(RT_ARCH_ARM64)
1664 /* sxth dst32, src */
1665 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1666 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1667
1668#else
1669# error "port me"
1670#endif
1671 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1672 return off;
1673}
1674
1675
1676/**
1677 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1678 */
1679DECL_INLINE_THROW(uint32_t)
1680iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1681{
1682#ifdef RT_ARCH_AMD64
1683 /* movsx r64, r/m8 */
1684 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1685 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1686 pbCodeBuf[off++] = 0x0f;
1687 pbCodeBuf[off++] = 0xbe;
1688 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1689
1690#elif defined(RT_ARCH_ARM64)
1691 /* sxtb dst, src */
1692 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1693 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1694
1695#else
1696# error "port me"
1697#endif
1698 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1699 return off;
1700}
1701
1702
1703/**
1704 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1705 * @note Bits 63 thru 32 are cleared.
1706 */
1707DECL_INLINE_THROW(uint32_t)
1708iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1709{
1710#ifdef RT_ARCH_AMD64
1711 /* movsx r32, r/m8 */
1712 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1713 if (iGprDst >= 8 || iGprSrc >= 8)
1714 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1715 else if (iGprSrc >= 4)
1716 pbCodeBuf[off++] = X86_OP_REX;
1717 pbCodeBuf[off++] = 0x0f;
1718 pbCodeBuf[off++] = 0xbe;
1719 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1720
1721#elif defined(RT_ARCH_ARM64)
1722 /* sxtb dst32, src32 */
1723 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1724 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1725
1726#else
1727# error "port me"
1728#endif
1729 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1730 return off;
1731}
1732
1733
1734/**
1735 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1736 * @note Bits 63 thru 16 are cleared.
1737 */
1738DECL_INLINE_THROW(uint32_t)
1739iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1740{
1741#ifdef RT_ARCH_AMD64
1742 /* movsx r16, r/m8 */
1743 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1744 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1745 if (iGprDst >= 8 || iGprSrc >= 8)
1746 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1747 else if (iGprSrc >= 4)
1748 pbCodeBuf[off++] = X86_OP_REX;
1749 pbCodeBuf[off++] = 0x0f;
1750 pbCodeBuf[off++] = 0xbe;
1751 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1752
1753 /* movzx r32, r/m16 */
1754 if (iGprDst >= 8)
1755 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
1756 pbCodeBuf[off++] = 0x0f;
1757 pbCodeBuf[off++] = 0xb7;
1758 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
1759
1760#elif defined(RT_ARCH_ARM64)
1761 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
1762 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1763 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1764 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1765 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
1766
1767#else
1768# error "port me"
1769#endif
1770 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1771 return off;
1772}
1773
1774
1775/**
1776 * Emits a gprdst = gprsrc + addend load.
1777 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1778 */
1779#ifdef RT_ARCH_AMD64
1780DECL_INLINE_THROW(uint32_t)
1781iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1782 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1783{
1784 Assert(iAddend != 0);
1785
1786 /* lea gprdst, [gprsrc + iAddend] */
1787 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1788 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1789 pbCodeBuf[off++] = 0x8d;
1790 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1791 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1792 return off;
1793}
1794
1795#elif defined(RT_ARCH_ARM64)
1796DECL_INLINE_THROW(uint32_t)
1797iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1798 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1799{
1800 if ((uint32_t)iAddend < 4096)
1801 {
1802 /* add dst, src, uimm12 */
1803 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1804 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
1805 }
1806 else if ((uint32_t)-iAddend < 4096)
1807 {
1808 /* sub dst, src, uimm12 */
1809 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1810 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
1811 }
1812 else
1813 {
1814 Assert(iGprSrc != iGprDst);
1815 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
1816 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1817 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
1818 }
1819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1820 return off;
1821}
1822#else
1823# error "port me"
1824#endif
1825
1826/**
1827 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
1828 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1829 */
1830#ifdef RT_ARCH_AMD64
1831DECL_INLINE_THROW(uint32_t)
1832iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1833 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1834#else
1835DECL_INLINE_THROW(uint32_t)
1836iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1837 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1838#endif
1839{
1840 if (iAddend != 0)
1841 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1842 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
1843}
1844
1845
1846/**
1847 * Emits a gprdst = gprsrc32 + addend load.
1848 * @note Bits 63 thru 32 are cleared.
1849 */
1850DECL_INLINE_THROW(uint32_t)
1851iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1852 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1853{
1854 Assert(iAddend != 0);
1855
1856#ifdef RT_ARCH_AMD64
1857 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
1858 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1859 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
1860 if ((iGprDst | iGprSrc) >= 8)
1861 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1862 pbCodeBuf[off++] = 0x8d;
1863 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1864
1865#elif defined(RT_ARCH_ARM64)
1866 if ((uint32_t)iAddend < 4096)
1867 {
1868 /* add dst, src, uimm12 */
1869 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1870 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
1871 }
1872 else if ((uint32_t)-iAddend < 4096)
1873 {
1874 /* sub dst, src, uimm12 */
1875 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1876 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
1877 }
1878 else
1879 {
1880 Assert(iGprSrc != iGprDst);
1881 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
1882 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1883 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
1884 }
1885
1886#else
1887# error "port me"
1888#endif
1889 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1890 return off;
1891}
1892
1893
1894/**
1895 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
1896 */
1897DECL_INLINE_THROW(uint32_t)
1898iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1899 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1900{
1901 if (iAddend != 0)
1902 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1903 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
1904}
1905
1906
1907/**
1908 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1909 * destination.
1910 */
1911DECL_FORCE_INLINE(uint32_t)
1912iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1913{
1914#ifdef RT_ARCH_AMD64
1915 /* mov reg16, r/m16 */
1916 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1917 if (idxDst >= 8 || idxSrc >= 8)
1918 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
1919 pCodeBuf[off++] = 0x8b;
1920 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
1921
1922#elif defined(RT_ARCH_ARM64)
1923 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
1924 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
1925
1926#else
1927# error "Port me!"
1928#endif
1929 return off;
1930}
1931
1932
1933/**
1934 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1935 * destination.
1936 */
1937DECL_INLINE_THROW(uint32_t)
1938iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1939{
1940#ifdef RT_ARCH_AMD64
1941 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
1942#elif defined(RT_ARCH_ARM64)
1943 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
1944#else
1945# error "Port me!"
1946#endif
1947 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1948 return off;
1949}
1950
1951
1952#ifdef RT_ARCH_AMD64
1953/**
1954 * Common bit of iemNativeEmitLoadGprByBp and friends.
1955 */
1956DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
1957 PIEMRECOMPILERSTATE pReNativeAssert)
1958{
1959 if (offDisp < 128 && offDisp >= -128)
1960 {
1961 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
1962 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
1963 }
1964 else
1965 {
1966 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
1967 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
1968 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
1969 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
1970 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
1971 }
1972 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
1973 return off;
1974}
1975#elif defined(RT_ARCH_ARM64)
1976/**
1977 * Common bit of iemNativeEmitLoadGprByBp and friends.
1978 */
1979DECL_FORCE_INLINE_THROW(uint32_t)
1980iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
1981 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
1982{
1983 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
1984 {
1985 /* str w/ unsigned imm12 (scaled) */
1986 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1987 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
1988 }
1989 else if (offDisp >= -256 && offDisp <= 256)
1990 {
1991 /* stur w/ signed imm9 (unscaled) */
1992 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1993 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
1994 }
1995 else
1996 {
1997 /* Use temporary indexing register. */
1998 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
1999 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2000 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2001 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2002 }
2003 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2004 return off;
2005}
2006#endif
2007
2008
2009/**
2010 * Emits a 64-bit GRP load instruction with an BP relative source address.
2011 */
2012DECL_INLINE_THROW(uint32_t)
2013iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2014{
2015#ifdef RT_ARCH_AMD64
2016 /* mov gprdst, qword [rbp + offDisp] */
2017 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2018 if (iGprDst < 8)
2019 pbCodeBuf[off++] = X86_OP_REX_W;
2020 else
2021 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2022 pbCodeBuf[off++] = 0x8b;
2023 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2024
2025#elif defined(RT_ARCH_ARM64)
2026 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2027
2028#else
2029# error "port me"
2030#endif
2031}
2032
2033
2034/**
2035 * Emits a 32-bit GRP load instruction with an BP relative source address.
2036 * @note Bits 63 thru 32 of the GPR will be cleared.
2037 */
2038DECL_INLINE_THROW(uint32_t)
2039iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2040{
2041#ifdef RT_ARCH_AMD64
2042 /* mov gprdst, dword [rbp + offDisp] */
2043 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2044 if (iGprDst >= 8)
2045 pbCodeBuf[off++] = X86_OP_REX_R;
2046 pbCodeBuf[off++] = 0x8b;
2047 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2048
2049#elif defined(RT_ARCH_ARM64)
2050 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2051
2052#else
2053# error "port me"
2054#endif
2055}
2056
2057
2058/**
2059 * Emits a 16-bit GRP load instruction with an BP relative source address.
2060 * @note Bits 63 thru 16 of the GPR will be cleared.
2061 */
2062DECL_INLINE_THROW(uint32_t)
2063iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2064{
2065#ifdef RT_ARCH_AMD64
2066 /* movzx gprdst, word [rbp + offDisp] */
2067 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2068 if (iGprDst >= 8)
2069 pbCodeBuf[off++] = X86_OP_REX_R;
2070 pbCodeBuf[off++] = 0x0f;
2071 pbCodeBuf[off++] = 0xb7;
2072 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2073
2074#elif defined(RT_ARCH_ARM64)
2075 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2076
2077#else
2078# error "port me"
2079#endif
2080}
2081
2082
2083/**
2084 * Emits a 8-bit GRP load instruction with an BP relative source address.
2085 * @note Bits 63 thru 8 of the GPR will be cleared.
2086 */
2087DECL_INLINE_THROW(uint32_t)
2088iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2089{
2090#ifdef RT_ARCH_AMD64
2091 /* movzx gprdst, byte [rbp + offDisp] */
2092 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2093 if (iGprDst >= 8)
2094 pbCodeBuf[off++] = X86_OP_REX_R;
2095 pbCodeBuf[off++] = 0x0f;
2096 pbCodeBuf[off++] = 0xb6;
2097 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2098
2099#elif defined(RT_ARCH_ARM64)
2100 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2101
2102#else
2103# error "port me"
2104#endif
2105}
2106
2107
2108#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2109/**
2110 * Emits a 128-bit vector register load instruction with an BP relative source address.
2111 */
2112DECL_FORCE_INLINE_THROW(uint32_t)
2113iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2114{
2115#ifdef RT_ARCH_AMD64
2116 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2117
2118 /* movdqu reg128, mem128 */
2119 pbCodeBuf[off++] = 0xf3;
2120 if (iVecRegDst >= 8)
2121 pbCodeBuf[off++] = X86_OP_REX_R;
2122 pbCodeBuf[off++] = 0x0f;
2123 pbCodeBuf[off++] = 0x6f;
2124 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2125#elif defined(RT_ARCH_ARM64)
2126 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2127#else
2128# error "port me"
2129#endif
2130}
2131
2132
2133/**
2134 * Emits a 256-bit vector register load instruction with an BP relative source address.
2135 */
2136DECL_FORCE_INLINE_THROW(uint32_t)
2137iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2138{
2139#ifdef RT_ARCH_AMD64
2140 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2141
2142 /* vmovdqu reg256, mem256 */
2143 pbCodeBuf[off++] = X86_OP_VEX2;
2144 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2145 pbCodeBuf[off++] = 0x6f;
2146 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2147#elif defined(RT_ARCH_ARM64)
2148 /* ASSUMES two consecutive vector registers for the 256-bit value. */
2149 Assert(!(iVecRegDst & 0x1));
2150 off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2151 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2152#else
2153# error "port me"
2154#endif
2155}
2156
2157#endif
2158
2159
2160/**
2161 * Emits a load effective address to a GRP with an BP relative source address.
2162 */
2163DECL_INLINE_THROW(uint32_t)
2164iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2165{
2166#ifdef RT_ARCH_AMD64
2167 /* lea gprdst, [rbp + offDisp] */
2168 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2169 if (iGprDst < 8)
2170 pbCodeBuf[off++] = X86_OP_REX_W;
2171 else
2172 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2173 pbCodeBuf[off++] = 0x8d;
2174 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2175
2176#elif defined(RT_ARCH_ARM64)
2177 if ((uint32_t)offDisp < (unsigned)_4K)
2178 {
2179 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2180 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)offDisp);
2181 }
2182 else if ((uint32_t)-offDisp < (unsigned)_4K)
2183 {
2184 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2185 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2186 }
2187 else
2188 {
2189 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2190 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offDisp >= 0 ? (uint32_t)offDisp : (uint32_t)-offDisp);
2191 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2192 if (offDisp >= 0)
2193 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2194 else
2195 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2196 }
2197
2198#else
2199# error "port me"
2200#endif
2201
2202 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2203 return off;
2204}
2205
2206
2207/**
2208 * Emits a 64-bit GPR store with an BP relative destination address.
2209 *
2210 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2211 */
2212DECL_INLINE_THROW(uint32_t)
2213iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2214{
2215#ifdef RT_ARCH_AMD64
2216 /* mov qword [rbp + offDisp], gprdst */
2217 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2218 if (iGprSrc < 8)
2219 pbCodeBuf[off++] = X86_OP_REX_W;
2220 else
2221 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2222 pbCodeBuf[off++] = 0x89;
2223 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2224
2225#elif defined(RT_ARCH_ARM64)
2226 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2227 {
2228 /* str w/ unsigned imm12 (scaled) */
2229 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2230 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2231 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2232 }
2233 else if (offDisp >= -256 && offDisp <= 256)
2234 {
2235 /* stur w/ signed imm9 (unscaled) */
2236 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2237 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2238 }
2239 else if ((uint32_t)-offDisp < (unsigned)_4K)
2240 {
2241 /* Use temporary indexing register w/ sub uimm12. */
2242 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2243 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2244 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2245 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2246 }
2247 else
2248 {
2249 /* Use temporary indexing register. */
2250 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2251 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2252 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2253 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2254 }
2255 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2256 return off;
2257
2258#else
2259# error "Port me!"
2260#endif
2261}
2262
2263
2264/**
2265 * Emits a 64-bit immediate store with an BP relative destination address.
2266 *
2267 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2268 */
2269DECL_INLINE_THROW(uint32_t)
2270iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2271{
2272#ifdef RT_ARCH_AMD64
2273 if ((int64_t)uImm64 == (int32_t)uImm64)
2274 {
2275 /* mov qword [rbp + offDisp], imm32 - sign extended */
2276 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2277 pbCodeBuf[off++] = X86_OP_REX_W;
2278 pbCodeBuf[off++] = 0xc7;
2279 if (offDisp < 128 && offDisp >= -128)
2280 {
2281 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2282 pbCodeBuf[off++] = (uint8_t)offDisp;
2283 }
2284 else
2285 {
2286 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2287 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2288 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2289 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2290 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2291 }
2292 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2293 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2294 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2295 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2296 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2297 return off;
2298 }
2299#endif
2300
2301 /* Load tmp0, imm64; Store tmp to bp+disp. */
2302 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2303 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2304}
2305
2306
2307#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2308/**
2309 * Emits a 128-bit vector register store with an BP relative destination address.
2310 *
2311 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2312 */
2313DECL_INLINE_THROW(uint32_t)
2314iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2315{
2316#ifdef RT_ARCH_AMD64
2317 /* movdqu [rbp + offDisp], vecsrc */
2318 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2319 pbCodeBuf[off++] = 0xf3;
2320 if (iVecRegSrc >= 8)
2321 pbCodeBuf[off++] = X86_OP_REX_R;
2322 pbCodeBuf[off++] = 0x0f;
2323 pbCodeBuf[off++] = 0x7f;
2324 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2325
2326#elif defined(RT_ARCH_ARM64)
2327 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2328 {
2329 /* str w/ unsigned imm12 (scaled) */
2330 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2331 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc,
2332 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2333 }
2334 else if (offDisp >= -256 && offDisp <= 256)
2335 {
2336 /* stur w/ signed imm9 (unscaled) */
2337 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2338 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp);
2339 }
2340 else if ((uint32_t)-offDisp < (unsigned)_4K)
2341 {
2342 /* Use temporary indexing register w/ sub uimm12. */
2343 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2344 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2345 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2346 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2347 }
2348 else
2349 {
2350 /* Use temporary indexing register. */
2351 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2352 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2353 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP,
2354 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2355 }
2356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2357 return off;
2358
2359#else
2360# error "Port me!"
2361#endif
2362}
2363
2364
2365/**
2366 * Emits a 256-bit vector register store with an BP relative destination address.
2367 *
2368 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2369 */
2370DECL_INLINE_THROW(uint32_t)
2371iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2372{
2373#ifdef RT_ARCH_AMD64
2374 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2375
2376 /* vmovdqu mem256, reg256 */
2377 pbCodeBuf[off++] = X86_OP_VEX2;
2378 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2379 pbCodeBuf[off++] = 0x7f;
2380 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2381#elif defined(RT_ARCH_ARM64)
2382 Assert(!(iVecRegSrc & 0x1));
2383 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp, iVecRegSrc);
2384 return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1);
2385#else
2386# error "Port me!"
2387#endif
2388}
2389#endif
2390
2391#if defined(RT_ARCH_ARM64)
2392
2393/**
2394 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2395 *
2396 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2397 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2398 * caller does not heed this.
2399 *
2400 * @note DON'T try this with prefetch.
2401 */
2402DECL_FORCE_INLINE_THROW(uint32_t)
2403iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2404 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2405{
2406 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2407 {
2408 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2409 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2410 }
2411 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2412 && iGprReg != iGprBase)
2413 || iGprTmp != UINT8_MAX)
2414 {
2415 /* The offset is too large, so we must load it into a register and use
2416 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2417 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2418 if (iGprTmp == UINT8_MAX)
2419 iGprTmp = iGprReg;
2420 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2421 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2422 }
2423 else
2424# ifdef IEM_WITH_THROW_CATCH
2425 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2426# else
2427 AssertReleaseFailedStmt(off = UINT32_MAX);
2428# endif
2429 return off;
2430}
2431
2432/**
2433 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2434 */
2435DECL_FORCE_INLINE_THROW(uint32_t)
2436iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2437 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2438{
2439 /*
2440 * There are a couple of ldr variants that takes an immediate offset, so
2441 * try use those if we can, otherwise we have to use the temporary register
2442 * help with the addressing.
2443 */
2444 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2445 {
2446 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2447 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2448 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2449 }
2450 else
2451 {
2452 /* The offset is too large, so we must load it into a register and use
2453 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2454 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2455 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2456
2457 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2458 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2459
2460 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2461 }
2462 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2463 return off;
2464}
2465
2466# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2467/**
2468 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2469 *
2470 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2471 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2472 * caller does not heed this.
2473 *
2474 * @note DON'T try this with prefetch.
2475 */
2476DECL_FORCE_INLINE_THROW(uint32_t)
2477iemNativeEmitVecRegByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iGprBase, int32_t offDisp,
2478 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2479{
2480 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2481 {
2482 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2483 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2484 }
2485 else if ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2486 || iGprTmp != UINT8_MAX)
2487 {
2488 /* The offset is too large, so we must load it into a register and use
2489 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2490 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2491 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2492 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, iGprTmp);
2493 }
2494 else
2495# ifdef IEM_WITH_THROW_CATCH
2496 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2497# else
2498 AssertReleaseFailedStmt(off = UINT32_MAX);
2499# endif
2500 return off;
2501}
2502# endif
2503
2504
2505/**
2506 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2507 */
2508DECL_FORCE_INLINE_THROW(uint32_t)
2509iemNativeEmitVecRegByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg,
2510 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2511{
2512 /*
2513 * There are a couple of ldr variants that takes an immediate offset, so
2514 * try use those if we can, otherwise we have to use the temporary register
2515 * help with the addressing.
2516 */
2517 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2518 {
2519 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2520 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2521 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2522 }
2523 else
2524 {
2525 /* The offset is too large, so we must load it into a register and use
2526 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2527 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2528 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2529
2530 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2531 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, idxTmpReg);
2532
2533 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2534 }
2535 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2536 return off;
2537}
2538#endif /* RT_ARCH_ARM64 */
2539
2540/**
2541 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2542 *
2543 * @note ARM64: Misaligned @a offDisp values and values not in the
2544 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2545 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2546 * does not heed this.
2547 */
2548DECL_FORCE_INLINE_THROW(uint32_t)
2549iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2550 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2551{
2552#ifdef RT_ARCH_AMD64
2553 /* mov reg64, mem64 */
2554 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2555 pCodeBuf[off++] = 0x8b;
2556 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2557 RT_NOREF(iGprTmp);
2558
2559#elif defined(RT_ARCH_ARM64)
2560 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2561 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2562
2563#else
2564# error "port me"
2565#endif
2566 return off;
2567}
2568
2569
2570/**
2571 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2572 */
2573DECL_INLINE_THROW(uint32_t)
2574iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2575{
2576#ifdef RT_ARCH_AMD64
2577 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2578 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2579
2580#elif defined(RT_ARCH_ARM64)
2581 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2582
2583#else
2584# error "port me"
2585#endif
2586 return off;
2587}
2588
2589
2590/**
2591 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2592 *
2593 * @note ARM64: Misaligned @a offDisp values and values not in the
2594 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2595 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2596 * caller does not heed this.
2597 *
2598 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2599 */
2600DECL_FORCE_INLINE_THROW(uint32_t)
2601iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2602 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2603{
2604#ifdef RT_ARCH_AMD64
2605 /* mov reg32, mem32 */
2606 if (iGprDst >= 8 || iGprBase >= 8)
2607 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2608 pCodeBuf[off++] = 0x8b;
2609 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2610 RT_NOREF(iGprTmp);
2611
2612#elif defined(RT_ARCH_ARM64)
2613 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2614 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2615
2616#else
2617# error "port me"
2618#endif
2619 return off;
2620}
2621
2622
2623/**
2624 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2625 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2626 */
2627DECL_INLINE_THROW(uint32_t)
2628iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2629{
2630#ifdef RT_ARCH_AMD64
2631 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2632 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2633
2634#elif defined(RT_ARCH_ARM64)
2635 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2636
2637#else
2638# error "port me"
2639#endif
2640 return off;
2641}
2642
2643
2644/**
2645 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2646 * sign-extending the value to 64 bits.
2647 *
2648 * @note ARM64: Misaligned @a offDisp values and values not in the
2649 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2650 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2651 * caller does not heed this.
2652 */
2653DECL_FORCE_INLINE_THROW(uint32_t)
2654iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2655 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2656{
2657#ifdef RT_ARCH_AMD64
2658 /* movsxd reg64, mem32 */
2659 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2660 pCodeBuf[off++] = 0x63;
2661 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2662 RT_NOREF(iGprTmp);
2663
2664#elif defined(RT_ARCH_ARM64)
2665 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2666 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2667
2668#else
2669# error "port me"
2670#endif
2671 return off;
2672}
2673
2674
2675/**
2676 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2677 *
2678 * @note ARM64: Misaligned @a offDisp values and values not in the
2679 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2680 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2681 * caller does not heed this.
2682 *
2683 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2684 */
2685DECL_FORCE_INLINE_THROW(uint32_t)
2686iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2687 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2688{
2689#ifdef RT_ARCH_AMD64
2690 /* movzx reg32, mem16 */
2691 if (iGprDst >= 8 || iGprBase >= 8)
2692 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2693 pCodeBuf[off++] = 0x0f;
2694 pCodeBuf[off++] = 0xb7;
2695 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2696 RT_NOREF(iGprTmp);
2697
2698#elif defined(RT_ARCH_ARM64)
2699 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2700 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2701
2702#else
2703# error "port me"
2704#endif
2705 return off;
2706}
2707
2708
2709/**
2710 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2711 * sign-extending the value to 64 bits.
2712 *
2713 * @note ARM64: Misaligned @a offDisp values and values not in the
2714 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2715 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2716 * caller does not heed this.
2717 */
2718DECL_FORCE_INLINE_THROW(uint32_t)
2719iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2720 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2721{
2722#ifdef RT_ARCH_AMD64
2723 /* movsx reg64, mem16 */
2724 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2725 pCodeBuf[off++] = 0x0f;
2726 pCodeBuf[off++] = 0xbf;
2727 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2728 RT_NOREF(iGprTmp);
2729
2730#elif defined(RT_ARCH_ARM64)
2731 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2732 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2733
2734#else
2735# error "port me"
2736#endif
2737 return off;
2738}
2739
2740
2741/**
2742 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2743 * sign-extending the value to 32 bits.
2744 *
2745 * @note ARM64: Misaligned @a offDisp values and values not in the
2746 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2747 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2748 * caller does not heed this.
2749 *
2750 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2751 */
2752DECL_FORCE_INLINE_THROW(uint32_t)
2753iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2754 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2755{
2756#ifdef RT_ARCH_AMD64
2757 /* movsx reg32, mem16 */
2758 if (iGprDst >= 8 || iGprBase >= 8)
2759 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2760 pCodeBuf[off++] = 0x0f;
2761 pCodeBuf[off++] = 0xbf;
2762 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2763 RT_NOREF(iGprTmp);
2764
2765#elif defined(RT_ARCH_ARM64)
2766 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2767 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
2768
2769#else
2770# error "port me"
2771#endif
2772 return off;
2773}
2774
2775
2776/**
2777 * Emits a 8-bit GPR load via a GPR base address with a displacement.
2778 *
2779 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2780 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2781 * same. Will assert / throw if caller does not heed this.
2782 *
2783 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
2784 */
2785DECL_FORCE_INLINE_THROW(uint32_t)
2786iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2787 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2788{
2789#ifdef RT_ARCH_AMD64
2790 /* movzx reg32, mem8 */
2791 if (iGprDst >= 8 || iGprBase >= 8)
2792 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2793 pCodeBuf[off++] = 0x0f;
2794 pCodeBuf[off++] = 0xb6;
2795 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2796 RT_NOREF(iGprTmp);
2797
2798#elif defined(RT_ARCH_ARM64)
2799 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2800 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
2801
2802#else
2803# error "port me"
2804#endif
2805 return off;
2806}
2807
2808
2809/**
2810 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2811 * sign-extending the value to 64 bits.
2812 *
2813 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2814 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2815 * same. Will assert / throw if caller does not heed this.
2816 */
2817DECL_FORCE_INLINE_THROW(uint32_t)
2818iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2819 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2820{
2821#ifdef RT_ARCH_AMD64
2822 /* movsx reg64, mem8 */
2823 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2824 pCodeBuf[off++] = 0x0f;
2825 pCodeBuf[off++] = 0xbe;
2826 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2827 RT_NOREF(iGprTmp);
2828
2829#elif defined(RT_ARCH_ARM64)
2830 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2831 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
2832
2833#else
2834# error "port me"
2835#endif
2836 return off;
2837}
2838
2839
2840/**
2841 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2842 * sign-extending the value to 32 bits.
2843 *
2844 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2845 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2846 * same. Will assert / throw if caller does not heed this.
2847 *
2848 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2849 */
2850DECL_FORCE_INLINE_THROW(uint32_t)
2851iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2852 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2853{
2854#ifdef RT_ARCH_AMD64
2855 /* movsx reg32, mem8 */
2856 if (iGprDst >= 8 || iGprBase >= 8)
2857 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2858 pCodeBuf[off++] = 0x0f;
2859 pCodeBuf[off++] = 0xbe;
2860 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2861 RT_NOREF(iGprTmp);
2862
2863#elif defined(RT_ARCH_ARM64)
2864 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2865 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2866
2867#else
2868# error "port me"
2869#endif
2870 return off;
2871}
2872
2873
2874/**
2875 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2876 * sign-extending the value to 16 bits.
2877 *
2878 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2879 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2880 * same. Will assert / throw if caller does not heed this.
2881 *
2882 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2883 */
2884DECL_FORCE_INLINE_THROW(uint32_t)
2885iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2886 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2887{
2888#ifdef RT_ARCH_AMD64
2889 /* movsx reg32, mem8 */
2890 if (iGprDst >= 8 || iGprBase >= 8)
2891 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2892 pCodeBuf[off++] = 0x0f;
2893 pCodeBuf[off++] = 0xbe;
2894 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2895# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
2896 /* and reg32, 0xffffh */
2897 if (iGprDst >= 8)
2898 pCodeBuf[off++] = X86_OP_REX_B;
2899 pCodeBuf[off++] = 0x81;
2900 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
2901 pCodeBuf[off++] = 0xff;
2902 pCodeBuf[off++] = 0xff;
2903 pCodeBuf[off++] = 0;
2904 pCodeBuf[off++] = 0;
2905# else
2906 /* movzx reg32, reg16 */
2907 if (iGprDst >= 8)
2908 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
2909 pCodeBuf[off++] = 0x0f;
2910 pCodeBuf[off++] = 0xb7;
2911 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2912# endif
2913 RT_NOREF(iGprTmp);
2914
2915#elif defined(RT_ARCH_ARM64)
2916 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2917 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2918 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2919 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
2920
2921#else
2922# error "port me"
2923#endif
2924 return off;
2925}
2926
2927
2928#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2929/**
2930 * Emits a 128-bit vector register load via a GPR base address with a displacement.
2931 *
2932 * @note ARM64: Misaligned @a offDisp values and values not in the
2933 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2934 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2935 * does not heed this.
2936 */
2937DECL_FORCE_INLINE_THROW(uint32_t)
2938iemNativeEmitLoadVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
2939 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2940{
2941#ifdef RT_ARCH_AMD64
2942 /* movdqu reg128, mem128 */
2943 pCodeBuf[off++] = 0xf3;
2944 if (iVecRegDst >= 8 || iGprBase >= 8)
2945 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2946 pCodeBuf[off++] = 0x0f;
2947 pCodeBuf[off++] = 0x6f;
2948 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
2949 RT_NOREF(iGprTmp);
2950
2951#elif defined(RT_ARCH_ARM64)
2952 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
2953 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
2954
2955#else
2956# error "port me"
2957#endif
2958 return off;
2959}
2960
2961
2962/**
2963 * Emits a 128-bit GPR load via a GPR base address with a displacement.
2964 */
2965DECL_INLINE_THROW(uint32_t)
2966iemNativeEmitLoadVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
2967{
2968#ifdef RT_ARCH_AMD64
2969 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
2970 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2971
2972#elif defined(RT_ARCH_ARM64)
2973 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2974
2975#else
2976# error "port me"
2977#endif
2978 return off;
2979}
2980
2981
2982/**
2983 * Emits a 256-bit vector register load via a GPR base address with a displacement.
2984 *
2985 * @note ARM64: Misaligned @a offDisp values and values not in the
2986 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2987 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2988 * does not heed this.
2989 */
2990DECL_FORCE_INLINE_THROW(uint32_t)
2991iemNativeEmitLoadVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
2992 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2993{
2994#ifdef RT_ARCH_AMD64
2995 /* vmovdqu reg256, mem256 */
2996 pCodeBuf[off++] = X86_OP_VEX3;
2997 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
2998 | X86_OP_VEX3_BYTE1_X
2999 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3000 | UINT8_C(0x01);
3001 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3002 pCodeBuf[off++] = 0x6f;
3003 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3004 RT_NOREF(iGprTmp);
3005
3006#elif defined(RT_ARCH_ARM64)
3007 Assert(!(iVecRegDst & 0x1));
3008 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3009 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3010 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3011 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3012#else
3013# error "port me"
3014#endif
3015 return off;
3016}
3017
3018
3019/**
3020 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3021 */
3022DECL_INLINE_THROW(uint32_t)
3023iemNativeEmitLoadVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3024{
3025#ifdef RT_ARCH_AMD64
3026 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3027 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3028
3029#elif defined(RT_ARCH_ARM64)
3030 Assert(!(iVecRegDst & 0x1));
3031 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3032 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3033 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3034 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3035
3036#else
3037# error "port me"
3038#endif
3039 return off;
3040}
3041#endif
3042
3043
3044/**
3045 * Emits a 64-bit GPR store via a GPR base address with a displacement.
3046 *
3047 * @note ARM64: Misaligned @a offDisp values and values not in the
3048 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3049 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3050 * does not heed this.
3051 */
3052DECL_FORCE_INLINE_THROW(uint32_t)
3053iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3054 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3055{
3056#ifdef RT_ARCH_AMD64
3057 /* mov mem64, reg64 */
3058 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3059 pCodeBuf[off++] = 0x89;
3060 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3061 RT_NOREF(iGprTmp);
3062
3063#elif defined(RT_ARCH_ARM64)
3064 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3065 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
3066
3067#else
3068# error "port me"
3069#endif
3070 return off;
3071}
3072
3073
3074/**
3075 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3076 *
3077 * @note ARM64: Misaligned @a offDisp values and values not in the
3078 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3079 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3080 * does not heed this.
3081 */
3082DECL_FORCE_INLINE_THROW(uint32_t)
3083iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3084 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3085{
3086#ifdef RT_ARCH_AMD64
3087 /* mov mem32, reg32 */
3088 if (iGprSrc >= 8 || iGprBase >= 8)
3089 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3090 pCodeBuf[off++] = 0x89;
3091 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3092 RT_NOREF(iGprTmp);
3093
3094#elif defined(RT_ARCH_ARM64)
3095 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3096 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3097
3098#else
3099# error "port me"
3100#endif
3101 return off;
3102}
3103
3104
3105/**
3106 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3107 *
3108 * @note ARM64: Misaligned @a offDisp values and values not in the
3109 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3110 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3111 * does not heed this.
3112 */
3113DECL_FORCE_INLINE_THROW(uint32_t)
3114iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3115 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3116{
3117#ifdef RT_ARCH_AMD64
3118 /* mov mem16, reg16 */
3119 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3120 if (iGprSrc >= 8 || iGprBase >= 8)
3121 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3122 pCodeBuf[off++] = 0x89;
3123 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3124 RT_NOREF(iGprTmp);
3125
3126#elif defined(RT_ARCH_ARM64)
3127 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3128 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3129
3130#else
3131# error "port me"
3132#endif
3133 return off;
3134}
3135
3136
3137/**
3138 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3139 *
3140 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3141 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3142 * same. Will assert / throw if caller does not heed this.
3143 */
3144DECL_FORCE_INLINE_THROW(uint32_t)
3145iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3146 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3147{
3148#ifdef RT_ARCH_AMD64
3149 /* mov mem8, reg8 */
3150 if (iGprSrc >= 8 || iGprBase >= 8)
3151 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3152 else if (iGprSrc >= 4)
3153 pCodeBuf[off++] = X86_OP_REX;
3154 pCodeBuf[off++] = 0x88;
3155 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3156 RT_NOREF(iGprTmp);
3157
3158#elif defined(RT_ARCH_ARM64)
3159 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3160 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3161
3162#else
3163# error "port me"
3164#endif
3165 return off;
3166}
3167
3168
3169/**
3170 * Emits a 64-bit immediate store via a GPR base address with a displacement.
3171 *
3172 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
3173 * AMD64 it depends on the immediate value.
3174 *
3175 * @note ARM64: Misaligned @a offDisp values and values not in the
3176 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3177 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3178 * does not heed this.
3179 */
3180DECL_FORCE_INLINE_THROW(uint32_t)
3181iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
3182 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3183{
3184#ifdef RT_ARCH_AMD64
3185 if ((int32_t)uImm == (int64_t)uImm)
3186 {
3187 /* mov mem64, imm32 (sign-extended) */
3188 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3189 pCodeBuf[off++] = 0xc7;
3190 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3191 pCodeBuf[off++] = RT_BYTE1(uImm);
3192 pCodeBuf[off++] = RT_BYTE2(uImm);
3193 pCodeBuf[off++] = RT_BYTE3(uImm);
3194 pCodeBuf[off++] = RT_BYTE4(uImm);
3195 }
3196 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
3197 {
3198 /* require temporary register. */
3199 if (iGprImmTmp == UINT8_MAX)
3200 iGprImmTmp = iGprTmp;
3201 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3202 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
3203 }
3204 else
3205# ifdef IEM_WITH_THROW_CATCH
3206 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3207# else
3208 AssertReleaseFailedStmt(off = UINT32_MAX);
3209# endif
3210
3211#elif defined(RT_ARCH_ARM64)
3212 if (uImm == 0)
3213 iGprImmTmp = ARMV8_A64_REG_XZR;
3214 else
3215 {
3216 Assert(iGprImmTmp < 31);
3217 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3218 }
3219 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
3220
3221#else
3222# error "port me"
3223#endif
3224 return off;
3225}
3226
3227
3228/**
3229 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3230 *
3231 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3232 *
3233 * @note ARM64: Misaligned @a offDisp values and values not in the
3234 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3235 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3236 * does not heed this.
3237 */
3238DECL_FORCE_INLINE_THROW(uint32_t)
3239iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
3240 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3241{
3242#ifdef RT_ARCH_AMD64
3243 /* mov mem32, imm32 */
3244 if (iGprBase >= 8)
3245 pCodeBuf[off++] = X86_OP_REX_B;
3246 pCodeBuf[off++] = 0xc7;
3247 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3248 pCodeBuf[off++] = RT_BYTE1(uImm);
3249 pCodeBuf[off++] = RT_BYTE2(uImm);
3250 pCodeBuf[off++] = RT_BYTE3(uImm);
3251 pCodeBuf[off++] = RT_BYTE4(uImm);
3252 RT_NOREF(iGprImmTmp, iGprTmp);
3253
3254#elif defined(RT_ARCH_ARM64)
3255 Assert(iGprImmTmp < 31);
3256 if (uImm == 0)
3257 iGprImmTmp = ARMV8_A64_REG_XZR;
3258 else
3259 {
3260 Assert(iGprImmTmp < 31);
3261 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3262 }
3263 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3264 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3265
3266#else
3267# error "port me"
3268#endif
3269 return off;
3270}
3271
3272
3273/**
3274 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3275 *
3276 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3277 *
3278 * @note ARM64: Misaligned @a offDisp values and values not in the
3279 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3280 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3281 * does not heed this.
3282 */
3283DECL_FORCE_INLINE_THROW(uint32_t)
3284iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
3285 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3286{
3287#ifdef RT_ARCH_AMD64
3288 /* mov mem16, imm16 */
3289 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3290 if (iGprBase >= 8)
3291 pCodeBuf[off++] = X86_OP_REX_B;
3292 pCodeBuf[off++] = 0xc7;
3293 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3294 pCodeBuf[off++] = RT_BYTE1(uImm);
3295 pCodeBuf[off++] = RT_BYTE2(uImm);
3296 RT_NOREF(iGprImmTmp, iGprTmp);
3297
3298#elif defined(RT_ARCH_ARM64)
3299 if (uImm == 0)
3300 iGprImmTmp = ARMV8_A64_REG_XZR;
3301 else
3302 {
3303 Assert(iGprImmTmp < 31);
3304 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3305 }
3306 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3307 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3308
3309#else
3310# error "port me"
3311#endif
3312 return off;
3313}
3314
3315
3316/**
3317 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3318 *
3319 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3320 *
3321 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3322 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3323 * same. Will assert / throw if caller does not heed this.
3324 */
3325DECL_FORCE_INLINE_THROW(uint32_t)
3326iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3327 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3328{
3329#ifdef RT_ARCH_AMD64
3330 /* mov mem8, imm8 */
3331 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3332 if (iGprBase >= 8)
3333 pCodeBuf[off++] = X86_OP_REX_B;
3334 pCodeBuf[off++] = 0xc6;
3335 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3336 pCodeBuf[off++] = uImm;
3337 RT_NOREF(iGprImmTmp, iGprTmp);
3338
3339#elif defined(RT_ARCH_ARM64)
3340 if (uImm == 0)
3341 iGprImmTmp = ARMV8_A64_REG_XZR;
3342 else
3343 {
3344 Assert(iGprImmTmp < 31);
3345 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3346 }
3347 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3348 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3349
3350#else
3351# error "port me"
3352#endif
3353 return off;
3354}
3355
3356
3357#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3358/**
3359 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3360 *
3361 * @note ARM64: Misaligned @a offDisp values and values not in the
3362 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3363 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3364 * does not heed this.
3365 */
3366DECL_FORCE_INLINE_THROW(uint32_t)
3367iemNativeEmitStoreVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3368 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3369{
3370#ifdef RT_ARCH_AMD64
3371 /* movdqu mem128, reg128 */
3372 pCodeBuf[off++] = 0xf3;
3373 if (iVecRegDst >= 8 || iGprBase >= 8)
3374 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3375 pCodeBuf[off++] = 0x0f;
3376 pCodeBuf[off++] = 0x7f;
3377 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3378 RT_NOREF(iGprTmp);
3379
3380#elif defined(RT_ARCH_ARM64)
3381 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3382 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3383
3384#else
3385# error "port me"
3386#endif
3387 return off;
3388}
3389
3390
3391/**
3392 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3393 */
3394DECL_INLINE_THROW(uint32_t)
3395iemNativeEmitStoreVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3396{
3397#ifdef RT_ARCH_AMD64
3398 off = iemNativeEmitStoreVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3399 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3400
3401#elif defined(RT_ARCH_ARM64)
3402 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3403
3404#else
3405# error "port me"
3406#endif
3407 return off;
3408}
3409
3410
3411/**
3412 * Emits a 256-bit vector register store via a GPR base address with a displacement.
3413 *
3414 * @note ARM64: Misaligned @a offDisp values and values not in the
3415 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3416 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3417 * does not heed this.
3418 */
3419DECL_FORCE_INLINE_THROW(uint32_t)
3420iemNativeEmitStoreVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3421 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3422{
3423#ifdef RT_ARCH_AMD64
3424 /* vmovdqu mem256, reg256 */
3425 pCodeBuf[off++] = X86_OP_VEX3;
3426 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3427 | X86_OP_VEX3_BYTE1_X
3428 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3429 | UINT8_C(0x01);
3430 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3431 pCodeBuf[off++] = 0x7f;
3432 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3433 RT_NOREF(iGprTmp);
3434
3435#elif defined(RT_ARCH_ARM64)
3436 Assert(!(iVecRegDst & 0x1));
3437 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3438 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3439 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3440 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3441#else
3442# error "port me"
3443#endif
3444 return off;
3445}
3446
3447
3448/**
3449 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3450 */
3451DECL_INLINE_THROW(uint32_t)
3452iemNativeEmitStoreVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3453{
3454#ifdef RT_ARCH_AMD64
3455 off = iemNativeEmitStoreVecRegByGprU256Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3456 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3457
3458#elif defined(RT_ARCH_ARM64)
3459 Assert(!(iVecRegDst & 0x1));
3460 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3461 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3462 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3463 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3464
3465#else
3466# error "port me"
3467#endif
3468 return off;
3469}
3470#endif
3471
3472
3473
3474/*********************************************************************************************************************************
3475* Subtraction and Additions *
3476*********************************************************************************************************************************/
3477
3478/**
3479 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3480 * @note The AMD64 version sets flags.
3481 */
3482DECL_INLINE_THROW(uint32_t)
3483iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3484{
3485#if defined(RT_ARCH_AMD64)
3486 /* sub Gv,Ev */
3487 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3488 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3489 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3490 pbCodeBuf[off++] = 0x2b;
3491 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3492
3493#elif defined(RT_ARCH_ARM64)
3494 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3495 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3496
3497#else
3498# error "Port me"
3499#endif
3500 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3501 return off;
3502}
3503
3504
3505/**
3506 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3507 * @note The AMD64 version sets flags.
3508 */
3509DECL_FORCE_INLINE(uint32_t)
3510iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3511{
3512#if defined(RT_ARCH_AMD64)
3513 /* sub Gv,Ev */
3514 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3515 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3516 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3517 pCodeBuf[off++] = 0x2b;
3518 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3519
3520#elif defined(RT_ARCH_ARM64)
3521 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3522
3523#else
3524# error "Port me"
3525#endif
3526 return off;
3527}
3528
3529
3530/**
3531 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3532 * @note The AMD64 version sets flags.
3533 */
3534DECL_INLINE_THROW(uint32_t)
3535iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3536{
3537#if defined(RT_ARCH_AMD64)
3538 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3539#elif defined(RT_ARCH_ARM64)
3540 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3541#else
3542# error "Port me"
3543#endif
3544 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3545 return off;
3546}
3547
3548
3549/**
3550 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3551 *
3552 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3553 *
3554 * @note Larger constants will require a temporary register. Failing to specify
3555 * one when needed will trigger fatal assertion / throw.
3556 */
3557DECL_FORCE_INLINE_THROW(uint32_t)
3558iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3559 uint8_t iGprTmp = UINT8_MAX)
3560{
3561#ifdef RT_ARCH_AMD64
3562 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3563 if (iSubtrahend == 1)
3564 {
3565 /* dec r/m64 */
3566 pCodeBuf[off++] = 0xff;
3567 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3568 }
3569 else if (iSubtrahend == -1)
3570 {
3571 /* inc r/m64 */
3572 pCodeBuf[off++] = 0xff;
3573 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3574 }
3575 else if ((int8_t)iSubtrahend == iSubtrahend)
3576 {
3577 /* sub r/m64, imm8 */
3578 pCodeBuf[off++] = 0x83;
3579 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3580 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3581 }
3582 else if ((int32_t)iSubtrahend == iSubtrahend)
3583 {
3584 /* sub r/m64, imm32 */
3585 pCodeBuf[off++] = 0x81;
3586 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3587 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3588 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3589 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3590 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3591 }
3592 else if (iGprTmp != UINT8_MAX)
3593 {
3594 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3595 /* sub r/m64, r64 */
3596 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3597 pCodeBuf[off++] = 0x29;
3598 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3599 }
3600 else
3601# ifdef IEM_WITH_THROW_CATCH
3602 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3603# else
3604 AssertReleaseFailedStmt(off = UINT32_MAX);
3605# endif
3606
3607#elif defined(RT_ARCH_ARM64)
3608 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3609 if (uAbsSubtrahend < 4096)
3610 {
3611 if (iSubtrahend >= 0)
3612 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3613 else
3614 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3615 }
3616 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3617 {
3618 if (iSubtrahend >= 0)
3619 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3620 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3621 else
3622 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3623 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3624 }
3625 else if (iGprTmp != UINT8_MAX)
3626 {
3627 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3628 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3629 }
3630 else
3631# ifdef IEM_WITH_THROW_CATCH
3632 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3633# else
3634 AssertReleaseFailedStmt(off = UINT32_MAX);
3635# endif
3636
3637#else
3638# error "Port me"
3639#endif
3640 return off;
3641}
3642
3643
3644/**
3645 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3646 *
3647 * @note Larger constants will require a temporary register. Failing to specify
3648 * one when needed will trigger fatal assertion / throw.
3649 */
3650DECL_INLINE_THROW(uint32_t)
3651iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3652 uint8_t iGprTmp = UINT8_MAX)
3653
3654{
3655#ifdef RT_ARCH_AMD64
3656 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3657#elif defined(RT_ARCH_ARM64)
3658 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3659#else
3660# error "Port me"
3661#endif
3662 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3663 return off;
3664}
3665
3666
3667/**
3668 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3669 *
3670 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3671 *
3672 * @note ARM64: Larger constants will require a temporary register. Failing to
3673 * specify one when needed will trigger fatal assertion / throw.
3674 */
3675DECL_FORCE_INLINE_THROW(uint32_t)
3676iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3677 uint8_t iGprTmp = UINT8_MAX)
3678{
3679#ifdef RT_ARCH_AMD64
3680 if (iGprDst >= 8)
3681 pCodeBuf[off++] = X86_OP_REX_B;
3682 if (iSubtrahend == 1)
3683 {
3684 /* dec r/m32 */
3685 pCodeBuf[off++] = 0xff;
3686 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3687 }
3688 else if (iSubtrahend == -1)
3689 {
3690 /* inc r/m32 */
3691 pCodeBuf[off++] = 0xff;
3692 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3693 }
3694 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3695 {
3696 /* sub r/m32, imm8 */
3697 pCodeBuf[off++] = 0x83;
3698 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3699 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3700 }
3701 else
3702 {
3703 /* sub r/m32, imm32 */
3704 pCodeBuf[off++] = 0x81;
3705 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3706 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3707 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3708 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3709 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3710 }
3711 RT_NOREF(iGprTmp);
3712
3713#elif defined(RT_ARCH_ARM64)
3714 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3715 if (uAbsSubtrahend < 4096)
3716 {
3717 if (iSubtrahend >= 0)
3718 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3719 else
3720 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3721 }
3722 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3723 {
3724 if (iSubtrahend >= 0)
3725 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3726 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3727 else
3728 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3729 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3730 }
3731 else if (iGprTmp != UINT8_MAX)
3732 {
3733 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3734 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3735 }
3736 else
3737# ifdef IEM_WITH_THROW_CATCH
3738 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3739# else
3740 AssertReleaseFailedStmt(off = UINT32_MAX);
3741# endif
3742
3743#else
3744# error "Port me"
3745#endif
3746 return off;
3747}
3748
3749
3750/**
3751 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3752 *
3753 * @note ARM64: Larger constants will require a temporary register. Failing to
3754 * specify one when needed will trigger fatal assertion / throw.
3755 */
3756DECL_INLINE_THROW(uint32_t)
3757iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3758 uint8_t iGprTmp = UINT8_MAX)
3759
3760{
3761#ifdef RT_ARCH_AMD64
3762 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
3763#elif defined(RT_ARCH_ARM64)
3764 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
3765#else
3766# error "Port me"
3767#endif
3768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3769 return off;
3770}
3771
3772
3773/**
3774 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
3775 *
3776 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
3777 * so not suitable as a base for conditional jumps.
3778 *
3779 * @note AMD64: Will only update the lower 16 bits of the register.
3780 * @note ARM64: Will update the entire register.
3781 * @note ARM64: Larger constants will require a temporary register. Failing to
3782 * specify one when needed will trigger fatal assertion / throw.
3783 */
3784DECL_FORCE_INLINE_THROW(uint32_t)
3785iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
3786 uint8_t iGprTmp = UINT8_MAX)
3787{
3788#ifdef RT_ARCH_AMD64
3789 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3790 if (iGprDst >= 8)
3791 pCodeBuf[off++] = X86_OP_REX_B;
3792 if (iSubtrahend == 1)
3793 {
3794 /* dec r/m16 */
3795 pCodeBuf[off++] = 0xff;
3796 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3797 }
3798 else if (iSubtrahend == -1)
3799 {
3800 /* inc r/m16 */
3801 pCodeBuf[off++] = 0xff;
3802 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3803 }
3804 else if ((int8_t)iSubtrahend == iSubtrahend)
3805 {
3806 /* sub r/m16, imm8 */
3807 pCodeBuf[off++] = 0x83;
3808 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3809 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3810 }
3811 else
3812 {
3813 /* sub r/m16, imm16 */
3814 pCodeBuf[off++] = 0x81;
3815 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3816 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
3817 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
3818 }
3819 RT_NOREF(iGprTmp);
3820
3821#elif defined(RT_ARCH_ARM64)
3822 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3823 if (uAbsSubtrahend < 4096)
3824 {
3825 if (iSubtrahend >= 0)
3826 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3827 else
3828 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3829 }
3830 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3831 {
3832 if (iSubtrahend >= 0)
3833 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3834 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3835 else
3836 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3837 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3838 }
3839 else if (iGprTmp != UINT8_MAX)
3840 {
3841 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3842 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3843 }
3844 else
3845# ifdef IEM_WITH_THROW_CATCH
3846 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3847# else
3848 AssertReleaseFailedStmt(off = UINT32_MAX);
3849# endif
3850 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3851
3852#else
3853# error "Port me"
3854#endif
3855 return off;
3856}
3857
3858
3859/**
3860 * Emits adding a 64-bit GPR to another, storing the result in the first.
3861 * @note The AMD64 version sets flags.
3862 */
3863DECL_FORCE_INLINE(uint32_t)
3864iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3865{
3866#if defined(RT_ARCH_AMD64)
3867 /* add Gv,Ev */
3868 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3869 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
3870 pCodeBuf[off++] = 0x03;
3871 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3872
3873#elif defined(RT_ARCH_ARM64)
3874 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
3875
3876#else
3877# error "Port me"
3878#endif
3879 return off;
3880}
3881
3882
3883/**
3884 * Emits adding a 64-bit GPR to another, storing the result in the first.
3885 * @note The AMD64 version sets flags.
3886 */
3887DECL_INLINE_THROW(uint32_t)
3888iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3889{
3890#if defined(RT_ARCH_AMD64)
3891 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3892#elif defined(RT_ARCH_ARM64)
3893 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3894#else
3895# error "Port me"
3896#endif
3897 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3898 return off;
3899}
3900
3901
3902/**
3903 * Emits adding a 64-bit GPR to another, storing the result in the first.
3904 * @note The AMD64 version sets flags.
3905 */
3906DECL_FORCE_INLINE(uint32_t)
3907iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3908{
3909#if defined(RT_ARCH_AMD64)
3910 /* add Gv,Ev */
3911 if (iGprDst >= 8 || iGprAddend >= 8)
3912 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
3913 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
3914 pCodeBuf[off++] = 0x03;
3915 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3916
3917#elif defined(RT_ARCH_ARM64)
3918 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
3919
3920#else
3921# error "Port me"
3922#endif
3923 return off;
3924}
3925
3926
3927/**
3928 * Emits adding a 64-bit GPR to another, storing the result in the first.
3929 * @note The AMD64 version sets flags.
3930 */
3931DECL_INLINE_THROW(uint32_t)
3932iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3933{
3934#if defined(RT_ARCH_AMD64)
3935 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3936#elif defined(RT_ARCH_ARM64)
3937 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3938#else
3939# error "Port me"
3940#endif
3941 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3942 return off;
3943}
3944
3945
3946/**
3947 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3948 */
3949DECL_INLINE_THROW(uint32_t)
3950iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3951{
3952#if defined(RT_ARCH_AMD64)
3953 /* add or inc */
3954 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3955 if (iImm8 != 1)
3956 {
3957 pCodeBuf[off++] = 0x83;
3958 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3959 pCodeBuf[off++] = (uint8_t)iImm8;
3960 }
3961 else
3962 {
3963 pCodeBuf[off++] = 0xff;
3964 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3965 }
3966
3967#elif defined(RT_ARCH_ARM64)
3968 if (iImm8 >= 0)
3969 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
3970 else
3971 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
3972
3973#else
3974# error "Port me"
3975#endif
3976 return off;
3977}
3978
3979
3980/**
3981 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3982 */
3983DECL_INLINE_THROW(uint32_t)
3984iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3985{
3986#if defined(RT_ARCH_AMD64)
3987 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
3988#elif defined(RT_ARCH_ARM64)
3989 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
3990#else
3991# error "Port me"
3992#endif
3993 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3994 return off;
3995}
3996
3997
3998/**
3999 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4000 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4001 */
4002DECL_FORCE_INLINE(uint32_t)
4003iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4004{
4005#if defined(RT_ARCH_AMD64)
4006 /* add or inc */
4007 if (iGprDst >= 8)
4008 pCodeBuf[off++] = X86_OP_REX_B;
4009 if (iImm8 != 1)
4010 {
4011 pCodeBuf[off++] = 0x83;
4012 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4013 pCodeBuf[off++] = (uint8_t)iImm8;
4014 }
4015 else
4016 {
4017 pCodeBuf[off++] = 0xff;
4018 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4019 }
4020
4021#elif defined(RT_ARCH_ARM64)
4022 if (iImm8 >= 0)
4023 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
4024 else
4025 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
4026
4027#else
4028# error "Port me"
4029#endif
4030 return off;
4031}
4032
4033
4034/**
4035 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4036 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4037 */
4038DECL_INLINE_THROW(uint32_t)
4039iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4040{
4041#if defined(RT_ARCH_AMD64)
4042 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4043#elif defined(RT_ARCH_ARM64)
4044 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4045#else
4046# error "Port me"
4047#endif
4048 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4049 return off;
4050}
4051
4052
4053/**
4054 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4055 *
4056 * @note Will assert / throw if @a iGprTmp is not specified when needed.
4057 */
4058DECL_FORCE_INLINE_THROW(uint32_t)
4059iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4060{
4061#if defined(RT_ARCH_AMD64)
4062 if ((int8_t)iAddend == iAddend)
4063 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4064
4065 if ((int32_t)iAddend == iAddend)
4066 {
4067 /* add grp, imm32 */
4068 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4069 pCodeBuf[off++] = 0x81;
4070 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4071 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4072 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4073 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4074 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4075 }
4076 else if (iGprTmp != UINT8_MAX)
4077 {
4078 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4079
4080 /* add dst, tmpreg */
4081 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4082 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
4083 pCodeBuf[off++] = 0x03;
4084 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
4085 }
4086 else
4087# ifdef IEM_WITH_THROW_CATCH
4088 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4089# else
4090 AssertReleaseFailedStmt(off = UINT32_MAX);
4091# endif
4092
4093#elif defined(RT_ARCH_ARM64)
4094 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4095 if (uAbsAddend < 4096)
4096 {
4097 if (iAddend >= 0)
4098 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
4099 else
4100 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
4101 }
4102 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4103 {
4104 if (iAddend >= 0)
4105 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
4106 true /*f64Bit*/, true /*fShift12*/);
4107 else
4108 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
4109 true /*f64Bit*/, true /*fShift12*/);
4110 }
4111 else if (iGprTmp != UINT8_MAX)
4112 {
4113 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4114 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
4115 }
4116 else
4117# ifdef IEM_WITH_THROW_CATCH
4118 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4119# else
4120 AssertReleaseFailedStmt(off = UINT32_MAX);
4121# endif
4122
4123#else
4124# error "Port me"
4125#endif
4126 return off;
4127}
4128
4129
4130/**
4131 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4132 */
4133DECL_INLINE_THROW(uint32_t)
4134iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
4135{
4136#if defined(RT_ARCH_AMD64)
4137 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4138 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
4139
4140 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
4141 {
4142 /* add grp, imm32 */
4143 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4144 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4145 pbCodeBuf[off++] = 0x81;
4146 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4147 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4148 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4149 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4150 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4151 }
4152 else
4153 {
4154 /* Best to use a temporary register to deal with this in the simplest way: */
4155 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4156
4157 /* add dst, tmpreg */
4158 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4159 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4160 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
4161 pbCodeBuf[off++] = 0x03;
4162 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
4163
4164 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4165 }
4166
4167#elif defined(RT_ARCH_ARM64)
4168 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
4169 {
4170 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4171 if (iAddend >= 0)
4172 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend);
4173 else
4174 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend);
4175 }
4176 else
4177 {
4178 /* Use temporary register for the immediate. */
4179 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4180
4181 /* add gprdst, gprdst, tmpreg */
4182 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4183 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg);
4184
4185 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4186 }
4187
4188#else
4189# error "Port me"
4190#endif
4191 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4192 return off;
4193}
4194
4195
4196/**
4197 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4198 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4199 * @note For ARM64 the iAddend value must be in the range 0x000..0xfff,
4200 * or that range shifted 12 bits to the left (e.g. 0x1000..0xfff000 with
4201 * the lower 12 bits always zero). The negative ranges are also allowed,
4202 * making it behave like a subtraction. If the constant does not conform,
4203 * bad stuff will happen.
4204 */
4205DECL_FORCE_INLINE_THROW(uint32_t)
4206iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4207{
4208#if defined(RT_ARCH_AMD64)
4209 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4210 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4211
4212 /* add grp, imm32 */
4213 if (iGprDst >= 8)
4214 pCodeBuf[off++] = X86_OP_REX_B;
4215 pCodeBuf[off++] = 0x81;
4216 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4217 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4218 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4219 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4220 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4221
4222#elif defined(RT_ARCH_ARM64)
4223 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4224 if (uAbsAddend <= 0xfff)
4225 {
4226 if (iAddend >= 0)
4227 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4228 else
4229 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4230 }
4231 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4232 {
4233 if (iAddend >= 0)
4234 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
4235 false /*f64Bit*/, true /*fShift12*/);
4236 else
4237 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
4238 false /*f64Bit*/, true /*fShift12*/);
4239 }
4240 else
4241# ifdef IEM_WITH_THROW_CATCH
4242 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4243# else
4244 AssertReleaseFailedStmt(off = UINT32_MAX);
4245# endif
4246
4247#else
4248# error "Port me"
4249#endif
4250 return off;
4251}
4252
4253
4254/**
4255 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4256 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4257 */
4258DECL_INLINE_THROW(uint32_t)
4259iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4260{
4261#if defined(RT_ARCH_AMD64)
4262 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
4263
4264#elif defined(RT_ARCH_ARM64)
4265 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
4266 {
4267 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4268 if (iAddend >= 0)
4269 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend, false /*f64Bit*/);
4270 else
4271 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend, false /*f64Bit*/);
4272 }
4273 else
4274 {
4275 /* Use temporary register for the immediate. */
4276 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint32_t)iAddend);
4277
4278 /* add gprdst, gprdst, tmpreg */
4279 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4280 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
4281
4282 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4283 }
4284
4285#else
4286# error "Port me"
4287#endif
4288 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4289 return off;
4290}
4291
4292
4293/**
4294 * Emits a 16-bit GPR add with a signed immediate addend.
4295 *
4296 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
4297 * so not suitable as a base for conditional jumps.
4298 *
4299 * @note AMD64: Will only update the lower 16 bits of the register.
4300 * @note ARM64: Will update the entire register.
4301 * @note ARM64: Larger constants will require a temporary register. Failing to
4302 * specify one when needed will trigger fatal assertion / throw.
4303 * @sa iemNativeEmitSubGpr16ImmEx
4304 */
4305DECL_FORCE_INLINE_THROW(uint32_t)
4306iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend,
4307 uint8_t iGprTmp = UINT8_MAX)
4308{
4309#ifdef RT_ARCH_AMD64
4310 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4311 if (iGprDst >= 8)
4312 pCodeBuf[off++] = X86_OP_REX_B;
4313 if (iAddend == 1)
4314 {
4315 /* inc r/m16 */
4316 pCodeBuf[off++] = 0xff;
4317 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4318 }
4319 else if (iAddend == -1)
4320 {
4321 /* dec r/m16 */
4322 pCodeBuf[off++] = 0xff;
4323 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4324 }
4325 else if ((int8_t)iAddend == iAddend)
4326 {
4327 /* add r/m16, imm8 */
4328 pCodeBuf[off++] = 0x83;
4329 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4330 pCodeBuf[off++] = (uint8_t)iAddend;
4331 }
4332 else
4333 {
4334 /* add r/m16, imm16 */
4335 pCodeBuf[off++] = 0x81;
4336 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4337 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
4338 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
4339 }
4340 RT_NOREF(iGprTmp);
4341
4342#elif defined(RT_ARCH_ARM64)
4343 uint32_t uAbsAddend = RT_ABS(iAddend);
4344 if (uAbsAddend < 4096)
4345 {
4346 if (iAddend >= 0)
4347 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4348 else
4349 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4350 }
4351 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4352 {
4353 if (iAddend >= 0)
4354 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
4355 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4356 else
4357 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
4358 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4359 }
4360 else if (iGprTmp != UINT8_MAX)
4361 {
4362 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iAddend);
4363 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4364 }
4365 else
4366# ifdef IEM_WITH_THROW_CATCH
4367 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4368# else
4369 AssertReleaseFailedStmt(off = UINT32_MAX);
4370# endif
4371 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4372
4373#else
4374# error "Port me"
4375#endif
4376 return off;
4377}
4378
4379
4380
4381/**
4382 * Adds two 64-bit GPRs together, storing the result in a third register.
4383 */
4384DECL_FORCE_INLINE(uint32_t)
4385iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4386{
4387#ifdef RT_ARCH_AMD64
4388 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4389 {
4390 /** @todo consider LEA */
4391 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
4392 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
4393 }
4394 else
4395 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4396
4397#elif defined(RT_ARCH_ARM64)
4398 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
4399
4400#else
4401# error "Port me!"
4402#endif
4403 return off;
4404}
4405
4406
4407
4408/**
4409 * Adds two 32-bit GPRs together, storing the result in a third register.
4410 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4411 */
4412DECL_FORCE_INLINE(uint32_t)
4413iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4414{
4415#ifdef RT_ARCH_AMD64
4416 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4417 {
4418 /** @todo consider LEA */
4419 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
4420 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
4421 }
4422 else
4423 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4424
4425#elif defined(RT_ARCH_ARM64)
4426 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
4427
4428#else
4429# error "Port me!"
4430#endif
4431 return off;
4432}
4433
4434
4435/**
4436 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
4437 * third register.
4438 *
4439 * @note The ARM64 version does not work for non-trivial constants if the
4440 * two registers are the same. Will assert / throw exception.
4441 */
4442DECL_FORCE_INLINE_THROW(uint32_t)
4443iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4444{
4445#ifdef RT_ARCH_AMD64
4446 /** @todo consider LEA */
4447 if ((int8_t)iImmAddend == iImmAddend)
4448 {
4449 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4450 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4451 }
4452 else
4453 {
4454 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4455 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4456 }
4457
4458#elif defined(RT_ARCH_ARM64)
4459 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4460 if (uAbsImmAddend < 4096)
4461 {
4462 if (iImmAddend >= 0)
4463 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4464 else
4465 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4466 }
4467 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4468 {
4469 if (iImmAddend >= 0)
4470 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4471 else
4472 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4473 }
4474 else if (iGprDst != iGprAddend)
4475 {
4476 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4477 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4478 }
4479 else
4480# ifdef IEM_WITH_THROW_CATCH
4481 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4482# else
4483 AssertReleaseFailedStmt(off = UINT32_MAX);
4484# endif
4485
4486#else
4487# error "Port me!"
4488#endif
4489 return off;
4490}
4491
4492
4493/**
4494 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4495 * third register.
4496 *
4497 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4498 *
4499 * @note The ARM64 version does not work for non-trivial constants if the
4500 * two registers are the same. Will assert / throw exception.
4501 */
4502DECL_FORCE_INLINE_THROW(uint32_t)
4503iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4504{
4505#ifdef RT_ARCH_AMD64
4506 /** @todo consider LEA */
4507 if ((int8_t)iImmAddend == iImmAddend)
4508 {
4509 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4510 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4511 }
4512 else
4513 {
4514 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4515 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4516 }
4517
4518#elif defined(RT_ARCH_ARM64)
4519 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4520 if (uAbsImmAddend < 4096)
4521 {
4522 if (iImmAddend >= 0)
4523 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4524 else
4525 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4526 }
4527 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4528 {
4529 if (iImmAddend >= 0)
4530 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4531 else
4532 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4533 }
4534 else if (iGprDst != iGprAddend)
4535 {
4536 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4537 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4538 }
4539 else
4540# ifdef IEM_WITH_THROW_CATCH
4541 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4542# else
4543 AssertReleaseFailedStmt(off = UINT32_MAX);
4544# endif
4545
4546#else
4547# error "Port me!"
4548#endif
4549 return off;
4550}
4551
4552
4553/*********************************************************************************************************************************
4554* Unary Operations *
4555*********************************************************************************************************************************/
4556
4557/**
4558 * Emits code for two complement negation of a 64-bit GPR.
4559 */
4560DECL_FORCE_INLINE_THROW(uint32_t)
4561iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4562{
4563#if defined(RT_ARCH_AMD64)
4564 /* neg Ev */
4565 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4566 pCodeBuf[off++] = 0xf7;
4567 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4568
4569#elif defined(RT_ARCH_ARM64)
4570 /* sub dst, xzr, dst */
4571 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4572
4573#else
4574# error "Port me"
4575#endif
4576 return off;
4577}
4578
4579
4580/**
4581 * Emits code for two complement negation of a 64-bit GPR.
4582 */
4583DECL_INLINE_THROW(uint32_t)
4584iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4585{
4586#if defined(RT_ARCH_AMD64)
4587 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4588#elif defined(RT_ARCH_ARM64)
4589 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4590#else
4591# error "Port me"
4592#endif
4593 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4594 return off;
4595}
4596
4597
4598/**
4599 * Emits code for two complement negation of a 32-bit GPR.
4600 * @note bit 32 thru 63 are set to zero.
4601 */
4602DECL_FORCE_INLINE_THROW(uint32_t)
4603iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4604{
4605#if defined(RT_ARCH_AMD64)
4606 /* neg Ev */
4607 if (iGprDst >= 8)
4608 pCodeBuf[off++] = X86_OP_REX_B;
4609 pCodeBuf[off++] = 0xf7;
4610 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4611
4612#elif defined(RT_ARCH_ARM64)
4613 /* sub dst, xzr, dst */
4614 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4615
4616#else
4617# error "Port me"
4618#endif
4619 return off;
4620}
4621
4622
4623/**
4624 * Emits code for two complement negation of a 32-bit GPR.
4625 * @note bit 32 thru 63 are set to zero.
4626 */
4627DECL_INLINE_THROW(uint32_t)
4628iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4629{
4630#if defined(RT_ARCH_AMD64)
4631 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4632#elif defined(RT_ARCH_ARM64)
4633 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4634#else
4635# error "Port me"
4636#endif
4637 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4638 return off;
4639}
4640
4641
4642
4643/*********************************************************************************************************************************
4644* Bit Operations *
4645*********************************************************************************************************************************/
4646
4647/**
4648 * Emits code for clearing bits 16 thru 63 in the GPR.
4649 */
4650DECL_INLINE_THROW(uint32_t)
4651iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4652{
4653#if defined(RT_ARCH_AMD64)
4654 /* movzx Gv,Ew */
4655 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4656 if (iGprDst >= 8)
4657 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4658 pbCodeBuf[off++] = 0x0f;
4659 pbCodeBuf[off++] = 0xb7;
4660 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4661
4662#elif defined(RT_ARCH_ARM64)
4663 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4664# if 1
4665 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4666# else
4667 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4668 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4669# endif
4670#else
4671# error "Port me"
4672#endif
4673 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4674 return off;
4675}
4676
4677
4678/**
4679 * Emits code for AND'ing two 64-bit GPRs.
4680 *
4681 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4682 * and ARM64 hosts.
4683 */
4684DECL_FORCE_INLINE(uint32_t)
4685iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4686{
4687#if defined(RT_ARCH_AMD64)
4688 /* and Gv, Ev */
4689 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4690 pCodeBuf[off++] = 0x23;
4691 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4692 RT_NOREF(fSetFlags);
4693
4694#elif defined(RT_ARCH_ARM64)
4695 if (!fSetFlags)
4696 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4697 else
4698 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4699
4700#else
4701# error "Port me"
4702#endif
4703 return off;
4704}
4705
4706
4707/**
4708 * Emits code for AND'ing two 64-bit GPRs.
4709 *
4710 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4711 * and ARM64 hosts.
4712 */
4713DECL_INLINE_THROW(uint32_t)
4714iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4715{
4716#if defined(RT_ARCH_AMD64)
4717 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4718#elif defined(RT_ARCH_ARM64)
4719 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4720#else
4721# error "Port me"
4722#endif
4723 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4724 return off;
4725}
4726
4727
4728/**
4729 * Emits code for AND'ing two 32-bit GPRs.
4730 */
4731DECL_FORCE_INLINE(uint32_t)
4732iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4733{
4734#if defined(RT_ARCH_AMD64)
4735 /* and Gv, Ev */
4736 if (iGprDst >= 8 || iGprSrc >= 8)
4737 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4738 pCodeBuf[off++] = 0x23;
4739 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4740 RT_NOREF(fSetFlags);
4741
4742#elif defined(RT_ARCH_ARM64)
4743 if (!fSetFlags)
4744 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4745 else
4746 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4747
4748#else
4749# error "Port me"
4750#endif
4751 return off;
4752}
4753
4754
4755/**
4756 * Emits code for AND'ing two 32-bit GPRs.
4757 */
4758DECL_INLINE_THROW(uint32_t)
4759iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4760{
4761#if defined(RT_ARCH_AMD64)
4762 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4763#elif defined(RT_ARCH_ARM64)
4764 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4765#else
4766# error "Port me"
4767#endif
4768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4769 return off;
4770}
4771
4772
4773/**
4774 * Emits code for AND'ing a 64-bit GPRs with a constant.
4775 *
4776 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4777 * and ARM64 hosts.
4778 */
4779DECL_INLINE_THROW(uint32_t)
4780iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4781{
4782#if defined(RT_ARCH_AMD64)
4783 if ((int64_t)uImm == (int8_t)uImm)
4784 {
4785 /* and Ev, imm8 */
4786 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4787 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4788 pbCodeBuf[off++] = 0x83;
4789 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4790 pbCodeBuf[off++] = (uint8_t)uImm;
4791 }
4792 else if ((int64_t)uImm == (int32_t)uImm)
4793 {
4794 /* and Ev, imm32 */
4795 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4796 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4797 pbCodeBuf[off++] = 0x81;
4798 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4799 pbCodeBuf[off++] = RT_BYTE1(uImm);
4800 pbCodeBuf[off++] = RT_BYTE2(uImm);
4801 pbCodeBuf[off++] = RT_BYTE3(uImm);
4802 pbCodeBuf[off++] = RT_BYTE4(uImm);
4803 }
4804 else
4805 {
4806 /* Use temporary register for the 64-bit immediate. */
4807 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4808 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
4809 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4810 }
4811 RT_NOREF(fSetFlags);
4812
4813#elif defined(RT_ARCH_ARM64)
4814 uint32_t uImmR = 0;
4815 uint32_t uImmNandS = 0;
4816 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4817 {
4818 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4819 if (!fSetFlags)
4820 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
4821 else
4822 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
4823 }
4824 else
4825 {
4826 /* Use temporary register for the 64-bit immediate. */
4827 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4828 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4829 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4830 }
4831
4832#else
4833# error "Port me"
4834#endif
4835 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4836 return off;
4837}
4838
4839
4840/**
4841 * Emits code for AND'ing an 32-bit GPRs with a constant.
4842 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4843 * @note For ARM64 this only supports @a uImm values that can be expressed using
4844 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
4845 * make sure this is possible!
4846 */
4847DECL_FORCE_INLINE_THROW(uint32_t)
4848iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4849{
4850#if defined(RT_ARCH_AMD64)
4851 /* and Ev, imm */
4852 if (iGprDst >= 8)
4853 pCodeBuf[off++] = X86_OP_REX_B;
4854 if ((int32_t)uImm == (int8_t)uImm)
4855 {
4856 pCodeBuf[off++] = 0x83;
4857 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4858 pCodeBuf[off++] = (uint8_t)uImm;
4859 }
4860 else
4861 {
4862 pCodeBuf[off++] = 0x81;
4863 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4864 pCodeBuf[off++] = RT_BYTE1(uImm);
4865 pCodeBuf[off++] = RT_BYTE2(uImm);
4866 pCodeBuf[off++] = RT_BYTE3(uImm);
4867 pCodeBuf[off++] = RT_BYTE4(uImm);
4868 }
4869 RT_NOREF(fSetFlags);
4870
4871#elif defined(RT_ARCH_ARM64)
4872 uint32_t uImmR = 0;
4873 uint32_t uImmNandS = 0;
4874 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4875 {
4876 if (!fSetFlags)
4877 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4878 else
4879 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4880 }
4881 else
4882# ifdef IEM_WITH_THROW_CATCH
4883 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4884# else
4885 AssertReleaseFailedStmt(off = UINT32_MAX);
4886# endif
4887
4888#else
4889# error "Port me"
4890#endif
4891 return off;
4892}
4893
4894
4895/**
4896 * Emits code for AND'ing an 32-bit GPRs with a constant.
4897 *
4898 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4899 */
4900DECL_INLINE_THROW(uint32_t)
4901iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4902{
4903#if defined(RT_ARCH_AMD64)
4904 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
4905
4906#elif defined(RT_ARCH_ARM64)
4907 uint32_t uImmR = 0;
4908 uint32_t uImmNandS = 0;
4909 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4910 {
4911 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4912 if (!fSetFlags)
4913 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4914 else
4915 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4916 }
4917 else
4918 {
4919 /* Use temporary register for the 64-bit immediate. */
4920 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4921 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4922 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4923 }
4924
4925#else
4926# error "Port me"
4927#endif
4928 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4929 return off;
4930}
4931
4932
4933/**
4934 * Emits code for AND'ing an 64-bit GPRs with a constant.
4935 *
4936 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4937 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4938 * the same.
4939 */
4940DECL_FORCE_INLINE_THROW(uint32_t)
4941iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
4942 bool fSetFlags = false)
4943{
4944#if defined(RT_ARCH_AMD64)
4945 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4946 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
4947 RT_NOREF(fSetFlags);
4948
4949#elif defined(RT_ARCH_ARM64)
4950 uint32_t uImmR = 0;
4951 uint32_t uImmNandS = 0;
4952 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4953 {
4954 if (!fSetFlags)
4955 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4956 else
4957 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4958 }
4959 else if (iGprDst != iGprSrc)
4960 {
4961 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4962 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4963 }
4964 else
4965# ifdef IEM_WITH_THROW_CATCH
4966 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4967# else
4968 AssertReleaseFailedStmt(off = UINT32_MAX);
4969# endif
4970
4971#else
4972# error "Port me"
4973#endif
4974 return off;
4975}
4976
4977/**
4978 * Emits code for AND'ing an 32-bit GPRs with a constant.
4979 *
4980 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4981 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4982 * the same.
4983 *
4984 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4985 */
4986DECL_FORCE_INLINE_THROW(uint32_t)
4987iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
4988 bool fSetFlags = false)
4989{
4990#if defined(RT_ARCH_AMD64)
4991 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
4992 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
4993 RT_NOREF(fSetFlags);
4994
4995#elif defined(RT_ARCH_ARM64)
4996 uint32_t uImmR = 0;
4997 uint32_t uImmNandS = 0;
4998 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4999 {
5000 if (!fSetFlags)
5001 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5002 else
5003 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5004 }
5005 else if (iGprDst != iGprSrc)
5006 {
5007 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5008 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
5009 }
5010 else
5011# ifdef IEM_WITH_THROW_CATCH
5012 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5013# else
5014 AssertReleaseFailedStmt(off = UINT32_MAX);
5015# endif
5016
5017#else
5018# error "Port me"
5019#endif
5020 return off;
5021}
5022
5023
5024/**
5025 * Emits code for OR'ing two 64-bit GPRs.
5026 */
5027DECL_FORCE_INLINE(uint32_t)
5028iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5029{
5030#if defined(RT_ARCH_AMD64)
5031 /* or Gv, Ev */
5032 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5033 pCodeBuf[off++] = 0x0b;
5034 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5035
5036#elif defined(RT_ARCH_ARM64)
5037 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
5038
5039#else
5040# error "Port me"
5041#endif
5042 return off;
5043}
5044
5045
5046/**
5047 * Emits code for OR'ing two 64-bit GPRs.
5048 */
5049DECL_INLINE_THROW(uint32_t)
5050iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5051{
5052#if defined(RT_ARCH_AMD64)
5053 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5054#elif defined(RT_ARCH_ARM64)
5055 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5056#else
5057# error "Port me"
5058#endif
5059 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5060 return off;
5061}
5062
5063
5064/**
5065 * Emits code for OR'ing two 32-bit GPRs.
5066 * @note Bits 63:32 of the destination GPR will be cleared.
5067 */
5068DECL_FORCE_INLINE(uint32_t)
5069iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5070{
5071#if defined(RT_ARCH_AMD64)
5072 /* or Gv, Ev */
5073 if (iGprDst >= 8 || iGprSrc >= 8)
5074 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5075 pCodeBuf[off++] = 0x0b;
5076 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5077
5078#elif defined(RT_ARCH_ARM64)
5079 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5080
5081#else
5082# error "Port me"
5083#endif
5084 return off;
5085}
5086
5087
5088/**
5089 * Emits code for OR'ing two 32-bit GPRs.
5090 * @note Bits 63:32 of the destination GPR will be cleared.
5091 */
5092DECL_INLINE_THROW(uint32_t)
5093iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5094{
5095#if defined(RT_ARCH_AMD64)
5096 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5097#elif defined(RT_ARCH_ARM64)
5098 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5099#else
5100# error "Port me"
5101#endif
5102 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5103 return off;
5104}
5105
5106
5107/**
5108 * Emits code for OR'ing a 64-bit GPRs with a constant.
5109 */
5110DECL_INLINE_THROW(uint32_t)
5111iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
5112{
5113#if defined(RT_ARCH_AMD64)
5114 if ((int64_t)uImm == (int8_t)uImm)
5115 {
5116 /* or Ev, imm8 */
5117 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5118 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5119 pbCodeBuf[off++] = 0x83;
5120 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5121 pbCodeBuf[off++] = (uint8_t)uImm;
5122 }
5123 else if ((int64_t)uImm == (int32_t)uImm)
5124 {
5125 /* or Ev, imm32 */
5126 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5127 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5128 pbCodeBuf[off++] = 0x81;
5129 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5130 pbCodeBuf[off++] = RT_BYTE1(uImm);
5131 pbCodeBuf[off++] = RT_BYTE2(uImm);
5132 pbCodeBuf[off++] = RT_BYTE3(uImm);
5133 pbCodeBuf[off++] = RT_BYTE4(uImm);
5134 }
5135 else
5136 {
5137 /* Use temporary register for the 64-bit immediate. */
5138 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5139 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
5140 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5141 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5142 }
5143
5144#elif defined(RT_ARCH_ARM64)
5145 uint32_t uImmR = 0;
5146 uint32_t uImmNandS = 0;
5147 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5148 {
5149 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5150 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
5151 }
5152 else
5153 {
5154 /* Use temporary register for the 64-bit immediate. */
5155 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5156 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
5157 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5158 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5159 }
5160
5161#else
5162# error "Port me"
5163#endif
5164 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5165 return off;
5166}
5167
5168
5169/**
5170 * Emits code for OR'ing an 32-bit GPRs with a constant.
5171 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5172 * @note For ARM64 this only supports @a uImm values that can be expressed using
5173 * the two 6-bit immediates of the OR instructions. The caller must make
5174 * sure this is possible!
5175 */
5176DECL_FORCE_INLINE_THROW(uint32_t)
5177iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5178{
5179#if defined(RT_ARCH_AMD64)
5180 /* or Ev, imm */
5181 if (iGprDst >= 8)
5182 pCodeBuf[off++] = X86_OP_REX_B;
5183 if ((int32_t)uImm == (int8_t)uImm)
5184 {
5185 pCodeBuf[off++] = 0x83;
5186 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5187 pCodeBuf[off++] = (uint8_t)uImm;
5188 }
5189 else
5190 {
5191 pCodeBuf[off++] = 0x81;
5192 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5193 pCodeBuf[off++] = RT_BYTE1(uImm);
5194 pCodeBuf[off++] = RT_BYTE2(uImm);
5195 pCodeBuf[off++] = RT_BYTE3(uImm);
5196 pCodeBuf[off++] = RT_BYTE4(uImm);
5197 }
5198
5199#elif defined(RT_ARCH_ARM64)
5200 uint32_t uImmR = 0;
5201 uint32_t uImmNandS = 0;
5202 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5203 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5204 else
5205# ifdef IEM_WITH_THROW_CATCH
5206 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5207# else
5208 AssertReleaseFailedStmt(off = UINT32_MAX);
5209# endif
5210
5211#else
5212# error "Port me"
5213#endif
5214 return off;
5215}
5216
5217
5218/**
5219 * Emits code for OR'ing an 32-bit GPRs with a constant.
5220 *
5221 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5222 */
5223DECL_INLINE_THROW(uint32_t)
5224iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5225{
5226#if defined(RT_ARCH_AMD64)
5227 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5228
5229#elif defined(RT_ARCH_ARM64)
5230 uint32_t uImmR = 0;
5231 uint32_t uImmNandS = 0;
5232 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5233 {
5234 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5235 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5236 }
5237 else
5238 {
5239 /* Use temporary register for the 64-bit immediate. */
5240 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5241 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
5242 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5243 }
5244
5245#else
5246# error "Port me"
5247#endif
5248 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5249 return off;
5250}
5251
5252
5253
5254/**
5255 * ORs two 64-bit GPRs together, storing the result in a third register.
5256 */
5257DECL_FORCE_INLINE(uint32_t)
5258iemNativeEmitGprEqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5259{
5260#ifdef RT_ARCH_AMD64
5261 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5262 {
5263 /** @todo consider LEA */
5264 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc1);
5265 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5266 }
5267 else
5268 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5269
5270#elif defined(RT_ARCH_ARM64)
5271 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2);
5272
5273#else
5274# error "Port me!"
5275#endif
5276 return off;
5277}
5278
5279
5280
5281/**
5282 * Ors two 32-bit GPRs together, storing the result in a third register.
5283 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
5284 */
5285DECL_FORCE_INLINE(uint32_t)
5286iemNativeEmitGpr32EqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5287{
5288#ifdef RT_ARCH_AMD64
5289 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5290 {
5291 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc1);
5292 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5293 }
5294 else
5295 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5296
5297#elif defined(RT_ARCH_ARM64)
5298 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2, false /*f64Bit*/);
5299
5300#else
5301# error "Port me!"
5302#endif
5303 return off;
5304}
5305
5306
5307/**
5308 * Emits code for XOR'ing two 64-bit GPRs.
5309 */
5310DECL_INLINE_THROW(uint32_t)
5311iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5312{
5313#if defined(RT_ARCH_AMD64)
5314 /* and Gv, Ev */
5315 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5316 pCodeBuf[off++] = 0x33;
5317 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5318
5319#elif defined(RT_ARCH_ARM64)
5320 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
5321
5322#else
5323# error "Port me"
5324#endif
5325 return off;
5326}
5327
5328
5329/**
5330 * Emits code for XOR'ing two 64-bit GPRs.
5331 */
5332DECL_INLINE_THROW(uint32_t)
5333iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5334{
5335#if defined(RT_ARCH_AMD64)
5336 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5337#elif defined(RT_ARCH_ARM64)
5338 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5339#else
5340# error "Port me"
5341#endif
5342 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5343 return off;
5344}
5345
5346
5347/**
5348 * Emits code for XOR'ing two 32-bit GPRs.
5349 */
5350DECL_INLINE_THROW(uint32_t)
5351iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5352{
5353#if defined(RT_ARCH_AMD64)
5354 /* and Gv, Ev */
5355 if (iGprDst >= 8 || iGprSrc >= 8)
5356 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5357 pCodeBuf[off++] = 0x33;
5358 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5359
5360#elif defined(RT_ARCH_ARM64)
5361 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5362
5363#else
5364# error "Port me"
5365#endif
5366 return off;
5367}
5368
5369
5370/**
5371 * Emits code for XOR'ing two 32-bit GPRs.
5372 */
5373DECL_INLINE_THROW(uint32_t)
5374iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5375{
5376#if defined(RT_ARCH_AMD64)
5377 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5378#elif defined(RT_ARCH_ARM64)
5379 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5380#else
5381# error "Port me"
5382#endif
5383 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5384 return off;
5385}
5386
5387
5388/**
5389 * Emits code for XOR'ing an 32-bit GPRs with a constant.
5390 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5391 * @note For ARM64 this only supports @a uImm values that can be expressed using
5392 * the two 6-bit immediates of the EOR instructions. The caller must make
5393 * sure this is possible!
5394 */
5395DECL_FORCE_INLINE_THROW(uint32_t)
5396iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5397{
5398#if defined(RT_ARCH_AMD64)
5399 /* and Ev, imm */
5400 if (iGprDst >= 8)
5401 pCodeBuf[off++] = X86_OP_REX_B;
5402 if ((int32_t)uImm == (int8_t)uImm)
5403 {
5404 pCodeBuf[off++] = 0x83;
5405 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5406 pCodeBuf[off++] = (uint8_t)uImm;
5407 }
5408 else
5409 {
5410 pCodeBuf[off++] = 0x81;
5411 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5412 pCodeBuf[off++] = RT_BYTE1(uImm);
5413 pCodeBuf[off++] = RT_BYTE2(uImm);
5414 pCodeBuf[off++] = RT_BYTE3(uImm);
5415 pCodeBuf[off++] = RT_BYTE4(uImm);
5416 }
5417
5418#elif defined(RT_ARCH_ARM64)
5419 uint32_t uImmR = 0;
5420 uint32_t uImmNandS = 0;
5421 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5422 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5423 else
5424# ifdef IEM_WITH_THROW_CATCH
5425 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5426# else
5427 AssertReleaseFailedStmt(off = UINT32_MAX);
5428# endif
5429
5430#else
5431# error "Port me"
5432#endif
5433 return off;
5434}
5435
5436
5437/**
5438 * Emits code for XOR'ing two 32-bit GPRs.
5439 */
5440DECL_INLINE_THROW(uint32_t)
5441iemNativeEmitXorGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5442{
5443#if defined(RT_ARCH_AMD64)
5444 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5445#elif defined(RT_ARCH_ARM64)
5446 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, uImm);
5447#else
5448# error "Port me"
5449#endif
5450 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5451 return off;
5452}
5453
5454
5455/*********************************************************************************************************************************
5456* Shifting *
5457*********************************************************************************************************************************/
5458
5459/**
5460 * Emits code for shifting a GPR a fixed number of bits to the left.
5461 */
5462DECL_FORCE_INLINE(uint32_t)
5463iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5464{
5465 Assert(cShift > 0 && cShift < 64);
5466
5467#if defined(RT_ARCH_AMD64)
5468 /* shl dst, cShift */
5469 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5470 if (cShift != 1)
5471 {
5472 pCodeBuf[off++] = 0xc1;
5473 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5474 pCodeBuf[off++] = cShift;
5475 }
5476 else
5477 {
5478 pCodeBuf[off++] = 0xd1;
5479 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5480 }
5481
5482#elif defined(RT_ARCH_ARM64)
5483 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
5484
5485#else
5486# error "Port me"
5487#endif
5488 return off;
5489}
5490
5491
5492/**
5493 * Emits code for shifting a GPR a fixed number of bits to the left.
5494 */
5495DECL_INLINE_THROW(uint32_t)
5496iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5497{
5498#if defined(RT_ARCH_AMD64)
5499 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5500#elif defined(RT_ARCH_ARM64)
5501 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5502#else
5503# error "Port me"
5504#endif
5505 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5506 return off;
5507}
5508
5509
5510/**
5511 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5512 */
5513DECL_FORCE_INLINE(uint32_t)
5514iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5515{
5516 Assert(cShift > 0 && cShift < 32);
5517
5518#if defined(RT_ARCH_AMD64)
5519 /* shl dst, cShift */
5520 if (iGprDst >= 8)
5521 pCodeBuf[off++] = X86_OP_REX_B;
5522 if (cShift != 1)
5523 {
5524 pCodeBuf[off++] = 0xc1;
5525 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5526 pCodeBuf[off++] = cShift;
5527 }
5528 else
5529 {
5530 pCodeBuf[off++] = 0xd1;
5531 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5532 }
5533
5534#elif defined(RT_ARCH_ARM64)
5535 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5536
5537#else
5538# error "Port me"
5539#endif
5540 return off;
5541}
5542
5543
5544/**
5545 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5546 */
5547DECL_INLINE_THROW(uint32_t)
5548iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5549{
5550#if defined(RT_ARCH_AMD64)
5551 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5552#elif defined(RT_ARCH_ARM64)
5553 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5554#else
5555# error "Port me"
5556#endif
5557 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5558 return off;
5559}
5560
5561
5562/**
5563 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5564 */
5565DECL_FORCE_INLINE(uint32_t)
5566iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5567{
5568 Assert(cShift > 0 && cShift < 64);
5569
5570#if defined(RT_ARCH_AMD64)
5571 /* shr dst, cShift */
5572 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5573 if (cShift != 1)
5574 {
5575 pCodeBuf[off++] = 0xc1;
5576 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5577 pCodeBuf[off++] = cShift;
5578 }
5579 else
5580 {
5581 pCodeBuf[off++] = 0xd1;
5582 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5583 }
5584
5585#elif defined(RT_ARCH_ARM64)
5586 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5587
5588#else
5589# error "Port me"
5590#endif
5591 return off;
5592}
5593
5594
5595/**
5596 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5597 */
5598DECL_INLINE_THROW(uint32_t)
5599iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5600{
5601#if defined(RT_ARCH_AMD64)
5602 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5603#elif defined(RT_ARCH_ARM64)
5604 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5605#else
5606# error "Port me"
5607#endif
5608 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5609 return off;
5610}
5611
5612
5613/**
5614 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5615 * right.
5616 */
5617DECL_FORCE_INLINE(uint32_t)
5618iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5619{
5620 Assert(cShift > 0 && cShift < 32);
5621
5622#if defined(RT_ARCH_AMD64)
5623 /* shr dst, cShift */
5624 if (iGprDst >= 8)
5625 pCodeBuf[off++] = X86_OP_REX_B;
5626 if (cShift != 1)
5627 {
5628 pCodeBuf[off++] = 0xc1;
5629 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5630 pCodeBuf[off++] = cShift;
5631 }
5632 else
5633 {
5634 pCodeBuf[off++] = 0xd1;
5635 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5636 }
5637
5638#elif defined(RT_ARCH_ARM64)
5639 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5640
5641#else
5642# error "Port me"
5643#endif
5644 return off;
5645}
5646
5647
5648/**
5649 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5650 * right.
5651 */
5652DECL_INLINE_THROW(uint32_t)
5653iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5654{
5655#if defined(RT_ARCH_AMD64)
5656 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5657#elif defined(RT_ARCH_ARM64)
5658 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5659#else
5660# error "Port me"
5661#endif
5662 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5663 return off;
5664}
5665
5666
5667/**
5668 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5669 * right and assigning it to a different GPR.
5670 */
5671DECL_INLINE_THROW(uint32_t)
5672iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5673{
5674 Assert(cShift > 0); Assert(cShift < 32);
5675#if defined(RT_ARCH_AMD64)
5676 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5677 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5678
5679#elif defined(RT_ARCH_ARM64)
5680 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5681
5682#else
5683# error "Port me"
5684#endif
5685 return off;
5686}
5687
5688
5689/**
5690 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5691 */
5692DECL_FORCE_INLINE(uint32_t)
5693iemNativeEmitArithShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5694{
5695 Assert(cShift > 0 && cShift < 64);
5696
5697#if defined(RT_ARCH_AMD64)
5698 /* sar dst, cShift */
5699 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5700 if (cShift != 1)
5701 {
5702 pCodeBuf[off++] = 0xc1;
5703 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5704 pCodeBuf[off++] = cShift;
5705 }
5706 else
5707 {
5708 pCodeBuf[off++] = 0xd1;
5709 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5710 }
5711
5712#elif defined(RT_ARCH_ARM64)
5713 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift);
5714
5715#else
5716# error "Port me"
5717#endif
5718 return off;
5719}
5720
5721
5722/**
5723 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5724 */
5725DECL_INLINE_THROW(uint32_t)
5726iemNativeEmitArithShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5727{
5728#if defined(RT_ARCH_AMD64)
5729 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5730#elif defined(RT_ARCH_ARM64)
5731 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5732#else
5733# error "Port me"
5734#endif
5735 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5736 return off;
5737}
5738
5739
5740/**
5741 * Emits code for (signed) shifting a 32-bit GPR a fixed number of bits to the right.
5742 */
5743DECL_FORCE_INLINE(uint32_t)
5744iemNativeEmitArithShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5745{
5746 Assert(cShift > 0 && cShift < 64);
5747
5748#if defined(RT_ARCH_AMD64)
5749 /* sar dst, cShift */
5750 if (iGprDst >= 8)
5751 pCodeBuf[off++] = X86_OP_REX_B;
5752 if (cShift != 1)
5753 {
5754 pCodeBuf[off++] = 0xc1;
5755 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5756 pCodeBuf[off++] = cShift;
5757 }
5758 else
5759 {
5760 pCodeBuf[off++] = 0xd1;
5761 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5762 }
5763
5764#elif defined(RT_ARCH_ARM64)
5765 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift, false /*f64Bit*/);
5766
5767#else
5768# error "Port me"
5769#endif
5770 return off;
5771}
5772
5773
5774/**
5775 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5776 */
5777DECL_INLINE_THROW(uint32_t)
5778iemNativeEmitArithShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5779{
5780#if defined(RT_ARCH_AMD64)
5781 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5782#elif defined(RT_ARCH_ARM64)
5783 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5784#else
5785# error "Port me"
5786#endif
5787 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5788 return off;
5789}
5790
5791
5792/**
5793 * Emits code for rotating a GPR a fixed number of bits to the left.
5794 */
5795DECL_FORCE_INLINE(uint32_t)
5796iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5797{
5798 Assert(cShift > 0 && cShift < 64);
5799
5800#if defined(RT_ARCH_AMD64)
5801 /* rol dst, cShift */
5802 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5803 if (cShift != 1)
5804 {
5805 pCodeBuf[off++] = 0xc1;
5806 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5807 pCodeBuf[off++] = cShift;
5808 }
5809 else
5810 {
5811 pCodeBuf[off++] = 0xd1;
5812 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5813 }
5814
5815#elif defined(RT_ARCH_ARM64)
5816 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
5817
5818#else
5819# error "Port me"
5820#endif
5821 return off;
5822}
5823
5824
5825#if defined(RT_ARCH_AMD64)
5826/**
5827 * Emits code for rotating a 32-bit GPR a fixed number of bits to the left via carry.
5828 */
5829DECL_FORCE_INLINE(uint32_t)
5830iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5831{
5832 Assert(cShift > 0 && cShift < 32);
5833
5834 /* rcl dst, cShift */
5835 if (iGprDst >= 8)
5836 pCodeBuf[off++] = X86_OP_REX_B;
5837 if (cShift != 1)
5838 {
5839 pCodeBuf[off++] = 0xc1;
5840 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5841 pCodeBuf[off++] = cShift;
5842 }
5843 else
5844 {
5845 pCodeBuf[off++] = 0xd1;
5846 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5847 }
5848
5849 return off;
5850}
5851#endif /* RT_ARCH_AMD64 */
5852
5853
5854
5855/**
5856 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
5857 * @note Bits 63:32 of the destination GPR will be cleared.
5858 */
5859DECL_FORCE_INLINE(uint32_t)
5860iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5861{
5862#if defined(RT_ARCH_AMD64)
5863 /*
5864 * There is no bswap r16 on x86 (the encoding exists but does not work).
5865 * So just use a rol (gcc -O2 is doing that).
5866 *
5867 * rol r16, 0x8
5868 */
5869 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5870 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5871 if (iGpr >= 8)
5872 pbCodeBuf[off++] = X86_OP_REX_B;
5873 pbCodeBuf[off++] = 0xc1;
5874 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
5875 pbCodeBuf[off++] = 0x08;
5876#elif defined(RT_ARCH_ARM64)
5877 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5878
5879 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
5880#else
5881# error "Port me"
5882#endif
5883
5884 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5885 return off;
5886}
5887
5888
5889/**
5890 * Emits code for reversing the byte order in a 32-bit GPR.
5891 * @note Bits 63:32 of the destination GPR will be cleared.
5892 */
5893DECL_FORCE_INLINE(uint32_t)
5894iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5895{
5896#if defined(RT_ARCH_AMD64)
5897 /* bswap r32 */
5898 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5899
5900 if (iGpr >= 8)
5901 pbCodeBuf[off++] = X86_OP_REX_B;
5902 pbCodeBuf[off++] = 0x0f;
5903 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5904#elif defined(RT_ARCH_ARM64)
5905 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5906
5907 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
5908#else
5909# error "Port me"
5910#endif
5911
5912 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5913 return off;
5914}
5915
5916
5917/**
5918 * Emits code for reversing the byte order in a 64-bit GPR.
5919 */
5920DECL_FORCE_INLINE(uint32_t)
5921iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5922{
5923#if defined(RT_ARCH_AMD64)
5924 /* bswap r64 */
5925 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5926
5927 if (iGpr >= 8)
5928 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
5929 else
5930 pbCodeBuf[off++] = X86_OP_REX_W;
5931 pbCodeBuf[off++] = 0x0f;
5932 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5933#elif defined(RT_ARCH_ARM64)
5934 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5935
5936 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
5937#else
5938# error "Port me"
5939#endif
5940
5941 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5942 return off;
5943}
5944
5945
5946/*********************************************************************************************************************************
5947* Compare and Testing *
5948*********************************************************************************************************************************/
5949
5950
5951#ifdef RT_ARCH_ARM64
5952/**
5953 * Emits an ARM64 compare instruction.
5954 */
5955DECL_INLINE_THROW(uint32_t)
5956iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
5957 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
5958{
5959 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5960 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
5961 f64Bit, true /*fSetFlags*/, cShift, enmShift);
5962 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5963 return off;
5964}
5965#endif
5966
5967
5968/**
5969 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5970 * with conditional instruction.
5971 */
5972DECL_FORCE_INLINE(uint32_t)
5973iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5974{
5975#ifdef RT_ARCH_AMD64
5976 /* cmp Gv, Ev */
5977 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5978 pCodeBuf[off++] = 0x3b;
5979 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5980
5981#elif defined(RT_ARCH_ARM64)
5982 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
5983
5984#else
5985# error "Port me!"
5986#endif
5987 return off;
5988}
5989
5990
5991/**
5992 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5993 * with conditional instruction.
5994 */
5995DECL_INLINE_THROW(uint32_t)
5996iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5997{
5998#ifdef RT_ARCH_AMD64
5999 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6000#elif defined(RT_ARCH_ARM64)
6001 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6002#else
6003# error "Port me!"
6004#endif
6005 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6006 return off;
6007}
6008
6009
6010/**
6011 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6012 * with conditional instruction.
6013 */
6014DECL_FORCE_INLINE(uint32_t)
6015iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6016{
6017#ifdef RT_ARCH_AMD64
6018 /* cmp Gv, Ev */
6019 if (iGprLeft >= 8 || iGprRight >= 8)
6020 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
6021 pCodeBuf[off++] = 0x3b;
6022 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
6023
6024#elif defined(RT_ARCH_ARM64)
6025 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
6026
6027#else
6028# error "Port me!"
6029#endif
6030 return off;
6031}
6032
6033
6034/**
6035 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6036 * with conditional instruction.
6037 */
6038DECL_INLINE_THROW(uint32_t)
6039iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6040{
6041#ifdef RT_ARCH_AMD64
6042 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6043#elif defined(RT_ARCH_ARM64)
6044 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6045#else
6046# error "Port me!"
6047#endif
6048 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6049 return off;
6050}
6051
6052
6053/**
6054 * Emits a compare of a 64-bit GPR with a constant value, settings status
6055 * flags/whatever for use with conditional instruction.
6056 */
6057DECL_INLINE_THROW(uint32_t)
6058iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
6059{
6060#ifdef RT_ARCH_AMD64
6061 if (uImm <= UINT32_C(0xff))
6062 {
6063 /* cmp Ev, Ib */
6064 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
6065 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6066 pbCodeBuf[off++] = 0x83;
6067 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6068 pbCodeBuf[off++] = (uint8_t)uImm;
6069 }
6070 else if ((int64_t)uImm == (int32_t)uImm)
6071 {
6072 /* cmp Ev, imm */
6073 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6074 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6075 pbCodeBuf[off++] = 0x81;
6076 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6077 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6078 pbCodeBuf[off++] = RT_BYTE1(uImm);
6079 pbCodeBuf[off++] = RT_BYTE2(uImm);
6080 pbCodeBuf[off++] = RT_BYTE3(uImm);
6081 pbCodeBuf[off++] = RT_BYTE4(uImm);
6082 }
6083 else
6084 {
6085 /* Use temporary register for the immediate. */
6086 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6087 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6088 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6089 }
6090
6091#elif defined(RT_ARCH_ARM64)
6092 /** @todo guess there are clevere things we can do here... */
6093 if (uImm < _4K)
6094 {
6095 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6096 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6097 true /*64Bit*/, true /*fSetFlags*/);
6098 }
6099 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
6100 {
6101 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6102 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
6103 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6104 }
6105 else
6106 {
6107 /* Use temporary register for the immediate. */
6108 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6109 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6110 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6111 }
6112
6113#else
6114# error "Port me!"
6115#endif
6116
6117 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6118 return off;
6119}
6120
6121
6122/**
6123 * Emits a compare of a 32-bit GPR with a constant value, settings status
6124 * flags/whatever for use with conditional instruction.
6125 *
6126 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6127 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6128 * bits all zero). Will release assert or throw exception if the caller
6129 * violates this restriction.
6130 */
6131DECL_FORCE_INLINE_THROW(uint32_t)
6132iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6133{
6134#ifdef RT_ARCH_AMD64
6135 if (iGprLeft >= 8)
6136 pCodeBuf[off++] = X86_OP_REX_B;
6137 if (uImm <= UINT32_C(0x7f))
6138 {
6139 /* cmp Ev, Ib */
6140 pCodeBuf[off++] = 0x83;
6141 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6142 pCodeBuf[off++] = (uint8_t)uImm;
6143 }
6144 else
6145 {
6146 /* cmp Ev, imm */
6147 pCodeBuf[off++] = 0x81;
6148 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6149 pCodeBuf[off++] = RT_BYTE1(uImm);
6150 pCodeBuf[off++] = RT_BYTE2(uImm);
6151 pCodeBuf[off++] = RT_BYTE3(uImm);
6152 pCodeBuf[off++] = RT_BYTE4(uImm);
6153 }
6154
6155#elif defined(RT_ARCH_ARM64)
6156 /** @todo guess there are clevere things we can do here... */
6157 if (uImm < _4K)
6158 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6159 false /*64Bit*/, true /*fSetFlags*/);
6160 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
6161 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6162 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6163 else
6164# ifdef IEM_WITH_THROW_CATCH
6165 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6166# else
6167 AssertReleaseFailedStmt(off = UINT32_MAX);
6168# endif
6169
6170#else
6171# error "Port me!"
6172#endif
6173 return off;
6174}
6175
6176
6177/**
6178 * Emits a compare of a 32-bit GPR with a constant value, settings status
6179 * flags/whatever for use with conditional instruction.
6180 */
6181DECL_INLINE_THROW(uint32_t)
6182iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6183{
6184#ifdef RT_ARCH_AMD64
6185 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
6186
6187#elif defined(RT_ARCH_ARM64)
6188 /** @todo guess there are clevere things we can do here... */
6189 if (uImm < _4K)
6190 {
6191 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6192 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6193 false /*64Bit*/, true /*fSetFlags*/);
6194 }
6195 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
6196 {
6197 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6198 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6199 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6200 }
6201 else
6202 {
6203 /* Use temporary register for the immediate. */
6204 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6205 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
6206 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6207 }
6208
6209#else
6210# error "Port me!"
6211#endif
6212
6213 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6214 return off;
6215}
6216
6217
6218/**
6219 * Emits a compare of a 32-bit GPR with a constant value, settings status
6220 * flags/whatever for use with conditional instruction.
6221 *
6222 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
6223 * 16-bit value from @a iGrpLeft.
6224 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6225 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6226 * bits all zero). Will release assert or throw exception if the caller
6227 * violates this restriction.
6228 */
6229DECL_FORCE_INLINE_THROW(uint32_t)
6230iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6231 uint8_t idxTmpReg = UINT8_MAX)
6232{
6233#ifdef RT_ARCH_AMD64
6234 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6235 if (iGprLeft >= 8)
6236 pCodeBuf[off++] = X86_OP_REX_B;
6237 if (uImm <= UINT32_C(0x7f))
6238 {
6239 /* cmp Ev, Ib */
6240 pCodeBuf[off++] = 0x83;
6241 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6242 pCodeBuf[off++] = (uint8_t)uImm;
6243 }
6244 else
6245 {
6246 /* cmp Ev, imm */
6247 pCodeBuf[off++] = 0x81;
6248 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6249 pCodeBuf[off++] = RT_BYTE1(uImm);
6250 pCodeBuf[off++] = RT_BYTE2(uImm);
6251 }
6252 RT_NOREF(idxTmpReg);
6253
6254#elif defined(RT_ARCH_ARM64)
6255# ifdef IEM_WITH_THROW_CATCH
6256 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6257# else
6258 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
6259# endif
6260 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6261 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
6262 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
6263
6264#else
6265# error "Port me!"
6266#endif
6267 return off;
6268}
6269
6270
6271/**
6272 * Emits a compare of a 16-bit GPR with a constant value, settings status
6273 * flags/whatever for use with conditional instruction.
6274 *
6275 * @note ARM64: Helper register is required (idxTmpReg).
6276 */
6277DECL_INLINE_THROW(uint32_t)
6278iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6279 uint8_t idxTmpReg = UINT8_MAX)
6280{
6281#ifdef RT_ARCH_AMD64
6282 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
6283#elif defined(RT_ARCH_ARM64)
6284 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
6285#else
6286# error "Port me!"
6287#endif
6288 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6289 return off;
6290}
6291
6292
6293
6294/*********************************************************************************************************************************
6295* Branching *
6296*********************************************************************************************************************************/
6297
6298/**
6299 * Emits a JMP rel32 / B imm19 to the given label.
6300 */
6301DECL_FORCE_INLINE_THROW(uint32_t)
6302iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
6303{
6304 Assert(idxLabel < pReNative->cLabels);
6305
6306#ifdef RT_ARCH_AMD64
6307 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6308 {
6309 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
6310 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
6311 {
6312 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
6313 pCodeBuf[off++] = (uint8_t)offRel;
6314 }
6315 else
6316 {
6317 offRel -= 3;
6318 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6319 pCodeBuf[off++] = RT_BYTE1(offRel);
6320 pCodeBuf[off++] = RT_BYTE2(offRel);
6321 pCodeBuf[off++] = RT_BYTE3(offRel);
6322 pCodeBuf[off++] = RT_BYTE4(offRel);
6323 }
6324 }
6325 else
6326 {
6327 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6328 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6329 pCodeBuf[off++] = 0xfe;
6330 pCodeBuf[off++] = 0xff;
6331 pCodeBuf[off++] = 0xff;
6332 pCodeBuf[off++] = 0xff;
6333 }
6334 pCodeBuf[off++] = 0xcc; /* int3 poison */
6335
6336#elif defined(RT_ARCH_ARM64)
6337 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6338 {
6339 pCodeBuf[off] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
6340 off++;
6341 }
6342 else
6343 {
6344 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
6345 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
6346 }
6347
6348#else
6349# error "Port me!"
6350#endif
6351 return off;
6352}
6353
6354
6355/**
6356 * Emits a JMP rel32 / B imm19 to the given label.
6357 */
6358DECL_INLINE_THROW(uint32_t)
6359iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6360{
6361#ifdef RT_ARCH_AMD64
6362 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
6363#elif defined(RT_ARCH_ARM64)
6364 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
6365#else
6366# error "Port me!"
6367#endif
6368 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6369 return off;
6370}
6371
6372
6373/**
6374 * Emits a JMP rel32 / B imm19 to a new undefined label.
6375 */
6376DECL_INLINE_THROW(uint32_t)
6377iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6378{
6379 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6380 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
6381}
6382
6383/** Condition type. */
6384#ifdef RT_ARCH_AMD64
6385typedef enum IEMNATIVEINSTRCOND : uint8_t
6386{
6387 kIemNativeInstrCond_o = 0,
6388 kIemNativeInstrCond_no,
6389 kIemNativeInstrCond_c,
6390 kIemNativeInstrCond_nc,
6391 kIemNativeInstrCond_e,
6392 kIemNativeInstrCond_z = kIemNativeInstrCond_e,
6393 kIemNativeInstrCond_ne,
6394 kIemNativeInstrCond_nz = kIemNativeInstrCond_ne,
6395 kIemNativeInstrCond_be,
6396 kIemNativeInstrCond_nbe,
6397 kIemNativeInstrCond_s,
6398 kIemNativeInstrCond_ns,
6399 kIemNativeInstrCond_p,
6400 kIemNativeInstrCond_np,
6401 kIemNativeInstrCond_l,
6402 kIemNativeInstrCond_nl,
6403 kIemNativeInstrCond_le,
6404 kIemNativeInstrCond_nle
6405} IEMNATIVEINSTRCOND;
6406#elif defined(RT_ARCH_ARM64)
6407typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
6408# define kIemNativeInstrCond_o todo_conditional_codes
6409# define kIemNativeInstrCond_no todo_conditional_codes
6410# define kIemNativeInstrCond_c todo_conditional_codes
6411# define kIemNativeInstrCond_nc todo_conditional_codes
6412# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
6413# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
6414# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
6415# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
6416# define kIemNativeInstrCond_s todo_conditional_codes
6417# define kIemNativeInstrCond_ns todo_conditional_codes
6418# define kIemNativeInstrCond_p todo_conditional_codes
6419# define kIemNativeInstrCond_np todo_conditional_codes
6420# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
6421# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
6422# define kIemNativeInstrCond_le kArmv8InstrCond_Le
6423# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
6424#else
6425# error "Port me!"
6426#endif
6427
6428
6429/**
6430 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6431 */
6432DECL_FORCE_INLINE_THROW(uint32_t)
6433iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6434 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6435{
6436 Assert(idxLabel < pReNative->cLabels);
6437
6438 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
6439#ifdef RT_ARCH_AMD64
6440 if (offLabel >= off)
6441 {
6442 /* jcc rel32 */
6443 pCodeBuf[off++] = 0x0f;
6444 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6445 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6446 pCodeBuf[off++] = 0x00;
6447 pCodeBuf[off++] = 0x00;
6448 pCodeBuf[off++] = 0x00;
6449 pCodeBuf[off++] = 0x00;
6450 }
6451 else
6452 {
6453 int32_t offDisp = offLabel - (off + 2);
6454 if ((int8_t)offDisp == offDisp)
6455 {
6456 /* jcc rel8 */
6457 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6458 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6459 }
6460 else
6461 {
6462 /* jcc rel32 */
6463 offDisp -= 4;
6464 pCodeBuf[off++] = 0x0f;
6465 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6466 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6467 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6468 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6469 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6470 }
6471 }
6472
6473#elif defined(RT_ARCH_ARM64)
6474 if (offLabel >= off)
6475 {
6476 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6477 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
6478 }
6479 else
6480 {
6481 Assert(off - offLabel <= 0x3ffffU);
6482 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
6483 off++;
6484 }
6485
6486#else
6487# error "Port me!"
6488#endif
6489 return off;
6490}
6491
6492
6493/**
6494 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6495 */
6496DECL_INLINE_THROW(uint32_t)
6497iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6498{
6499#ifdef RT_ARCH_AMD64
6500 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
6501#elif defined(RT_ARCH_ARM64)
6502 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
6503#else
6504# error "Port me!"
6505#endif
6506 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6507 return off;
6508}
6509
6510
6511/**
6512 * Emits a Jcc rel32 / B.cc imm19 to a new label.
6513 */
6514DECL_INLINE_THROW(uint32_t)
6515iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6516 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
6517{
6518 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6519 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
6520}
6521
6522
6523/**
6524 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
6525 */
6526DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6527{
6528#ifdef RT_ARCH_AMD64
6529 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
6530#elif defined(RT_ARCH_ARM64)
6531 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
6532#else
6533# error "Port me!"
6534#endif
6535}
6536
6537/**
6538 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
6539 */
6540DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6541 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6542{
6543#ifdef RT_ARCH_AMD64
6544 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
6545#elif defined(RT_ARCH_ARM64)
6546 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
6547#else
6548# error "Port me!"
6549#endif
6550}
6551
6552
6553/**
6554 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
6555 */
6556DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6557{
6558#ifdef RT_ARCH_AMD64
6559 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
6560#elif defined(RT_ARCH_ARM64)
6561 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
6562#else
6563# error "Port me!"
6564#endif
6565}
6566
6567/**
6568 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
6569 */
6570DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6571 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6572{
6573#ifdef RT_ARCH_AMD64
6574 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
6575#elif defined(RT_ARCH_ARM64)
6576 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
6577#else
6578# error "Port me!"
6579#endif
6580}
6581
6582
6583/**
6584 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
6585 */
6586DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6587{
6588#ifdef RT_ARCH_AMD64
6589 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
6590#elif defined(RT_ARCH_ARM64)
6591 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
6592#else
6593# error "Port me!"
6594#endif
6595}
6596
6597/**
6598 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
6599 */
6600DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6601 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6602{
6603#ifdef RT_ARCH_AMD64
6604 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
6605#elif defined(RT_ARCH_ARM64)
6606 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
6607#else
6608# error "Port me!"
6609#endif
6610}
6611
6612
6613/**
6614 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
6615 */
6616DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6617{
6618#ifdef RT_ARCH_AMD64
6619 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
6620#elif defined(RT_ARCH_ARM64)
6621 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
6622#else
6623# error "Port me!"
6624#endif
6625}
6626
6627/**
6628 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
6629 */
6630DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6631 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6632{
6633#ifdef RT_ARCH_AMD64
6634 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
6635#elif defined(RT_ARCH_ARM64)
6636 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
6637#else
6638# error "Port me!"
6639#endif
6640}
6641
6642
6643/**
6644 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
6645 */
6646DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6647{
6648#ifdef RT_ARCH_AMD64
6649 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
6650#elif defined(RT_ARCH_ARM64)
6651 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
6652#else
6653# error "Port me!"
6654#endif
6655}
6656
6657/**
6658 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
6659 */
6660DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6661 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6662{
6663#ifdef RT_ARCH_AMD64
6664 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
6665#elif defined(RT_ARCH_ARM64)
6666 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
6667#else
6668# error "Port me!"
6669#endif
6670}
6671
6672
6673/**
6674 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6675 *
6676 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6677 *
6678 * Only use hardcoded jumps forward when emitting for exactly one
6679 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6680 * the right target address on all platforms!
6681 *
6682 * Please also note that on x86 it is necessary pass off + 256 or higher
6683 * for @a offTarget one believe the intervening code is more than 127
6684 * bytes long.
6685 */
6686DECL_FORCE_INLINE(uint32_t)
6687iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6688{
6689#ifdef RT_ARCH_AMD64
6690 /* jcc rel8 / rel32 */
6691 int32_t offDisp = (int32_t)(offTarget - (off + 2));
6692 if (offDisp < 128 && offDisp >= -128)
6693 {
6694 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6695 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6696 }
6697 else
6698 {
6699 offDisp -= 4;
6700 pCodeBuf[off++] = 0x0f;
6701 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6702 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6703 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6704 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6705 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6706 }
6707
6708#elif defined(RT_ARCH_ARM64)
6709 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
6710 off++;
6711#else
6712# error "Port me!"
6713#endif
6714 return off;
6715}
6716
6717
6718/**
6719 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6720 *
6721 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6722 *
6723 * Only use hardcoded jumps forward when emitting for exactly one
6724 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6725 * the right target address on all platforms!
6726 *
6727 * Please also note that on x86 it is necessary pass off + 256 or higher
6728 * for @a offTarget if one believe the intervening code is more than 127
6729 * bytes long.
6730 */
6731DECL_INLINE_THROW(uint32_t)
6732iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6733{
6734#ifdef RT_ARCH_AMD64
6735 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
6736#elif defined(RT_ARCH_ARM64)
6737 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
6738#else
6739# error "Port me!"
6740#endif
6741 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6742 return off;
6743}
6744
6745
6746/**
6747 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
6748 *
6749 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6750 */
6751DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6752{
6753#ifdef RT_ARCH_AMD64
6754 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
6755#elif defined(RT_ARCH_ARM64)
6756 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
6757#else
6758# error "Port me!"
6759#endif
6760}
6761
6762
6763/**
6764 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
6765 *
6766 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6767 */
6768DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6769{
6770#ifdef RT_ARCH_AMD64
6771 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
6772#elif defined(RT_ARCH_ARM64)
6773 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
6774#else
6775# error "Port me!"
6776#endif
6777}
6778
6779
6780/**
6781 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
6782 *
6783 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6784 */
6785DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6786{
6787#ifdef RT_ARCH_AMD64
6788 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
6789#elif defined(RT_ARCH_ARM64)
6790 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
6791#else
6792# error "Port me!"
6793#endif
6794}
6795
6796
6797/**
6798 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
6799 *
6800 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6801 */
6802DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6803{
6804#ifdef RT_ARCH_AMD64
6805 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
6806#elif defined(RT_ARCH_ARM64)
6807 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
6808#else
6809# error "Port me!"
6810#endif
6811}
6812
6813
6814/**
6815 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6816 *
6817 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6818 */
6819DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
6820{
6821#ifdef RT_ARCH_AMD64
6822 /* jmp rel8 or rel32 */
6823 int32_t offDisp = offTarget - (off + 2);
6824 if (offDisp < 128 && offDisp >= -128)
6825 {
6826 pCodeBuf[off++] = 0xeb;
6827 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6828 }
6829 else
6830 {
6831 offDisp -= 3;
6832 pCodeBuf[off++] = 0xe9;
6833 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6834 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6835 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6836 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6837 }
6838
6839#elif defined(RT_ARCH_ARM64)
6840 pCodeBuf[off] = Armv8A64MkInstrB((int32_t)(offTarget - off));
6841 off++;
6842
6843#else
6844# error "Port me!"
6845#endif
6846 return off;
6847}
6848
6849
6850/**
6851 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6852 *
6853 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6854 */
6855DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6856{
6857#ifdef RT_ARCH_AMD64
6858 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
6859#elif defined(RT_ARCH_ARM64)
6860 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
6861#else
6862# error "Port me!"
6863#endif
6864 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6865 return off;
6866}
6867
6868
6869/**
6870 * Fixes up a conditional jump to a fixed label.
6871 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
6872 * iemNativeEmitJzToFixed, ...
6873 */
6874DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
6875{
6876#ifdef RT_ARCH_AMD64
6877 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
6878 uint8_t const bOpcode = pbCodeBuf[offFixup];
6879 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
6880 {
6881 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
6882 AssertStmt(pbCodeBuf[offFixup + 1] == offTarget - (offFixup + 2),
6883 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
6884 }
6885 else
6886 {
6887 if (bOpcode != 0x0f)
6888 Assert(bOpcode == 0xe9);
6889 else
6890 {
6891 offFixup += 1;
6892 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
6893 }
6894 uint32_t const offRel32 = offTarget - (offFixup + 5);
6895 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
6896 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
6897 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
6898 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
6899 }
6900
6901#elif defined(RT_ARCH_ARM64)
6902 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
6903 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
6904 {
6905 /* B.COND + BC.COND */
6906 int32_t const offDisp = offTarget - offFixup;
6907 Assert(offDisp >= -262144 && offDisp < 262144);
6908 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
6909 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6910 }
6911 else
6912 {
6913 /* B imm26 */
6914 Assert((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000));
6915 int32_t const offDisp = offTarget - offFixup;
6916 Assert(offDisp >= -33554432 && offDisp < 33554432);
6917 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
6918 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6919 }
6920
6921#else
6922# error "Port me!"
6923#endif
6924}
6925
6926
6927#ifdef RT_ARCH_AMD64
6928/**
6929 * For doing bt on a register.
6930 */
6931DECL_INLINE_THROW(uint32_t)
6932iemNativeEmitAmd64TestBitInGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
6933{
6934 Assert(iBitNo < 64);
6935 /* bt Ev, imm8 */
6936 if (iBitNo >= 32)
6937 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6938 else if (iGprSrc >= 8)
6939 pCodeBuf[off++] = X86_OP_REX_B;
6940 pCodeBuf[off++] = 0x0f;
6941 pCodeBuf[off++] = 0xba;
6942 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6943 pCodeBuf[off++] = iBitNo;
6944 return off;
6945}
6946#endif /* RT_ARCH_AMD64 */
6947
6948
6949/**
6950 * Internal helper, don't call directly.
6951 */
6952DECL_INLINE_THROW(uint32_t)
6953iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
6954 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
6955{
6956 Assert(iBitNo < 64);
6957#ifdef RT_ARCH_AMD64
6958 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6959 if (iBitNo < 8)
6960 {
6961 /* test Eb, imm8 */
6962 if (iGprSrc >= 4)
6963 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6964 pbCodeBuf[off++] = 0xf6;
6965 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6966 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
6967 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6968 }
6969 else
6970 {
6971 /* bt Ev, imm8 */
6972 if (iBitNo >= 32)
6973 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6974 else if (iGprSrc >= 8)
6975 pbCodeBuf[off++] = X86_OP_REX_B;
6976 pbCodeBuf[off++] = 0x0f;
6977 pbCodeBuf[off++] = 0xba;
6978 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6979 pbCodeBuf[off++] = iBitNo;
6980 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
6981 }
6982
6983#elif defined(RT_ARCH_ARM64)
6984 /* Use the TBNZ instruction here. */
6985 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6986 if (pReNative->paLabels[idxLabel].enmType > kIemNativeLabelType_LastWholeTbBranch)
6987 {
6988 AssertMsg(pReNative->paLabels[idxLabel].off == UINT32_MAX,
6989 ("TODO: Please enable & test commented out code for jumping back to a predefined label.\n"));
6990 //uint32_t offLabel = pReNative->paLabels[idxLabel].off;
6991 //if (offLabel == UINT32_MAX)
6992 {
6993 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
6994 pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
6995 }
6996 //else
6997 //{
6998 // RT_BREAKPOINT();
6999 // Assert(off - offLabel <= 0x1fffU);
7000 // pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, offLabel - off, iGprSrc, iBitNo);
7001 //
7002 //}
7003 }
7004 else
7005 {
7006 Assert(Armv8A64ConvertImmRImmS2Mask64(0x40, (64U - iBitNo) & 63U) == RT_BIT_64(iBitNo));
7007 pu32CodeBuf[off++] = Armv8A64MkInstrTstImm(iGprSrc, 0x40, (64U - iBitNo) & 63U);
7008 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7009 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(fJmpIfSet ? kArmv8InstrCond_Ne : kArmv8InstrCond_Eq, 0);
7010 }
7011
7012#else
7013# error "Port me!"
7014#endif
7015 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7016 return off;
7017}
7018
7019
7020/**
7021 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
7022 * @a iGprSrc.
7023 *
7024 * @note On ARM64 the range is only +/-8191 instructions.
7025 */
7026DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7027 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7028{
7029 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
7030}
7031
7032
7033/**
7034 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
7035 * _set_ in @a iGprSrc.
7036 *
7037 * @note On ARM64 the range is only +/-8191 instructions.
7038 */
7039DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7040 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7041{
7042 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
7043}
7044
7045
7046/**
7047 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
7048 * flags accordingly.
7049 */
7050DECL_INLINE_THROW(uint32_t)
7051iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
7052{
7053 Assert(fBits != 0);
7054#ifdef RT_ARCH_AMD64
7055
7056 if (fBits >= UINT32_MAX)
7057 {
7058 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7059
7060 /* test Ev,Gv */
7061 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7062 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
7063 pbCodeBuf[off++] = 0x85;
7064 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
7065
7066 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7067 }
7068 else if (fBits <= UINT32_MAX)
7069 {
7070 /* test Eb, imm8 or test Ev, imm32 */
7071 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7072 if (fBits <= UINT8_MAX)
7073 {
7074 if (iGprSrc >= 4)
7075 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7076 pbCodeBuf[off++] = 0xf6;
7077 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7078 pbCodeBuf[off++] = (uint8_t)fBits;
7079 }
7080 else
7081 {
7082 if (iGprSrc >= 8)
7083 pbCodeBuf[off++] = X86_OP_REX_B;
7084 pbCodeBuf[off++] = 0xf7;
7085 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7086 pbCodeBuf[off++] = RT_BYTE1(fBits);
7087 pbCodeBuf[off++] = RT_BYTE2(fBits);
7088 pbCodeBuf[off++] = RT_BYTE3(fBits);
7089 pbCodeBuf[off++] = RT_BYTE4(fBits);
7090 }
7091 }
7092 /** @todo implement me. */
7093 else
7094 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
7095
7096#elif defined(RT_ARCH_ARM64)
7097 uint32_t uImmR = 0;
7098 uint32_t uImmNandS = 0;
7099 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
7100 {
7101 /* ands xzr, iGprSrc, #fBits */
7102 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7103 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
7104 }
7105 else
7106 {
7107 /* ands xzr, iGprSrc, iTmpReg */
7108 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7109 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7110 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
7111 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7112 }
7113
7114#else
7115# error "Port me!"
7116#endif
7117 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7118 return off;
7119}
7120
7121
7122/**
7123 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
7124 * @a iGprSrc, setting CPU flags accordingly.
7125 *
7126 * @note For ARM64 this only supports @a fBits values that can be expressed
7127 * using the two 6-bit immediates of the ANDS instruction. The caller
7128 * must make sure this is possible!
7129 */
7130DECL_FORCE_INLINE_THROW(uint32_t)
7131iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits)
7132{
7133 Assert(fBits != 0);
7134
7135#ifdef RT_ARCH_AMD64
7136 if (fBits <= UINT8_MAX)
7137 {
7138 /* test Eb, imm8 */
7139 if (iGprSrc >= 4)
7140 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7141 pCodeBuf[off++] = 0xf6;
7142 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7143 pCodeBuf[off++] = (uint8_t)fBits;
7144 }
7145 else
7146 {
7147 /* test Ev, imm32 */
7148 if (iGprSrc >= 8)
7149 pCodeBuf[off++] = X86_OP_REX_B;
7150 pCodeBuf[off++] = 0xf7;
7151 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7152 pCodeBuf[off++] = RT_BYTE1(fBits);
7153 pCodeBuf[off++] = RT_BYTE2(fBits);
7154 pCodeBuf[off++] = RT_BYTE3(fBits);
7155 pCodeBuf[off++] = RT_BYTE4(fBits);
7156 }
7157
7158#elif defined(RT_ARCH_ARM64)
7159 /* ands xzr, src, #fBits */
7160 uint32_t uImmR = 0;
7161 uint32_t uImmNandS = 0;
7162 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7163 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7164 else
7165# ifdef IEM_WITH_THROW_CATCH
7166 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7167# else
7168 AssertReleaseFailedStmt(off = UINT32_MAX);
7169# endif
7170
7171#else
7172# error "Port me!"
7173#endif
7174 return off;
7175}
7176
7177
7178
7179/**
7180 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7181 * @a iGprSrc, setting CPU flags accordingly.
7182 *
7183 * @note For ARM64 this only supports @a fBits values that can be expressed
7184 * using the two 6-bit immediates of the ANDS instruction. The caller
7185 * must make sure this is possible!
7186 */
7187DECL_FORCE_INLINE_THROW(uint32_t)
7188iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7189{
7190 Assert(fBits != 0);
7191
7192#ifdef RT_ARCH_AMD64
7193 /* test Eb, imm8 */
7194 if (iGprSrc >= 4)
7195 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7196 pCodeBuf[off++] = 0xf6;
7197 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7198 pCodeBuf[off++] = fBits;
7199
7200#elif defined(RT_ARCH_ARM64)
7201 /* ands xzr, src, #fBits */
7202 uint32_t uImmR = 0;
7203 uint32_t uImmNandS = 0;
7204 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7205 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7206 else
7207# ifdef IEM_WITH_THROW_CATCH
7208 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7209# else
7210 AssertReleaseFailedStmt(off = UINT32_MAX);
7211# endif
7212
7213#else
7214# error "Port me!"
7215#endif
7216 return off;
7217}
7218
7219
7220/**
7221 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7222 * @a iGprSrc, setting CPU flags accordingly.
7223 */
7224DECL_INLINE_THROW(uint32_t)
7225iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7226{
7227 Assert(fBits != 0);
7228
7229#ifdef RT_ARCH_AMD64
7230 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
7231
7232#elif defined(RT_ARCH_ARM64)
7233 /* ands xzr, src, [tmp|#imm] */
7234 uint32_t uImmR = 0;
7235 uint32_t uImmNandS = 0;
7236 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7237 {
7238 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7239 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7240 }
7241 else
7242 {
7243 /* Use temporary register for the 64-bit immediate. */
7244 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7245 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7246 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
7247 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7248 }
7249
7250#else
7251# error "Port me!"
7252#endif
7253 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7254 return off;
7255}
7256
7257
7258/**
7259 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
7260 * are set in @a iGprSrc.
7261 */
7262DECL_INLINE_THROW(uint32_t)
7263iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7264 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7265{
7266 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7267
7268 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7269 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7270
7271 return off;
7272}
7273
7274
7275/**
7276 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
7277 * are set in @a iGprSrc.
7278 */
7279DECL_INLINE_THROW(uint32_t)
7280iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7281 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7282{
7283 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7284
7285 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7286 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7287
7288 return off;
7289}
7290
7291
7292/**
7293 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7294 *
7295 * The operand size is given by @a f64Bit.
7296 */
7297DECL_FORCE_INLINE_THROW(uint32_t)
7298iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7299 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7300{
7301 Assert(idxLabel < pReNative->cLabels);
7302
7303#ifdef RT_ARCH_AMD64
7304 /* test reg32,reg32 / test reg64,reg64 */
7305 if (f64Bit)
7306 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7307 else if (iGprSrc >= 8)
7308 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7309 pCodeBuf[off++] = 0x85;
7310 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7311
7312 /* jnz idxLabel */
7313 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7314 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7315
7316#elif defined(RT_ARCH_ARM64)
7317 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
7318 {
7319 pCodeBuf[off] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
7320 iGprSrc, f64Bit);
7321 off++;
7322 }
7323 else
7324 {
7325 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7326 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
7327 }
7328
7329#else
7330# error "Port me!"
7331#endif
7332 return off;
7333}
7334
7335
7336/**
7337 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7338 *
7339 * The operand size is given by @a f64Bit.
7340 */
7341DECL_FORCE_INLINE_THROW(uint32_t)
7342iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7343 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7344{
7345#ifdef RT_ARCH_AMD64
7346 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7347 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7348#elif defined(RT_ARCH_ARM64)
7349 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
7350 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7351#else
7352# error "Port me!"
7353#endif
7354 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7355 return off;
7356}
7357
7358
7359/* if (Grp1 == 0) Jmp idxLabel; */
7360
7361/**
7362 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7363 *
7364 * The operand size is given by @a f64Bit.
7365 */
7366DECL_FORCE_INLINE_THROW(uint32_t)
7367iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7368 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7369{
7370 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7371 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7372}
7373
7374
7375/**
7376 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7377 *
7378 * The operand size is given by @a f64Bit.
7379 */
7380DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7381 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7382{
7383 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7384}
7385
7386
7387/**
7388 * Emits code that jumps to a new label if @a iGprSrc is zero.
7389 *
7390 * The operand size is given by @a f64Bit.
7391 */
7392DECL_INLINE_THROW(uint32_t)
7393iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7394 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7395{
7396 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7397 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7398}
7399
7400
7401/* if (Grp1 != 0) Jmp idxLabel; */
7402
7403/**
7404 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7405 *
7406 * The operand size is given by @a f64Bit.
7407 */
7408DECL_FORCE_INLINE_THROW(uint32_t)
7409iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7410 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7411{
7412 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7413 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7414}
7415
7416
7417/**
7418 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7419 *
7420 * The operand size is given by @a f64Bit.
7421 */
7422DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7423 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7424{
7425 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7426}
7427
7428
7429/**
7430 * Emits code that jumps to a new label if @a iGprSrc is not zero.
7431 *
7432 * The operand size is given by @a f64Bit.
7433 */
7434DECL_INLINE_THROW(uint32_t)
7435iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7436 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7437{
7438 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7439 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7440}
7441
7442
7443/* if (Grp1 != Gpr2) Jmp idxLabel; */
7444
7445/**
7446 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
7447 * differs.
7448 */
7449DECL_INLINE_THROW(uint32_t)
7450iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7451 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
7452{
7453 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
7454 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7455 return off;
7456}
7457
7458
7459/**
7460 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
7461 */
7462DECL_INLINE_THROW(uint32_t)
7463iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7464 uint8_t iGprLeft, uint8_t iGprRight,
7465 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7466{
7467 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7468 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
7469}
7470
7471
7472/* if (Grp != Imm) Jmp idxLabel; */
7473
7474/**
7475 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
7476 */
7477DECL_INLINE_THROW(uint32_t)
7478iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7479 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7480{
7481 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7482 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7483 return off;
7484}
7485
7486
7487/**
7488 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
7489 */
7490DECL_INLINE_THROW(uint32_t)
7491iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7492 uint8_t iGprSrc, uint64_t uImm,
7493 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7494{
7495 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7496 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7497}
7498
7499
7500/**
7501 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
7502 * @a uImm.
7503 */
7504DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7505 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7506{
7507 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7508 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7509 return off;
7510}
7511
7512
7513/**
7514 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
7515 * @a uImm.
7516 */
7517DECL_INLINE_THROW(uint32_t)
7518iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7519 uint8_t iGprSrc, uint32_t uImm,
7520 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7521{
7522 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7523 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7524}
7525
7526
7527/**
7528 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
7529 * @a uImm.
7530 */
7531DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7532 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
7533{
7534 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
7535 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7536 return off;
7537}
7538
7539
7540/**
7541 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
7542 * @a uImm.
7543 */
7544DECL_INLINE_THROW(uint32_t)
7545iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7546 uint8_t iGprSrc, uint16_t uImm,
7547 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7548{
7549 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7550 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7551}
7552
7553
7554/* if (Grp == Imm) Jmp idxLabel; */
7555
7556/**
7557 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
7558 */
7559DECL_INLINE_THROW(uint32_t)
7560iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7561 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7562{
7563 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7564 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7565 return off;
7566}
7567
7568
7569/**
7570 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
7571 */
7572DECL_INLINE_THROW(uint32_t)
7573iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
7574 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7575{
7576 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7577 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7578}
7579
7580
7581/**
7582 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
7583 */
7584DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7585 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7586{
7587 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7588 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7589 return off;
7590}
7591
7592
7593/**
7594 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
7595 */
7596DECL_INLINE_THROW(uint32_t)
7597iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
7598 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7599{
7600 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7601 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7602}
7603
7604
7605/**
7606 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
7607 *
7608 * @note ARM64: Helper register is required (idxTmpReg).
7609 */
7610DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7611 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
7612 uint8_t idxTmpReg = UINT8_MAX)
7613{
7614 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
7615 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7616 return off;
7617}
7618
7619
7620/**
7621 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
7622 *
7623 * @note ARM64: Helper register is required (idxTmpReg).
7624 */
7625DECL_INLINE_THROW(uint32_t)
7626iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
7627 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
7628 uint8_t idxTmpReg = UINT8_MAX)
7629{
7630 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7631 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
7632}
7633
7634
7635/*********************************************************************************************************************************
7636* Calls. *
7637*********************************************************************************************************************************/
7638
7639/**
7640 * Emits a call to a 64-bit address.
7641 */
7642DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
7643{
7644#ifdef RT_ARCH_AMD64
7645 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
7646
7647 /* call rax */
7648 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7649 pbCodeBuf[off++] = 0xff;
7650 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
7651
7652#elif defined(RT_ARCH_ARM64)
7653 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
7654
7655 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7656 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
7657
7658#else
7659# error "port me"
7660#endif
7661 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7662 return off;
7663}
7664
7665
7666/**
7667 * Emits code to load a stack variable into an argument GPR.
7668 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7669 */
7670DECL_FORCE_INLINE_THROW(uint32_t)
7671iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7672 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
7673 bool fSpilledVarsInVolatileRegs = false)
7674{
7675 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7676 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7677 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7678
7679 uint8_t const idxRegVar = pVar->idxReg;
7680 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
7681 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
7682 || !fSpilledVarsInVolatileRegs ))
7683 {
7684 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
7685 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
7686 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
7687 if (!offAddend)
7688 {
7689 if (idxRegArg != idxRegVar)
7690 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
7691 }
7692 else
7693 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
7694 }
7695 else
7696 {
7697 uint8_t const idxStackSlot = pVar->idxStackSlot;
7698 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7699 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
7700 if (offAddend)
7701 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
7702 }
7703 return off;
7704}
7705
7706
7707/**
7708 * Emits code to load a stack or immediate variable value into an argument GPR,
7709 * optional with a addend.
7710 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7711 */
7712DECL_FORCE_INLINE_THROW(uint32_t)
7713iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7714 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
7715 bool fSpilledVarsInVolatileRegs = false)
7716{
7717 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7718 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7719 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7720 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
7721 else
7722 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
7723 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
7724 return off;
7725}
7726
7727
7728/**
7729 * Emits code to load the variable address into an argument GPR.
7730 *
7731 * This only works for uninitialized and stack variables.
7732 */
7733DECL_FORCE_INLINE_THROW(uint32_t)
7734iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7735 bool fFlushShadows)
7736{
7737 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7738 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7739 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7740 || pVar->enmKind == kIemNativeVarKind_Stack,
7741 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7742 AssertStmt(!pVar->fSimdReg,
7743 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7744
7745 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7746 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7747
7748 uint8_t const idxRegVar = pVar->idxReg;
7749 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
7750 {
7751 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
7752 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
7753 Assert(pVar->idxReg == UINT8_MAX);
7754 }
7755 Assert( pVar->idxStackSlot != UINT8_MAX
7756 && pVar->idxReg == UINT8_MAX);
7757
7758 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7759}
7760
7761
7762#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7763/**
7764 * Emits code to load the variable address into an argument GPR.
7765 *
7766 * This is a special variant intended for SIMD variables only and only called
7767 * by the TLB miss path in the memory fetch/store code because there we pass
7768 * the value by reference and need both the register and stack depending on which
7769 * path is taken (TLB hit vs. miss).
7770 */
7771DECL_FORCE_INLINE_THROW(uint32_t)
7772iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7773 bool fSyncRegWithStack = true)
7774{
7775 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7776 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7777 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7778 || pVar->enmKind == kIemNativeVarKind_Stack,
7779 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7780 AssertStmt(pVar->fSimdReg,
7781 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7782 Assert( pVar->idxStackSlot != UINT8_MAX
7783 && pVar->idxReg != UINT8_MAX);
7784
7785 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7786 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7787
7788 uint8_t const idxRegVar = pVar->idxReg;
7789 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7790 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
7791
7792 if (fSyncRegWithStack)
7793 {
7794 if (pVar->cbVar == sizeof(RTUINT128U))
7795 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDisp, idxRegVar);
7796 else
7797 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDisp, idxRegVar);
7798 }
7799
7800 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7801}
7802
7803
7804/**
7805 * Emits code to sync the host SIMD register assigned to the given SIMD variable.
7806 *
7807 * This is a special helper and only called
7808 * by the TLB miss path in the memory fetch/store code because there we pass
7809 * the value by reference and need to sync the value on the stack with the assigned host register
7810 * after a TLB miss where the value ends up on the stack.
7811 */
7812DECL_FORCE_INLINE_THROW(uint32_t)
7813iemNativeEmitSimdVarSyncStackToRegister(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar)
7814{
7815 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7816 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7817 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7818 || pVar->enmKind == kIemNativeVarKind_Stack,
7819 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7820 AssertStmt(pVar->fSimdReg,
7821 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7822 Assert( pVar->idxStackSlot != UINT8_MAX
7823 && pVar->idxReg != UINT8_MAX);
7824
7825 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7826 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7827
7828 uint8_t const idxRegVar = pVar->idxReg;
7829 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7830 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
7831
7832 if (pVar->cbVar == sizeof(RTUINT128U))
7833 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxRegVar, offBpDisp);
7834 else
7835 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxRegVar, offBpDisp);
7836
7837 return off;
7838}
7839
7840
7841/**
7842 * Emits a gprdst = ~gprsrc store.
7843 */
7844DECL_FORCE_INLINE_THROW(uint32_t)
7845iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7846{
7847#ifdef RT_ARCH_AMD64
7848 if (iGprDst != iGprSrc)
7849 {
7850 /* mov gprdst, gprsrc. */
7851 if (f64Bit)
7852 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
7853 else
7854 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
7855 }
7856
7857 /* not gprdst */
7858 if (f64Bit || iGprDst >= 8)
7859 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
7860 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
7861 pCodeBuf[off++] = 0xf7;
7862 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
7863#elif defined(RT_ARCH_ARM64)
7864 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
7865#else
7866# error "port me"
7867#endif
7868 return off;
7869}
7870
7871
7872/**
7873 * Emits a gprdst = ~gprsrc store.
7874 */
7875DECL_INLINE_THROW(uint32_t)
7876iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7877{
7878#ifdef RT_ARCH_AMD64
7879 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
7880#elif defined(RT_ARCH_ARM64)
7881 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
7882#else
7883# error "port me"
7884#endif
7885 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7886 return off;
7887}
7888
7889
7890/**
7891 * Emits a 128-bit vector register store to a VCpu value.
7892 */
7893DECL_FORCE_INLINE_THROW(uint32_t)
7894iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7895{
7896#ifdef RT_ARCH_AMD64
7897 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
7898 pCodeBuf[off++] = 0x66;
7899 if (iVecReg >= 8)
7900 pCodeBuf[off++] = X86_OP_REX_R;
7901 pCodeBuf[off++] = 0x0f;
7902 pCodeBuf[off++] = 0x7f;
7903 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7904#elif defined(RT_ARCH_ARM64)
7905 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7906
7907#else
7908# error "port me"
7909#endif
7910 return off;
7911}
7912
7913
7914/**
7915 * Emits a 128-bit vector register load of a VCpu value.
7916 */
7917DECL_INLINE_THROW(uint32_t)
7918iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7919{
7920#ifdef RT_ARCH_AMD64
7921 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7922#elif defined(RT_ARCH_ARM64)
7923 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7924#else
7925# error "port me"
7926#endif
7927 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7928 return off;
7929}
7930
7931
7932/**
7933 * Emits a high 128-bit vector register store to a VCpu value.
7934 */
7935DECL_FORCE_INLINE_THROW(uint32_t)
7936iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7937{
7938#ifdef RT_ARCH_AMD64
7939 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
7940 pCodeBuf[off++] = X86_OP_VEX3;
7941 if (iVecReg >= 8)
7942 pCodeBuf[off++] = 0x63;
7943 else
7944 pCodeBuf[off++] = 0xe3;
7945 pCodeBuf[off++] = 0x7d;
7946 pCodeBuf[off++] = 0x39;
7947 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7948 pCodeBuf[off++] = 0x01; /* Immediate */
7949#elif defined(RT_ARCH_ARM64)
7950 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7951#else
7952# error "port me"
7953#endif
7954 return off;
7955}
7956
7957
7958/**
7959 * Emits a high 128-bit vector register load of a VCpu value.
7960 */
7961DECL_INLINE_THROW(uint32_t)
7962iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7963{
7964#ifdef RT_ARCH_AMD64
7965 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7966#elif defined(RT_ARCH_ARM64)
7967 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7968 Assert(!(iVecReg & 0x1));
7969 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
7970#else
7971# error "port me"
7972#endif
7973 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7974 return off;
7975}
7976
7977
7978/**
7979 * Emits a 128-bit vector register load of a VCpu value.
7980 */
7981DECL_FORCE_INLINE_THROW(uint32_t)
7982iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7983{
7984#ifdef RT_ARCH_AMD64
7985 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
7986 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7987 if (iVecReg >= 8)
7988 pCodeBuf[off++] = X86_OP_REX_R;
7989 pCodeBuf[off++] = 0x0f;
7990 pCodeBuf[off++] = 0x6f;
7991 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7992#elif defined(RT_ARCH_ARM64)
7993 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
7994
7995#else
7996# error "port me"
7997#endif
7998 return off;
7999}
8000
8001
8002/**
8003 * Emits a 128-bit vector register load of a VCpu value.
8004 */
8005DECL_INLINE_THROW(uint32_t)
8006iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8007{
8008#ifdef RT_ARCH_AMD64
8009 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
8010#elif defined(RT_ARCH_ARM64)
8011 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
8012#else
8013# error "port me"
8014#endif
8015 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8016 return off;
8017}
8018
8019
8020/**
8021 * Emits a 128-bit vector register load of a VCpu value.
8022 */
8023DECL_FORCE_INLINE_THROW(uint32_t)
8024iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8025{
8026#ifdef RT_ARCH_AMD64
8027 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
8028 pCodeBuf[off++] = X86_OP_VEX3;
8029 if (iVecReg >= 8)
8030 pCodeBuf[off++] = 0x63;
8031 else
8032 pCodeBuf[off++] = 0xe3;
8033 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8034 pCodeBuf[off++] = 0x38;
8035 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8036 pCodeBuf[off++] = 0x01; /* Immediate */
8037#elif defined(RT_ARCH_ARM64)
8038 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
8039#else
8040# error "port me"
8041#endif
8042 return off;
8043}
8044
8045
8046/**
8047 * Emits a 128-bit vector register load of a VCpu value.
8048 */
8049DECL_INLINE_THROW(uint32_t)
8050iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8051{
8052#ifdef RT_ARCH_AMD64
8053 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
8054#elif defined(RT_ARCH_ARM64)
8055 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8056 Assert(!(iVecReg & 0x1));
8057 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
8058#else
8059# error "port me"
8060#endif
8061 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8062 return off;
8063}
8064
8065
8066/**
8067 * Emits a vecdst = vecsrc load.
8068 */
8069DECL_FORCE_INLINE(uint32_t)
8070iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8071{
8072#ifdef RT_ARCH_AMD64
8073 /* movdqu vecdst, vecsrc */
8074 pCodeBuf[off++] = 0xf3;
8075
8076 if ((iVecRegDst | iVecRegSrc) >= 8)
8077 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
8078 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
8079 : X86_OP_REX_R;
8080 pCodeBuf[off++] = 0x0f;
8081 pCodeBuf[off++] = 0x6f;
8082 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8083
8084#elif defined(RT_ARCH_ARM64)
8085 /* mov dst, src; alias for: orr dst, src, src */
8086 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
8087
8088#else
8089# error "port me"
8090#endif
8091 return off;
8092}
8093
8094
8095/**
8096 * Emits a vecdst = vecsrc load, 128-bit.
8097 */
8098DECL_INLINE_THROW(uint32_t)
8099iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8100{
8101#ifdef RT_ARCH_AMD64
8102 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
8103#elif defined(RT_ARCH_ARM64)
8104 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
8105#else
8106# error "port me"
8107#endif
8108 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8109 return off;
8110}
8111
8112
8113/**
8114 * Emits a vecdst[128:255] = vecsrc[128:255] load.
8115 */
8116DECL_FORCE_INLINE_THROW(uint32_t)
8117iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8118{
8119#ifdef RT_ARCH_AMD64
8120 /* vperm2i128 dst, dst, src, 0x30. */ /* ASSUMES AVX2 support */
8121 pCodeBuf[off++] = X86_OP_VEX3;
8122 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
8123 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8124 pCodeBuf[off++] = 0x46;
8125 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8126 pCodeBuf[off++] = 0x30; /* Immediate, this will leave the low 128 bits of dst untouched and move the high 128 bits from src to dst. */
8127
8128#elif defined(RT_ARCH_ARM64)
8129 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
8130
8131 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128(). */
8132# ifdef IEM_WITH_THROW_CATCH
8133 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
8134# else
8135 AssertReleaseFailedStmt(off = UINT32_MAX);
8136# endif
8137#else
8138# error "port me"
8139#endif
8140 return off;
8141}
8142
8143
8144/**
8145 * Emits a vecdst[128:255] = vecsrc[128:255] load, high 128-bit.
8146 */
8147DECL_INLINE_THROW(uint32_t)
8148iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8149{
8150#ifdef RT_ARCH_AMD64
8151 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
8152#elif defined(RT_ARCH_ARM64)
8153 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8154 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iVecRegSrc + 1);
8155#else
8156# error "port me"
8157#endif
8158 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8159 return off;
8160}
8161
8162
8163/**
8164 * Emits a vecdst[0:127] = vecsrc[128:255] load.
8165 */
8166DECL_FORCE_INLINE_THROW(uint32_t)
8167iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8168{
8169#ifdef RT_ARCH_AMD64
8170 /* vextracti128 dst, src, 1. */ /* ASSUMES AVX2 support */
8171 pCodeBuf[off++] = X86_OP_VEX3;
8172 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegDst >= 8, false, iVecRegSrc >= 8);
8173 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8174 pCodeBuf[off++] = 0x39;
8175 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7);
8176 pCodeBuf[off++] = 0x1;
8177
8178#elif defined(RT_ARCH_ARM64)
8179 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
8180
8181 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(). */
8182# ifdef IEM_WITH_THROW_CATCH
8183 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
8184# else
8185 AssertReleaseFailedStmt(off = UINT32_MAX);
8186# endif
8187#else
8188# error "port me"
8189#endif
8190 return off;
8191}
8192
8193
8194/**
8195 * Emits a vecdst[0:127] = vecsrc[128:255] load, high 128-bit.
8196 */
8197DECL_INLINE_THROW(uint32_t)
8198iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8199{
8200#ifdef RT_ARCH_AMD64
8201 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
8202#elif defined(RT_ARCH_ARM64)
8203 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8204 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc + 1);
8205#else
8206# error "port me"
8207#endif
8208 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8209 return off;
8210}
8211
8212
8213/**
8214 * Emits a vecdst = vecsrc load, 256-bit.
8215 */
8216DECL_INLINE_THROW(uint32_t)
8217iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8218{
8219#ifdef RT_ARCH_AMD64
8220 /* vmovdqa ymm, ymm */
8221 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8222 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
8223 {
8224 pbCodeBuf[off++] = X86_OP_VEX3;
8225 pbCodeBuf[off++] = 0x41;
8226 pbCodeBuf[off++] = 0x7d;
8227 pbCodeBuf[off++] = 0x6f;
8228 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8229 }
8230 else
8231 {
8232 pbCodeBuf[off++] = X86_OP_VEX2;
8233 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
8234 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
8235 pbCodeBuf[off++] = iVecRegSrc >= 8
8236 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
8237 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8238 }
8239#elif defined(RT_ARCH_ARM64)
8240 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8241 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
8242 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
8243 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
8244#else
8245# error "port me"
8246#endif
8247 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8248 return off;
8249}
8250
8251
8252/**
8253 * Emits a vecdst = vecsrc load.
8254 */
8255DECL_FORCE_INLINE(uint32_t)
8256iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8257{
8258#ifdef RT_ARCH_AMD64
8259 /* vinserti128 dst, dst, src, 1. */ /* ASSUMES AVX2 support */
8260 pCodeBuf[off++] = X86_OP_VEX3;
8261 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
8262 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8263 pCodeBuf[off++] = 0x38;
8264 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8265 pCodeBuf[off++] = 0x01; /* Immediate */
8266
8267#elif defined(RT_ARCH_ARM64)
8268 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8269 /* mov dst, src; alias for: orr dst, src, src */
8270 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
8271
8272#else
8273# error "port me"
8274#endif
8275 return off;
8276}
8277
8278
8279/**
8280 * Emits a vecdst[128:255] = vecsrc[0:127] load, 128-bit.
8281 */
8282DECL_INLINE_THROW(uint32_t)
8283iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8284{
8285#ifdef RT_ARCH_AMD64
8286 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
8287#elif defined(RT_ARCH_ARM64)
8288 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
8289#else
8290# error "port me"
8291#endif
8292 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8293 return off;
8294}
8295
8296
8297/**
8298 * Emits a gprdst = vecsrc[x] load, 64-bit.
8299 */
8300DECL_FORCE_INLINE(uint32_t)
8301iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
8302{
8303#ifdef RT_ARCH_AMD64
8304 if (iQWord >= 2)
8305 {
8306 /*
8307 * vpextrq doesn't work on the upper 128-bits.
8308 * So we use the following sequence:
8309 * vextracti128 vectmp0, vecsrc, 1
8310 * pextrd gpr, vectmp0, #(iQWord - 2)
8311 */
8312 /* vextracti128 */
8313 pCodeBuf[off++] = X86_OP_VEX3;
8314 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
8315 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8316 pCodeBuf[off++] = 0x39;
8317 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8318 pCodeBuf[off++] = 0x1;
8319
8320 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
8321 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8322 pCodeBuf[off++] = X86_OP_REX_W
8323 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
8324 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8325 pCodeBuf[off++] = 0x0f;
8326 pCodeBuf[off++] = 0x3a;
8327 pCodeBuf[off++] = 0x16;
8328 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
8329 pCodeBuf[off++] = iQWord - 2;
8330 }
8331 else
8332 {
8333 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
8334 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8335 pCodeBuf[off++] = X86_OP_REX_W
8336 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8337 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8338 pCodeBuf[off++] = 0x0f;
8339 pCodeBuf[off++] = 0x3a;
8340 pCodeBuf[off++] = 0x16;
8341 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8342 pCodeBuf[off++] = iQWord;
8343 }
8344#elif defined(RT_ARCH_ARM64)
8345 /* umov gprdst, vecsrc[iQWord] */
8346 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
8347#else
8348# error "port me"
8349#endif
8350 return off;
8351}
8352
8353
8354/**
8355 * Emits a gprdst = vecsrc[x] load, 64-bit.
8356 */
8357DECL_INLINE_THROW(uint32_t)
8358iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
8359{
8360 Assert(iQWord <= 3);
8361
8362#ifdef RT_ARCH_AMD64
8363 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iQWord);
8364#elif defined(RT_ARCH_ARM64)
8365 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8366 Assert(!(iVecRegSrc & 0x1));
8367 /* Need to access the "high" 128-bit vector register. */
8368 if (iQWord >= 2)
8369 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
8370 else
8371 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
8372#else
8373# error "port me"
8374#endif
8375 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8376 return off;
8377}
8378
8379
8380/**
8381 * Emits a gprdst = vecsrc[x] load, 32-bit.
8382 */
8383DECL_FORCE_INLINE(uint32_t)
8384iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
8385{
8386#ifdef RT_ARCH_AMD64
8387 if (iDWord >= 4)
8388 {
8389 /*
8390 * vpextrd doesn't work on the upper 128-bits.
8391 * So we use the following sequence:
8392 * vextracti128 vectmp0, vecsrc, 1
8393 * pextrd gpr, vectmp0, #(iDWord - 4)
8394 */
8395 /* vextracti128 */
8396 pCodeBuf[off++] = X86_OP_VEX3;
8397 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
8398 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8399 pCodeBuf[off++] = 0x39;
8400 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8401 pCodeBuf[off++] = 0x1;
8402
8403 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
8404 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8405 if (iGprDst >= 8 || IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8)
8406 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
8407 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8408 pCodeBuf[off++] = 0x0f;
8409 pCodeBuf[off++] = 0x3a;
8410 pCodeBuf[off++] = 0x16;
8411 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
8412 pCodeBuf[off++] = iDWord - 4;
8413 }
8414 else
8415 {
8416 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
8417 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8418 if (iGprDst >= 8 || iVecRegSrc >= 8)
8419 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8420 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8421 pCodeBuf[off++] = 0x0f;
8422 pCodeBuf[off++] = 0x3a;
8423 pCodeBuf[off++] = 0x16;
8424 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8425 pCodeBuf[off++] = iDWord;
8426 }
8427#elif defined(RT_ARCH_ARM64)
8428 Assert(iDWord < 4);
8429
8430 /* umov gprdst, vecsrc[iDWord] */
8431 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
8432#else
8433# error "port me"
8434#endif
8435 return off;
8436}
8437
8438
8439/**
8440 * Emits a gprdst = vecsrc[x] load, 32-bit.
8441 */
8442DECL_INLINE_THROW(uint32_t)
8443iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
8444{
8445 Assert(iDWord <= 7);
8446
8447#ifdef RT_ARCH_AMD64
8448 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 15), off, iGprDst, iVecRegSrc, iDWord);
8449#elif defined(RT_ARCH_ARM64)
8450 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8451 Assert(!(iVecRegSrc & 0x1));
8452 /* Need to access the "high" 128-bit vector register. */
8453 if (iDWord >= 4)
8454 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
8455 else
8456 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
8457#else
8458# error "port me"
8459#endif
8460 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8461 return off;
8462}
8463
8464
8465/**
8466 * Emits a gprdst = vecsrc[x] load, 16-bit.
8467 */
8468DECL_FORCE_INLINE(uint32_t)
8469iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
8470{
8471#ifdef RT_ARCH_AMD64
8472 if (iWord >= 8)
8473 {
8474 /** @todo Currently not used. */
8475 AssertReleaseFailed();
8476 }
8477 else
8478 {
8479 /* pextrw gpr, vecsrc, #iWord */
8480 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8481 if (iGprDst >= 8 || iVecRegSrc >= 8)
8482 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
8483 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
8484 pCodeBuf[off++] = 0x0f;
8485 pCodeBuf[off++] = 0xc5;
8486 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
8487 pCodeBuf[off++] = iWord;
8488 }
8489#elif defined(RT_ARCH_ARM64)
8490 /* umov gprdst, vecsrc[iWord] */
8491 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
8492#else
8493# error "port me"
8494#endif
8495 return off;
8496}
8497
8498
8499/**
8500 * Emits a gprdst = vecsrc[x] load, 16-bit.
8501 */
8502DECL_INLINE_THROW(uint32_t)
8503iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
8504{
8505 Assert(iWord <= 16);
8506
8507#ifdef RT_ARCH_AMD64
8508 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
8509#elif defined(RT_ARCH_ARM64)
8510 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8511 Assert(!(iVecRegSrc & 0x1));
8512 /* Need to access the "high" 128-bit vector register. */
8513 if (iWord >= 8)
8514 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
8515 else
8516 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
8517#else
8518# error "port me"
8519#endif
8520 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8521 return off;
8522}
8523
8524
8525/**
8526 * Emits a gprdst = vecsrc[x] load, 8-bit.
8527 */
8528DECL_FORCE_INLINE(uint32_t)
8529iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
8530{
8531#ifdef RT_ARCH_AMD64
8532 if (iByte >= 16)
8533 {
8534 /** @todo Currently not used. */
8535 AssertReleaseFailed();
8536 }
8537 else
8538 {
8539 /* pextrb gpr, vecsrc, #iByte */
8540 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8541 if (iGprDst >= 8 || iVecRegSrc >= 8)
8542 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8543 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8544 pCodeBuf[off++] = 0x0f;
8545 pCodeBuf[off++] = 0x3a;
8546 pCodeBuf[off++] = 0x14;
8547 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8548 pCodeBuf[off++] = iByte;
8549 }
8550#elif defined(RT_ARCH_ARM64)
8551 /* umov gprdst, vecsrc[iByte] */
8552 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
8553#else
8554# error "port me"
8555#endif
8556 return off;
8557}
8558
8559
8560/**
8561 * Emits a gprdst = vecsrc[x] load, 8-bit.
8562 */
8563DECL_INLINE_THROW(uint32_t)
8564iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
8565{
8566 Assert(iByte <= 32);
8567
8568#ifdef RT_ARCH_AMD64
8569 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iByte);
8570#elif defined(RT_ARCH_ARM64)
8571 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8572 Assert(!(iVecRegSrc & 0x1));
8573 /* Need to access the "high" 128-bit vector register. */
8574 if (iByte >= 16)
8575 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
8576 else
8577 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
8578#else
8579# error "port me"
8580#endif
8581 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8582 return off;
8583}
8584
8585
8586/**
8587 * Emits a vecdst[x] = gprsrc store, 64-bit.
8588 */
8589DECL_FORCE_INLINE(uint32_t)
8590iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
8591{
8592#ifdef RT_ARCH_AMD64
8593 if (iQWord >= 2)
8594 {
8595 /*
8596 * vpinsrq doesn't work on the upper 128-bits.
8597 * So we use the following sequence:
8598 * vextracti128 vectmp0, vecdst, 1
8599 * pinsrq vectmp0, gpr, #(iQWord - 2)
8600 * vinserti128 vecdst, vectmp0, 1
8601 */
8602 /* vextracti128 */
8603 pCodeBuf[off++] = X86_OP_VEX3;
8604 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
8605 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8606 pCodeBuf[off++] = 0x39;
8607 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8608 pCodeBuf[off++] = 0x1;
8609
8610 /* pinsrq */
8611 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8612 pCodeBuf[off++] = X86_OP_REX_W
8613 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
8614 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8615 pCodeBuf[off++] = 0x0f;
8616 pCodeBuf[off++] = 0x3a;
8617 pCodeBuf[off++] = 0x22;
8618 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
8619 pCodeBuf[off++] = iQWord - 2;
8620
8621 /* vinserti128 */
8622 pCodeBuf[off++] = X86_OP_VEX3;
8623 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
8624 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8625 pCodeBuf[off++] = 0x38;
8626 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8627 pCodeBuf[off++] = 0x01; /* Immediate */
8628 }
8629 else
8630 {
8631 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
8632 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8633 pCodeBuf[off++] = X86_OP_REX_W
8634 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8635 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8636 pCodeBuf[off++] = 0x0f;
8637 pCodeBuf[off++] = 0x3a;
8638 pCodeBuf[off++] = 0x22;
8639 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8640 pCodeBuf[off++] = iQWord;
8641 }
8642#elif defined(RT_ARCH_ARM64)
8643 /* ins vecsrc[iQWord], gpr */
8644 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
8645#else
8646# error "port me"
8647#endif
8648 return off;
8649}
8650
8651
8652/**
8653 * Emits a vecdst[x] = gprsrc store, 64-bit.
8654 */
8655DECL_INLINE_THROW(uint32_t)
8656iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
8657{
8658 Assert(iQWord <= 3);
8659
8660#ifdef RT_ARCH_AMD64
8661 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iQWord);
8662#elif defined(RT_ARCH_ARM64)
8663 Assert(!(iVecRegDst & 0x1));
8664 if (iQWord >= 2)
8665 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iQWord - 2);
8666 else
8667 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
8668#else
8669# error "port me"
8670#endif
8671 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8672 return off;
8673}
8674
8675
8676/**
8677 * Emits a vecdst[x] = gprsrc store, 32-bit.
8678 */
8679DECL_FORCE_INLINE(uint32_t)
8680iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
8681{
8682#ifdef RT_ARCH_AMD64
8683 if (iDWord >= 4)
8684 {
8685 /*
8686 * vpinsrq doesn't work on the upper 128-bits.
8687 * So we use the following sequence:
8688 * vextracti128 vectmp0, vecdst, 1
8689 * pinsrd vectmp0, gpr, #(iDword - 4)
8690 * vinserti128 vecdst, vectmp0, 1
8691 */
8692 /* vextracti128 */
8693 pCodeBuf[off++] = X86_OP_VEX3;
8694 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
8695 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8696 pCodeBuf[off++] = 0x39;
8697 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8698 pCodeBuf[off++] = 0x1;
8699
8700 /* pinsrd */
8701 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8702 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 || iGprSrc >= 8)
8703 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
8704 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8705 pCodeBuf[off++] = 0x0f;
8706 pCodeBuf[off++] = 0x3a;
8707 pCodeBuf[off++] = 0x22;
8708 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
8709 pCodeBuf[off++] = iDWord - 4;
8710
8711 /* vinserti128 */
8712 pCodeBuf[off++] = X86_OP_VEX3;
8713 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
8714 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8715 pCodeBuf[off++] = 0x38;
8716 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8717 pCodeBuf[off++] = 0x01; /* Immediate */
8718 }
8719 else
8720 {
8721 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
8722 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8723 if (iVecRegDst >= 8 || iGprSrc >= 8)
8724 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8725 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8726 pCodeBuf[off++] = 0x0f;
8727 pCodeBuf[off++] = 0x3a;
8728 pCodeBuf[off++] = 0x22;
8729 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8730 pCodeBuf[off++] = iDWord;
8731 }
8732#elif defined(RT_ARCH_ARM64)
8733 /* ins vecsrc[iDWord], gpr */
8734 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
8735#else
8736# error "port me"
8737#endif
8738 return off;
8739}
8740
8741
8742/**
8743 * Emits a vecdst[x] = gprsrc store, 64-bit.
8744 */
8745DECL_INLINE_THROW(uint32_t)
8746iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
8747{
8748 Assert(iDWord <= 7);
8749
8750#ifdef RT_ARCH_AMD64
8751 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iDWord);
8752#elif defined(RT_ARCH_ARM64)
8753 Assert(!(iVecRegDst & 0x1));
8754 if (iDWord >= 4)
8755 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iDWord - 4);
8756 else
8757 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
8758#else
8759# error "port me"
8760#endif
8761 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8762 return off;
8763}
8764
8765
8766/**
8767 * Emits a vecdst[x] = gprsrc store, 16-bit.
8768 */
8769DECL_FORCE_INLINE(uint32_t)
8770iemNativeEmitSimdStoreGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
8771{
8772#ifdef RT_ARCH_AMD64
8773 /* pinsrw vecsrc, gpr, #iWord. */
8774 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8775 if (iVecRegDst >= 8 || iGprSrc >= 8)
8776 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8777 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8778 pCodeBuf[off++] = 0x0f;
8779 pCodeBuf[off++] = 0xc4;
8780 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8781 pCodeBuf[off++] = iWord;
8782#elif defined(RT_ARCH_ARM64)
8783 /* ins vecsrc[iWord], gpr */
8784 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iWord, kArmv8InstrUmovInsSz_U16);
8785#else
8786# error "port me"
8787#endif
8788 return off;
8789}
8790
8791
8792/**
8793 * Emits a vecdst[x] = gprsrc store, 16-bit.
8794 */
8795DECL_INLINE_THROW(uint32_t)
8796iemNativeEmitSimdStoreGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
8797{
8798 Assert(iWord <= 15);
8799
8800#ifdef RT_ARCH_AMD64
8801 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iGprSrc, iWord);
8802#elif defined(RT_ARCH_ARM64)
8803 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iWord);
8804#else
8805# error "port me"
8806#endif
8807 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8808 return off;
8809}
8810
8811
8812/**
8813 * Emits a vecdst[x] = gprsrc store, 8-bit.
8814 */
8815DECL_FORCE_INLINE(uint32_t)
8816iemNativeEmitSimdStoreGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
8817{
8818#ifdef RT_ARCH_AMD64
8819 /* pinsrb vecsrc, gpr, #iByte (ASSUMES SSE4.1). */
8820 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8821 if (iVecRegDst >= 8 || iGprSrc >= 8)
8822 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8823 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8824 pCodeBuf[off++] = 0x0f;
8825 pCodeBuf[off++] = 0x3a;
8826 pCodeBuf[off++] = 0x20;
8827 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8828 pCodeBuf[off++] = iByte;
8829#elif defined(RT_ARCH_ARM64)
8830 /* ins vecsrc[iByte], gpr */
8831 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iByte, kArmv8InstrUmovInsSz_U8);
8832#else
8833# error "port me"
8834#endif
8835 return off;
8836}
8837
8838
8839/**
8840 * Emits a vecdst[x] = gprsrc store, 8-bit.
8841 */
8842DECL_INLINE_THROW(uint32_t)
8843iemNativeEmitSimdStoreGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
8844{
8845 Assert(iByte <= 15);
8846
8847#ifdef RT_ARCH_AMD64
8848 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iByte);
8849#elif defined(RT_ARCH_ARM64)
8850 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iByte);
8851#else
8852# error "port me"
8853#endif
8854 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8855 return off;
8856}
8857
8858
8859/**
8860 * Emits a vecdst.au32[iDWord] = 0 store.
8861 */
8862DECL_FORCE_INLINE(uint32_t)
8863iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
8864{
8865 Assert(iDWord <= 7);
8866
8867#ifdef RT_ARCH_AMD64
8868 /*
8869 * xor tmp0, tmp0
8870 * pinsrd xmm, tmp0, iDword
8871 */
8872 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
8873 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8874 pCodeBuf[off++] = 0x33;
8875 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
8876 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(&pCodeBuf[off], off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
8877#elif defined(RT_ARCH_ARM64)
8878 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8879 Assert(!(iVecReg & 0x1));
8880 /* ins vecsrc[iDWord], wzr */
8881 if (iDWord >= 4)
8882 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
8883 else
8884 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
8885#else
8886# error "port me"
8887#endif
8888 return off;
8889}
8890
8891
8892/**
8893 * Emits a vecdst.au32[iDWord] = 0 store.
8894 */
8895DECL_INLINE_THROW(uint32_t)
8896iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
8897{
8898
8899#ifdef RT_ARCH_AMD64
8900 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
8901#elif defined(RT_ARCH_ARM64)
8902 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
8903#else
8904# error "port me"
8905#endif
8906 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8907 return off;
8908}
8909
8910
8911/**
8912 * Emits a vecdst[0:127] = 0 store.
8913 */
8914DECL_FORCE_INLINE(uint32_t)
8915iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8916{
8917#ifdef RT_ARCH_AMD64
8918 /* pxor xmm, xmm */
8919 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8920 if (iVecReg >= 8)
8921 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
8922 pCodeBuf[off++] = 0x0f;
8923 pCodeBuf[off++] = 0xef;
8924 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8925#elif defined(RT_ARCH_ARM64)
8926 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8927 Assert(!(iVecReg & 0x1));
8928 /* eor vecreg, vecreg, vecreg */
8929 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
8930#else
8931# error "port me"
8932#endif
8933 return off;
8934}
8935
8936
8937/**
8938 * Emits a vecdst[0:127] = 0 store.
8939 */
8940DECL_INLINE_THROW(uint32_t)
8941iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8942{
8943#ifdef RT_ARCH_AMD64
8944 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
8945#elif defined(RT_ARCH_ARM64)
8946 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
8947#else
8948# error "port me"
8949#endif
8950 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8951 return off;
8952}
8953
8954
8955/**
8956 * Emits a vecdst[128:255] = 0 store.
8957 */
8958DECL_FORCE_INLINE(uint32_t)
8959iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8960{
8961#ifdef RT_ARCH_AMD64
8962 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
8963 if (iVecReg < 8)
8964 {
8965 pCodeBuf[off++] = X86_OP_VEX2;
8966 pCodeBuf[off++] = 0xf9;
8967 }
8968 else
8969 {
8970 pCodeBuf[off++] = X86_OP_VEX3;
8971 pCodeBuf[off++] = 0x41;
8972 pCodeBuf[off++] = 0x79;
8973 }
8974 pCodeBuf[off++] = 0x6f;
8975 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8976#elif defined(RT_ARCH_ARM64)
8977 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8978 Assert(!(iVecReg & 0x1));
8979 /* eor vecreg, vecreg, vecreg */
8980 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
8981#else
8982# error "port me"
8983#endif
8984 return off;
8985}
8986
8987
8988/**
8989 * Emits a vecdst[128:255] = 0 store.
8990 */
8991DECL_INLINE_THROW(uint32_t)
8992iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8993{
8994#ifdef RT_ARCH_AMD64
8995 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
8996#elif defined(RT_ARCH_ARM64)
8997 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
8998#else
8999# error "port me"
9000#endif
9001 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9002 return off;
9003}
9004
9005
9006/**
9007 * Emits a vecdst[0:255] = 0 store.
9008 */
9009DECL_FORCE_INLINE(uint32_t)
9010iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
9011{
9012#ifdef RT_ARCH_AMD64
9013 /* vpxor ymm, ymm, ymm */
9014 if (iVecReg < 8)
9015 {
9016 pCodeBuf[off++] = X86_OP_VEX2;
9017 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
9018 }
9019 else
9020 {
9021 pCodeBuf[off++] = X86_OP_VEX3;
9022 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
9023 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
9024 }
9025 pCodeBuf[off++] = 0xef;
9026 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
9027#elif defined(RT_ARCH_ARM64)
9028 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9029 Assert(!(iVecReg & 0x1));
9030 /* eor vecreg, vecreg, vecreg */
9031 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
9032 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
9033#else
9034# error "port me"
9035#endif
9036 return off;
9037}
9038
9039
9040/**
9041 * Emits a vecdst[0:255] = 0 store.
9042 */
9043DECL_INLINE_THROW(uint32_t)
9044iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
9045{
9046#ifdef RT_ARCH_AMD64
9047 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
9048#elif defined(RT_ARCH_ARM64)
9049 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
9050#else
9051# error "port me"
9052#endif
9053 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9054 return off;
9055}
9056
9057
9058/**
9059 * Emits a vecdst = gprsrc broadcast, 8-bit.
9060 */
9061DECL_FORCE_INLINE(uint32_t)
9062iemNativeEmitSimdBroadcastGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9063{
9064#ifdef RT_ARCH_AMD64
9065 /* pinsrb vecdst, gpr, #0 (ASSUMES SSE 4.1) */
9066 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9067 if (iVecRegDst >= 8 || iGprSrc >= 8)
9068 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9069 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9070 pCodeBuf[off++] = 0x0f;
9071 pCodeBuf[off++] = 0x3a;
9072 pCodeBuf[off++] = 0x20;
9073 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9074 pCodeBuf[off++] = 0x00;
9075
9076 /* vpbroadcastb {y,x}mm, xmm (ASSUMES AVX2). */
9077 pCodeBuf[off++] = X86_OP_VEX3;
9078 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9079 | 0x02 /* opcode map. */
9080 | ( iVecRegDst >= 8
9081 ? 0
9082 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9083 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9084 pCodeBuf[off++] = 0x78;
9085 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9086#elif defined(RT_ARCH_ARM64)
9087 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9088 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9089
9090 /* dup vecsrc, gpr */
9091 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U8);
9092 if (f256Bit)
9093 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U8);
9094#else
9095# error "port me"
9096#endif
9097 return off;
9098}
9099
9100
9101/**
9102 * Emits a vecdst[x] = gprsrc broadcast, 8-bit.
9103 */
9104DECL_INLINE_THROW(uint32_t)
9105iemNativeEmitSimdBroadcastGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9106{
9107#ifdef RT_ARCH_AMD64
9108 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
9109#elif defined(RT_ARCH_ARM64)
9110 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9111#else
9112# error "port me"
9113#endif
9114 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9115 return off;
9116}
9117
9118
9119/**
9120 * Emits a vecdst = gprsrc broadcast, 16-bit.
9121 */
9122DECL_FORCE_INLINE(uint32_t)
9123iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9124{
9125#ifdef RT_ARCH_AMD64
9126 /* pinsrw vecdst, gpr, #0 */
9127 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9128 if (iVecRegDst >= 8 || iGprSrc >= 8)
9129 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9130 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9131 pCodeBuf[off++] = 0x0f;
9132 pCodeBuf[off++] = 0xc4;
9133 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9134 pCodeBuf[off++] = 0x00;
9135
9136 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
9137 pCodeBuf[off++] = X86_OP_VEX3;
9138 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9139 | 0x02 /* opcode map. */
9140 | ( iVecRegDst >= 8
9141 ? 0
9142 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9143 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9144 pCodeBuf[off++] = 0x79;
9145 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9146#elif defined(RT_ARCH_ARM64)
9147 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9148 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9149
9150 /* dup vecsrc, gpr */
9151 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
9152 if (f256Bit)
9153 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
9154#else
9155# error "port me"
9156#endif
9157 return off;
9158}
9159
9160
9161/**
9162 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
9163 */
9164DECL_INLINE_THROW(uint32_t)
9165iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9166{
9167#ifdef RT_ARCH_AMD64
9168 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
9169#elif defined(RT_ARCH_ARM64)
9170 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9171#else
9172# error "port me"
9173#endif
9174 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9175 return off;
9176}
9177
9178
9179/**
9180 * Emits a vecdst = gprsrc broadcast, 32-bit.
9181 */
9182DECL_FORCE_INLINE(uint32_t)
9183iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9184{
9185#ifdef RT_ARCH_AMD64
9186 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
9187 * vbroadcast needs a memory operand or another xmm register to work... */
9188
9189 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
9190 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9191 if (iVecRegDst >= 8 || iGprSrc >= 8)
9192 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9193 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9194 pCodeBuf[off++] = 0x0f;
9195 pCodeBuf[off++] = 0x3a;
9196 pCodeBuf[off++] = 0x22;
9197 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9198 pCodeBuf[off++] = 0x00;
9199
9200 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
9201 pCodeBuf[off++] = X86_OP_VEX3;
9202 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9203 | 0x02 /* opcode map. */
9204 | ( iVecRegDst >= 8
9205 ? 0
9206 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9207 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9208 pCodeBuf[off++] = 0x58;
9209 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9210#elif defined(RT_ARCH_ARM64)
9211 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9212 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9213
9214 /* dup vecsrc, gpr */
9215 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
9216 if (f256Bit)
9217 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
9218#else
9219# error "port me"
9220#endif
9221 return off;
9222}
9223
9224
9225/**
9226 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
9227 */
9228DECL_INLINE_THROW(uint32_t)
9229iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9230{
9231#ifdef RT_ARCH_AMD64
9232 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
9233#elif defined(RT_ARCH_ARM64)
9234 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9235#else
9236# error "port me"
9237#endif
9238 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9239 return off;
9240}
9241
9242
9243/**
9244 * Emits a vecdst = gprsrc broadcast, 64-bit.
9245 */
9246DECL_FORCE_INLINE(uint32_t)
9247iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9248{
9249#ifdef RT_ARCH_AMD64
9250 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
9251 * vbroadcast needs a memory operand or another xmm register to work... */
9252
9253 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
9254 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9255 pCodeBuf[off++] = X86_OP_REX_W
9256 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9257 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9258 pCodeBuf[off++] = 0x0f;
9259 pCodeBuf[off++] = 0x3a;
9260 pCodeBuf[off++] = 0x22;
9261 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9262 pCodeBuf[off++] = 0x00;
9263
9264 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
9265 pCodeBuf[off++] = X86_OP_VEX3;
9266 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9267 | 0x02 /* opcode map. */
9268 | ( iVecRegDst >= 8
9269 ? 0
9270 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9271 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9272 pCodeBuf[off++] = 0x59;
9273 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9274#elif defined(RT_ARCH_ARM64)
9275 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9276 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9277
9278 /* dup vecsrc, gpr */
9279 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
9280 if (f256Bit)
9281 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
9282#else
9283# error "port me"
9284#endif
9285 return off;
9286}
9287
9288
9289/**
9290 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
9291 */
9292DECL_INLINE_THROW(uint32_t)
9293iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9294{
9295#ifdef RT_ARCH_AMD64
9296 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
9297#elif defined(RT_ARCH_ARM64)
9298 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9299#else
9300# error "port me"
9301#endif
9302 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9303 return off;
9304}
9305
9306
9307/**
9308 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
9309 */
9310DECL_FORCE_INLINE(uint32_t)
9311iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9312{
9313#ifdef RT_ARCH_AMD64
9314 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(pCodeBuf, off, iVecRegDst, iVecRegSrc);
9315
9316 /* vinserti128 ymm, ymm, xmm, 1. */ /* ASSUMES AVX2 support */
9317 pCodeBuf[off++] = X86_OP_VEX3;
9318 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
9319 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9320 pCodeBuf[off++] = 0x38;
9321 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9322 pCodeBuf[off++] = 0x01; /* Immediate */
9323#elif defined(RT_ARCH_ARM64)
9324 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9325 Assert(!(iVecRegDst & 0x1));
9326
9327 /* mov dst, src; alias for: orr dst, src, src */
9328 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
9329 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
9330#else
9331# error "port me"
9332#endif
9333 return off;
9334}
9335
9336
9337/**
9338 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
9339 */
9340DECL_INLINE_THROW(uint32_t)
9341iemNativeEmitSimdBroadcastVecRegU128ToVecReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9342{
9343#ifdef RT_ARCH_AMD64
9344 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 11), off, iVecRegDst, iVecRegSrc);
9345#elif defined(RT_ARCH_ARM64)
9346 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecRegDst, iVecRegSrc);
9347#else
9348# error "port me"
9349#endif
9350 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9351 return off;
9352}
9353
9354#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
9355
9356/** @} */
9357
9358#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
9359
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette