VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 103892

Last change on this file since 103892 was 103892, checked in by vboxsync, 11 months ago

VMM/IEM: Fix instruction length for amd64, bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 286.5 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 103892 2024-03-18 11:59:39Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 pu32CodeBuf[off++] = 0xd503201f;
71
72 RT_NOREF(uInfo);
73#else
74# error "port me"
75#endif
76 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
77 return off;
78}
79
80
81/**
82 * Emit a breakpoint instruction.
83 */
84DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
85{
86#ifdef RT_ARCH_AMD64
87 pCodeBuf[off++] = 0xcc;
88 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
89
90#elif defined(RT_ARCH_ARM64)
91 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
92
93#else
94# error "error"
95#endif
96 return off;
97}
98
99
100/**
101 * Emit a breakpoint instruction.
102 */
103DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
104{
105#ifdef RT_ARCH_AMD64
106 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
107#elif defined(RT_ARCH_ARM64)
108 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
109#else
110# error "error"
111#endif
112 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
113 return off;
114}
115
116
117/*********************************************************************************************************************************
118* Loads, Stores and Related Stuff. *
119*********************************************************************************************************************************/
120
121#ifdef RT_ARCH_AMD64
122/**
123 * Common bit of iemNativeEmitLoadGprByGpr and friends.
124 */
125DECL_FORCE_INLINE(uint32_t)
126iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
127{
128 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
129 {
130 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
131 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
132 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
133 }
134 else if (offDisp == (int8_t)offDisp)
135 {
136 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
137 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
138 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
139 pbCodeBuf[off++] = (uint8_t)offDisp;
140 }
141 else
142 {
143 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
144 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
145 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
146 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
147 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
148 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
149 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
150 }
151 return off;
152}
153#endif /* RT_ARCH_AMD64 */
154
155/**
156 * Emits setting a GPR to zero.
157 */
158DECL_INLINE_THROW(uint32_t)
159iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
160{
161#ifdef RT_ARCH_AMD64
162 /* xor gpr32, gpr32 */
163 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
164 if (iGpr >= 8)
165 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
166 pbCodeBuf[off++] = 0x33;
167 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
168
169#elif defined(RT_ARCH_ARM64)
170 /* mov gpr, #0x0 */
171 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
172 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
173
174#else
175# error "port me"
176#endif
177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
178 return off;
179}
180
181
182/**
183 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
184 * buffer space.
185 *
186 * Max buffer consumption:
187 * - AMD64: 10 instruction bytes.
188 * - ARM64: 4 instruction words (16 bytes).
189 */
190DECL_FORCE_INLINE(uint32_t)
191iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
192{
193#ifdef RT_ARCH_AMD64
194 if (uImm64 == 0)
195 {
196 /* xor gpr, gpr */
197 if (iGpr >= 8)
198 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
199 pCodeBuf[off++] = 0x33;
200 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
201 }
202 else if (uImm64 <= UINT32_MAX)
203 {
204 /* mov gpr, imm32 */
205 if (iGpr >= 8)
206 pCodeBuf[off++] = X86_OP_REX_B;
207 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
208 pCodeBuf[off++] = RT_BYTE1(uImm64);
209 pCodeBuf[off++] = RT_BYTE2(uImm64);
210 pCodeBuf[off++] = RT_BYTE3(uImm64);
211 pCodeBuf[off++] = RT_BYTE4(uImm64);
212 }
213 else if (uImm64 == (uint64_t)(int32_t)uImm64)
214 {
215 /* mov gpr, sx(imm32) */
216 if (iGpr < 8)
217 pCodeBuf[off++] = X86_OP_REX_W;
218 else
219 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
220 pCodeBuf[off++] = 0xc7;
221 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
222 pCodeBuf[off++] = RT_BYTE1(uImm64);
223 pCodeBuf[off++] = RT_BYTE2(uImm64);
224 pCodeBuf[off++] = RT_BYTE3(uImm64);
225 pCodeBuf[off++] = RT_BYTE4(uImm64);
226 }
227 else
228 {
229 /* mov gpr, imm64 */
230 if (iGpr < 8)
231 pCodeBuf[off++] = X86_OP_REX_W;
232 else
233 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
234 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
235 pCodeBuf[off++] = RT_BYTE1(uImm64);
236 pCodeBuf[off++] = RT_BYTE2(uImm64);
237 pCodeBuf[off++] = RT_BYTE3(uImm64);
238 pCodeBuf[off++] = RT_BYTE4(uImm64);
239 pCodeBuf[off++] = RT_BYTE5(uImm64);
240 pCodeBuf[off++] = RT_BYTE6(uImm64);
241 pCodeBuf[off++] = RT_BYTE7(uImm64);
242 pCodeBuf[off++] = RT_BYTE8(uImm64);
243 }
244
245#elif defined(RT_ARCH_ARM64)
246 /*
247 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
248 * supply remaining bits using 'movk grp, imm16, lsl #x'.
249 *
250 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
251 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
252 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
253 * after the first non-zero immediate component so we switch to movk for
254 * the remainder.
255 */
256 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
257 + !((uImm64 >> 16) & UINT16_MAX)
258 + !((uImm64 >> 32) & UINT16_MAX)
259 + !((uImm64 >> 48) & UINT16_MAX);
260 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
261 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
262 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
263 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
264 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
265 if (cFfffHalfWords <= cZeroHalfWords)
266 {
267 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
268
269 /* movz gpr, imm16 */
270 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
271 if (uImmPart || cZeroHalfWords == 4)
272 {
273 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
274 fMovBase |= RT_BIT_32(29);
275 }
276 /* mov[z/k] gpr, imm16, lsl #16 */
277 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
278 if (uImmPart)
279 {
280 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
281 fMovBase |= RT_BIT_32(29);
282 }
283 /* mov[z/k] gpr, imm16, lsl #32 */
284 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
285 if (uImmPart)
286 {
287 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
288 fMovBase |= RT_BIT_32(29);
289 }
290 /* mov[z/k] gpr, imm16, lsl #48 */
291 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
292 if (uImmPart)
293 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
294 }
295 else
296 {
297 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
298
299 /* find the first half-word that isn't UINT16_MAX. */
300 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
301 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
302 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
303
304 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
305 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
306 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
307 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
308 /* movk gpr, imm16 */
309 if (iHwNotFfff != 0)
310 {
311 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
312 if (uImmPart != UINT32_C(0xffff))
313 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
314 }
315 /* movk gpr, imm16, lsl #16 */
316 if (iHwNotFfff != 1)
317 {
318 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
319 if (uImmPart != UINT32_C(0xffff))
320 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
321 }
322 /* movk gpr, imm16, lsl #32 */
323 if (iHwNotFfff != 2)
324 {
325 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
326 if (uImmPart != UINT32_C(0xffff))
327 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
328 }
329 /* movk gpr, imm16, lsl #48 */
330 if (iHwNotFfff != 3)
331 {
332 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
333 if (uImmPart != UINT32_C(0xffff))
334 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
335 }
336 }
337
338 /** @todo load into 'w' register instead of 'x' when imm64 <= UINT32_MAX?
339 * clang 12.x does that, only to use the 'x' version for the
340 * addressing in the following ldr). */
341
342#else
343# error "port me"
344#endif
345 return off;
346}
347
348
349/**
350 * Emits loading a constant into a 64-bit GPR
351 */
352DECL_INLINE_THROW(uint32_t)
353iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
354{
355#ifdef RT_ARCH_AMD64
356 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
357#elif defined(RT_ARCH_ARM64)
358 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
359#else
360# error "port me"
361#endif
362 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
363 return off;
364}
365
366
367/**
368 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
369 * buffer space.
370 *
371 * Max buffer consumption:
372 * - AMD64: 6 instruction bytes.
373 * - ARM64: 2 instruction words (8 bytes).
374 *
375 * @note The top 32 bits will be cleared.
376 */
377DECLINLINE(uint32_t) iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
378{
379#ifdef RT_ARCH_AMD64
380 if (uImm32 == 0)
381 {
382 /* xor gpr, gpr */
383 if (iGpr >= 8)
384 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
385 pCodeBuf[off++] = 0x33;
386 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
387 }
388 else
389 {
390 /* mov gpr, imm32 */
391 if (iGpr >= 8)
392 pCodeBuf[off++] = X86_OP_REX_B;
393 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
394 pCodeBuf[off++] = RT_BYTE1(uImm32);
395 pCodeBuf[off++] = RT_BYTE2(uImm32);
396 pCodeBuf[off++] = RT_BYTE3(uImm32);
397 pCodeBuf[off++] = RT_BYTE4(uImm32);
398 }
399
400#elif defined(RT_ARCH_ARM64)
401 if ((uImm32 >> 16) == 0)
402 /* movz gpr, imm16 */
403 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
404 else if ((uImm32 & UINT32_C(0xffff)) == 0)
405 /* movz gpr, imm16, lsl #16 */
406 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
407 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
408 /* movn gpr, imm16, lsl #16 */
409 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
410 else if ((uImm32 >> 16) == UINT32_C(0xffff))
411 /* movn gpr, imm16 */
412 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
413 else
414 {
415 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
416 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
417 }
418
419#else
420# error "port me"
421#endif
422 return off;
423}
424
425
426/**
427 * Emits loading a constant into a 32-bit GPR.
428 * @note The top 32 bits will be cleared.
429 */
430DECL_INLINE_THROW(uint32_t)
431iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
432{
433#ifdef RT_ARCH_AMD64
434 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
435#elif defined(RT_ARCH_ARM64)
436 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
437#else
438# error "port me"
439#endif
440 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
441 return off;
442}
443
444
445/**
446 * Emits loading a constant into a 8-bit GPR
447 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
448 * only the ARM64 version does that.
449 */
450DECL_INLINE_THROW(uint32_t)
451iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
452{
453#ifdef RT_ARCH_AMD64
454 /* mov gpr, imm8 */
455 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
456 if (iGpr >= 8)
457 pbCodeBuf[off++] = X86_OP_REX_B;
458 else if (iGpr >= 4)
459 pbCodeBuf[off++] = X86_OP_REX;
460 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
461 pbCodeBuf[off++] = RT_BYTE1(uImm8);
462
463#elif defined(RT_ARCH_ARM64)
464 /* movz gpr, imm16, lsl #0 */
465 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
466 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
467
468#else
469# error "port me"
470#endif
471 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
472 return off;
473}
474
475
476#ifdef RT_ARCH_AMD64
477/**
478 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
479 */
480DECL_FORCE_INLINE(uint32_t)
481iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
482{
483 if (offVCpu < 128)
484 {
485 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
486 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
487 }
488 else
489 {
490 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
491 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
492 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
493 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
494 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
495 }
496 return off;
497}
498
499#elif defined(RT_ARCH_ARM64)
500
501/**
502 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
503 *
504 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
505 * registers (@a iGprTmp).
506 * @note DON'T try this with prefetch.
507 */
508DECL_FORCE_INLINE_THROW(uint32_t)
509iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
510 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
511{
512 /*
513 * There are a couple of ldr variants that takes an immediate offset, so
514 * try use those if we can, otherwise we have to use the temporary register
515 * help with the addressing.
516 */
517 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
518 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
519 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
520 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
521 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
522 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
523 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
524 {
525 /* The offset is too large, so we must load it into a register and use
526 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
527 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
528 if (iGprTmp == UINT8_MAX)
529 iGprTmp = iGprReg;
530 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
531 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
532 }
533 else
534# ifdef IEM_WITH_THROW_CATCH
535 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
536# else
537 AssertReleaseFailedStmt(off = UINT32_MAX);
538# endif
539
540 return off;
541}
542
543/**
544 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
545 */
546DECL_FORCE_INLINE_THROW(uint32_t)
547iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
548 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
549{
550 /*
551 * There are a couple of ldr variants that takes an immediate offset, so
552 * try use those if we can, otherwise we have to use the temporary register
553 * help with the addressing.
554 */
555 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
556 {
557 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
558 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
559 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
560 }
561 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
562 {
563 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
564 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
565 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
566 }
567 else
568 {
569 /* The offset is too large, so we must load it into a register and use
570 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
571 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
572 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
573 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
574 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
575 IEMNATIVE_REG_FIXED_TMP0);
576 }
577 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
578 return off;
579}
580
581#endif /* RT_ARCH_ARM64 */
582
583
584/**
585 * Emits a 64-bit GPR load of a VCpu value.
586 */
587DECL_FORCE_INLINE_THROW(uint32_t)
588iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
589{
590#ifdef RT_ARCH_AMD64
591 /* mov reg64, mem64 */
592 if (iGpr < 8)
593 pCodeBuf[off++] = X86_OP_REX_W;
594 else
595 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
596 pCodeBuf[off++] = 0x8b;
597 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off,iGpr, offVCpu);
598
599#elif defined(RT_ARCH_ARM64)
600 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
601
602#else
603# error "port me"
604#endif
605 return off;
606}
607
608
609/**
610 * Emits a 64-bit GPR load of a VCpu value.
611 */
612DECL_INLINE_THROW(uint32_t)
613iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
614{
615#ifdef RT_ARCH_AMD64
616 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
617 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
618
619#elif defined(RT_ARCH_ARM64)
620 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
621
622#else
623# error "port me"
624#endif
625 return off;
626}
627
628
629/**
630 * Emits a 32-bit GPR load of a VCpu value.
631 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
632 */
633DECL_INLINE_THROW(uint32_t)
634iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
635{
636#ifdef RT_ARCH_AMD64
637 /* mov reg32, mem32 */
638 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
639 if (iGpr >= 8)
640 pbCodeBuf[off++] = X86_OP_REX_R;
641 pbCodeBuf[off++] = 0x8b;
642 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
644
645#elif defined(RT_ARCH_ARM64)
646 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
647
648#else
649# error "port me"
650#endif
651 return off;
652}
653
654
655/**
656 * Emits a 16-bit GPR load of a VCpu value.
657 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
658 */
659DECL_INLINE_THROW(uint32_t)
660iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
661{
662#ifdef RT_ARCH_AMD64
663 /* movzx reg32, mem16 */
664 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
665 if (iGpr >= 8)
666 pbCodeBuf[off++] = X86_OP_REX_R;
667 pbCodeBuf[off++] = 0x0f;
668 pbCodeBuf[off++] = 0xb7;
669 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
670 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
671
672#elif defined(RT_ARCH_ARM64)
673 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
674
675#else
676# error "port me"
677#endif
678 return off;
679}
680
681
682/**
683 * Emits a 8-bit GPR load of a VCpu value.
684 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
685 */
686DECL_INLINE_THROW(uint32_t)
687iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
688{
689#ifdef RT_ARCH_AMD64
690 /* movzx reg32, mem8 */
691 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
692 if (iGpr >= 8)
693 pbCodeBuf[off++] = X86_OP_REX_R;
694 pbCodeBuf[off++] = 0x0f;
695 pbCodeBuf[off++] = 0xb6;
696 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
697 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
698
699#elif defined(RT_ARCH_ARM64)
700 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
701
702#else
703# error "port me"
704#endif
705 return off;
706}
707
708
709/**
710 * Emits a store of a GPR value to a 64-bit VCpu field.
711 */
712DECL_FORCE_INLINE_THROW(uint32_t)
713iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
714 uint8_t iGprTmp = UINT8_MAX)
715{
716#ifdef RT_ARCH_AMD64
717 /* mov mem64, reg64 */
718 if (iGpr < 8)
719 pCodeBuf[off++] = X86_OP_REX_W;
720 else
721 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
722 pCodeBuf[off++] = 0x89;
723 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
724 RT_NOREF(iGprTmp);
725
726#elif defined(RT_ARCH_ARM64)
727 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
728
729#else
730# error "port me"
731#endif
732 return off;
733}
734
735
736/**
737 * Emits a store of a GPR value to a 64-bit VCpu field.
738 */
739DECL_INLINE_THROW(uint32_t)
740iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
741{
742#ifdef RT_ARCH_AMD64
743 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
744#elif defined(RT_ARCH_ARM64)
745 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
746 IEMNATIVE_REG_FIXED_TMP0);
747#else
748# error "port me"
749#endif
750 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
751 return off;
752}
753
754
755/**
756 * Emits a store of a GPR value to a 32-bit VCpu field.
757 */
758DECL_INLINE_THROW(uint32_t)
759iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
760{
761#ifdef RT_ARCH_AMD64
762 /* mov mem32, reg32 */
763 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
764 if (iGpr >= 8)
765 pbCodeBuf[off++] = X86_OP_REX_R;
766 pbCodeBuf[off++] = 0x89;
767 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
769
770#elif defined(RT_ARCH_ARM64)
771 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
772
773#else
774# error "port me"
775#endif
776 return off;
777}
778
779
780/**
781 * Emits a store of a GPR value to a 16-bit VCpu field.
782 */
783DECL_INLINE_THROW(uint32_t)
784iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
785{
786#ifdef RT_ARCH_AMD64
787 /* mov mem16, reg16 */
788 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
789 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
790 if (iGpr >= 8)
791 pbCodeBuf[off++] = X86_OP_REX_R;
792 pbCodeBuf[off++] = 0x89;
793 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
794 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
795
796#elif defined(RT_ARCH_ARM64)
797 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
798
799#else
800# error "port me"
801#endif
802 return off;
803}
804
805
806/**
807 * Emits a store of a GPR value to a 8-bit VCpu field.
808 */
809DECL_INLINE_THROW(uint32_t)
810iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
811{
812#ifdef RT_ARCH_AMD64
813 /* mov mem8, reg8 */
814 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
815 if (iGpr >= 8)
816 pbCodeBuf[off++] = X86_OP_REX_R;
817 pbCodeBuf[off++] = 0x88;
818 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
820
821#elif defined(RT_ARCH_ARM64)
822 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
823
824#else
825# error "port me"
826#endif
827 return off;
828}
829
830
831/**
832 * Emits a store of an immediate value to a 32-bit VCpu field.
833 *
834 * @note ARM64: Will allocate temporary registers.
835 */
836DECL_FORCE_INLINE_THROW(uint32_t)
837iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
838{
839#ifdef RT_ARCH_AMD64
840 /* mov mem32, imm32 */
841 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
842 pCodeBuf[off++] = 0xc7;
843 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
844 pCodeBuf[off++] = RT_BYTE1(uImm);
845 pCodeBuf[off++] = RT_BYTE2(uImm);
846 pCodeBuf[off++] = RT_BYTE3(uImm);
847 pCodeBuf[off++] = RT_BYTE4(uImm);
848 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
849
850#elif defined(RT_ARCH_ARM64)
851 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
852 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
853 if (idxRegImm != ARMV8_A64_REG_XZR)
854 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
855
856#else
857# error "port me"
858#endif
859 return off;
860}
861
862
863
864/**
865 * Emits a store of an immediate value to a 16-bit VCpu field.
866 *
867 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
868 * offset can be encoded as an immediate or not. The @a offVCpu immediate
869 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
870 */
871DECL_FORCE_INLINE_THROW(uint32_t)
872iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
873 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
874{
875#ifdef RT_ARCH_AMD64
876 /* mov mem16, imm16 */
877 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
878 pCodeBuf[off++] = 0xc7;
879 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
880 pCodeBuf[off++] = RT_BYTE1(uImm);
881 pCodeBuf[off++] = RT_BYTE2(uImm);
882 RT_NOREF(idxTmp1, idxTmp2);
883
884#elif defined(RT_ARCH_ARM64)
885 if (idxTmp1 != UINT8_MAX)
886 {
887 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
888 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
889 sizeof(uint16_t), idxTmp2);
890 }
891 else
892# ifdef IEM_WITH_THROW_CATCH
893 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
894# else
895 AssertReleaseFailedStmt(off = UINT32_MAX);
896# endif
897
898#else
899# error "port me"
900#endif
901 return off;
902}
903
904
905/**
906 * Emits a store of an immediate value to a 8-bit VCpu field.
907 */
908DECL_INLINE_THROW(uint32_t)
909iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu)
910{
911#ifdef RT_ARCH_AMD64
912 /* mov mem8, imm8 */
913 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
914 pbCodeBuf[off++] = 0xc6;
915 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
916 pbCodeBuf[off++] = bImm;
917 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
918
919#elif defined(RT_ARCH_ARM64)
920 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
921 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
922 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
923 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
924
925#else
926# error "port me"
927#endif
928 return off;
929}
930
931
932/**
933 * Emits a load effective address to a GRP of a VCpu field.
934 */
935DECL_INLINE_THROW(uint32_t)
936iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
937{
938#ifdef RT_ARCH_AMD64
939 /* lea gprdst, [rbx + offDisp] */
940 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
941 if (iGprDst < 8)
942 pbCodeBuf[off++] = X86_OP_REX_W;
943 else
944 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
945 pbCodeBuf[off++] = 0x8d;
946 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
947
948#elif defined(RT_ARCH_ARM64)
949 if (offVCpu < (unsigned)_4K)
950 {
951 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
952 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
953 }
954 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
955 {
956 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
957 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
958 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
959 }
960 else
961 {
962 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
963 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
964 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
965 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
966 }
967
968#else
969# error "port me"
970#endif
971 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
972 return off;
973}
974
975
976/** This is just as a typesafe alternative to RT_UOFFSETOF. */
977DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
978{
979 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
980 Assert(off < sizeof(VMCPU));
981 return off;
982}
983
984
985/** This is just as a typesafe alternative to RT_UOFFSETOF. */
986DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
987{
988 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
989 Assert(off < sizeof(VMCPU));
990 return off;
991}
992
993
994/**
995 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
996 *
997 * @note The two temp registers are not required for AMD64. ARM64 always
998 * requires the first, and the 2nd is needed if the offset cannot be
999 * encoded as an immediate.
1000 */
1001DECL_FORCE_INLINE(uint32_t)
1002iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1003{
1004#ifdef RT_ARCH_AMD64
1005 /* inc qword [pVCpu + off] */
1006 pCodeBuf[off++] = X86_OP_REX_W;
1007 pCodeBuf[off++] = 0xff;
1008 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1009 RT_NOREF(idxTmp1, idxTmp2);
1010
1011#elif defined(RT_ARCH_ARM64)
1012 /* Determine how we're to access pVCpu first. */
1013 uint32_t const cbData = sizeof(STAMCOUNTER);
1014 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1015 {
1016 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1017 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1018 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1019 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1020 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1021 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1022 }
1023 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1024 {
1025 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1026 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1027 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1028 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1029 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1030 }
1031 else
1032 {
1033 /* The offset is too large, so we must load it into a register and use
1034 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1035 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1036 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1037 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1038 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1039 }
1040
1041#else
1042# error "port me"
1043#endif
1044 return off;
1045}
1046
1047
1048/**
1049 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1050 *
1051 * @note The two temp registers are not required for AMD64. ARM64 always
1052 * requires the first, and the 2nd is needed if the offset cannot be
1053 * encoded as an immediate.
1054 */
1055DECL_FORCE_INLINE(uint32_t)
1056iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1057{
1058#ifdef RT_ARCH_AMD64
1059 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1060#elif defined(RT_ARCH_ARM64)
1061 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1062#else
1063# error "port me"
1064#endif
1065 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1066 return off;
1067}
1068
1069
1070/**
1071 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1072 *
1073 * @note The two temp registers are not required for AMD64. ARM64 always
1074 * requires the first, and the 2nd is needed if the offset cannot be
1075 * encoded as an immediate.
1076 */
1077DECL_FORCE_INLINE(uint32_t)
1078iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1079{
1080 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1081#ifdef RT_ARCH_AMD64
1082 /* inc dword [pVCpu + offVCpu] */
1083 pCodeBuf[off++] = 0xff;
1084 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1085 RT_NOREF(idxTmp1, idxTmp2);
1086
1087#elif defined(RT_ARCH_ARM64)
1088 /* Determine how we're to access pVCpu first. */
1089 uint32_t const cbData = sizeof(uint32_t);
1090 if (offVCpu < (unsigned)(_4K * cbData))
1091 {
1092 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1093 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1094 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1095 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1096 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1097 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1098 }
1099 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1100 {
1101 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1102 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1103 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1104 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1105 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1106 }
1107 else
1108 {
1109 /* The offset is too large, so we must load it into a register and use
1110 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1111 of the instruction if that'll reduce the constant to 16-bits. */
1112 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1113 {
1114 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1115 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1116 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1117 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1118 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1119 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1120 }
1121 else
1122 {
1123 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1124 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1125 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1126 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1127 }
1128 }
1129
1130#else
1131# error "port me"
1132#endif
1133 return off;
1134}
1135
1136
1137/**
1138 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1139 *
1140 * @note The two temp registers are not required for AMD64. ARM64 always
1141 * requires the first, and the 2nd is needed if the offset cannot be
1142 * encoded as an immediate.
1143 */
1144DECL_FORCE_INLINE(uint32_t)
1145iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1146{
1147#ifdef RT_ARCH_AMD64
1148 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1149#elif defined(RT_ARCH_ARM64)
1150 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1151#else
1152# error "port me"
1153#endif
1154 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1155 return off;
1156}
1157
1158
1159/**
1160 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1161 *
1162 * @note May allocate temporary registers (not AMD64).
1163 */
1164DECL_FORCE_INLINE(uint32_t)
1165iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1166{
1167 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1168#ifdef RT_ARCH_AMD64
1169 /* or dword [pVCpu + offVCpu], imm8/32 */
1170 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1171 if (fMask < 0x80)
1172 {
1173 pCodeBuf[off++] = 0x83;
1174 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1175 pCodeBuf[off++] = (uint8_t)fMask;
1176 }
1177 else
1178 {
1179 pCodeBuf[off++] = 0x81;
1180 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1181 pCodeBuf[off++] = RT_BYTE1(fMask);
1182 pCodeBuf[off++] = RT_BYTE2(fMask);
1183 pCodeBuf[off++] = RT_BYTE3(fMask);
1184 pCodeBuf[off++] = RT_BYTE4(fMask);
1185 }
1186
1187#elif defined(RT_ARCH_ARM64)
1188 /* If the constant is unwieldy we'll need a register to hold it as well. */
1189 uint32_t uImmSizeLen, uImmRotate;
1190 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1191 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1192
1193 /* We need a temp register for holding the member value we're modifying. */
1194 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1195
1196 /* Determine how we're to access pVCpu first. */
1197 uint32_t const cbData = sizeof(uint32_t);
1198 if (offVCpu < (unsigned)(_4K * cbData))
1199 {
1200 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1201 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1202 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1203 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1204 if (idxTmpMask == UINT8_MAX)
1205 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1206 else
1207 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1208 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1209 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1210 }
1211 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1212 {
1213 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1214 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1215 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1216 if (idxTmpMask == UINT8_MAX)
1217 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1218 else
1219 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1220 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1221 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1222 }
1223 else
1224 {
1225 /* The offset is too large, so we must load it into a register and use
1226 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1227 of the instruction if that'll reduce the constant to 16-bits. */
1228 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1229 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1230 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1231 if (fShifted)
1232 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1233 else
1234 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1235
1236 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1237 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1238
1239 if (idxTmpMask == UINT8_MAX)
1240 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1241 else
1242 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1243
1244 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1245 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1246 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1247 }
1248 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1249 if (idxTmpMask != UINT8_MAX)
1250 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1251
1252#else
1253# error "port me"
1254#endif
1255 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1256 return off;
1257}
1258
1259
1260/**
1261 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1262 *
1263 * @note May allocate temporary registers (not AMD64).
1264 */
1265DECL_FORCE_INLINE(uint32_t)
1266iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1267{
1268 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1269#ifdef RT_ARCH_AMD64
1270 /* and dword [pVCpu + offVCpu], imm8/32 */
1271 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1272 if (fMask < 0x80)
1273 {
1274 pCodeBuf[off++] = 0x83;
1275 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1276 pCodeBuf[off++] = (uint8_t)fMask;
1277 }
1278 else
1279 {
1280 pCodeBuf[off++] = 0x81;
1281 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1282 pCodeBuf[off++] = RT_BYTE1(fMask);
1283 pCodeBuf[off++] = RT_BYTE2(fMask);
1284 pCodeBuf[off++] = RT_BYTE3(fMask);
1285 pCodeBuf[off++] = RT_BYTE4(fMask);
1286 }
1287
1288#elif defined(RT_ARCH_ARM64)
1289 /* If the constant is unwieldy we'll need a register to hold it as well. */
1290 uint32_t uImmSizeLen, uImmRotate;
1291 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1292 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1293
1294 /* We need a temp register for holding the member value we're modifying. */
1295 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1296
1297 /* Determine how we're to access pVCpu first. */
1298 uint32_t const cbData = sizeof(uint32_t);
1299 if (offVCpu < (unsigned)(_4K * cbData))
1300 {
1301 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1302 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1303 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1304 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1305 if (idxTmpMask == UINT8_MAX)
1306 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1307 else
1308 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1309 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1310 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1311 }
1312 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1313 {
1314 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1315 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1316 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1317 if (idxTmpMask == UINT8_MAX)
1318 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1319 else
1320 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1321 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1322 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1323 }
1324 else
1325 {
1326 /* The offset is too large, so we must load it into a register and use
1327 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1328 of the instruction if that'll reduce the constant to 16-bits. */
1329 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1330 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1331 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1332 if (fShifted)
1333 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1334 else
1335 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1336
1337 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1338 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1339
1340 if (idxTmpMask == UINT8_MAX)
1341 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1342 else
1343 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1344
1345 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1346 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1347 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1348 }
1349 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1350 if (idxTmpMask != UINT8_MAX)
1351 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1352
1353#else
1354# error "port me"
1355#endif
1356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1357 return off;
1358}
1359
1360
1361/**
1362 * Emits a gprdst = gprsrc load.
1363 */
1364DECL_FORCE_INLINE(uint32_t)
1365iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1366{
1367#ifdef RT_ARCH_AMD64
1368 /* mov gprdst, gprsrc */
1369 if ((iGprDst | iGprSrc) >= 8)
1370 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1371 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1372 : X86_OP_REX_W | X86_OP_REX_R;
1373 else
1374 pCodeBuf[off++] = X86_OP_REX_W;
1375 pCodeBuf[off++] = 0x8b;
1376 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1377
1378#elif defined(RT_ARCH_ARM64)
1379 /* mov dst, src; alias for: orr dst, xzr, src */
1380 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1381
1382#else
1383# error "port me"
1384#endif
1385 return off;
1386}
1387
1388
1389/**
1390 * Emits a gprdst = gprsrc load.
1391 */
1392DECL_INLINE_THROW(uint32_t)
1393iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1394{
1395#ifdef RT_ARCH_AMD64
1396 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1397#elif defined(RT_ARCH_ARM64)
1398 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1399#else
1400# error "port me"
1401#endif
1402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1403 return off;
1404}
1405
1406
1407/**
1408 * Emits a gprdst = gprsrc[31:0] load.
1409 * @note Bits 63 thru 32 are cleared.
1410 */
1411DECL_FORCE_INLINE(uint32_t)
1412iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1413{
1414#ifdef RT_ARCH_AMD64
1415 /* mov gprdst, gprsrc */
1416 if ((iGprDst | iGprSrc) >= 8)
1417 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1418 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1419 : X86_OP_REX_R;
1420 pCodeBuf[off++] = 0x8b;
1421 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1422
1423#elif defined(RT_ARCH_ARM64)
1424 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1425 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1426
1427#else
1428# error "port me"
1429#endif
1430 return off;
1431}
1432
1433
1434/**
1435 * Emits a gprdst = gprsrc[31:0] load.
1436 * @note Bits 63 thru 32 are cleared.
1437 */
1438DECL_INLINE_THROW(uint32_t)
1439iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1440{
1441#ifdef RT_ARCH_AMD64
1442 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1443#elif defined(RT_ARCH_ARM64)
1444 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1445#else
1446# error "port me"
1447#endif
1448 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1449 return off;
1450}
1451
1452
1453/**
1454 * Emits a gprdst = gprsrc[15:0] load.
1455 * @note Bits 63 thru 15 are cleared.
1456 */
1457DECL_INLINE_THROW(uint32_t)
1458iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1459{
1460#ifdef RT_ARCH_AMD64
1461 /* movzx Gv,Ew */
1462 if ((iGprDst | iGprSrc) >= 8)
1463 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1464 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1465 : X86_OP_REX_R;
1466 pCodeBuf[off++] = 0x0f;
1467 pCodeBuf[off++] = 0xb7;
1468 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1469
1470#elif defined(RT_ARCH_ARM64)
1471 /* and gprdst, gprsrc, #0xffff */
1472# if 1
1473 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1474 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1475# else
1476 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1477 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1478# endif
1479
1480#else
1481# error "port me"
1482#endif
1483 return off;
1484}
1485
1486
1487/**
1488 * Emits a gprdst = gprsrc[15:0] load.
1489 * @note Bits 63 thru 15 are cleared.
1490 */
1491DECL_INLINE_THROW(uint32_t)
1492iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1493{
1494#ifdef RT_ARCH_AMD64
1495 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1496#elif defined(RT_ARCH_ARM64)
1497 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1498#else
1499# error "port me"
1500#endif
1501 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1502 return off;
1503}
1504
1505
1506/**
1507 * Emits a gprdst = gprsrc[7:0] load.
1508 * @note Bits 63 thru 8 are cleared.
1509 */
1510DECL_FORCE_INLINE(uint32_t)
1511iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1512{
1513#ifdef RT_ARCH_AMD64
1514 /* movzx Gv,Eb */
1515 if (iGprDst >= 8 || iGprSrc >= 8)
1516 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1517 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1518 : X86_OP_REX_R;
1519 else if (iGprSrc >= 4)
1520 pCodeBuf[off++] = X86_OP_REX;
1521 pCodeBuf[off++] = 0x0f;
1522 pCodeBuf[off++] = 0xb6;
1523 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1524
1525#elif defined(RT_ARCH_ARM64)
1526 /* and gprdst, gprsrc, #0xff */
1527 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1528 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1529
1530#else
1531# error "port me"
1532#endif
1533 return off;
1534}
1535
1536
1537/**
1538 * Emits a gprdst = gprsrc[7:0] load.
1539 * @note Bits 63 thru 8 are cleared.
1540 */
1541DECL_INLINE_THROW(uint32_t)
1542iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1543{
1544#ifdef RT_ARCH_AMD64
1545 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1546#elif defined(RT_ARCH_ARM64)
1547 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1548#else
1549# error "port me"
1550#endif
1551 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1552 return off;
1553}
1554
1555
1556/**
1557 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1558 * @note Bits 63 thru 8 are cleared.
1559 */
1560DECL_INLINE_THROW(uint32_t)
1561iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1562{
1563#ifdef RT_ARCH_AMD64
1564 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1565
1566 /* movzx Gv,Ew */
1567 if ((iGprDst | iGprSrc) >= 8)
1568 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1569 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1570 : X86_OP_REX_R;
1571 pbCodeBuf[off++] = 0x0f;
1572 pbCodeBuf[off++] = 0xb7;
1573 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1574
1575 /* shr Ev,8 */
1576 if (iGprDst >= 8)
1577 pbCodeBuf[off++] = X86_OP_REX_B;
1578 pbCodeBuf[off++] = 0xc1;
1579 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1580 pbCodeBuf[off++] = 8;
1581
1582#elif defined(RT_ARCH_ARM64)
1583 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1584 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1585 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1586
1587#else
1588# error "port me"
1589#endif
1590 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1591 return off;
1592}
1593
1594
1595/**
1596 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1597 */
1598DECL_INLINE_THROW(uint32_t)
1599iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1600{
1601#ifdef RT_ARCH_AMD64
1602 /* movsxd r64, r/m32 */
1603 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1604 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1605 pbCodeBuf[off++] = 0x63;
1606 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1607
1608#elif defined(RT_ARCH_ARM64)
1609 /* sxtw dst, src */
1610 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1611 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1612
1613#else
1614# error "port me"
1615#endif
1616 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1617 return off;
1618}
1619
1620
1621/**
1622 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1623 */
1624DECL_INLINE_THROW(uint32_t)
1625iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1626{
1627#ifdef RT_ARCH_AMD64
1628 /* movsx r64, r/m16 */
1629 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1630 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1631 pbCodeBuf[off++] = 0x0f;
1632 pbCodeBuf[off++] = 0xbf;
1633 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1634
1635#elif defined(RT_ARCH_ARM64)
1636 /* sxth dst, src */
1637 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1638 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1639
1640#else
1641# error "port me"
1642#endif
1643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1644 return off;
1645}
1646
1647
1648/**
1649 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1650 */
1651DECL_INLINE_THROW(uint32_t)
1652iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1653{
1654#ifdef RT_ARCH_AMD64
1655 /* movsx r64, r/m16 */
1656 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1657 if (iGprDst >= 8 || iGprSrc >= 8)
1658 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1659 pbCodeBuf[off++] = 0x0f;
1660 pbCodeBuf[off++] = 0xbf;
1661 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1662
1663#elif defined(RT_ARCH_ARM64)
1664 /* sxth dst32, src */
1665 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1666 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1667
1668#else
1669# error "port me"
1670#endif
1671 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1672 return off;
1673}
1674
1675
1676/**
1677 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1678 */
1679DECL_INLINE_THROW(uint32_t)
1680iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1681{
1682#ifdef RT_ARCH_AMD64
1683 /* movsx r64, r/m8 */
1684 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1685 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1686 pbCodeBuf[off++] = 0x0f;
1687 pbCodeBuf[off++] = 0xbe;
1688 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1689
1690#elif defined(RT_ARCH_ARM64)
1691 /* sxtb dst, src */
1692 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1693 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1694
1695#else
1696# error "port me"
1697#endif
1698 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1699 return off;
1700}
1701
1702
1703/**
1704 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1705 * @note Bits 63 thru 32 are cleared.
1706 */
1707DECL_INLINE_THROW(uint32_t)
1708iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1709{
1710#ifdef RT_ARCH_AMD64
1711 /* movsx r32, r/m8 */
1712 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1713 if (iGprDst >= 8 || iGprSrc >= 8)
1714 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1715 else if (iGprSrc >= 4)
1716 pbCodeBuf[off++] = X86_OP_REX;
1717 pbCodeBuf[off++] = 0x0f;
1718 pbCodeBuf[off++] = 0xbe;
1719 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1720
1721#elif defined(RT_ARCH_ARM64)
1722 /* sxtb dst32, src32 */
1723 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1724 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1725
1726#else
1727# error "port me"
1728#endif
1729 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1730 return off;
1731}
1732
1733
1734/**
1735 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1736 * @note Bits 63 thru 16 are cleared.
1737 */
1738DECL_INLINE_THROW(uint32_t)
1739iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1740{
1741#ifdef RT_ARCH_AMD64
1742 /* movsx r16, r/m8 */
1743 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1744 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1745 if (iGprDst >= 8 || iGprSrc >= 8)
1746 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1747 else if (iGprSrc >= 4)
1748 pbCodeBuf[off++] = X86_OP_REX;
1749 pbCodeBuf[off++] = 0x0f;
1750 pbCodeBuf[off++] = 0xbe;
1751 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1752
1753 /* movzx r32, r/m16 */
1754 if (iGprDst >= 8)
1755 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
1756 pbCodeBuf[off++] = 0x0f;
1757 pbCodeBuf[off++] = 0xb7;
1758 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
1759
1760#elif defined(RT_ARCH_ARM64)
1761 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
1762 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1763 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1764 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1765 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
1766
1767#else
1768# error "port me"
1769#endif
1770 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1771 return off;
1772}
1773
1774
1775/**
1776 * Emits a gprdst = gprsrc + addend load.
1777 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1778 */
1779#ifdef RT_ARCH_AMD64
1780DECL_INLINE_THROW(uint32_t)
1781iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1782 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1783{
1784 Assert(iAddend != 0);
1785
1786 /* lea gprdst, [gprsrc + iAddend] */
1787 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1788 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1789 pbCodeBuf[off++] = 0x8d;
1790 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1791 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1792 return off;
1793}
1794
1795#elif defined(RT_ARCH_ARM64)
1796DECL_INLINE_THROW(uint32_t)
1797iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1798 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1799{
1800 if ((uint32_t)iAddend < 4096)
1801 {
1802 /* add dst, src, uimm12 */
1803 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1804 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
1805 }
1806 else if ((uint32_t)-iAddend < 4096)
1807 {
1808 /* sub dst, src, uimm12 */
1809 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1810 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
1811 }
1812 else
1813 {
1814 Assert(iGprSrc != iGprDst);
1815 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
1816 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1817 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
1818 }
1819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1820 return off;
1821}
1822#else
1823# error "port me"
1824#endif
1825
1826/**
1827 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
1828 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1829 */
1830#ifdef RT_ARCH_AMD64
1831DECL_INLINE_THROW(uint32_t)
1832iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1833 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1834#else
1835DECL_INLINE_THROW(uint32_t)
1836iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1837 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1838#endif
1839{
1840 if (iAddend != 0)
1841 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1842 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
1843}
1844
1845
1846/**
1847 * Emits a gprdst = gprsrc32 + addend load.
1848 * @note Bits 63 thru 32 are cleared.
1849 */
1850DECL_INLINE_THROW(uint32_t)
1851iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1852 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1853{
1854 Assert(iAddend != 0);
1855
1856#ifdef RT_ARCH_AMD64
1857 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
1858 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1859 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
1860 if ((iGprDst | iGprSrc) >= 8)
1861 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1862 pbCodeBuf[off++] = 0x8d;
1863 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1864
1865#elif defined(RT_ARCH_ARM64)
1866 if ((uint32_t)iAddend < 4096)
1867 {
1868 /* add dst, src, uimm12 */
1869 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1870 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
1871 }
1872 else if ((uint32_t)-iAddend < 4096)
1873 {
1874 /* sub dst, src, uimm12 */
1875 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1876 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
1877 }
1878 else
1879 {
1880 Assert(iGprSrc != iGprDst);
1881 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
1882 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1883 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
1884 }
1885
1886#else
1887# error "port me"
1888#endif
1889 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1890 return off;
1891}
1892
1893
1894/**
1895 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
1896 */
1897DECL_INLINE_THROW(uint32_t)
1898iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1899 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1900{
1901 if (iAddend != 0)
1902 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1903 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
1904}
1905
1906
1907/**
1908 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1909 * destination.
1910 */
1911DECL_FORCE_INLINE(uint32_t)
1912iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1913{
1914#ifdef RT_ARCH_AMD64
1915 /* mov reg16, r/m16 */
1916 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1917 if (idxDst >= 8 || idxSrc >= 8)
1918 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
1919 pCodeBuf[off++] = 0x8b;
1920 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
1921
1922#elif defined(RT_ARCH_ARM64)
1923 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
1924 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
1925
1926#else
1927# error "Port me!"
1928#endif
1929 return off;
1930}
1931
1932
1933/**
1934 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1935 * destination.
1936 */
1937DECL_INLINE_THROW(uint32_t)
1938iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1939{
1940#ifdef RT_ARCH_AMD64
1941 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
1942#elif defined(RT_ARCH_ARM64)
1943 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
1944#else
1945# error "Port me!"
1946#endif
1947 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1948 return off;
1949}
1950
1951
1952#ifdef RT_ARCH_AMD64
1953/**
1954 * Common bit of iemNativeEmitLoadGprByBp and friends.
1955 */
1956DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
1957 PIEMRECOMPILERSTATE pReNativeAssert)
1958{
1959 if (offDisp < 128 && offDisp >= -128)
1960 {
1961 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
1962 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
1963 }
1964 else
1965 {
1966 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
1967 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
1968 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
1969 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
1970 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
1971 }
1972 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
1973 return off;
1974}
1975#elif defined(RT_ARCH_ARM64)
1976/**
1977 * Common bit of iemNativeEmitLoadGprByBp and friends.
1978 */
1979DECL_FORCE_INLINE_THROW(uint32_t)
1980iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
1981 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
1982{
1983 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
1984 {
1985 /* str w/ unsigned imm12 (scaled) */
1986 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1987 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
1988 }
1989 else if (offDisp >= -256 && offDisp <= 256)
1990 {
1991 /* stur w/ signed imm9 (unscaled) */
1992 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1993 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
1994 }
1995 else
1996 {
1997 /* Use temporary indexing register. */
1998 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
1999 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2000 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2001 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2002 }
2003 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2004 return off;
2005}
2006#endif
2007
2008
2009/**
2010 * Emits a 64-bit GRP load instruction with an BP relative source address.
2011 */
2012DECL_INLINE_THROW(uint32_t)
2013iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2014{
2015#ifdef RT_ARCH_AMD64
2016 /* mov gprdst, qword [rbp + offDisp] */
2017 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2018 if (iGprDst < 8)
2019 pbCodeBuf[off++] = X86_OP_REX_W;
2020 else
2021 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2022 pbCodeBuf[off++] = 0x8b;
2023 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2024
2025#elif defined(RT_ARCH_ARM64)
2026 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2027
2028#else
2029# error "port me"
2030#endif
2031}
2032
2033
2034/**
2035 * Emits a 32-bit GRP load instruction with an BP relative source address.
2036 * @note Bits 63 thru 32 of the GPR will be cleared.
2037 */
2038DECL_INLINE_THROW(uint32_t)
2039iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2040{
2041#ifdef RT_ARCH_AMD64
2042 /* mov gprdst, dword [rbp + offDisp] */
2043 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2044 if (iGprDst >= 8)
2045 pbCodeBuf[off++] = X86_OP_REX_R;
2046 pbCodeBuf[off++] = 0x8b;
2047 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2048
2049#elif defined(RT_ARCH_ARM64)
2050 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2051
2052#else
2053# error "port me"
2054#endif
2055}
2056
2057
2058/**
2059 * Emits a 16-bit GRP load instruction with an BP relative source address.
2060 * @note Bits 63 thru 16 of the GPR will be cleared.
2061 */
2062DECL_INLINE_THROW(uint32_t)
2063iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2064{
2065#ifdef RT_ARCH_AMD64
2066 /* movzx gprdst, word [rbp + offDisp] */
2067 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2068 if (iGprDst >= 8)
2069 pbCodeBuf[off++] = X86_OP_REX_R;
2070 pbCodeBuf[off++] = 0x0f;
2071 pbCodeBuf[off++] = 0xb7;
2072 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2073
2074#elif defined(RT_ARCH_ARM64)
2075 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2076
2077#else
2078# error "port me"
2079#endif
2080}
2081
2082
2083/**
2084 * Emits a 8-bit GRP load instruction with an BP relative source address.
2085 * @note Bits 63 thru 8 of the GPR will be cleared.
2086 */
2087DECL_INLINE_THROW(uint32_t)
2088iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2089{
2090#ifdef RT_ARCH_AMD64
2091 /* movzx gprdst, byte [rbp + offDisp] */
2092 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2093 if (iGprDst >= 8)
2094 pbCodeBuf[off++] = X86_OP_REX_R;
2095 pbCodeBuf[off++] = 0x0f;
2096 pbCodeBuf[off++] = 0xb6;
2097 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2098
2099#elif defined(RT_ARCH_ARM64)
2100 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2101
2102#else
2103# error "port me"
2104#endif
2105}
2106
2107
2108/**
2109 * Emits a load effective address to a GRP with an BP relative source address.
2110 */
2111DECL_INLINE_THROW(uint32_t)
2112iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2113{
2114#ifdef RT_ARCH_AMD64
2115 /* lea gprdst, [rbp + offDisp] */
2116 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2117 if (iGprDst < 8)
2118 pbCodeBuf[off++] = X86_OP_REX_W;
2119 else
2120 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2121 pbCodeBuf[off++] = 0x8d;
2122 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2123
2124#elif defined(RT_ARCH_ARM64)
2125 if ((uint32_t)offDisp < (unsigned)_4K)
2126 {
2127 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2128 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)offDisp);
2129 }
2130 else if ((uint32_t)-offDisp < (unsigned)_4K)
2131 {
2132 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2133 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2134 }
2135 else
2136 {
2137 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2138 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offDisp >= 0 ? (uint32_t)offDisp : (uint32_t)-offDisp);
2139 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2140 if (offDisp >= 0)
2141 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2142 else
2143 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2144 }
2145
2146#else
2147# error "port me"
2148#endif
2149
2150 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2151 return off;
2152}
2153
2154
2155/**
2156 * Emits a 64-bit GPR store with an BP relative destination address.
2157 *
2158 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2159 */
2160DECL_INLINE_THROW(uint32_t)
2161iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2162{
2163#ifdef RT_ARCH_AMD64
2164 /* mov qword [rbp + offDisp], gprdst */
2165 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2166 if (iGprSrc < 8)
2167 pbCodeBuf[off++] = X86_OP_REX_W;
2168 else
2169 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2170 pbCodeBuf[off++] = 0x89;
2171 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2172
2173#elif defined(RT_ARCH_ARM64)
2174 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2175 {
2176 /* str w/ unsigned imm12 (scaled) */
2177 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2178 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2179 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2180 }
2181 else if (offDisp >= -256 && offDisp <= 256)
2182 {
2183 /* stur w/ signed imm9 (unscaled) */
2184 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2185 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2186 }
2187 else if ((uint32_t)-offDisp < (unsigned)_4K)
2188 {
2189 /* Use temporary indexing register w/ sub uimm12. */
2190 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2191 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2192 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2193 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2194 }
2195 else
2196 {
2197 /* Use temporary indexing register. */
2198 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2199 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2200 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2201 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2202 }
2203 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2204 return off;
2205
2206#else
2207# error "Port me!"
2208#endif
2209}
2210
2211
2212/**
2213 * Emits a 64-bit immediate store with an BP relative destination address.
2214 *
2215 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2216 */
2217DECL_INLINE_THROW(uint32_t)
2218iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2219{
2220#ifdef RT_ARCH_AMD64
2221 if ((int64_t)uImm64 == (int32_t)uImm64)
2222 {
2223 /* mov qword [rbp + offDisp], imm32 - sign extended */
2224 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2225 pbCodeBuf[off++] = X86_OP_REX_W;
2226 pbCodeBuf[off++] = 0xc7;
2227 if (offDisp < 128 && offDisp >= -128)
2228 {
2229 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2230 pbCodeBuf[off++] = (uint8_t)offDisp;
2231 }
2232 else
2233 {
2234 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2235 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2236 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2237 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2238 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2239 }
2240 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2241 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2242 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2243 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2244 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2245 return off;
2246 }
2247#endif
2248
2249 /* Load tmp0, imm64; Store tmp to bp+disp. */
2250 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2251 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2252}
2253
2254#if defined(RT_ARCH_ARM64)
2255
2256/**
2257 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2258 *
2259 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2260 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2261 * caller does not heed this.
2262 *
2263 * @note DON'T try this with prefetch.
2264 */
2265DECL_FORCE_INLINE_THROW(uint32_t)
2266iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2267 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2268{
2269 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2270 {
2271 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2272 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2273 }
2274 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2275 && iGprReg != iGprBase)
2276 || iGprTmp != UINT8_MAX)
2277 {
2278 /* The offset is too large, so we must load it into a register and use
2279 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2280 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2281 if (iGprTmp == UINT8_MAX)
2282 iGprTmp = iGprReg;
2283 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2284 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2285 }
2286 else
2287# ifdef IEM_WITH_THROW_CATCH
2288 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2289# else
2290 AssertReleaseFailedStmt(off = UINT32_MAX);
2291# endif
2292 return off;
2293}
2294
2295/**
2296 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2297 */
2298DECL_FORCE_INLINE_THROW(uint32_t)
2299iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2300 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2301{
2302 /*
2303 * There are a couple of ldr variants that takes an immediate offset, so
2304 * try use those if we can, otherwise we have to use the temporary register
2305 * help with the addressing.
2306 */
2307 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2308 {
2309 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2310 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2311 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2312 }
2313 else
2314 {
2315 /* The offset is too large, so we must load it into a register and use
2316 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2317 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2318 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2319
2320 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2321 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2322
2323 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2324 }
2325 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2326 return off;
2327}
2328
2329#endif /* RT_ARCH_ARM64 */
2330
2331/**
2332 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2333 *
2334 * @note ARM64: Misaligned @a offDisp values and values not in the
2335 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2336 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2337 * does not heed this.
2338 */
2339DECL_FORCE_INLINE_THROW(uint32_t)
2340iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2341 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2342{
2343#ifdef RT_ARCH_AMD64
2344 /* mov reg64, mem64 */
2345 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2346 pCodeBuf[off++] = 0x8b;
2347 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2348 RT_NOREF(iGprTmp);
2349
2350#elif defined(RT_ARCH_ARM64)
2351 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2352 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2353
2354#else
2355# error "port me"
2356#endif
2357 return off;
2358}
2359
2360
2361/**
2362 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2363 */
2364DECL_INLINE_THROW(uint32_t)
2365iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2366{
2367#ifdef RT_ARCH_AMD64
2368 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2369 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2370
2371#elif defined(RT_ARCH_ARM64)
2372 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2373
2374#else
2375# error "port me"
2376#endif
2377 return off;
2378}
2379
2380
2381/**
2382 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2383 *
2384 * @note ARM64: Misaligned @a offDisp values and values not in the
2385 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2386 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2387 * caller does not heed this.
2388 *
2389 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2390 */
2391DECL_FORCE_INLINE_THROW(uint32_t)
2392iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2393 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2394{
2395#ifdef RT_ARCH_AMD64
2396 /* mov reg32, mem32 */
2397 if (iGprDst >= 8 || iGprBase >= 8)
2398 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2399 pCodeBuf[off++] = 0x8b;
2400 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2401 RT_NOREF(iGprTmp);
2402
2403#elif defined(RT_ARCH_ARM64)
2404 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2405 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2406
2407#else
2408# error "port me"
2409#endif
2410 return off;
2411}
2412
2413
2414/**
2415 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2416 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2417 */
2418DECL_INLINE_THROW(uint32_t)
2419iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2420{
2421#ifdef RT_ARCH_AMD64
2422 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2423 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2424
2425#elif defined(RT_ARCH_ARM64)
2426 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2427
2428#else
2429# error "port me"
2430#endif
2431 return off;
2432}
2433
2434
2435/**
2436 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2437 * sign-extending the value to 64 bits.
2438 *
2439 * @note ARM64: Misaligned @a offDisp values and values not in the
2440 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2441 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2442 * caller does not heed this.
2443 */
2444DECL_FORCE_INLINE_THROW(uint32_t)
2445iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2446 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2447{
2448#ifdef RT_ARCH_AMD64
2449 /* movsxd reg64, mem32 */
2450 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2451 pCodeBuf[off++] = 0x63;
2452 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2453 RT_NOREF(iGprTmp);
2454
2455#elif defined(RT_ARCH_ARM64)
2456 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2457 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2458
2459#else
2460# error "port me"
2461#endif
2462 return off;
2463}
2464
2465
2466/**
2467 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2468 *
2469 * @note ARM64: Misaligned @a offDisp values and values not in the
2470 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2471 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2472 * caller does not heed this.
2473 *
2474 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2475 */
2476DECL_FORCE_INLINE_THROW(uint32_t)
2477iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2478 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2479{
2480#ifdef RT_ARCH_AMD64
2481 /* movzx reg32, mem16 */
2482 if (iGprDst >= 8 || iGprBase >= 8)
2483 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2484 pCodeBuf[off++] = 0x0f;
2485 pCodeBuf[off++] = 0xb7;
2486 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2487 RT_NOREF(iGprTmp);
2488
2489#elif defined(RT_ARCH_ARM64)
2490 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2491 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2492
2493#else
2494# error "port me"
2495#endif
2496 return off;
2497}
2498
2499
2500/**
2501 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2502 * sign-extending the value to 64 bits.
2503 *
2504 * @note ARM64: Misaligned @a offDisp values and values not in the
2505 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2506 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2507 * caller does not heed this.
2508 */
2509DECL_FORCE_INLINE_THROW(uint32_t)
2510iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2511 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2512{
2513#ifdef RT_ARCH_AMD64
2514 /* movsx reg64, mem16 */
2515 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2516 pCodeBuf[off++] = 0x0f;
2517 pCodeBuf[off++] = 0xbf;
2518 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2519 RT_NOREF(iGprTmp);
2520
2521#elif defined(RT_ARCH_ARM64)
2522 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2523 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2524
2525#else
2526# error "port me"
2527#endif
2528 return off;
2529}
2530
2531
2532/**
2533 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2534 * sign-extending the value to 32 bits.
2535 *
2536 * @note ARM64: Misaligned @a offDisp values and values not in the
2537 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2538 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2539 * caller does not heed this.
2540 *
2541 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2542 */
2543DECL_FORCE_INLINE_THROW(uint32_t)
2544iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2545 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2546{
2547#ifdef RT_ARCH_AMD64
2548 /* movsx reg32, mem16 */
2549 if (iGprDst >= 8 || iGprBase >= 8)
2550 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2551 pCodeBuf[off++] = 0x0f;
2552 pCodeBuf[off++] = 0xbf;
2553 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2554 RT_NOREF(iGprTmp);
2555
2556#elif defined(RT_ARCH_ARM64)
2557 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2558 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
2559
2560#else
2561# error "port me"
2562#endif
2563 return off;
2564}
2565
2566
2567/**
2568 * Emits a 8-bit GPR load via a GPR base address with a displacement.
2569 *
2570 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2571 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2572 * same. Will assert / throw if caller does not heed this.
2573 *
2574 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
2575 */
2576DECL_FORCE_INLINE_THROW(uint32_t)
2577iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2578 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2579{
2580#ifdef RT_ARCH_AMD64
2581 /* movzx reg32, mem8 */
2582 if (iGprDst >= 8 || iGprBase >= 8)
2583 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2584 pCodeBuf[off++] = 0x0f;
2585 pCodeBuf[off++] = 0xb6;
2586 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2587 RT_NOREF(iGprTmp);
2588
2589#elif defined(RT_ARCH_ARM64)
2590 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2591 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
2592
2593#else
2594# error "port me"
2595#endif
2596 return off;
2597}
2598
2599
2600/**
2601 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2602 * sign-extending the value to 64 bits.
2603 *
2604 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2605 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2606 * same. Will assert / throw if caller does not heed this.
2607 */
2608DECL_FORCE_INLINE_THROW(uint32_t)
2609iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2610 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2611{
2612#ifdef RT_ARCH_AMD64
2613 /* movsx reg64, mem8 */
2614 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2615 pCodeBuf[off++] = 0x0f;
2616 pCodeBuf[off++] = 0xbe;
2617 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2618 RT_NOREF(iGprTmp);
2619
2620#elif defined(RT_ARCH_ARM64)
2621 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2622 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
2623
2624#else
2625# error "port me"
2626#endif
2627 return off;
2628}
2629
2630
2631/**
2632 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2633 * sign-extending the value to 32 bits.
2634 *
2635 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2636 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2637 * same. Will assert / throw if caller does not heed this.
2638 *
2639 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2640 */
2641DECL_FORCE_INLINE_THROW(uint32_t)
2642iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2643 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2644{
2645#ifdef RT_ARCH_AMD64
2646 /* movsx reg32, mem8 */
2647 if (iGprDst >= 8 || iGprBase >= 8)
2648 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2649 pCodeBuf[off++] = 0x0f;
2650 pCodeBuf[off++] = 0xbe;
2651 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2652 RT_NOREF(iGprTmp);
2653
2654#elif defined(RT_ARCH_ARM64)
2655 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2656 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2657
2658#else
2659# error "port me"
2660#endif
2661 return off;
2662}
2663
2664
2665/**
2666 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2667 * sign-extending the value to 16 bits.
2668 *
2669 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2670 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2671 * same. Will assert / throw if caller does not heed this.
2672 *
2673 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2674 */
2675DECL_FORCE_INLINE_THROW(uint32_t)
2676iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2677 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2678{
2679#ifdef RT_ARCH_AMD64
2680 /* movsx reg32, mem8 */
2681 if (iGprDst >= 8 || iGprBase >= 8)
2682 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2683 pCodeBuf[off++] = 0x0f;
2684 pCodeBuf[off++] = 0xbe;
2685 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2686# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
2687 /* and reg32, 0xffffh */
2688 if (iGprDst >= 8)
2689 pCodeBuf[off++] = X86_OP_REX_B;
2690 pCodeBuf[off++] = 0x81;
2691 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
2692 pCodeBuf[off++] = 0xff;
2693 pCodeBuf[off++] = 0xff;
2694 pCodeBuf[off++] = 0;
2695 pCodeBuf[off++] = 0;
2696# else
2697 /* movzx reg32, reg16 */
2698 if (iGprDst >= 8)
2699 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
2700 pCodeBuf[off++] = 0x0f;
2701 pCodeBuf[off++] = 0xb7;
2702 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2703# endif
2704 RT_NOREF(iGprTmp);
2705
2706#elif defined(RT_ARCH_ARM64)
2707 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2708 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2709 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2710 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
2711
2712#else
2713# error "port me"
2714#endif
2715 return off;
2716}
2717
2718
2719/**
2720 * Emits a 64-bit GPR store via a GPR base address with a displacement.
2721 *
2722 * @note ARM64: Misaligned @a offDisp values and values not in the
2723 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2724 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2725 * does not heed this.
2726 */
2727DECL_FORCE_INLINE_THROW(uint32_t)
2728iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2729 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2730{
2731#ifdef RT_ARCH_AMD64
2732 /* mov mem64, reg64 */
2733 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2734 pCodeBuf[off++] = 0x89;
2735 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2736 RT_NOREF(iGprTmp);
2737
2738#elif defined(RT_ARCH_ARM64)
2739 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2740 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
2741
2742#else
2743# error "port me"
2744#endif
2745 return off;
2746}
2747
2748
2749/**
2750 * Emits a 32-bit GPR store via a GPR base address with a displacement.
2751 *
2752 * @note ARM64: Misaligned @a offDisp values and values not in the
2753 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
2754 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2755 * does not heed this.
2756 */
2757DECL_FORCE_INLINE_THROW(uint32_t)
2758iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2759 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2760{
2761#ifdef RT_ARCH_AMD64
2762 /* mov mem32, reg32 */
2763 if (iGprSrc >= 8 || iGprBase >= 8)
2764 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2765 pCodeBuf[off++] = 0x89;
2766 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2767 RT_NOREF(iGprTmp);
2768
2769#elif defined(RT_ARCH_ARM64)
2770 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2771 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
2772
2773#else
2774# error "port me"
2775#endif
2776 return off;
2777}
2778
2779
2780/**
2781 * Emits a 16-bit GPR store via a GPR base address with a displacement.
2782 *
2783 * @note ARM64: Misaligned @a offDisp values and values not in the
2784 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
2785 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2786 * does not heed this.
2787 */
2788DECL_FORCE_INLINE_THROW(uint32_t)
2789iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2790 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2791{
2792#ifdef RT_ARCH_AMD64
2793 /* mov mem16, reg16 */
2794 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2795 if (iGprSrc >= 8 || iGprBase >= 8)
2796 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2797 pCodeBuf[off++] = 0x89;
2798 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2799 RT_NOREF(iGprTmp);
2800
2801#elif defined(RT_ARCH_ARM64)
2802 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2803 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
2804
2805#else
2806# error "port me"
2807#endif
2808 return off;
2809}
2810
2811
2812/**
2813 * Emits a 8-bit GPR store via a GPR base address with a displacement.
2814 *
2815 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2816 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2817 * same. Will assert / throw if caller does not heed this.
2818 */
2819DECL_FORCE_INLINE_THROW(uint32_t)
2820iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2821 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2822{
2823#ifdef RT_ARCH_AMD64
2824 /* mov mem8, reg8 */
2825 if (iGprSrc >= 8 || iGprBase >= 8)
2826 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2827 else if (iGprSrc >= 4)
2828 pCodeBuf[off++] = X86_OP_REX;
2829 pCodeBuf[off++] = 0x88;
2830 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2831 RT_NOREF(iGprTmp);
2832
2833#elif defined(RT_ARCH_ARM64)
2834 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2835 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
2836
2837#else
2838# error "port me"
2839#endif
2840 return off;
2841}
2842
2843
2844/**
2845 * Emits a 64-bit immediate store via a GPR base address with a displacement.
2846 *
2847 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
2848 * AMD64 it depends on the immediate value.
2849 *
2850 * @note ARM64: Misaligned @a offDisp values and values not in the
2851 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2852 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2853 * does not heed this.
2854 */
2855DECL_FORCE_INLINE_THROW(uint32_t)
2856iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
2857 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2858{
2859#ifdef RT_ARCH_AMD64
2860 if ((int32_t)uImm == (int64_t)uImm)
2861 {
2862 /* mov mem64, imm32 (sign-extended) */
2863 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2864 pCodeBuf[off++] = 0xc7;
2865 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
2866 pCodeBuf[off++] = RT_BYTE1(uImm);
2867 pCodeBuf[off++] = RT_BYTE2(uImm);
2868 pCodeBuf[off++] = RT_BYTE3(uImm);
2869 pCodeBuf[off++] = RT_BYTE4(uImm);
2870 }
2871 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
2872 {
2873 /* require temporary register. */
2874 if (iGprImmTmp == UINT8_MAX)
2875 iGprImmTmp = iGprTmp;
2876 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
2877 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
2878 }
2879 else
2880# ifdef IEM_WITH_THROW_CATCH
2881 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2882# else
2883 AssertReleaseFailedStmt(off = UINT32_MAX);
2884# endif
2885
2886#elif defined(RT_ARCH_ARM64)
2887 if (uImm == 0)
2888 iGprImmTmp = ARMV8_A64_REG_XZR;
2889 else
2890 {
2891 Assert(iGprImmTmp < 31);
2892 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
2893 }
2894 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
2895
2896#else
2897# error "port me"
2898#endif
2899 return off;
2900}
2901
2902
2903/**
2904 * Emits a 32-bit GPR store via a GPR base address with a displacement.
2905 *
2906 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
2907 *
2908 * @note ARM64: Misaligned @a offDisp values and values not in the
2909 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
2910 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2911 * does not heed this.
2912 */
2913DECL_FORCE_INLINE_THROW(uint32_t)
2914iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
2915 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2916{
2917#ifdef RT_ARCH_AMD64
2918 /* mov mem32, imm32 */
2919 if (iGprBase >= 8)
2920 pCodeBuf[off++] = X86_OP_REX_B;
2921 pCodeBuf[off++] = 0xc7;
2922 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
2923 pCodeBuf[off++] = RT_BYTE1(uImm);
2924 pCodeBuf[off++] = RT_BYTE2(uImm);
2925 pCodeBuf[off++] = RT_BYTE3(uImm);
2926 pCodeBuf[off++] = RT_BYTE4(uImm);
2927 RT_NOREF(iGprImmTmp, iGprTmp);
2928
2929#elif defined(RT_ARCH_ARM64)
2930 Assert(iGprImmTmp < 31);
2931 if (uImm == 0)
2932 iGprImmTmp = ARMV8_A64_REG_XZR;
2933 else
2934 {
2935 Assert(iGprImmTmp < 31);
2936 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
2937 }
2938 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
2939 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
2940
2941#else
2942# error "port me"
2943#endif
2944 return off;
2945}
2946
2947
2948/**
2949 * Emits a 16-bit GPR store via a GPR base address with a displacement.
2950 *
2951 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
2952 *
2953 * @note ARM64: Misaligned @a offDisp values and values not in the
2954 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
2955 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2956 * does not heed this.
2957 */
2958DECL_FORCE_INLINE_THROW(uint32_t)
2959iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
2960 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2961{
2962#ifdef RT_ARCH_AMD64
2963 /* mov mem16, imm16 */
2964 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2965 if (iGprBase >= 8)
2966 pCodeBuf[off++] = X86_OP_REX_B;
2967 pCodeBuf[off++] = 0xc7;
2968 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
2969 pCodeBuf[off++] = RT_BYTE1(uImm);
2970 pCodeBuf[off++] = RT_BYTE2(uImm);
2971 RT_NOREF(iGprImmTmp, iGprTmp);
2972
2973#elif defined(RT_ARCH_ARM64)
2974 if (uImm == 0)
2975 iGprImmTmp = ARMV8_A64_REG_XZR;
2976 else
2977 {
2978 Assert(iGprImmTmp < 31);
2979 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
2980 }
2981 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
2982 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
2983
2984#else
2985# error "port me"
2986#endif
2987 return off;
2988}
2989
2990
2991/**
2992 * Emits a 8-bit GPR store via a GPR base address with a displacement.
2993 *
2994 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
2995 *
2996 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2997 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2998 * same. Will assert / throw if caller does not heed this.
2999 */
3000DECL_FORCE_INLINE_THROW(uint32_t)
3001iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3002 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3003{
3004#ifdef RT_ARCH_AMD64
3005 /* mov mem8, imm8 */
3006 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3007 if (iGprBase >= 8)
3008 pCodeBuf[off++] = X86_OP_REX_B;
3009 pCodeBuf[off++] = 0xc6;
3010 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3011 pCodeBuf[off++] = uImm;
3012 RT_NOREF(iGprImmTmp, iGprTmp);
3013
3014#elif defined(RT_ARCH_ARM64)
3015 if (uImm == 0)
3016 iGprImmTmp = ARMV8_A64_REG_XZR;
3017 else
3018 {
3019 Assert(iGprImmTmp < 31);
3020 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3021 }
3022 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3023 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3024
3025#else
3026# error "port me"
3027#endif
3028 return off;
3029}
3030
3031
3032
3033/*********************************************************************************************************************************
3034* Subtraction and Additions *
3035*********************************************************************************************************************************/
3036
3037/**
3038 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3039 * @note The AMD64 version sets flags.
3040 */
3041DECL_INLINE_THROW(uint32_t)
3042iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3043{
3044#if defined(RT_ARCH_AMD64)
3045 /* sub Gv,Ev */
3046 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3047 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3048 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3049 pbCodeBuf[off++] = 0x2b;
3050 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3051
3052#elif defined(RT_ARCH_ARM64)
3053 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3054 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3055
3056#else
3057# error "Port me"
3058#endif
3059 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3060 return off;
3061}
3062
3063
3064/**
3065 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3066 * @note The AMD64 version sets flags.
3067 */
3068DECL_FORCE_INLINE(uint32_t)
3069iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3070{
3071#if defined(RT_ARCH_AMD64)
3072 /* sub Gv,Ev */
3073 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3074 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3075 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3076 pCodeBuf[off++] = 0x2b;
3077 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3078
3079#elif defined(RT_ARCH_ARM64)
3080 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3081
3082#else
3083# error "Port me"
3084#endif
3085 return off;
3086}
3087
3088
3089/**
3090 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3091 * @note The AMD64 version sets flags.
3092 */
3093DECL_INLINE_THROW(uint32_t)
3094iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3095{
3096#if defined(RT_ARCH_AMD64)
3097 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3098#elif defined(RT_ARCH_ARM64)
3099 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3100#else
3101# error "Port me"
3102#endif
3103 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3104 return off;
3105}
3106
3107
3108/**
3109 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3110 *
3111 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3112 *
3113 * @note Larger constants will require a temporary register. Failing to specify
3114 * one when needed will trigger fatal assertion / throw.
3115 */
3116DECL_FORCE_INLINE_THROW(uint32_t)
3117iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3118 uint8_t iGprTmp = UINT8_MAX)
3119{
3120#ifdef RT_ARCH_AMD64
3121 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3122 if (iSubtrahend == 1)
3123 {
3124 /* dec r/m64 */
3125 pCodeBuf[off++] = 0xff;
3126 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3127 }
3128 else if (iSubtrahend == -1)
3129 {
3130 /* inc r/m64 */
3131 pCodeBuf[off++] = 0xff;
3132 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3133 }
3134 else if ((int8_t)iSubtrahend == iSubtrahend)
3135 {
3136 /* sub r/m64, imm8 */
3137 pCodeBuf[off++] = 0x83;
3138 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3139 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3140 }
3141 else if ((int32_t)iSubtrahend == iSubtrahend)
3142 {
3143 /* sub r/m64, imm32 */
3144 pCodeBuf[off++] = 0x81;
3145 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3146 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3147 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3148 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3149 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3150 }
3151 else if (iGprTmp != UINT8_MAX)
3152 {
3153 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3154 /* sub r/m64, r64 */
3155 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3156 pCodeBuf[off++] = 0x29;
3157 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3158 }
3159 else
3160# ifdef IEM_WITH_THROW_CATCH
3161 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3162# else
3163 AssertReleaseFailedStmt(off = UINT32_MAX);
3164# endif
3165
3166#elif defined(RT_ARCH_ARM64)
3167 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3168 if (uAbsSubtrahend < 4096)
3169 {
3170 if (iSubtrahend >= 0)
3171 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3172 else
3173 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3174 }
3175 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3176 {
3177 if (iSubtrahend >= 0)
3178 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3179 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3180 else
3181 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3182 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3183 }
3184 else if (iGprTmp != UINT8_MAX)
3185 {
3186 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3187 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3188 }
3189 else
3190# ifdef IEM_WITH_THROW_CATCH
3191 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3192# else
3193 AssertReleaseFailedStmt(off = UINT32_MAX);
3194# endif
3195
3196#else
3197# error "Port me"
3198#endif
3199 return off;
3200}
3201
3202
3203/**
3204 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3205 *
3206 * @note Larger constants will require a temporary register. Failing to specify
3207 * one when needed will trigger fatal assertion / throw.
3208 */
3209DECL_INLINE_THROW(uint32_t)
3210iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3211 uint8_t iGprTmp = UINT8_MAX)
3212
3213{
3214#ifdef RT_ARCH_AMD64
3215 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3216#elif defined(RT_ARCH_ARM64)
3217 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3218#else
3219# error "Port me"
3220#endif
3221 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3222 return off;
3223}
3224
3225
3226/**
3227 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3228 *
3229 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3230 *
3231 * @note ARM64: Larger constants will require a temporary register. Failing to
3232 * specify one when needed will trigger fatal assertion / throw.
3233 */
3234DECL_FORCE_INLINE_THROW(uint32_t)
3235iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3236 uint8_t iGprTmp = UINT8_MAX)
3237{
3238#ifdef RT_ARCH_AMD64
3239 if (iGprDst >= 8)
3240 pCodeBuf[off++] = X86_OP_REX_B;
3241 if (iSubtrahend == 1)
3242 {
3243 /* dec r/m32 */
3244 pCodeBuf[off++] = 0xff;
3245 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3246 }
3247 else if (iSubtrahend == -1)
3248 {
3249 /* inc r/m32 */
3250 pCodeBuf[off++] = 0xff;
3251 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3252 }
3253 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3254 {
3255 /* sub r/m32, imm8 */
3256 pCodeBuf[off++] = 0x83;
3257 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3258 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3259 }
3260 else
3261 {
3262 /* sub r/m32, imm32 */
3263 pCodeBuf[off++] = 0x81;
3264 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3265 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3266 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3267 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3268 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3269 }
3270 RT_NOREF(iGprTmp);
3271
3272#elif defined(RT_ARCH_ARM64)
3273 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3274 if (uAbsSubtrahend < 4096)
3275 {
3276 if (iSubtrahend >= 0)
3277 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3278 else
3279 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3280 }
3281 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3282 {
3283 if (iSubtrahend >= 0)
3284 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3285 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3286 else
3287 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3288 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3289 }
3290 else if (iGprTmp != UINT8_MAX)
3291 {
3292 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3293 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3294 }
3295 else
3296# ifdef IEM_WITH_THROW_CATCH
3297 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3298# else
3299 AssertReleaseFailedStmt(off = UINT32_MAX);
3300# endif
3301
3302#else
3303# error "Port me"
3304#endif
3305 return off;
3306}
3307
3308
3309/**
3310 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3311 *
3312 * @note ARM64: Larger constants will require a temporary register. Failing to
3313 * specify one when needed will trigger fatal assertion / throw.
3314 */
3315DECL_INLINE_THROW(uint32_t)
3316iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3317 uint8_t iGprTmp = UINT8_MAX)
3318
3319{
3320#ifdef RT_ARCH_AMD64
3321 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
3322#elif defined(RT_ARCH_ARM64)
3323 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
3324#else
3325# error "Port me"
3326#endif
3327 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3328 return off;
3329}
3330
3331
3332/**
3333 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
3334 *
3335 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
3336 * so not suitable as a base for conditional jumps.
3337 *
3338 * @note AMD64: Will only update the lower 16 bits of the register.
3339 * @note ARM64: Will update the entire register.
3340 * @note ARM64: Larger constants will require a temporary register. Failing to
3341 * specify one when needed will trigger fatal assertion / throw.
3342 */
3343DECL_FORCE_INLINE_THROW(uint32_t)
3344iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
3345 uint8_t iGprTmp = UINT8_MAX)
3346{
3347#ifdef RT_ARCH_AMD64
3348 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3349 if (iGprDst >= 8)
3350 pCodeBuf[off++] = X86_OP_REX_B;
3351 if (iSubtrahend == 1)
3352 {
3353 /* dec r/m16 */
3354 pCodeBuf[off++] = 0xff;
3355 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3356 }
3357 else if (iSubtrahend == -1)
3358 {
3359 /* inc r/m16 */
3360 pCodeBuf[off++] = 0xff;
3361 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3362 }
3363 else if ((int8_t)iSubtrahend == iSubtrahend)
3364 {
3365 /* sub r/m16, imm8 */
3366 pCodeBuf[off++] = 0x83;
3367 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3368 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3369 }
3370 else
3371 {
3372 /* sub r/m16, imm16 */
3373 pCodeBuf[off++] = 0x81;
3374 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3375 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
3376 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
3377 }
3378 RT_NOREF(iGprTmp);
3379
3380#elif defined(RT_ARCH_ARM64)
3381 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3382 if (uAbsSubtrahend < 4096)
3383 {
3384 if (iSubtrahend >= 0)
3385 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3386 else
3387 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3388 }
3389 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3390 {
3391 if (iSubtrahend >= 0)
3392 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3393 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3394 else
3395 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3396 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3397 }
3398 else if (iGprTmp != UINT8_MAX)
3399 {
3400 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3401 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3402 }
3403 else
3404# ifdef IEM_WITH_THROW_CATCH
3405 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3406# else
3407 AssertReleaseFailedStmt(off = UINT32_MAX);
3408# endif
3409 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3410
3411#else
3412# error "Port me"
3413#endif
3414 return off;
3415}
3416
3417
3418/**
3419 * Emits adding a 64-bit GPR to another, storing the result in the first.
3420 * @note The AMD64 version sets flags.
3421 */
3422DECL_FORCE_INLINE(uint32_t)
3423iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3424{
3425#if defined(RT_ARCH_AMD64)
3426 /* add Gv,Ev */
3427 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3428 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
3429 pCodeBuf[off++] = 0x03;
3430 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3431
3432#elif defined(RT_ARCH_ARM64)
3433 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
3434
3435#else
3436# error "Port me"
3437#endif
3438 return off;
3439}
3440
3441
3442/**
3443 * Emits adding a 64-bit GPR to another, storing the result in the first.
3444 * @note The AMD64 version sets flags.
3445 */
3446DECL_INLINE_THROW(uint32_t)
3447iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3448{
3449#if defined(RT_ARCH_AMD64)
3450 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3451#elif defined(RT_ARCH_ARM64)
3452 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3453#else
3454# error "Port me"
3455#endif
3456 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3457 return off;
3458}
3459
3460
3461/**
3462 * Emits adding a 64-bit GPR to another, storing the result in the first.
3463 * @note The AMD64 version sets flags.
3464 */
3465DECL_FORCE_INLINE(uint32_t)
3466iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3467{
3468#if defined(RT_ARCH_AMD64)
3469 /* add Gv,Ev */
3470 if (iGprDst >= 8 || iGprAddend >= 8)
3471 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
3472 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
3473 pCodeBuf[off++] = 0x03;
3474 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3475
3476#elif defined(RT_ARCH_ARM64)
3477 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
3478
3479#else
3480# error "Port me"
3481#endif
3482 return off;
3483}
3484
3485
3486/**
3487 * Emits adding a 64-bit GPR to another, storing the result in the first.
3488 * @note The AMD64 version sets flags.
3489 */
3490DECL_INLINE_THROW(uint32_t)
3491iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3492{
3493#if defined(RT_ARCH_AMD64)
3494 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3495#elif defined(RT_ARCH_ARM64)
3496 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3497#else
3498# error "Port me"
3499#endif
3500 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3501 return off;
3502}
3503
3504
3505/**
3506 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3507 */
3508DECL_INLINE_THROW(uint32_t)
3509iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3510{
3511#if defined(RT_ARCH_AMD64)
3512 /* add or inc */
3513 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3514 if (iImm8 != 1)
3515 {
3516 pCodeBuf[off++] = 0x83;
3517 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3518 pCodeBuf[off++] = (uint8_t)iImm8;
3519 }
3520 else
3521 {
3522 pCodeBuf[off++] = 0xff;
3523 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3524 }
3525
3526#elif defined(RT_ARCH_ARM64)
3527 if (iImm8 >= 0)
3528 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
3529 else
3530 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
3531
3532#else
3533# error "Port me"
3534#endif
3535 return off;
3536}
3537
3538
3539/**
3540 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3541 */
3542DECL_INLINE_THROW(uint32_t)
3543iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3544{
3545#if defined(RT_ARCH_AMD64)
3546 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
3547#elif defined(RT_ARCH_ARM64)
3548 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
3549#else
3550# error "Port me"
3551#endif
3552 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3553 return off;
3554}
3555
3556
3557/**
3558 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
3559 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3560 */
3561DECL_FORCE_INLINE(uint32_t)
3562iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3563{
3564#if defined(RT_ARCH_AMD64)
3565 /* add or inc */
3566 if (iGprDst >= 8)
3567 pCodeBuf[off++] = X86_OP_REX_B;
3568 if (iImm8 != 1)
3569 {
3570 pCodeBuf[off++] = 0x83;
3571 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3572 pCodeBuf[off++] = (uint8_t)iImm8;
3573 }
3574 else
3575 {
3576 pCodeBuf[off++] = 0xff;
3577 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3578 }
3579
3580#elif defined(RT_ARCH_ARM64)
3581 if (iImm8 >= 0)
3582 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
3583 else
3584 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
3585
3586#else
3587# error "Port me"
3588#endif
3589 return off;
3590}
3591
3592
3593/**
3594 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
3595 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3596 */
3597DECL_INLINE_THROW(uint32_t)
3598iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3599{
3600#if defined(RT_ARCH_AMD64)
3601 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
3602#elif defined(RT_ARCH_ARM64)
3603 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
3604#else
3605# error "Port me"
3606#endif
3607 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3608 return off;
3609}
3610
3611
3612/**
3613 * Emits a 64-bit GPR additions with a 64-bit signed addend.
3614 *
3615 * @note Will assert / throw if @a iGprTmp is not specified when needed.
3616 */
3617DECL_FORCE_INLINE_THROW(uint32_t)
3618iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
3619{
3620#if defined(RT_ARCH_AMD64)
3621 if ((int8_t)iAddend == iAddend)
3622 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
3623
3624 if ((int32_t)iAddend == iAddend)
3625 {
3626 /* add grp, imm32 */
3627 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3628 pCodeBuf[off++] = 0x81;
3629 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3630 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
3631 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
3632 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
3633 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
3634 }
3635 else if (iGprTmp != UINT8_MAX)
3636 {
3637 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
3638
3639 /* add dst, tmpreg */
3640 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3641 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
3642 pCodeBuf[off++] = 0x03;
3643 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
3644 }
3645 else
3646# ifdef IEM_WITH_THROW_CATCH
3647 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3648# else
3649 AssertReleaseFailedStmt(off = UINT32_MAX);
3650# endif
3651
3652#elif defined(RT_ARCH_ARM64)
3653 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
3654 if (uAbsAddend < 4096)
3655 {
3656 if (iAddend >= 0)
3657 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
3658 else
3659 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
3660 }
3661 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
3662 {
3663 if (iAddend >= 0)
3664 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
3665 true /*f64Bit*/, true /*fShift12*/);
3666 else
3667 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
3668 true /*f64Bit*/, true /*fShift12*/);
3669 }
3670 else if (iGprTmp != UINT8_MAX)
3671 {
3672 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
3673 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
3674 }
3675 else
3676# ifdef IEM_WITH_THROW_CATCH
3677 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3678# else
3679 AssertReleaseFailedStmt(off = UINT32_MAX);
3680# endif
3681
3682#else
3683# error "Port me"
3684#endif
3685 return off;
3686}
3687
3688
3689/**
3690 * Emits a 64-bit GPR additions with a 64-bit signed addend.
3691 */
3692DECL_INLINE_THROW(uint32_t)
3693iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
3694{
3695#if defined(RT_ARCH_AMD64)
3696 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
3697 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
3698
3699 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
3700 {
3701 /* add grp, imm32 */
3702 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3703 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3704 pbCodeBuf[off++] = 0x81;
3705 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3706 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
3707 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
3708 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
3709 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
3710 }
3711 else
3712 {
3713 /* Best to use a temporary register to deal with this in the simplest way: */
3714 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
3715
3716 /* add dst, tmpreg */
3717 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3718 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3719 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
3720 pbCodeBuf[off++] = 0x03;
3721 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
3722
3723 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
3724 }
3725
3726#elif defined(RT_ARCH_ARM64)
3727 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
3728 {
3729 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3730 if (iAddend >= 0)
3731 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend);
3732 else
3733 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend);
3734 }
3735 else
3736 {
3737 /* Use temporary register for the immediate. */
3738 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
3739
3740 /* add gprdst, gprdst, tmpreg */
3741 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3742 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg);
3743
3744 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
3745 }
3746
3747#else
3748# error "Port me"
3749#endif
3750 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3751 return off;
3752}
3753
3754
3755/**
3756 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
3757 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3758 * @note For ARM64 the iAddend value must be in the range 0x000..0xfff,
3759 * or that range shifted 12 bits to the left (e.g. 0x1000..0xfff000 with
3760 * the lower 12 bits always zero). The negative ranges are also allowed,
3761 * making it behave like a subtraction. If the constant does not conform,
3762 * bad stuff will happen.
3763 */
3764DECL_FORCE_INLINE_THROW(uint32_t)
3765iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend)
3766{
3767#if defined(RT_ARCH_AMD64)
3768 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
3769 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
3770
3771 /* add grp, imm32 */
3772 if (iGprDst >= 8)
3773 pCodeBuf[off++] = X86_OP_REX_B;
3774 pCodeBuf[off++] = 0x81;
3775 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3776 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
3777 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
3778 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
3779 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
3780
3781#elif defined(RT_ARCH_ARM64)
3782 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
3783 if (uAbsAddend <= 0xfff)
3784 {
3785 if (iAddend >= 0)
3786 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
3787 else
3788 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
3789 }
3790 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
3791 {
3792 if (iAddend >= 0)
3793 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
3794 false /*f64Bit*/, true /*fShift12*/);
3795 else
3796 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
3797 false /*f64Bit*/, true /*fShift12*/);
3798 }
3799 else
3800# ifdef IEM_WITH_THROW_CATCH
3801 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3802# else
3803 AssertReleaseFailedStmt(off = UINT32_MAX);
3804# endif
3805
3806#else
3807# error "Port me"
3808#endif
3809 return off;
3810}
3811
3812
3813/**
3814 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
3815 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3816 */
3817DECL_INLINE_THROW(uint32_t)
3818iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
3819{
3820#if defined(RT_ARCH_AMD64)
3821 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
3822
3823#elif defined(RT_ARCH_ARM64)
3824 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
3825 {
3826 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3827 if (iAddend >= 0)
3828 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend, false /*f64Bit*/);
3829 else
3830 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend, false /*f64Bit*/);
3831 }
3832 else
3833 {
3834 /* Use temporary register for the immediate. */
3835 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint32_t)iAddend);
3836
3837 /* add gprdst, gprdst, tmpreg */
3838 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3839 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
3840
3841 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
3842 }
3843
3844#else
3845# error "Port me"
3846#endif
3847 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3848 return off;
3849}
3850
3851
3852/**
3853 * Emits a 16-bit GPR add with a signed immediate addend.
3854 *
3855 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
3856 * so not suitable as a base for conditional jumps.
3857 *
3858 * @note AMD64: Will only update the lower 16 bits of the register.
3859 * @note ARM64: Will update the entire register.
3860 * @note ARM64: Larger constants will require a temporary register. Failing to
3861 * specify one when needed will trigger fatal assertion / throw.
3862 * @sa iemNativeEmitSubGpr16ImmEx
3863 */
3864DECL_FORCE_INLINE_THROW(uint32_t)
3865iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend,
3866 uint8_t iGprTmp = UINT8_MAX)
3867{
3868#ifdef RT_ARCH_AMD64
3869 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3870 if (iGprDst >= 8)
3871 pCodeBuf[off++] = X86_OP_REX_B;
3872 if (iAddend == 1)
3873 {
3874 /* inc r/m16 */
3875 pCodeBuf[off++] = 0xff;
3876 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3877 }
3878 else if (iAddend == -1)
3879 {
3880 /* dec r/m16 */
3881 pCodeBuf[off++] = 0xff;
3882 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3883 }
3884 else if ((int8_t)iAddend == iAddend)
3885 {
3886 /* add r/m16, imm8 */
3887 pCodeBuf[off++] = 0x83;
3888 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3889 pCodeBuf[off++] = (uint8_t)iAddend;
3890 }
3891 else
3892 {
3893 /* add r/m16, imm16 */
3894 pCodeBuf[off++] = 0x81;
3895 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3896 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
3897 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
3898 }
3899 RT_NOREF(iGprTmp);
3900
3901#elif defined(RT_ARCH_ARM64)
3902 uint32_t uAbsAddend = RT_ABS(iAddend);
3903 if (uAbsAddend < 4096)
3904 {
3905 if (iAddend >= 0)
3906 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
3907 else
3908 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
3909 }
3910 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
3911 {
3912 if (iAddend >= 0)
3913 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
3914 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3915 else
3916 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
3917 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3918 }
3919 else if (iGprTmp != UINT8_MAX)
3920 {
3921 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iAddend);
3922 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3923 }
3924 else
3925# ifdef IEM_WITH_THROW_CATCH
3926 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3927# else
3928 AssertReleaseFailedStmt(off = UINT32_MAX);
3929# endif
3930 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3931
3932#else
3933# error "Port me"
3934#endif
3935 return off;
3936}
3937
3938
3939
3940/**
3941 * Adds two 64-bit GPRs together, storing the result in a third register.
3942 */
3943DECL_FORCE_INLINE(uint32_t)
3944iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
3945{
3946#ifdef RT_ARCH_AMD64
3947 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
3948 {
3949 /** @todo consider LEA */
3950 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
3951 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
3952 }
3953 else
3954 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
3955
3956#elif defined(RT_ARCH_ARM64)
3957 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
3958
3959#else
3960# error "Port me!"
3961#endif
3962 return off;
3963}
3964
3965
3966
3967/**
3968 * Adds two 32-bit GPRs together, storing the result in a third register.
3969 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
3970 */
3971DECL_FORCE_INLINE(uint32_t)
3972iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
3973{
3974#ifdef RT_ARCH_AMD64
3975 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
3976 {
3977 /** @todo consider LEA */
3978 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
3979 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
3980 }
3981 else
3982 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
3983
3984#elif defined(RT_ARCH_ARM64)
3985 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
3986
3987#else
3988# error "Port me!"
3989#endif
3990 return off;
3991}
3992
3993
3994/**
3995 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
3996 * third register.
3997 *
3998 * @note The ARM64 version does not work for non-trivial constants if the
3999 * two registers are the same. Will assert / throw exception.
4000 */
4001DECL_FORCE_INLINE_THROW(uint32_t)
4002iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4003{
4004#ifdef RT_ARCH_AMD64
4005 /** @todo consider LEA */
4006 if ((int8_t)iImmAddend == iImmAddend)
4007 {
4008 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4009 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4010 }
4011 else
4012 {
4013 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4014 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4015 }
4016
4017#elif defined(RT_ARCH_ARM64)
4018 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4019 if (uAbsImmAddend < 4096)
4020 {
4021 if (iImmAddend >= 0)
4022 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4023 else
4024 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4025 }
4026 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4027 {
4028 if (iImmAddend >= 0)
4029 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4030 else
4031 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4032 }
4033 else if (iGprDst != iGprAddend)
4034 {
4035 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4036 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4037 }
4038 else
4039# ifdef IEM_WITH_THROW_CATCH
4040 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4041# else
4042 AssertReleaseFailedStmt(off = UINT32_MAX);
4043# endif
4044
4045#else
4046# error "Port me!"
4047#endif
4048 return off;
4049}
4050
4051
4052/**
4053 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4054 * third register.
4055 *
4056 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4057 *
4058 * @note The ARM64 version does not work for non-trivial constants if the
4059 * two registers are the same. Will assert / throw exception.
4060 */
4061DECL_FORCE_INLINE_THROW(uint32_t)
4062iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4063{
4064#ifdef RT_ARCH_AMD64
4065 /** @todo consider LEA */
4066 if ((int8_t)iImmAddend == iImmAddend)
4067 {
4068 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4069 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4070 }
4071 else
4072 {
4073 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4074 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4075 }
4076
4077#elif defined(RT_ARCH_ARM64)
4078 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4079 if (uAbsImmAddend < 4096)
4080 {
4081 if (iImmAddend >= 0)
4082 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4083 else
4084 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4085 }
4086 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4087 {
4088 if (iImmAddend >= 0)
4089 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4090 else
4091 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4092 }
4093 else if (iGprDst != iGprAddend)
4094 {
4095 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4096 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4097 }
4098 else
4099# ifdef IEM_WITH_THROW_CATCH
4100 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4101# else
4102 AssertReleaseFailedStmt(off = UINT32_MAX);
4103# endif
4104
4105#else
4106# error "Port me!"
4107#endif
4108 return off;
4109}
4110
4111
4112/*********************************************************************************************************************************
4113* Unary Operations *
4114*********************************************************************************************************************************/
4115
4116/**
4117 * Emits code for two complement negation of a 64-bit GPR.
4118 */
4119DECL_FORCE_INLINE_THROW(uint32_t)
4120iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4121{
4122#if defined(RT_ARCH_AMD64)
4123 /* neg Ev */
4124 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4125 pCodeBuf[off++] = 0xf7;
4126 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4127
4128#elif defined(RT_ARCH_ARM64)
4129 /* sub dst, xzr, dst */
4130 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4131
4132#else
4133# error "Port me"
4134#endif
4135 return off;
4136}
4137
4138
4139/**
4140 * Emits code for two complement negation of a 64-bit GPR.
4141 */
4142DECL_INLINE_THROW(uint32_t)
4143iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4144{
4145#if defined(RT_ARCH_AMD64)
4146 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4147#elif defined(RT_ARCH_ARM64)
4148 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4149#else
4150# error "Port me"
4151#endif
4152 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4153 return off;
4154}
4155
4156
4157/**
4158 * Emits code for two complement negation of a 32-bit GPR.
4159 * @note bit 32 thru 63 are set to zero.
4160 */
4161DECL_FORCE_INLINE_THROW(uint32_t)
4162iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4163{
4164#if defined(RT_ARCH_AMD64)
4165 /* neg Ev */
4166 if (iGprDst >= 8)
4167 pCodeBuf[off++] = X86_OP_REX_B;
4168 pCodeBuf[off++] = 0xf7;
4169 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4170
4171#elif defined(RT_ARCH_ARM64)
4172 /* sub dst, xzr, dst */
4173 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4174
4175#else
4176# error "Port me"
4177#endif
4178 return off;
4179}
4180
4181
4182/**
4183 * Emits code for two complement negation of a 32-bit GPR.
4184 * @note bit 32 thru 63 are set to zero.
4185 */
4186DECL_INLINE_THROW(uint32_t)
4187iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4188{
4189#if defined(RT_ARCH_AMD64)
4190 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4191#elif defined(RT_ARCH_ARM64)
4192 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4193#else
4194# error "Port me"
4195#endif
4196 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4197 return off;
4198}
4199
4200
4201
4202/*********************************************************************************************************************************
4203* Bit Operations *
4204*********************************************************************************************************************************/
4205
4206/**
4207 * Emits code for clearing bits 16 thru 63 in the GPR.
4208 */
4209DECL_INLINE_THROW(uint32_t)
4210iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4211{
4212#if defined(RT_ARCH_AMD64)
4213 /* movzx Gv,Ew */
4214 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4215 if (iGprDst >= 8)
4216 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4217 pbCodeBuf[off++] = 0x0f;
4218 pbCodeBuf[off++] = 0xb7;
4219 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4220
4221#elif defined(RT_ARCH_ARM64)
4222 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4223# if 1
4224 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4225# else
4226 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4227 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4228# endif
4229#else
4230# error "Port me"
4231#endif
4232 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4233 return off;
4234}
4235
4236
4237/**
4238 * Emits code for AND'ing two 64-bit GPRs.
4239 *
4240 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4241 * and ARM64 hosts.
4242 */
4243DECL_FORCE_INLINE(uint32_t)
4244iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4245{
4246#if defined(RT_ARCH_AMD64)
4247 /* and Gv, Ev */
4248 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4249 pCodeBuf[off++] = 0x23;
4250 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4251 RT_NOREF(fSetFlags);
4252
4253#elif defined(RT_ARCH_ARM64)
4254 if (!fSetFlags)
4255 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4256 else
4257 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4258
4259#else
4260# error "Port me"
4261#endif
4262 return off;
4263}
4264
4265
4266/**
4267 * Emits code for AND'ing two 64-bit GPRs.
4268 *
4269 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4270 * and ARM64 hosts.
4271 */
4272DECL_INLINE_THROW(uint32_t)
4273iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4274{
4275#if defined(RT_ARCH_AMD64)
4276 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4277#elif defined(RT_ARCH_ARM64)
4278 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4279#else
4280# error "Port me"
4281#endif
4282 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4283 return off;
4284}
4285
4286
4287/**
4288 * Emits code for AND'ing two 32-bit GPRs.
4289 */
4290DECL_FORCE_INLINE(uint32_t)
4291iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4292{
4293#if defined(RT_ARCH_AMD64)
4294 /* and Gv, Ev */
4295 if (iGprDst >= 8 || iGprSrc >= 8)
4296 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4297 pCodeBuf[off++] = 0x23;
4298 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4299 RT_NOREF(fSetFlags);
4300
4301#elif defined(RT_ARCH_ARM64)
4302 if (!fSetFlags)
4303 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4304 else
4305 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4306
4307#else
4308# error "Port me"
4309#endif
4310 return off;
4311}
4312
4313
4314/**
4315 * Emits code for AND'ing two 32-bit GPRs.
4316 */
4317DECL_INLINE_THROW(uint32_t)
4318iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4319{
4320#if defined(RT_ARCH_AMD64)
4321 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4322#elif defined(RT_ARCH_ARM64)
4323 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4324#else
4325# error "Port me"
4326#endif
4327 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4328 return off;
4329}
4330
4331
4332/**
4333 * Emits code for AND'ing a 64-bit GPRs with a constant.
4334 *
4335 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4336 * and ARM64 hosts.
4337 */
4338DECL_INLINE_THROW(uint32_t)
4339iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4340{
4341#if defined(RT_ARCH_AMD64)
4342 if ((int64_t)uImm == (int8_t)uImm)
4343 {
4344 /* and Ev, imm8 */
4345 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4346 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4347 pbCodeBuf[off++] = 0x83;
4348 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4349 pbCodeBuf[off++] = (uint8_t)uImm;
4350 }
4351 else if ((int64_t)uImm == (int32_t)uImm)
4352 {
4353 /* and Ev, imm32 */
4354 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4355 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4356 pbCodeBuf[off++] = 0x81;
4357 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4358 pbCodeBuf[off++] = RT_BYTE1(uImm);
4359 pbCodeBuf[off++] = RT_BYTE2(uImm);
4360 pbCodeBuf[off++] = RT_BYTE3(uImm);
4361 pbCodeBuf[off++] = RT_BYTE4(uImm);
4362 }
4363 else
4364 {
4365 /* Use temporary register for the 64-bit immediate. */
4366 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4367 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
4368 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4369 }
4370 RT_NOREF(fSetFlags);
4371
4372#elif defined(RT_ARCH_ARM64)
4373 uint32_t uImmR = 0;
4374 uint32_t uImmNandS = 0;
4375 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4376 {
4377 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4378 if (!fSetFlags)
4379 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
4380 else
4381 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
4382 }
4383 else
4384 {
4385 /* Use temporary register for the 64-bit immediate. */
4386 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4387 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4388 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4389 }
4390
4391#else
4392# error "Port me"
4393#endif
4394 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4395 return off;
4396}
4397
4398
4399/**
4400 * Emits code for AND'ing an 32-bit GPRs with a constant.
4401 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4402 * @note For ARM64 this only supports @a uImm values that can be expressed using
4403 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
4404 * make sure this is possible!
4405 */
4406DECL_FORCE_INLINE_THROW(uint32_t)
4407iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4408{
4409#if defined(RT_ARCH_AMD64)
4410 /* and Ev, imm */
4411 if (iGprDst >= 8)
4412 pCodeBuf[off++] = X86_OP_REX_B;
4413 if ((int32_t)uImm == (int8_t)uImm)
4414 {
4415 pCodeBuf[off++] = 0x83;
4416 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4417 pCodeBuf[off++] = (uint8_t)uImm;
4418 }
4419 else
4420 {
4421 pCodeBuf[off++] = 0x81;
4422 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4423 pCodeBuf[off++] = RT_BYTE1(uImm);
4424 pCodeBuf[off++] = RT_BYTE2(uImm);
4425 pCodeBuf[off++] = RT_BYTE3(uImm);
4426 pCodeBuf[off++] = RT_BYTE4(uImm);
4427 }
4428 RT_NOREF(fSetFlags);
4429
4430#elif defined(RT_ARCH_ARM64)
4431 uint32_t uImmR = 0;
4432 uint32_t uImmNandS = 0;
4433 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4434 {
4435 if (!fSetFlags)
4436 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4437 else
4438 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4439 }
4440 else
4441# ifdef IEM_WITH_THROW_CATCH
4442 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4443# else
4444 AssertReleaseFailedStmt(off = UINT32_MAX);
4445# endif
4446
4447#else
4448# error "Port me"
4449#endif
4450 return off;
4451}
4452
4453
4454/**
4455 * Emits code for AND'ing an 32-bit GPRs with a constant.
4456 *
4457 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4458 */
4459DECL_INLINE_THROW(uint32_t)
4460iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4461{
4462#if defined(RT_ARCH_AMD64)
4463 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
4464
4465#elif defined(RT_ARCH_ARM64)
4466 uint32_t uImmR = 0;
4467 uint32_t uImmNandS = 0;
4468 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4469 {
4470 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4471 if (!fSetFlags)
4472 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4473 else
4474 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4475 }
4476 else
4477 {
4478 /* Use temporary register for the 64-bit immediate. */
4479 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4480 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4481 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4482 }
4483
4484#else
4485# error "Port me"
4486#endif
4487 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4488 return off;
4489}
4490
4491
4492/**
4493 * Emits code for AND'ing an 64-bit GPRs with a constant.
4494 *
4495 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4496 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4497 * the same.
4498 */
4499DECL_FORCE_INLINE_THROW(uint32_t)
4500iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
4501 bool fSetFlags = false)
4502{
4503#if defined(RT_ARCH_AMD64)
4504 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4505 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
4506 RT_NOREF(fSetFlags);
4507
4508#elif defined(RT_ARCH_ARM64)
4509 uint32_t uImmR = 0;
4510 uint32_t uImmNandS = 0;
4511 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4512 {
4513 if (!fSetFlags)
4514 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4515 else
4516 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4517 }
4518 else if (iGprDst != iGprSrc)
4519 {
4520 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4521 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4522 }
4523 else
4524# ifdef IEM_WITH_THROW_CATCH
4525 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4526# else
4527 AssertReleaseFailedStmt(off = UINT32_MAX);
4528# endif
4529
4530#else
4531# error "Port me"
4532#endif
4533 return off;
4534}
4535
4536/**
4537 * Emits code for AND'ing an 32-bit GPRs with a constant.
4538 *
4539 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4540 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4541 * the same.
4542 *
4543 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4544 */
4545DECL_FORCE_INLINE_THROW(uint32_t)
4546iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
4547 bool fSetFlags = false)
4548{
4549#if defined(RT_ARCH_AMD64)
4550 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
4551 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
4552 RT_NOREF(fSetFlags);
4553
4554#elif defined(RT_ARCH_ARM64)
4555 uint32_t uImmR = 0;
4556 uint32_t uImmNandS = 0;
4557 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4558 {
4559 if (!fSetFlags)
4560 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
4561 else
4562 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
4563 }
4564 else if (iGprDst != iGprSrc)
4565 {
4566 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
4567 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4568 }
4569 else
4570# ifdef IEM_WITH_THROW_CATCH
4571 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4572# else
4573 AssertReleaseFailedStmt(off = UINT32_MAX);
4574# endif
4575
4576#else
4577# error "Port me"
4578#endif
4579 return off;
4580}
4581
4582
4583/**
4584 * Emits code for OR'ing two 64-bit GPRs.
4585 */
4586DECL_FORCE_INLINE(uint32_t)
4587iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4588{
4589#if defined(RT_ARCH_AMD64)
4590 /* or Gv, Ev */
4591 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4592 pCodeBuf[off++] = 0x0b;
4593 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4594
4595#elif defined(RT_ARCH_ARM64)
4596 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
4597
4598#else
4599# error "Port me"
4600#endif
4601 return off;
4602}
4603
4604
4605/**
4606 * Emits code for OR'ing two 64-bit GPRs.
4607 */
4608DECL_INLINE_THROW(uint32_t)
4609iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4610{
4611#if defined(RT_ARCH_AMD64)
4612 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4613#elif defined(RT_ARCH_ARM64)
4614 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4615#else
4616# error "Port me"
4617#endif
4618 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4619 return off;
4620}
4621
4622
4623/**
4624 * Emits code for OR'ing two 32-bit GPRs.
4625 * @note Bits 63:32 of the destination GPR will be cleared.
4626 */
4627DECL_FORCE_INLINE(uint32_t)
4628iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4629{
4630#if defined(RT_ARCH_AMD64)
4631 /* or Gv, Ev */
4632 if (iGprDst >= 8 || iGprSrc >= 8)
4633 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4634 pCodeBuf[off++] = 0x0b;
4635 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4636
4637#elif defined(RT_ARCH_ARM64)
4638 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4639
4640#else
4641# error "Port me"
4642#endif
4643 return off;
4644}
4645
4646
4647/**
4648 * Emits code for OR'ing two 32-bit GPRs.
4649 * @note Bits 63:32 of the destination GPR will be cleared.
4650 */
4651DECL_INLINE_THROW(uint32_t)
4652iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4653{
4654#if defined(RT_ARCH_AMD64)
4655 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4656#elif defined(RT_ARCH_ARM64)
4657 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4658#else
4659# error "Port me"
4660#endif
4661 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4662 return off;
4663}
4664
4665
4666/**
4667 * Emits code for OR'ing a 64-bit GPRs with a constant.
4668 */
4669DECL_INLINE_THROW(uint32_t)
4670iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
4671{
4672#if defined(RT_ARCH_AMD64)
4673 if ((int64_t)uImm == (int8_t)uImm)
4674 {
4675 /* or Ev, imm8 */
4676 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4677 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4678 pbCodeBuf[off++] = 0x83;
4679 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4680 pbCodeBuf[off++] = (uint8_t)uImm;
4681 }
4682 else if ((int64_t)uImm == (int32_t)uImm)
4683 {
4684 /* or Ev, imm32 */
4685 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4686 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4687 pbCodeBuf[off++] = 0x81;
4688 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4689 pbCodeBuf[off++] = RT_BYTE1(uImm);
4690 pbCodeBuf[off++] = RT_BYTE2(uImm);
4691 pbCodeBuf[off++] = RT_BYTE3(uImm);
4692 pbCodeBuf[off++] = RT_BYTE4(uImm);
4693 }
4694 else
4695 {
4696 /* Use temporary register for the 64-bit immediate. */
4697 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4698 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
4699 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4700 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4701 }
4702
4703#elif defined(RT_ARCH_ARM64)
4704 uint32_t uImmR = 0;
4705 uint32_t uImmNandS = 0;
4706 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4707 {
4708 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4709 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
4710 }
4711 else
4712 {
4713 /* Use temporary register for the 64-bit immediate. */
4714 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4715 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
4716 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4717 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4718 }
4719
4720#else
4721# error "Port me"
4722#endif
4723 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4724 return off;
4725}
4726
4727
4728/**
4729 * Emits code for OR'ing an 32-bit GPRs with a constant.
4730 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4731 * @note For ARM64 this only supports @a uImm values that can be expressed using
4732 * the two 6-bit immediates of the OR instructions. The caller must make
4733 * sure this is possible!
4734 */
4735DECL_FORCE_INLINE_THROW(uint32_t)
4736iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
4737{
4738#if defined(RT_ARCH_AMD64)
4739 /* or Ev, imm */
4740 if (iGprDst >= 8)
4741 pCodeBuf[off++] = X86_OP_REX_B;
4742 if ((int32_t)uImm == (int8_t)uImm)
4743 {
4744 pCodeBuf[off++] = 0x83;
4745 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4746 pCodeBuf[off++] = (uint8_t)uImm;
4747 }
4748 else
4749 {
4750 pCodeBuf[off++] = 0x81;
4751 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4752 pCodeBuf[off++] = RT_BYTE1(uImm);
4753 pCodeBuf[off++] = RT_BYTE2(uImm);
4754 pCodeBuf[off++] = RT_BYTE3(uImm);
4755 pCodeBuf[off++] = RT_BYTE4(uImm);
4756 }
4757
4758#elif defined(RT_ARCH_ARM64)
4759 uint32_t uImmR = 0;
4760 uint32_t uImmNandS = 0;
4761 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4762 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4763 else
4764# ifdef IEM_WITH_THROW_CATCH
4765 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4766# else
4767 AssertReleaseFailedStmt(off = UINT32_MAX);
4768# endif
4769
4770#else
4771# error "Port me"
4772#endif
4773 return off;
4774}
4775
4776
4777/**
4778 * Emits code for OR'ing an 32-bit GPRs with a constant.
4779 *
4780 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4781 */
4782DECL_INLINE_THROW(uint32_t)
4783iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
4784{
4785#if defined(RT_ARCH_AMD64)
4786 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
4787
4788#elif defined(RT_ARCH_ARM64)
4789 uint32_t uImmR = 0;
4790 uint32_t uImmNandS = 0;
4791 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4792 {
4793 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4794 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4795 }
4796 else
4797 {
4798 /* Use temporary register for the 64-bit immediate. */
4799 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4800 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
4801 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4802 }
4803
4804#else
4805# error "Port me"
4806#endif
4807 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4808 return off;
4809}
4810
4811
4812
4813/**
4814 * ORs two 64-bit GPRs together, storing the result in a third register.
4815 */
4816DECL_FORCE_INLINE(uint32_t)
4817iemNativeEmitGprEqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
4818{
4819#ifdef RT_ARCH_AMD64
4820 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
4821 {
4822 /** @todo consider LEA */
4823 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc1);
4824 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
4825 }
4826 else
4827 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
4828
4829#elif defined(RT_ARCH_ARM64)
4830 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2);
4831
4832#else
4833# error "Port me!"
4834#endif
4835 return off;
4836}
4837
4838
4839
4840/**
4841 * Ors two 32-bit GPRs together, storing the result in a third register.
4842 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4843 */
4844DECL_FORCE_INLINE(uint32_t)
4845iemNativeEmitGpr32EqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
4846{
4847#ifdef RT_ARCH_AMD64
4848 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
4849 {
4850 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc1);
4851 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
4852 }
4853 else
4854 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
4855
4856#elif defined(RT_ARCH_ARM64)
4857 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2, false /*f64Bit*/);
4858
4859#else
4860# error "Port me!"
4861#endif
4862 return off;
4863}
4864
4865
4866/**
4867 * Emits code for XOR'ing two 64-bit GPRs.
4868 */
4869DECL_INLINE_THROW(uint32_t)
4870iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4871{
4872#if defined(RT_ARCH_AMD64)
4873 /* and Gv, Ev */
4874 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4875 pCodeBuf[off++] = 0x33;
4876 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4877
4878#elif defined(RT_ARCH_ARM64)
4879 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
4880
4881#else
4882# error "Port me"
4883#endif
4884 return off;
4885}
4886
4887
4888/**
4889 * Emits code for XOR'ing two 64-bit GPRs.
4890 */
4891DECL_INLINE_THROW(uint32_t)
4892iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4893{
4894#if defined(RT_ARCH_AMD64)
4895 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4896#elif defined(RT_ARCH_ARM64)
4897 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4898#else
4899# error "Port me"
4900#endif
4901 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4902 return off;
4903}
4904
4905
4906/**
4907 * Emits code for XOR'ing two 32-bit GPRs.
4908 */
4909DECL_INLINE_THROW(uint32_t)
4910iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4911{
4912#if defined(RT_ARCH_AMD64)
4913 /* and Gv, Ev */
4914 if (iGprDst >= 8 || iGprSrc >= 8)
4915 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4916 pCodeBuf[off++] = 0x33;
4917 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4918
4919#elif defined(RT_ARCH_ARM64)
4920 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4921
4922#else
4923# error "Port me"
4924#endif
4925 return off;
4926}
4927
4928
4929/**
4930 * Emits code for XOR'ing two 32-bit GPRs.
4931 */
4932DECL_INLINE_THROW(uint32_t)
4933iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4934{
4935#if defined(RT_ARCH_AMD64)
4936 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4937#elif defined(RT_ARCH_ARM64)
4938 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4939#else
4940# error "Port me"
4941#endif
4942 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4943 return off;
4944}
4945
4946
4947/**
4948 * Emits code for XOR'ing an 32-bit GPRs with a constant.
4949 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4950 * @note For ARM64 this only supports @a uImm values that can be expressed using
4951 * the two 6-bit immediates of the EOR instructions. The caller must make
4952 * sure this is possible!
4953 */
4954DECL_FORCE_INLINE_THROW(uint32_t)
4955iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
4956{
4957#if defined(RT_ARCH_AMD64)
4958 /* and Ev, imm */
4959 if (iGprDst >= 8)
4960 pCodeBuf[off++] = X86_OP_REX_B;
4961 if ((int32_t)uImm == (int8_t)uImm)
4962 {
4963 pCodeBuf[off++] = 0x83;
4964 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
4965 pCodeBuf[off++] = (uint8_t)uImm;
4966 }
4967 else
4968 {
4969 pCodeBuf[off++] = 0x81;
4970 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
4971 pCodeBuf[off++] = RT_BYTE1(uImm);
4972 pCodeBuf[off++] = RT_BYTE2(uImm);
4973 pCodeBuf[off++] = RT_BYTE3(uImm);
4974 pCodeBuf[off++] = RT_BYTE4(uImm);
4975 }
4976
4977#elif defined(RT_ARCH_ARM64)
4978 uint32_t uImmR = 0;
4979 uint32_t uImmNandS = 0;
4980 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4981 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4982 else
4983# ifdef IEM_WITH_THROW_CATCH
4984 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4985# else
4986 AssertReleaseFailedStmt(off = UINT32_MAX);
4987# endif
4988
4989#else
4990# error "Port me"
4991#endif
4992 return off;
4993}
4994
4995
4996/*********************************************************************************************************************************
4997* Shifting *
4998*********************************************************************************************************************************/
4999
5000/**
5001 * Emits code for shifting a GPR a fixed number of bits to the left.
5002 */
5003DECL_FORCE_INLINE(uint32_t)
5004iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5005{
5006 Assert(cShift > 0 && cShift < 64);
5007
5008#if defined(RT_ARCH_AMD64)
5009 /* shl dst, cShift */
5010 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5011 if (cShift != 1)
5012 {
5013 pCodeBuf[off++] = 0xc1;
5014 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5015 pCodeBuf[off++] = cShift;
5016 }
5017 else
5018 {
5019 pCodeBuf[off++] = 0xd1;
5020 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5021 }
5022
5023#elif defined(RT_ARCH_ARM64)
5024 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
5025
5026#else
5027# error "Port me"
5028#endif
5029 return off;
5030}
5031
5032
5033/**
5034 * Emits code for shifting a GPR a fixed number of bits to the left.
5035 */
5036DECL_INLINE_THROW(uint32_t)
5037iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5038{
5039#if defined(RT_ARCH_AMD64)
5040 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5041#elif defined(RT_ARCH_ARM64)
5042 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5043#else
5044# error "Port me"
5045#endif
5046 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5047 return off;
5048}
5049
5050
5051/**
5052 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5053 */
5054DECL_FORCE_INLINE(uint32_t)
5055iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5056{
5057 Assert(cShift > 0 && cShift < 32);
5058
5059#if defined(RT_ARCH_AMD64)
5060 /* shl dst, cShift */
5061 if (iGprDst >= 8)
5062 pCodeBuf[off++] = X86_OP_REX_B;
5063 if (cShift != 1)
5064 {
5065 pCodeBuf[off++] = 0xc1;
5066 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5067 pCodeBuf[off++] = cShift;
5068 }
5069 else
5070 {
5071 pCodeBuf[off++] = 0xd1;
5072 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5073 }
5074
5075#elif defined(RT_ARCH_ARM64)
5076 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5077
5078#else
5079# error "Port me"
5080#endif
5081 return off;
5082}
5083
5084
5085/**
5086 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5087 */
5088DECL_INLINE_THROW(uint32_t)
5089iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5090{
5091#if defined(RT_ARCH_AMD64)
5092 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5093#elif defined(RT_ARCH_ARM64)
5094 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5095#else
5096# error "Port me"
5097#endif
5098 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5099 return off;
5100}
5101
5102
5103/**
5104 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5105 */
5106DECL_FORCE_INLINE(uint32_t)
5107iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5108{
5109 Assert(cShift > 0 && cShift < 64);
5110
5111#if defined(RT_ARCH_AMD64)
5112 /* shr dst, cShift */
5113 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5114 if (cShift != 1)
5115 {
5116 pCodeBuf[off++] = 0xc1;
5117 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5118 pCodeBuf[off++] = cShift;
5119 }
5120 else
5121 {
5122 pCodeBuf[off++] = 0xd1;
5123 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5124 }
5125
5126#elif defined(RT_ARCH_ARM64)
5127 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5128
5129#else
5130# error "Port me"
5131#endif
5132 return off;
5133}
5134
5135
5136/**
5137 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5138 */
5139DECL_INLINE_THROW(uint32_t)
5140iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5141{
5142#if defined(RT_ARCH_AMD64)
5143 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5144#elif defined(RT_ARCH_ARM64)
5145 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5146#else
5147# error "Port me"
5148#endif
5149 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5150 return off;
5151}
5152
5153
5154/**
5155 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5156 * right.
5157 */
5158DECL_FORCE_INLINE(uint32_t)
5159iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5160{
5161 Assert(cShift > 0 && cShift < 32);
5162
5163#if defined(RT_ARCH_AMD64)
5164 /* shr dst, cShift */
5165 if (iGprDst >= 8)
5166 pCodeBuf[off++] = X86_OP_REX_B;
5167 if (cShift != 1)
5168 {
5169 pCodeBuf[off++] = 0xc1;
5170 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5171 pCodeBuf[off++] = cShift;
5172 }
5173 else
5174 {
5175 pCodeBuf[off++] = 0xd1;
5176 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5177 }
5178
5179#elif defined(RT_ARCH_ARM64)
5180 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5181
5182#else
5183# error "Port me"
5184#endif
5185 return off;
5186}
5187
5188
5189/**
5190 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5191 * right.
5192 */
5193DECL_INLINE_THROW(uint32_t)
5194iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5195{
5196#if defined(RT_ARCH_AMD64)
5197 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5198#elif defined(RT_ARCH_ARM64)
5199 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5200#else
5201# error "Port me"
5202#endif
5203 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5204 return off;
5205}
5206
5207
5208/**
5209 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5210 * right and assigning it to a different GPR.
5211 */
5212DECL_INLINE_THROW(uint32_t)
5213iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5214{
5215 Assert(cShift > 0); Assert(cShift < 32);
5216#if defined(RT_ARCH_AMD64)
5217 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5218 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5219
5220#elif defined(RT_ARCH_ARM64)
5221 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5222
5223#else
5224# error "Port me"
5225#endif
5226 return off;
5227}
5228
5229
5230/**
5231 * Emits code for rotating a GPR a fixed number of bits to the left.
5232 */
5233DECL_FORCE_INLINE(uint32_t)
5234iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5235{
5236 Assert(cShift > 0 && cShift < 64);
5237
5238#if defined(RT_ARCH_AMD64)
5239 /* rol dst, cShift */
5240 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5241 if (cShift != 1)
5242 {
5243 pCodeBuf[off++] = 0xc1;
5244 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5245 pCodeBuf[off++] = cShift;
5246 }
5247 else
5248 {
5249 pCodeBuf[off++] = 0xd1;
5250 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5251 }
5252
5253#elif defined(RT_ARCH_ARM64)
5254 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
5255
5256#else
5257# error "Port me"
5258#endif
5259 return off;
5260}
5261
5262
5263#if defined(RT_ARCH_AMD64)
5264/**
5265 * Emits code for rotating a 32-bit GPR a fixed number of bits to the left via carry.
5266 */
5267DECL_FORCE_INLINE(uint32_t)
5268iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5269{
5270 Assert(cShift > 0 && cShift < 32);
5271
5272 /* rcl dst, cShift */
5273 if (iGprDst >= 8)
5274 pCodeBuf[off++] = X86_OP_REX_B;
5275 if (cShift != 1)
5276 {
5277 pCodeBuf[off++] = 0xc1;
5278 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5279 pCodeBuf[off++] = cShift;
5280 }
5281 else
5282 {
5283 pCodeBuf[off++] = 0xd1;
5284 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5285 }
5286
5287 return off;
5288}
5289#endif /* RT_ARCH_AMD64 */
5290
5291
5292
5293/**
5294 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
5295 * @note Bits 63:32 of the destination GPR will be cleared.
5296 */
5297DECL_FORCE_INLINE(uint32_t)
5298iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5299{
5300#if defined(RT_ARCH_AMD64)
5301 /*
5302 * There is no bswap r16 on x86 (the encoding exists but does not work).
5303 * So just use a rol (gcc -O2 is doing that).
5304 *
5305 * rol r16, 0x8
5306 */
5307 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5308 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5309 if (iGpr >= 8)
5310 pbCodeBuf[off++] = X86_OP_REX_B;
5311 pbCodeBuf[off++] = 0xc1;
5312 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
5313 pbCodeBuf[off++] = 0x08;
5314#elif defined(RT_ARCH_ARM64)
5315 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5316
5317 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
5318#else
5319# error "Port me"
5320#endif
5321
5322 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5323 return off;
5324}
5325
5326
5327/**
5328 * Emits code for reversing the byte order in a 32-bit GPR.
5329 * @note Bits 63:32 of the destination GPR will be cleared.
5330 */
5331DECL_FORCE_INLINE(uint32_t)
5332iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5333{
5334#if defined(RT_ARCH_AMD64)
5335 /* bswap r32 */
5336 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5337
5338 if (iGpr >= 8)
5339 pbCodeBuf[off++] = X86_OP_REX_B;
5340 pbCodeBuf[off++] = 0x0f;
5341 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5342#elif defined(RT_ARCH_ARM64)
5343 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5344
5345 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
5346#else
5347# error "Port me"
5348#endif
5349
5350 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5351 return off;
5352}
5353
5354
5355/**
5356 * Emits code for reversing the byte order in a 64-bit GPR.
5357 */
5358DECL_FORCE_INLINE(uint32_t)
5359iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5360{
5361#if defined(RT_ARCH_AMD64)
5362 /* bswap r64 */
5363 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5364
5365 if (iGpr >= 8)
5366 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
5367 else
5368 pbCodeBuf[off++] = X86_OP_REX_W;
5369 pbCodeBuf[off++] = 0x0f;
5370 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5371#elif defined(RT_ARCH_ARM64)
5372 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5373
5374 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
5375#else
5376# error "Port me"
5377#endif
5378
5379 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5380 return off;
5381}
5382
5383
5384/*********************************************************************************************************************************
5385* Compare and Testing *
5386*********************************************************************************************************************************/
5387
5388
5389#ifdef RT_ARCH_ARM64
5390/**
5391 * Emits an ARM64 compare instruction.
5392 */
5393DECL_INLINE_THROW(uint32_t)
5394iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
5395 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
5396{
5397 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5398 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
5399 f64Bit, true /*fSetFlags*/, cShift, enmShift);
5400 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5401 return off;
5402}
5403#endif
5404
5405
5406/**
5407 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5408 * with conditional instruction.
5409 */
5410DECL_FORCE_INLINE(uint32_t)
5411iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5412{
5413#ifdef RT_ARCH_AMD64
5414 /* cmp Gv, Ev */
5415 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5416 pCodeBuf[off++] = 0x3b;
5417 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5418
5419#elif defined(RT_ARCH_ARM64)
5420 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
5421
5422#else
5423# error "Port me!"
5424#endif
5425 return off;
5426}
5427
5428
5429/**
5430 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5431 * with conditional instruction.
5432 */
5433DECL_INLINE_THROW(uint32_t)
5434iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5435{
5436#ifdef RT_ARCH_AMD64
5437 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5438#elif defined(RT_ARCH_ARM64)
5439 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5440#else
5441# error "Port me!"
5442#endif
5443 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5444 return off;
5445}
5446
5447
5448/**
5449 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5450 * with conditional instruction.
5451 */
5452DECL_FORCE_INLINE(uint32_t)
5453iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5454{
5455#ifdef RT_ARCH_AMD64
5456 /* cmp Gv, Ev */
5457 if (iGprLeft >= 8 || iGprRight >= 8)
5458 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5459 pCodeBuf[off++] = 0x3b;
5460 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5461
5462#elif defined(RT_ARCH_ARM64)
5463 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
5464
5465#else
5466# error "Port me!"
5467#endif
5468 return off;
5469}
5470
5471
5472/**
5473 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5474 * with conditional instruction.
5475 */
5476DECL_INLINE_THROW(uint32_t)
5477iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5478{
5479#ifdef RT_ARCH_AMD64
5480 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5481#elif defined(RT_ARCH_ARM64)
5482 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5483#else
5484# error "Port me!"
5485#endif
5486 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5487 return off;
5488}
5489
5490
5491/**
5492 * Emits a compare of a 64-bit GPR with a constant value, settings status
5493 * flags/whatever for use with conditional instruction.
5494 */
5495DECL_INLINE_THROW(uint32_t)
5496iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
5497{
5498#ifdef RT_ARCH_AMD64
5499 if (uImm <= UINT32_C(0xff))
5500 {
5501 /* cmp Ev, Ib */
5502 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5503 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
5504 pbCodeBuf[off++] = 0x83;
5505 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5506 pbCodeBuf[off++] = (uint8_t)uImm;
5507 }
5508 else if ((int64_t)uImm == (int32_t)uImm)
5509 {
5510 /* cmp Ev, imm */
5511 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5512 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
5513 pbCodeBuf[off++] = 0x81;
5514 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5515 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5516 pbCodeBuf[off++] = RT_BYTE1(uImm);
5517 pbCodeBuf[off++] = RT_BYTE2(uImm);
5518 pbCodeBuf[off++] = RT_BYTE3(uImm);
5519 pbCodeBuf[off++] = RT_BYTE4(uImm);
5520 }
5521 else
5522 {
5523 /* Use temporary register for the immediate. */
5524 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5525 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
5526 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5527 }
5528
5529#elif defined(RT_ARCH_ARM64)
5530 /** @todo guess there are clevere things we can do here... */
5531 if (uImm < _4K)
5532 {
5533 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5534 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5535 true /*64Bit*/, true /*fSetFlags*/);
5536 }
5537 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5538 {
5539 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5540 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
5541 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5542 }
5543 else
5544 {
5545 /* Use temporary register for the immediate. */
5546 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5547 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
5548 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5549 }
5550
5551#else
5552# error "Port me!"
5553#endif
5554
5555 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5556 return off;
5557}
5558
5559
5560/**
5561 * Emits a compare of a 32-bit GPR with a constant value, settings status
5562 * flags/whatever for use with conditional instruction.
5563 *
5564 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
5565 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
5566 * bits all zero). Will release assert or throw exception if the caller
5567 * violates this restriction.
5568 */
5569DECL_FORCE_INLINE_THROW(uint32_t)
5570iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
5571{
5572#ifdef RT_ARCH_AMD64
5573 if (iGprLeft >= 8)
5574 pCodeBuf[off++] = X86_OP_REX_B;
5575 if (uImm <= UINT32_C(0x7f))
5576 {
5577 /* cmp Ev, Ib */
5578 pCodeBuf[off++] = 0x83;
5579 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5580 pCodeBuf[off++] = (uint8_t)uImm;
5581 }
5582 else
5583 {
5584 /* cmp Ev, imm */
5585 pCodeBuf[off++] = 0x81;
5586 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5587 pCodeBuf[off++] = RT_BYTE1(uImm);
5588 pCodeBuf[off++] = RT_BYTE2(uImm);
5589 pCodeBuf[off++] = RT_BYTE3(uImm);
5590 pCodeBuf[off++] = RT_BYTE4(uImm);
5591 }
5592
5593#elif defined(RT_ARCH_ARM64)
5594 /** @todo guess there are clevere things we can do here... */
5595 if (uImm < _4K)
5596 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5597 false /*64Bit*/, true /*fSetFlags*/);
5598 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5599 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5600 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5601 else
5602# ifdef IEM_WITH_THROW_CATCH
5603 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5604# else
5605 AssertReleaseFailedStmt(off = UINT32_MAX);
5606# endif
5607
5608#else
5609# error "Port me!"
5610#endif
5611 return off;
5612}
5613
5614
5615/**
5616 * Emits a compare of a 32-bit GPR with a constant value, settings status
5617 * flags/whatever for use with conditional instruction.
5618 */
5619DECL_INLINE_THROW(uint32_t)
5620iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
5621{
5622#ifdef RT_ARCH_AMD64
5623 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
5624
5625#elif defined(RT_ARCH_ARM64)
5626 /** @todo guess there are clevere things we can do here... */
5627 if (uImm < _4K)
5628 {
5629 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5630 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5631 false /*64Bit*/, true /*fSetFlags*/);
5632 }
5633 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5634 {
5635 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5636 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5637 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5638 }
5639 else
5640 {
5641 /* Use temporary register for the immediate. */
5642 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5643 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
5644 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5645 }
5646
5647#else
5648# error "Port me!"
5649#endif
5650
5651 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5652 return off;
5653}
5654
5655
5656/**
5657 * Emits a compare of a 32-bit GPR with a constant value, settings status
5658 * flags/whatever for use with conditional instruction.
5659 *
5660 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
5661 * 16-bit value from @a iGrpLeft.
5662 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
5663 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
5664 * bits all zero). Will release assert or throw exception if the caller
5665 * violates this restriction.
5666 */
5667DECL_FORCE_INLINE_THROW(uint32_t)
5668iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
5669 uint8_t idxTmpReg = UINT8_MAX)
5670{
5671#ifdef RT_ARCH_AMD64
5672 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5673 if (iGprLeft >= 8)
5674 pCodeBuf[off++] = X86_OP_REX_B;
5675 if (uImm <= UINT32_C(0x7f))
5676 {
5677 /* cmp Ev, Ib */
5678 pCodeBuf[off++] = 0x83;
5679 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5680 pCodeBuf[off++] = (uint8_t)uImm;
5681 }
5682 else
5683 {
5684 /* cmp Ev, imm */
5685 pCodeBuf[off++] = 0x81;
5686 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5687 pCodeBuf[off++] = RT_BYTE1(uImm);
5688 pCodeBuf[off++] = RT_BYTE2(uImm);
5689 }
5690 RT_NOREF(idxTmpReg);
5691
5692#elif defined(RT_ARCH_ARM64)
5693# ifdef IEM_WITH_THROW_CATCH
5694 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5695# else
5696 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
5697# endif
5698 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
5699 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
5700 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
5701
5702#else
5703# error "Port me!"
5704#endif
5705 return off;
5706}
5707
5708
5709/**
5710 * Emits a compare of a 16-bit GPR with a constant value, settings status
5711 * flags/whatever for use with conditional instruction.
5712 *
5713 * @note ARM64: Helper register is required (idxTmpReg).
5714 */
5715DECL_INLINE_THROW(uint32_t)
5716iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
5717 uint8_t idxTmpReg = UINT8_MAX)
5718{
5719#ifdef RT_ARCH_AMD64
5720 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
5721#elif defined(RT_ARCH_ARM64)
5722 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
5723#else
5724# error "Port me!"
5725#endif
5726 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5727 return off;
5728}
5729
5730
5731
5732/*********************************************************************************************************************************
5733* Branching *
5734*********************************************************************************************************************************/
5735
5736/**
5737 * Emits a JMP rel32 / B imm19 to the given label.
5738 */
5739DECL_FORCE_INLINE_THROW(uint32_t)
5740iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
5741{
5742 Assert(idxLabel < pReNative->cLabels);
5743
5744#ifdef RT_ARCH_AMD64
5745 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
5746 {
5747 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
5748 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
5749 {
5750 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
5751 pCodeBuf[off++] = (uint8_t)offRel;
5752 }
5753 else
5754 {
5755 offRel -= 3;
5756 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
5757 pCodeBuf[off++] = RT_BYTE1(offRel);
5758 pCodeBuf[off++] = RT_BYTE2(offRel);
5759 pCodeBuf[off++] = RT_BYTE3(offRel);
5760 pCodeBuf[off++] = RT_BYTE4(offRel);
5761 }
5762 }
5763 else
5764 {
5765 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
5766 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
5767 pCodeBuf[off++] = 0xfe;
5768 pCodeBuf[off++] = 0xff;
5769 pCodeBuf[off++] = 0xff;
5770 pCodeBuf[off++] = 0xff;
5771 }
5772 pCodeBuf[off++] = 0xcc; /* int3 poison */
5773
5774#elif defined(RT_ARCH_ARM64)
5775 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
5776 pCodeBuf[off++] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
5777 else
5778 {
5779 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
5780 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
5781 }
5782
5783#else
5784# error "Port me!"
5785#endif
5786 return off;
5787}
5788
5789
5790/**
5791 * Emits a JMP rel32 / B imm19 to the given label.
5792 */
5793DECL_INLINE_THROW(uint32_t)
5794iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5795{
5796#ifdef RT_ARCH_AMD64
5797 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
5798#elif defined(RT_ARCH_ARM64)
5799 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
5800#else
5801# error "Port me!"
5802#endif
5803 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5804 return off;
5805}
5806
5807
5808/**
5809 * Emits a JMP rel32 / B imm19 to a new undefined label.
5810 */
5811DECL_INLINE_THROW(uint32_t)
5812iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
5813{
5814 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
5815 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
5816}
5817
5818/** Condition type. */
5819#ifdef RT_ARCH_AMD64
5820typedef enum IEMNATIVEINSTRCOND : uint8_t
5821{
5822 kIemNativeInstrCond_o = 0,
5823 kIemNativeInstrCond_no,
5824 kIemNativeInstrCond_c,
5825 kIemNativeInstrCond_nc,
5826 kIemNativeInstrCond_e,
5827 kIemNativeInstrCond_ne,
5828 kIemNativeInstrCond_be,
5829 kIemNativeInstrCond_nbe,
5830 kIemNativeInstrCond_s,
5831 kIemNativeInstrCond_ns,
5832 kIemNativeInstrCond_p,
5833 kIemNativeInstrCond_np,
5834 kIemNativeInstrCond_l,
5835 kIemNativeInstrCond_nl,
5836 kIemNativeInstrCond_le,
5837 kIemNativeInstrCond_nle
5838} IEMNATIVEINSTRCOND;
5839#elif defined(RT_ARCH_ARM64)
5840typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
5841# define kIemNativeInstrCond_o todo_conditional_codes
5842# define kIemNativeInstrCond_no todo_conditional_codes
5843# define kIemNativeInstrCond_c todo_conditional_codes
5844# define kIemNativeInstrCond_nc todo_conditional_codes
5845# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
5846# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
5847# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
5848# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
5849# define kIemNativeInstrCond_s todo_conditional_codes
5850# define kIemNativeInstrCond_ns todo_conditional_codes
5851# define kIemNativeInstrCond_p todo_conditional_codes
5852# define kIemNativeInstrCond_np todo_conditional_codes
5853# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
5854# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
5855# define kIemNativeInstrCond_le kArmv8InstrCond_Le
5856# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
5857#else
5858# error "Port me!"
5859#endif
5860
5861
5862/**
5863 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
5864 */
5865DECL_FORCE_INLINE_THROW(uint32_t)
5866iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
5867 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
5868{
5869 Assert(idxLabel < pReNative->cLabels);
5870
5871 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
5872#ifdef RT_ARCH_AMD64
5873 if (offLabel >= off)
5874 {
5875 /* jcc rel32 */
5876 pCodeBuf[off++] = 0x0f;
5877 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
5878 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
5879 pCodeBuf[off++] = 0x00;
5880 pCodeBuf[off++] = 0x00;
5881 pCodeBuf[off++] = 0x00;
5882 pCodeBuf[off++] = 0x00;
5883 }
5884 else
5885 {
5886 int32_t offDisp = offLabel - (off + 2);
5887 if ((int8_t)offDisp == offDisp)
5888 {
5889 /* jcc rel8 */
5890 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
5891 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
5892 }
5893 else
5894 {
5895 /* jcc rel32 */
5896 offDisp -= 4;
5897 pCodeBuf[off++] = 0x0f;
5898 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
5899 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
5900 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
5901 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
5902 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
5903 }
5904 }
5905
5906#elif defined(RT_ARCH_ARM64)
5907 if (offLabel >= off)
5908 {
5909 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5910 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
5911 }
5912 else
5913 {
5914 Assert(off - offLabel <= 0x3ffffU);
5915 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
5916 }
5917
5918#else
5919# error "Port me!"
5920#endif
5921 return off;
5922}
5923
5924
5925/**
5926 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
5927 */
5928DECL_INLINE_THROW(uint32_t)
5929iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
5930{
5931#ifdef RT_ARCH_AMD64
5932 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
5933#elif defined(RT_ARCH_ARM64)
5934 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
5935#else
5936# error "Port me!"
5937#endif
5938 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5939 return off;
5940}
5941
5942
5943/**
5944 * Emits a Jcc rel32 / B.cc imm19 to a new label.
5945 */
5946DECL_INLINE_THROW(uint32_t)
5947iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5948 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
5949{
5950 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
5951 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
5952}
5953
5954
5955/**
5956 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
5957 */
5958DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5959{
5960#ifdef RT_ARCH_AMD64
5961 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
5962#elif defined(RT_ARCH_ARM64)
5963 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
5964#else
5965# error "Port me!"
5966#endif
5967}
5968
5969/**
5970 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
5971 */
5972DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5973 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
5974{
5975#ifdef RT_ARCH_AMD64
5976 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
5977#elif defined(RT_ARCH_ARM64)
5978 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
5979#else
5980# error "Port me!"
5981#endif
5982}
5983
5984
5985/**
5986 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
5987 */
5988DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5989{
5990#ifdef RT_ARCH_AMD64
5991 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
5992#elif defined(RT_ARCH_ARM64)
5993 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
5994#else
5995# error "Port me!"
5996#endif
5997}
5998
5999/**
6000 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
6001 */
6002DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6003 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6004{
6005#ifdef RT_ARCH_AMD64
6006 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
6007#elif defined(RT_ARCH_ARM64)
6008 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
6009#else
6010# error "Port me!"
6011#endif
6012}
6013
6014
6015/**
6016 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
6017 */
6018DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6019{
6020#ifdef RT_ARCH_AMD64
6021 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
6022#elif defined(RT_ARCH_ARM64)
6023 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
6024#else
6025# error "Port me!"
6026#endif
6027}
6028
6029/**
6030 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
6031 */
6032DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6033 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6034{
6035#ifdef RT_ARCH_AMD64
6036 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
6037#elif defined(RT_ARCH_ARM64)
6038 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
6039#else
6040# error "Port me!"
6041#endif
6042}
6043
6044
6045/**
6046 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
6047 */
6048DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6049{
6050#ifdef RT_ARCH_AMD64
6051 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
6052#elif defined(RT_ARCH_ARM64)
6053 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
6054#else
6055# error "Port me!"
6056#endif
6057}
6058
6059/**
6060 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
6061 */
6062DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6063 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6064{
6065#ifdef RT_ARCH_AMD64
6066 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
6067#elif defined(RT_ARCH_ARM64)
6068 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
6069#else
6070# error "Port me!"
6071#endif
6072}
6073
6074
6075/**
6076 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
6077 */
6078DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6079{
6080#ifdef RT_ARCH_AMD64
6081 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
6082#elif defined(RT_ARCH_ARM64)
6083 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
6084#else
6085# error "Port me!"
6086#endif
6087}
6088
6089/**
6090 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
6091 */
6092DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6093 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6094{
6095#ifdef RT_ARCH_AMD64
6096 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
6097#elif defined(RT_ARCH_ARM64)
6098 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
6099#else
6100# error "Port me!"
6101#endif
6102}
6103
6104
6105/**
6106 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6107 *
6108 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6109 *
6110 * Only use hardcoded jumps forward when emitting for exactly one
6111 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6112 * the right target address on all platforms!
6113 *
6114 * Please also note that on x86 it is necessary pass off + 256 or higher
6115 * for @a offTarget one believe the intervening code is more than 127
6116 * bytes long.
6117 */
6118DECL_FORCE_INLINE(uint32_t)
6119iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6120{
6121#ifdef RT_ARCH_AMD64
6122 /* jcc rel8 / rel32 */
6123 int32_t offDisp = (int32_t)(offTarget - (off + 2));
6124 if (offDisp < 128 && offDisp >= -128)
6125 {
6126 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6127 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6128 }
6129 else
6130 {
6131 offDisp -= 4;
6132 pCodeBuf[off++] = 0x0f;
6133 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6134 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6135 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6136 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6137 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6138 }
6139
6140#elif defined(RT_ARCH_ARM64)
6141 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
6142
6143#else
6144# error "Port me!"
6145#endif
6146 return off;
6147}
6148
6149
6150/**
6151 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6152 *
6153 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6154 *
6155 * Only use hardcoded jumps forward when emitting for exactly one
6156 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6157 * the right target address on all platforms!
6158 *
6159 * Please also note that on x86 it is necessary pass off + 256 or higher
6160 * for @a offTarget if one believe the intervening code is more than 127
6161 * bytes long.
6162 */
6163DECL_INLINE_THROW(uint32_t)
6164iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6165{
6166#ifdef RT_ARCH_AMD64
6167 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
6168#elif defined(RT_ARCH_ARM64)
6169 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
6170#else
6171# error "Port me!"
6172#endif
6173 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6174 return off;
6175}
6176
6177
6178/**
6179 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
6180 *
6181 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6182 */
6183DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6184{
6185#ifdef RT_ARCH_AMD64
6186 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
6187#elif defined(RT_ARCH_ARM64)
6188 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
6189#else
6190# error "Port me!"
6191#endif
6192}
6193
6194
6195/**
6196 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
6197 *
6198 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6199 */
6200DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6201{
6202#ifdef RT_ARCH_AMD64
6203 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
6204#elif defined(RT_ARCH_ARM64)
6205 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
6206#else
6207# error "Port me!"
6208#endif
6209}
6210
6211
6212/**
6213 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
6214 *
6215 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6216 */
6217DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6218{
6219#ifdef RT_ARCH_AMD64
6220 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
6221#elif defined(RT_ARCH_ARM64)
6222 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
6223#else
6224# error "Port me!"
6225#endif
6226}
6227
6228
6229/**
6230 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
6231 *
6232 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6233 */
6234DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6235{
6236#ifdef RT_ARCH_AMD64
6237 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
6238#elif defined(RT_ARCH_ARM64)
6239 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
6240#else
6241# error "Port me!"
6242#endif
6243}
6244
6245
6246/**
6247 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6248 *
6249 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6250 */
6251DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
6252{
6253#ifdef RT_ARCH_AMD64
6254 /* jmp rel8 or rel32 */
6255 int32_t offDisp = offTarget - (off + 2);
6256 if (offDisp < 128 && offDisp >= -128)
6257 {
6258 pCodeBuf[off++] = 0xeb;
6259 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6260 }
6261 else
6262 {
6263 offDisp -= 3;
6264 pCodeBuf[off++] = 0xe9;
6265 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6266 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6267 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6268 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6269 }
6270
6271#elif defined(RT_ARCH_ARM64)
6272 pCodeBuf[off++] = Armv8A64MkInstrB((int32_t)(offTarget - off));
6273
6274#else
6275# error "Port me!"
6276#endif
6277 return off;
6278}
6279
6280
6281/**
6282 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6283 *
6284 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6285 */
6286DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6287{
6288#ifdef RT_ARCH_AMD64
6289 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
6290#elif defined(RT_ARCH_ARM64)
6291 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
6292#else
6293# error "Port me!"
6294#endif
6295 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6296 return off;
6297}
6298
6299
6300/**
6301 * Fixes up a conditional jump to a fixed label.
6302 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
6303 * iemNativeEmitJzToFixed, ...
6304 */
6305DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
6306{
6307#ifdef RT_ARCH_AMD64
6308 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
6309 uint8_t const bOpcode = pbCodeBuf[offFixup];
6310 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
6311 {
6312 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
6313 AssertStmt(pbCodeBuf[offFixup + 1] == offTarget - (offFixup + 2),
6314 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
6315 }
6316 else
6317 {
6318 if (bOpcode != 0x0f)
6319 Assert(bOpcode == 0xe9);
6320 else
6321 {
6322 offFixup += 1;
6323 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
6324 }
6325 uint32_t const offRel32 = offTarget - (offFixup + 5);
6326 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
6327 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
6328 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
6329 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
6330 }
6331
6332#elif defined(RT_ARCH_ARM64)
6333 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
6334 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
6335 {
6336 /* B.COND + BC.COND */
6337 int32_t const offDisp = offTarget - offFixup;
6338 Assert(offDisp >= -262144 && offDisp < 262144);
6339 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
6340 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6341 }
6342 else
6343 {
6344 /* B imm26 */
6345 Assert((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000));
6346 int32_t const offDisp = offTarget - offFixup;
6347 Assert(offDisp >= -33554432 && offDisp < 33554432);
6348 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
6349 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6350 }
6351
6352#else
6353# error "Port me!"
6354#endif
6355}
6356
6357
6358#ifdef RT_ARCH_AMD64
6359/**
6360 * For doing bt on a register.
6361 */
6362DECL_INLINE_THROW(uint32_t)
6363iemNativeEmitAmd64TestBitInGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
6364{
6365 Assert(iBitNo < 64);
6366 /* bt Ev, imm8 */
6367 if (iBitNo >= 32)
6368 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6369 else if (iGprSrc >= 8)
6370 pCodeBuf[off++] = X86_OP_REX_B;
6371 pCodeBuf[off++] = 0x0f;
6372 pCodeBuf[off++] = 0xba;
6373 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6374 pCodeBuf[off++] = iBitNo;
6375 return off;
6376}
6377#endif /* RT_ARCH_AMD64 */
6378
6379
6380/**
6381 * Internal helper, don't call directly.
6382 */
6383DECL_INLINE_THROW(uint32_t)
6384iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
6385 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
6386{
6387 Assert(iBitNo < 64);
6388#ifdef RT_ARCH_AMD64
6389 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6390 if (iBitNo < 8)
6391 {
6392 /* test Eb, imm8 */
6393 if (iGprSrc >= 4)
6394 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6395 pbCodeBuf[off++] = 0xf6;
6396 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6397 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
6398 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6399 }
6400 else
6401 {
6402 /* bt Ev, imm8 */
6403 if (iBitNo >= 32)
6404 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6405 else if (iGprSrc >= 8)
6406 pbCodeBuf[off++] = X86_OP_REX_B;
6407 pbCodeBuf[off++] = 0x0f;
6408 pbCodeBuf[off++] = 0xba;
6409 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6410 pbCodeBuf[off++] = iBitNo;
6411 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
6412 }
6413
6414#elif defined(RT_ARCH_ARM64)
6415 /* Use the TBNZ instruction here. */
6416 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6417 if (pReNative->paLabels[idxLabel].enmType > kIemNativeLabelType_LastWholeTbBranch)
6418 {
6419 AssertMsg(pReNative->paLabels[idxLabel].off == UINT32_MAX,
6420 ("TODO: Please enable & test commented out code for jumping back to a predefined label.\n"));
6421 //uint32_t offLabel = pReNative->paLabels[idxLabel].off;
6422 //if (offLabel == UINT32_MAX)
6423 {
6424 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
6425 pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
6426 }
6427 //else
6428 //{
6429 // RT_BREAKPOINT();
6430 // Assert(off - offLabel <= 0x1fffU);
6431 // pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, offLabel - off, iGprSrc, iBitNo);
6432 //
6433 //}
6434 }
6435 else
6436 {
6437 Assert(Armv8A64ConvertImmRImmS2Mask64(0x40, (64U - iBitNo) & 63U) == RT_BIT_64(iBitNo));
6438 pu32CodeBuf[off++] = Armv8A64MkInstrTstImm(iGprSrc, 0x40, (64U - iBitNo) & 63U);
6439 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6440 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(fJmpIfSet ? kArmv8InstrCond_Ne : kArmv8InstrCond_Eq, 0);
6441 }
6442
6443#else
6444# error "Port me!"
6445#endif
6446 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6447 return off;
6448}
6449
6450
6451/**
6452 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
6453 * @a iGprSrc.
6454 *
6455 * @note On ARM64 the range is only +/-8191 instructions.
6456 */
6457DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6458 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
6459{
6460 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
6461}
6462
6463
6464/**
6465 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
6466 * _set_ in @a iGprSrc.
6467 *
6468 * @note On ARM64 the range is only +/-8191 instructions.
6469 */
6470DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6471 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
6472{
6473 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
6474}
6475
6476
6477/**
6478 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
6479 * flags accordingly.
6480 */
6481DECL_INLINE_THROW(uint32_t)
6482iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
6483{
6484 Assert(fBits != 0);
6485#ifdef RT_ARCH_AMD64
6486
6487 if (fBits >= UINT32_MAX)
6488 {
6489 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6490
6491 /* test Ev,Gv */
6492 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6493 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
6494 pbCodeBuf[off++] = 0x85;
6495 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
6496
6497 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6498 }
6499 else if (fBits <= UINT32_MAX)
6500 {
6501 /* test Eb, imm8 or test Ev, imm32 */
6502 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6503 if (fBits <= UINT8_MAX)
6504 {
6505 if (iGprSrc >= 4)
6506 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6507 pbCodeBuf[off++] = 0xf6;
6508 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6509 pbCodeBuf[off++] = (uint8_t)fBits;
6510 }
6511 else
6512 {
6513 if (iGprSrc >= 8)
6514 pbCodeBuf[off++] = X86_OP_REX_B;
6515 pbCodeBuf[off++] = 0xf7;
6516 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6517 pbCodeBuf[off++] = RT_BYTE1(fBits);
6518 pbCodeBuf[off++] = RT_BYTE2(fBits);
6519 pbCodeBuf[off++] = RT_BYTE3(fBits);
6520 pbCodeBuf[off++] = RT_BYTE4(fBits);
6521 }
6522 }
6523 /** @todo implement me. */
6524 else
6525 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
6526
6527#elif defined(RT_ARCH_ARM64)
6528 uint32_t uImmR = 0;
6529 uint32_t uImmNandS = 0;
6530 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
6531 {
6532 /* ands xzr, iGprSrc, #fBits */
6533 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6534 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
6535 }
6536 else
6537 {
6538 /* ands xzr, iGprSrc, iTmpReg */
6539 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6540 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6541 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
6542 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6543 }
6544
6545#else
6546# error "Port me!"
6547#endif
6548 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6549 return off;
6550}
6551
6552
6553/**
6554 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
6555 * @a iGprSrc, setting CPU flags accordingly.
6556 *
6557 * @note For ARM64 this only supports @a fBits values that can be expressed
6558 * using the two 6-bit immediates of the ANDS instruction. The caller
6559 * must make sure this is possible!
6560 */
6561DECL_FORCE_INLINE_THROW(uint32_t)
6562iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits)
6563{
6564 Assert(fBits != 0);
6565
6566#ifdef RT_ARCH_AMD64
6567 if (fBits <= UINT8_MAX)
6568 {
6569 /* test Eb, imm8 */
6570 if (iGprSrc >= 4)
6571 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6572 pCodeBuf[off++] = 0xf6;
6573 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6574 pCodeBuf[off++] = (uint8_t)fBits;
6575 }
6576 else
6577 {
6578 /* test Ev, imm32 */
6579 if (iGprSrc >= 8)
6580 pCodeBuf[off++] = X86_OP_REX_B;
6581 pCodeBuf[off++] = 0xf7;
6582 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6583 pCodeBuf[off++] = RT_BYTE1(fBits);
6584 pCodeBuf[off++] = RT_BYTE2(fBits);
6585 pCodeBuf[off++] = RT_BYTE3(fBits);
6586 pCodeBuf[off++] = RT_BYTE4(fBits);
6587 }
6588
6589#elif defined(RT_ARCH_ARM64)
6590 /* ands xzr, src, #fBits */
6591 uint32_t uImmR = 0;
6592 uint32_t uImmNandS = 0;
6593 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6594 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6595 else
6596# ifdef IEM_WITH_THROW_CATCH
6597 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6598# else
6599 AssertReleaseFailedStmt(off = UINT32_MAX);
6600# endif
6601
6602#else
6603# error "Port me!"
6604#endif
6605 return off;
6606}
6607
6608
6609
6610/**
6611 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
6612 * @a iGprSrc, setting CPU flags accordingly.
6613 *
6614 * @note For ARM64 this only supports @a fBits values that can be expressed
6615 * using the two 6-bit immediates of the ANDS instruction. The caller
6616 * must make sure this is possible!
6617 */
6618DECL_FORCE_INLINE_THROW(uint32_t)
6619iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
6620{
6621 Assert(fBits != 0);
6622
6623#ifdef RT_ARCH_AMD64
6624 /* test Eb, imm8 */
6625 if (iGprSrc >= 4)
6626 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6627 pCodeBuf[off++] = 0xf6;
6628 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6629 pCodeBuf[off++] = fBits;
6630
6631#elif defined(RT_ARCH_ARM64)
6632 /* ands xzr, src, #fBits */
6633 uint32_t uImmR = 0;
6634 uint32_t uImmNandS = 0;
6635 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6636 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6637 else
6638# ifdef IEM_WITH_THROW_CATCH
6639 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6640# else
6641 AssertReleaseFailedStmt(off = UINT32_MAX);
6642# endif
6643
6644#else
6645# error "Port me!"
6646#endif
6647 return off;
6648}
6649
6650
6651/**
6652 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
6653 * @a iGprSrc, setting CPU flags accordingly.
6654 */
6655DECL_INLINE_THROW(uint32_t)
6656iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
6657{
6658 Assert(fBits != 0);
6659
6660#ifdef RT_ARCH_AMD64
6661 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
6662
6663#elif defined(RT_ARCH_ARM64)
6664 /* ands xzr, src, [tmp|#imm] */
6665 uint32_t uImmR = 0;
6666 uint32_t uImmNandS = 0;
6667 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6668 {
6669 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6670 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6671 }
6672 else
6673 {
6674 /* Use temporary register for the 64-bit immediate. */
6675 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6676 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6677 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
6678 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6679 }
6680
6681#else
6682# error "Port me!"
6683#endif
6684 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6685 return off;
6686}
6687
6688
6689/**
6690 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
6691 * are set in @a iGprSrc.
6692 */
6693DECL_INLINE_THROW(uint32_t)
6694iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6695 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
6696{
6697 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
6698
6699 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
6700 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6701
6702 return off;
6703}
6704
6705
6706/**
6707 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
6708 * are set in @a iGprSrc.
6709 */
6710DECL_INLINE_THROW(uint32_t)
6711iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6712 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
6713{
6714 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
6715
6716 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
6717 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
6718
6719 return off;
6720}
6721
6722
6723/**
6724 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6725 *
6726 * The operand size is given by @a f64Bit.
6727 */
6728DECL_FORCE_INLINE_THROW(uint32_t)
6729iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6730 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
6731{
6732 Assert(idxLabel < pReNative->cLabels);
6733
6734#ifdef RT_ARCH_AMD64
6735 /* test reg32,reg32 / test reg64,reg64 */
6736 if (f64Bit)
6737 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
6738 else if (iGprSrc >= 8)
6739 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
6740 pCodeBuf[off++] = 0x85;
6741 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
6742
6743 /* jnz idxLabel */
6744 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
6745 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6746
6747#elif defined(RT_ARCH_ARM64)
6748 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6749 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
6750 iGprSrc, f64Bit);
6751 else
6752 {
6753 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6754 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
6755 }
6756
6757#else
6758# error "Port me!"
6759#endif
6760 return off;
6761}
6762
6763
6764/**
6765 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6766 *
6767 * The operand size is given by @a f64Bit.
6768 */
6769DECL_FORCE_INLINE_THROW(uint32_t)
6770iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
6771 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
6772{
6773#ifdef RT_ARCH_AMD64
6774 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
6775 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
6776#elif defined(RT_ARCH_ARM64)
6777 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
6778 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
6779#else
6780# error "Port me!"
6781#endif
6782 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6783 return off;
6784}
6785
6786
6787/* if (Grp1 == 0) Jmp idxLabel; */
6788
6789/**
6790 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
6791 *
6792 * The operand size is given by @a f64Bit.
6793 */
6794DECL_FORCE_INLINE_THROW(uint32_t)
6795iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6796 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6797{
6798 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
6799 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
6800}
6801
6802
6803/**
6804 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
6805 *
6806 * The operand size is given by @a f64Bit.
6807 */
6808DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6809 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6810{
6811 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
6812}
6813
6814
6815/**
6816 * Emits code that jumps to a new label if @a iGprSrc is zero.
6817 *
6818 * The operand size is given by @a f64Bit.
6819 */
6820DECL_INLINE_THROW(uint32_t)
6821iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
6822 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6823{
6824 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6825 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
6826}
6827
6828
6829/* if (Grp1 != 0) Jmp idxLabel; */
6830
6831/**
6832 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6833 *
6834 * The operand size is given by @a f64Bit.
6835 */
6836DECL_FORCE_INLINE_THROW(uint32_t)
6837iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6838 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6839{
6840 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
6841 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
6842}
6843
6844
6845/**
6846 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6847 *
6848 * The operand size is given by @a f64Bit.
6849 */
6850DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6851 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6852{
6853 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
6854}
6855
6856
6857/**
6858 * Emits code that jumps to a new label if @a iGprSrc is not zero.
6859 *
6860 * The operand size is given by @a f64Bit.
6861 */
6862DECL_INLINE_THROW(uint32_t)
6863iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
6864 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6865{
6866 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6867 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
6868}
6869
6870
6871/* if (Grp1 != Gpr2) Jmp idxLabel; */
6872
6873/**
6874 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
6875 * differs.
6876 */
6877DECL_INLINE_THROW(uint32_t)
6878iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6879 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
6880{
6881 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
6882 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6883 return off;
6884}
6885
6886
6887/**
6888 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
6889 */
6890DECL_INLINE_THROW(uint32_t)
6891iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6892 uint8_t iGprLeft, uint8_t iGprRight,
6893 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6894{
6895 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6896 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
6897}
6898
6899
6900/* if (Grp != Imm) Jmp idxLabel; */
6901
6902/**
6903 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
6904 */
6905DECL_INLINE_THROW(uint32_t)
6906iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6907 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
6908{
6909 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
6910 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6911 return off;
6912}
6913
6914
6915/**
6916 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
6917 */
6918DECL_INLINE_THROW(uint32_t)
6919iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6920 uint8_t iGprSrc, uint64_t uImm,
6921 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6922{
6923 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6924 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
6925}
6926
6927
6928/**
6929 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
6930 * @a uImm.
6931 */
6932DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6933 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
6934{
6935 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
6936 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6937 return off;
6938}
6939
6940
6941/**
6942 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
6943 * @a uImm.
6944 */
6945DECL_INLINE_THROW(uint32_t)
6946iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6947 uint8_t iGprSrc, uint32_t uImm,
6948 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6949{
6950 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6951 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
6952}
6953
6954
6955/**
6956 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
6957 * @a uImm.
6958 */
6959DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6960 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
6961{
6962 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
6963 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6964 return off;
6965}
6966
6967
6968/**
6969 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
6970 * @a uImm.
6971 */
6972DECL_INLINE_THROW(uint32_t)
6973iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6974 uint8_t iGprSrc, uint16_t uImm,
6975 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6976{
6977 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6978 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
6979}
6980
6981
6982/* if (Grp == Imm) Jmp idxLabel; */
6983
6984/**
6985 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
6986 */
6987DECL_INLINE_THROW(uint32_t)
6988iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6989 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
6990{
6991 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
6992 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
6993 return off;
6994}
6995
6996
6997/**
6998 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
6999 */
7000DECL_INLINE_THROW(uint32_t)
7001iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
7002 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7003{
7004 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7005 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7006}
7007
7008
7009/**
7010 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
7011 */
7012DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7013 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7014{
7015 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7016 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7017 return off;
7018}
7019
7020
7021/**
7022 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
7023 */
7024DECL_INLINE_THROW(uint32_t)
7025iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
7026 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7027{
7028 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7029 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7030}
7031
7032
7033/**
7034 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
7035 *
7036 * @note ARM64: Helper register is required (idxTmpReg).
7037 */
7038DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7039 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
7040 uint8_t idxTmpReg = UINT8_MAX)
7041{
7042 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
7043 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7044 return off;
7045}
7046
7047
7048/**
7049 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
7050 *
7051 * @note ARM64: Helper register is required (idxTmpReg).
7052 */
7053DECL_INLINE_THROW(uint32_t)
7054iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
7055 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
7056 uint8_t idxTmpReg = UINT8_MAX)
7057{
7058 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7059 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
7060}
7061
7062
7063/*********************************************************************************************************************************
7064* Calls. *
7065*********************************************************************************************************************************/
7066
7067/**
7068 * Emits a call to a 64-bit address.
7069 */
7070DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
7071{
7072#ifdef RT_ARCH_AMD64
7073 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
7074
7075 /* call rax */
7076 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7077 pbCodeBuf[off++] = 0xff;
7078 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
7079
7080#elif defined(RT_ARCH_ARM64)
7081 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
7082
7083 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7084 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
7085
7086#else
7087# error "port me"
7088#endif
7089 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7090 return off;
7091}
7092
7093
7094/**
7095 * Emits code to load a stack variable into an argument GPR.
7096 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7097 */
7098DECL_FORCE_INLINE_THROW(uint32_t)
7099iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7100 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
7101 bool fSpilledVarsInVolatileRegs = false)
7102{
7103 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7104 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7105 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7106
7107 uint8_t const idxRegVar = pVar->idxReg;
7108 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
7109 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
7110 || !fSpilledVarsInVolatileRegs ))
7111 {
7112 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
7113 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
7114 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
7115 if (!offAddend)
7116 {
7117 if (idxRegArg != idxRegVar)
7118 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
7119 }
7120 else
7121 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
7122 }
7123 else
7124 {
7125 uint8_t const idxStackSlot = pVar->idxStackSlot;
7126 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7127 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
7128 if (offAddend)
7129 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
7130 }
7131 return off;
7132}
7133
7134
7135/**
7136 * Emits code to load a stack or immediate variable value into an argument GPR,
7137 * optional with a addend.
7138 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7139 */
7140DECL_FORCE_INLINE_THROW(uint32_t)
7141iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7142 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
7143 bool fSpilledVarsInVolatileRegs = false)
7144{
7145 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7146 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7147 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7148 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
7149 else
7150 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
7151 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
7152 return off;
7153}
7154
7155
7156/**
7157 * Emits code to load the variable address into an argument GRP.
7158 *
7159 * This only works for uninitialized and stack variables.
7160 */
7161DECL_FORCE_INLINE_THROW(uint32_t)
7162iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7163 bool fFlushShadows)
7164{
7165 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7166 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7167 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7168 || pVar->enmKind == kIemNativeVarKind_Stack,
7169 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7170
7171 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7172 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7173
7174 uint8_t const idxRegVar = pVar->idxReg;
7175 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
7176 {
7177 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
7178 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
7179 Assert(pVar->idxReg == UINT8_MAX);
7180 }
7181 Assert( pVar->idxStackSlot != UINT8_MAX
7182 && pVar->idxReg == UINT8_MAX);
7183
7184 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7185}
7186
7187
7188#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7189
7190/**
7191 * Emits a gprdst = ~gprsrc store.
7192 */
7193DECL_FORCE_INLINE_THROW(uint32_t)
7194iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7195{
7196#ifdef RT_ARCH_AMD64
7197 if (iGprDst != iGprSrc)
7198 {
7199 /* mov gprdst, gprsrc. */
7200 if (f64Bit)
7201 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
7202 else
7203 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
7204 }
7205
7206 /* not gprdst */
7207 if (f64Bit || iGprDst >= 8)
7208 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
7209 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
7210 pCodeBuf[off++] = 0xf7;
7211 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
7212#elif defined(RT_ARCH_ARM64)
7213 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
7214#else
7215# error "port me"
7216#endif
7217 return off;
7218}
7219
7220
7221/**
7222 * Emits a gprdst = ~gprsrc store.
7223 */
7224DECL_INLINE_THROW(uint32_t)
7225iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7226{
7227#ifdef RT_ARCH_AMD64
7228 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
7229#elif defined(RT_ARCH_ARM64)
7230 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
7231#else
7232# error "port me"
7233#endif
7234 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7235 return off;
7236}
7237
7238
7239/**
7240 * Emits a 128-bit vector register store to a VCpu value.
7241 */
7242DECL_FORCE_INLINE_THROW(uint32_t)
7243iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7244{
7245#ifdef RT_ARCH_AMD64
7246 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
7247 pCodeBuf[off++] = 0x66;
7248 if (iVecReg >= 8)
7249 pCodeBuf[off++] = X86_OP_REX_R;
7250 pCodeBuf[off++] = 0x0f;
7251 pCodeBuf[off++] = 0x7f;
7252 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7253#elif defined(RT_ARCH_ARM64)
7254 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7255
7256#else
7257# error "port me"
7258#endif
7259 return off;
7260}
7261
7262
7263/**
7264 * Emits a 128-bit vector register load of a VCpu value.
7265 */
7266DECL_INLINE_THROW(uint32_t)
7267iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7268{
7269#ifdef RT_ARCH_AMD64
7270 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7271#elif defined(RT_ARCH_ARM64)
7272 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7273#else
7274# error "port me"
7275#endif
7276 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7277 return off;
7278}
7279
7280
7281/**
7282 * Emits a high 128-bit vector register store to a VCpu value.
7283 */
7284DECL_FORCE_INLINE_THROW(uint32_t)
7285iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7286{
7287#ifdef RT_ARCH_AMD64
7288 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
7289 pCodeBuf[off++] = X86_OP_VEX3;
7290 if (iVecReg >= 8)
7291 pCodeBuf[off++] = 0x63;
7292 else
7293 pCodeBuf[off++] = 0xe3;
7294 pCodeBuf[off++] = 0x7d;
7295 pCodeBuf[off++] = 0x39;
7296 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7297 pCodeBuf[off++] = 0x01; /* Immediate */
7298#elif defined(RT_ARCH_ARM64)
7299 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7300#else
7301# error "port me"
7302#endif
7303 return off;
7304}
7305
7306
7307/**
7308 * Emits a high 128-bit vector register load of a VCpu value.
7309 */
7310DECL_INLINE_THROW(uint32_t)
7311iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7312{
7313#ifdef RT_ARCH_AMD64
7314 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7315#elif defined(RT_ARCH_ARM64)
7316 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7317 Assert(!(iVecReg & 0x1));
7318 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
7319#else
7320# error "port me"
7321#endif
7322 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7323 return off;
7324}
7325
7326
7327/**
7328 * Emits a 128-bit vector register load of a VCpu value.
7329 */
7330DECL_FORCE_INLINE_THROW(uint32_t)
7331iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7332{
7333#ifdef RT_ARCH_AMD64
7334 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
7335 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7336 if (iVecReg >= 8)
7337 pCodeBuf[off++] = X86_OP_REX_R;
7338 pCodeBuf[off++] = 0x0f;
7339 pCodeBuf[off++] = 0x6f;
7340 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7341#elif defined(RT_ARCH_ARM64)
7342 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
7343
7344#else
7345# error "port me"
7346#endif
7347 return off;
7348}
7349
7350
7351/**
7352 * Emits a 128-bit vector register load of a VCpu value.
7353 */
7354DECL_INLINE_THROW(uint32_t)
7355iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7356{
7357#ifdef RT_ARCH_AMD64
7358 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7359#elif defined(RT_ARCH_ARM64)
7360 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7361#else
7362# error "port me"
7363#endif
7364 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7365 return off;
7366}
7367
7368
7369/**
7370 * Emits a 128-bit vector register load of a VCpu value.
7371 */
7372DECL_FORCE_INLINE_THROW(uint32_t)
7373iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7374{
7375#ifdef RT_ARCH_AMD64
7376 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
7377 pCodeBuf[off++] = X86_OP_VEX3;
7378 if (iVecReg >= 8)
7379 pCodeBuf[off++] = 0x63;
7380 else
7381 pCodeBuf[off++] = 0xe3;
7382 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
7383 pCodeBuf[off++] = 0x38;
7384 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7385 pCodeBuf[off++] = 0x01; /* Immediate */
7386#elif defined(RT_ARCH_ARM64)
7387 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
7388#else
7389# error "port me"
7390#endif
7391 return off;
7392}
7393
7394
7395/**
7396 * Emits a 128-bit vector register load of a VCpu value.
7397 */
7398DECL_INLINE_THROW(uint32_t)
7399iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7400{
7401#ifdef RT_ARCH_AMD64
7402 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7403#elif defined(RT_ARCH_ARM64)
7404 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7405 Assert(!(iVecReg & 0x1));
7406 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
7407#else
7408# error "port me"
7409#endif
7410 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7411 return off;
7412}
7413
7414
7415/**
7416 * Emits a vecdst = vecsrc load.
7417 */
7418DECL_FORCE_INLINE(uint32_t)
7419iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7420{
7421#ifdef RT_ARCH_AMD64
7422 /* movdqu vecdst, vecsrc */
7423 pCodeBuf[off++] = 0xf3;
7424
7425 if ((iVecRegDst | iVecRegSrc) >= 8)
7426 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
7427 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
7428 : X86_OP_REX_R;
7429 pCodeBuf[off++] = 0x0f;
7430 pCodeBuf[off++] = 0x6f;
7431 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7432
7433#elif defined(RT_ARCH_ARM64)
7434 /* mov dst, src; alias for: orr dst, src, src */
7435 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
7436
7437#else
7438# error "port me"
7439#endif
7440 return off;
7441}
7442
7443
7444/**
7445 * Emits a vecdst = vecsrc load, 128-bit.
7446 */
7447DECL_INLINE_THROW(uint32_t)
7448iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7449{
7450#ifdef RT_ARCH_AMD64
7451 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
7452#elif defined(RT_ARCH_ARM64)
7453 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
7454#else
7455# error "port me"
7456#endif
7457 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7458 return off;
7459}
7460
7461
7462/**
7463 * Emits a vecdst = vecsrc load, 256-bit.
7464 */
7465DECL_INLINE_THROW(uint32_t)
7466iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7467{
7468#ifdef RT_ARCH_AMD64
7469 /* vmovdqa ymm, ymm */
7470 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7471 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
7472 {
7473 pbCodeBuf[off++] = X86_OP_VEX3;
7474 pbCodeBuf[off++] = 0x41;
7475 pbCodeBuf[off++] = 0x7d;
7476 pbCodeBuf[off++] = 0x6f;
7477 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7478 }
7479 else
7480 {
7481 pbCodeBuf[off++] = X86_OP_VEX2;
7482 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
7483 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
7484 pbCodeBuf[off++] = iVecRegSrc >= 8
7485 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
7486 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7487 }
7488#elif defined(RT_ARCH_ARM64)
7489 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7490 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
7491 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
7492 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
7493#else
7494# error "port me"
7495#endif
7496 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7497 return off;
7498}
7499
7500
7501/**
7502 * Emits a gprdst = vecsrc[x] load, 64-bit.
7503 */
7504DECL_FORCE_INLINE(uint32_t)
7505iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
7506{
7507#ifdef RT_ARCH_AMD64
7508 if (iQWord >= 2)
7509 {
7510 /** @todo Currently not used. */
7511 AssertReleaseFailed();
7512 }
7513 else
7514 {
7515 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
7516 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7517 pCodeBuf[off++] = X86_OP_REX_W
7518 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
7519 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
7520 pCodeBuf[off++] = 0x0f;
7521 pCodeBuf[off++] = 0x3a;
7522 pCodeBuf[off++] = 0x16;
7523 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
7524 pCodeBuf[off++] = iQWord;
7525 }
7526#elif defined(RT_ARCH_ARM64)
7527 /* umov gprdst, vecsrc[iQWord] */
7528 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
7529#else
7530# error "port me"
7531#endif
7532 return off;
7533}
7534
7535
7536/**
7537 * Emits a gprdst = vecsrc[x] load, 64-bit.
7538 */
7539DECL_INLINE_THROW(uint32_t)
7540iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
7541{
7542 Assert(iQWord <= 3);
7543
7544#ifdef RT_ARCH_AMD64
7545 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iQWord);
7546#elif defined(RT_ARCH_ARM64)
7547 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7548 Assert(!(iVecRegSrc & 0x1));
7549 /* Need to access the "high" 128-bit vector register. */
7550 if (iQWord >= 2)
7551 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
7552 else
7553 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
7554#else
7555# error "port me"
7556#endif
7557 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7558 return off;
7559}
7560
7561
7562/**
7563 * Emits a gprdst = vecsrc[x] load, 32-bit.
7564 */
7565DECL_FORCE_INLINE(uint32_t)
7566iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
7567{
7568#ifdef RT_ARCH_AMD64
7569 if (iDWord >= 4)
7570 {
7571 /** @todo Currently not used. */
7572 AssertReleaseFailed();
7573 }
7574 else
7575 {
7576 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
7577 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7578 if (iGprDst >= 8 || iVecRegSrc >= 8)
7579 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
7580 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
7581 pCodeBuf[off++] = 0x0f;
7582 pCodeBuf[off++] = 0x3a;
7583 pCodeBuf[off++] = 0x16;
7584 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
7585 pCodeBuf[off++] = iDWord;
7586 }
7587#elif defined(RT_ARCH_ARM64)
7588 /* umov gprdst, vecsrc[iDWord] */
7589 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
7590#else
7591# error "port me"
7592#endif
7593 return off;
7594}
7595
7596
7597/**
7598 * Emits a gprdst = vecsrc[x] load, 32-bit.
7599 */
7600DECL_INLINE_THROW(uint32_t)
7601iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
7602{
7603 Assert(iDWord <= 7);
7604
7605#ifdef RT_ARCH_AMD64
7606 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iDWord);
7607#elif defined(RT_ARCH_ARM64)
7608 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7609 Assert(!(iVecRegSrc & 0x1));
7610 /* Need to access the "high" 128-bit vector register. */
7611 if (iDWord >= 4)
7612 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
7613 else
7614 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
7615#else
7616# error "port me"
7617#endif
7618 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7619 return off;
7620}
7621
7622
7623/**
7624 * Emits a gprdst = vecsrc[x] load, 16-bit.
7625 */
7626DECL_FORCE_INLINE(uint32_t)
7627iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
7628{
7629#ifdef RT_ARCH_AMD64
7630 if (iWord >= 8)
7631 {
7632 /** @todo Currently not used. */
7633 AssertReleaseFailed();
7634 }
7635 else
7636 {
7637 /* pextrw gpr, vecsrc, #iWord */
7638 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7639 if (iGprDst >= 8 || iVecRegSrc >= 8)
7640 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
7641 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
7642 pCodeBuf[off++] = 0x0f;
7643 pCodeBuf[off++] = 0xc5;
7644 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
7645 pCodeBuf[off++] = iWord;
7646 }
7647#elif defined(RT_ARCH_ARM64)
7648 /* umov gprdst, vecsrc[iWord] */
7649 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
7650#else
7651# error "port me"
7652#endif
7653 return off;
7654}
7655
7656
7657/**
7658 * Emits a gprdst = vecsrc[x] load, 16-bit.
7659 */
7660DECL_INLINE_THROW(uint32_t)
7661iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
7662{
7663 Assert(iWord <= 16);
7664
7665#ifdef RT_ARCH_AMD64
7666 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
7667#elif defined(RT_ARCH_ARM64)
7668 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7669 Assert(!(iVecRegSrc & 0x1));
7670 /* Need to access the "high" 128-bit vector register. */
7671 if (iWord >= 8)
7672 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
7673 else
7674 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
7675#else
7676# error "port me"
7677#endif
7678 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7679 return off;
7680}
7681
7682
7683/**
7684 * Emits a gprdst = vecsrc[x] load, 8-bit.
7685 */
7686DECL_FORCE_INLINE(uint32_t)
7687iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
7688{
7689#ifdef RT_ARCH_AMD64
7690 if (iByte >= 16)
7691 {
7692 /** @todo Currently not used. */
7693 AssertReleaseFailed();
7694 }
7695 else
7696 {
7697 /* pextrb gpr, vecsrc, #iByte */
7698 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7699 if (iGprDst >= 8 || iVecRegSrc >= 8)
7700 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
7701 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
7702 pCodeBuf[off++] = 0x0f;
7703 pCodeBuf[off++] = 0x3a;
7704 pCodeBuf[off++] = 0x14;
7705 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
7706 pCodeBuf[off++] = iByte;
7707 }
7708#elif defined(RT_ARCH_ARM64)
7709 /* umov gprdst, vecsrc[iByte] */
7710 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
7711#else
7712# error "port me"
7713#endif
7714 return off;
7715}
7716
7717
7718/**
7719 * Emits a gprdst = vecsrc[x] load, 8-bit.
7720 */
7721DECL_INLINE_THROW(uint32_t)
7722iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
7723{
7724 Assert(iByte <= 32);
7725
7726#ifdef RT_ARCH_AMD64
7727 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iByte);
7728#elif defined(RT_ARCH_ARM64)
7729 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7730 Assert(!(iVecRegSrc & 0x1));
7731 /* Need to access the "high" 128-bit vector register. */
7732 if (iByte >= 16)
7733 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
7734 else
7735 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
7736#else
7737# error "port me"
7738#endif
7739 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7740 return off;
7741}
7742
7743
7744/**
7745 * Emits a vecdst[x] = gprsrc store, 64-bit.
7746 */
7747DECL_FORCE_INLINE(uint32_t)
7748iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
7749{
7750#ifdef RT_ARCH_AMD64
7751 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
7752 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7753 pCodeBuf[off++] = X86_OP_REX_W
7754 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
7755 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7756 pCodeBuf[off++] = 0x0f;
7757 pCodeBuf[off++] = 0x3a;
7758 pCodeBuf[off++] = 0x22;
7759 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
7760 pCodeBuf[off++] = iQWord;
7761#elif defined(RT_ARCH_ARM64)
7762 /* ins vecsrc[iQWord], gpr */
7763 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
7764#else
7765# error "port me"
7766#endif
7767 return off;
7768}
7769
7770
7771/**
7772 * Emits a vecdst[x] = gprsrc store, 64-bit.
7773 */
7774DECL_INLINE_THROW(uint32_t)
7775iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
7776{
7777 Assert(iQWord <= 1);
7778
7779#ifdef RT_ARCH_AMD64
7780 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iQWord);
7781#elif defined(RT_ARCH_ARM64)
7782 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
7783#else
7784# error "port me"
7785#endif
7786 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7787 return off;
7788}
7789
7790
7791/**
7792 * Emits a vecdst[x] = gprsrc store, 32-bit.
7793 */
7794DECL_FORCE_INLINE(uint32_t)
7795iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
7796{
7797#ifdef RT_ARCH_AMD64
7798 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
7799 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7800 if (iVecRegDst >= 8 || iGprSrc >= 8)
7801 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
7802 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7803 pCodeBuf[off++] = 0x0f;
7804 pCodeBuf[off++] = 0x3a;
7805 pCodeBuf[off++] = 0x22;
7806 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
7807 pCodeBuf[off++] = iDWord;
7808#elif defined(RT_ARCH_ARM64)
7809 /* ins vecsrc[iDWord], gpr */
7810 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
7811#else
7812# error "port me"
7813#endif
7814 return off;
7815}
7816
7817
7818/**
7819 * Emits a vecdst[x] = gprsrc store, 64-bit.
7820 */
7821DECL_INLINE_THROW(uint32_t)
7822iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
7823{
7824 Assert(iDWord <= 3);
7825
7826#ifdef RT_ARCH_AMD64
7827 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iDWord);
7828#elif defined(RT_ARCH_ARM64)
7829 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
7830#else
7831# error "port me"
7832#endif
7833 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7834 return off;
7835}
7836
7837
7838/**
7839 * Emits a vecdst.au32[iDWord] = 0 store.
7840 */
7841DECL_FORCE_INLINE(uint32_t)
7842iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
7843{
7844 Assert(iDWord <= 7);
7845
7846#ifdef RT_ARCH_AMD64
7847 /*
7848 * xor tmp0, tmp0
7849 * pinsrd xmm, tmp0, iDword
7850 */
7851 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
7852 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7853 pCodeBuf[off++] = 0x33;
7854 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
7855 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(&pCodeBuf[off], off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
7856#elif defined(RT_ARCH_ARM64)
7857 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7858 Assert(!(iVecReg & 0x1));
7859 /* ins vecsrc[iDWord], wzr */
7860 if (iDWord >= 4)
7861 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
7862 else
7863 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
7864#else
7865# error "port me"
7866#endif
7867 return off;
7868}
7869
7870
7871/**
7872 * Emits a vecdst.au32[iDWord] = 0 store.
7873 */
7874DECL_INLINE_THROW(uint32_t)
7875iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
7876{
7877
7878#ifdef RT_ARCH_AMD64
7879 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
7880#elif defined(RT_ARCH_ARM64)
7881 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
7882#else
7883# error "port me"
7884#endif
7885 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7886 return off;
7887}
7888
7889
7890/**
7891 * Emits a vecdst[0:127] = 0 store.
7892 */
7893DECL_FORCE_INLINE(uint32_t)
7894iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
7895{
7896#ifdef RT_ARCH_AMD64
7897 /* pxor xmm, xmm */
7898 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7899 if (iVecReg >= 8)
7900 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
7901 pCodeBuf[off++] = 0x0f;
7902 pCodeBuf[off++] = 0xef;
7903 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
7904#elif defined(RT_ARCH_ARM64)
7905 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7906 Assert(!(iVecReg & 0x1));
7907 /* eor vecreg, vecreg, vecreg */
7908 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
7909#else
7910# error "port me"
7911#endif
7912 return off;
7913}
7914
7915
7916/**
7917 * Emits a vecdst[0:127] = 0 store.
7918 */
7919DECL_INLINE_THROW(uint32_t)
7920iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
7921{
7922#ifdef RT_ARCH_AMD64
7923 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
7924#elif defined(RT_ARCH_ARM64)
7925 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
7926#else
7927# error "port me"
7928#endif
7929 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7930 return off;
7931}
7932
7933
7934/**
7935 * Emits a vecdst[128:255] = 0 store.
7936 */
7937DECL_FORCE_INLINE(uint32_t)
7938iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
7939{
7940#ifdef RT_ARCH_AMD64
7941 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
7942 if (iVecReg < 8)
7943 {
7944 pCodeBuf[off++] = X86_OP_VEX2;
7945 pCodeBuf[off++] = 0xf9;
7946 }
7947 else
7948 {
7949 pCodeBuf[off++] = X86_OP_VEX3;
7950 pCodeBuf[off++] = 0x41;
7951 pCodeBuf[off++] = 0x79;
7952 }
7953 pCodeBuf[off++] = 0x6f;
7954 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
7955#elif defined(RT_ARCH_ARM64)
7956 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7957 Assert(!(iVecReg & 0x1));
7958 /* eor vecreg, vecreg, vecreg */
7959 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
7960#else
7961# error "port me"
7962#endif
7963 return off;
7964}
7965
7966
7967/**
7968 * Emits a vecdst[128:255] = 0 store.
7969 */
7970DECL_INLINE_THROW(uint32_t)
7971iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
7972{
7973#ifdef RT_ARCH_AMD64
7974 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
7975#elif defined(RT_ARCH_ARM64)
7976 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
7977#else
7978# error "port me"
7979#endif
7980 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7981 return off;
7982}
7983
7984
7985/**
7986 * Emits a vecdst[0:255] = 0 store.
7987 */
7988DECL_FORCE_INLINE(uint32_t)
7989iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
7990{
7991#ifdef RT_ARCH_AMD64
7992 /* vpxor ymm, ymm, ymm */
7993 if (iVecReg < 8)
7994 {
7995 pCodeBuf[off++] = X86_OP_VEX2;
7996 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
7997 }
7998 else
7999 {
8000 pCodeBuf[off++] = X86_OP_VEX3;
8001 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
8002 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8003 }
8004 pCodeBuf[off++] = 0xef;
8005 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8006#elif defined(RT_ARCH_ARM64)
8007 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8008 Assert(!(iVecReg & 0x1));
8009 /* eor vecreg, vecreg, vecreg */
8010 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
8011 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
8012#else
8013# error "port me"
8014#endif
8015 return off;
8016}
8017
8018
8019/**
8020 * Emits a vecdst[0:255] = 0 store.
8021 */
8022DECL_INLINE_THROW(uint32_t)
8023iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8024{
8025#ifdef RT_ARCH_AMD64
8026 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
8027#elif defined(RT_ARCH_ARM64)
8028 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
8029#else
8030# error "port me"
8031#endif
8032 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8033 return off;
8034}
8035
8036
8037/**
8038 * Emits a vecdst = gprsrc broadcast, 8-bit.
8039 */
8040DECL_FORCE_INLINE(uint32_t)
8041iemNativeEmitSimdBroadcastGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8042{
8043#ifdef RT_ARCH_AMD64
8044 /* pinsrb vecdst, gpr, #0 (ASSUMES SSE 4.1) */
8045 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8046 if (iVecRegDst >= 8 || iGprSrc >= 8)
8047 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8048 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8049 pCodeBuf[off++] = 0x0f;
8050 pCodeBuf[off++] = 0x3a;
8051 pCodeBuf[off++] = 0x20;
8052 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8053 pCodeBuf[off++] = 0x00;
8054
8055 /* vpbroadcastb {y,x}mm, xmm (ASSUMES AVX2). */
8056 pCodeBuf[off++] = X86_OP_VEX3;
8057 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8058 | 0x02 /* opcode map. */
8059 | ( iVecRegDst >= 8
8060 ? 0
8061 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8062 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8063 pCodeBuf[off++] = 0x78;
8064 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8065#elif defined(RT_ARCH_ARM64)
8066 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8067 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8068
8069 /* dup vecsrc, gpr */
8070 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U8);
8071 if (f256Bit)
8072 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U8);
8073#else
8074# error "port me"
8075#endif
8076 return off;
8077}
8078
8079
8080/**
8081 * Emits a vecdst[x] = gprsrc broadcast, 8-bit.
8082 */
8083DECL_INLINE_THROW(uint32_t)
8084iemNativeEmitSimdBroadcastGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8085{
8086#ifdef RT_ARCH_AMD64
8087 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8088#elif defined(RT_ARCH_ARM64)
8089 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8090#else
8091# error "port me"
8092#endif
8093 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8094 return off;
8095}
8096
8097
8098/**
8099 * Emits a vecdst = gprsrc broadcast, 16-bit.
8100 */
8101DECL_FORCE_INLINE(uint32_t)
8102iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8103{
8104#ifdef RT_ARCH_AMD64
8105 /* pinsrw vecdst, gpr, #0 */
8106 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8107 if (iVecRegDst >= 8 || iGprSrc >= 8)
8108 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8109 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8110 pCodeBuf[off++] = 0x0f;
8111 pCodeBuf[off++] = 0xc4;
8112 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8113 pCodeBuf[off++] = 0x00;
8114
8115 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
8116 pCodeBuf[off++] = X86_OP_VEX3;
8117 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8118 | 0x02 /* opcode map. */
8119 | ( iVecRegDst >= 8
8120 ? 0
8121 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8122 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8123 pCodeBuf[off++] = 0x79;
8124 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8125#elif defined(RT_ARCH_ARM64)
8126 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8127 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8128
8129 /* dup vecsrc, gpr */
8130 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
8131 if (f256Bit)
8132 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
8133#else
8134# error "port me"
8135#endif
8136 return off;
8137}
8138
8139
8140/**
8141 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
8142 */
8143DECL_INLINE_THROW(uint32_t)
8144iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8145{
8146#ifdef RT_ARCH_AMD64
8147 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8148#elif defined(RT_ARCH_ARM64)
8149 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8150#else
8151# error "port me"
8152#endif
8153 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8154 return off;
8155}
8156
8157
8158/**
8159 * Emits a vecdst = gprsrc broadcast, 32-bit.
8160 */
8161DECL_FORCE_INLINE(uint32_t)
8162iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8163{
8164#ifdef RT_ARCH_AMD64
8165 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
8166 * vbroadcast needs a memory operand or another xmm register to work... */
8167
8168 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
8169 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8170 if (iVecRegDst >= 8 || iGprSrc >= 8)
8171 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8172 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8173 pCodeBuf[off++] = 0x0f;
8174 pCodeBuf[off++] = 0x3a;
8175 pCodeBuf[off++] = 0x22;
8176 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8177 pCodeBuf[off++] = 0x00;
8178
8179 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
8180 pCodeBuf[off++] = X86_OP_VEX3;
8181 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8182 | 0x02 /* opcode map. */
8183 | ( iVecRegDst >= 8
8184 ? 0
8185 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8186 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8187 pCodeBuf[off++] = 0x58;
8188 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8189#elif defined(RT_ARCH_ARM64)
8190 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8191 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8192
8193 /* dup vecsrc, gpr */
8194 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
8195 if (f256Bit)
8196 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
8197#else
8198# error "port me"
8199#endif
8200 return off;
8201}
8202
8203
8204/**
8205 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
8206 */
8207DECL_INLINE_THROW(uint32_t)
8208iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8209{
8210#ifdef RT_ARCH_AMD64
8211 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8212#elif defined(RT_ARCH_ARM64)
8213 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8214#else
8215# error "port me"
8216#endif
8217 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8218 return off;
8219}
8220
8221
8222/**
8223 * Emits a vecdst = gprsrc broadcast, 64-bit.
8224 */
8225DECL_FORCE_INLINE(uint32_t)
8226iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8227{
8228#ifdef RT_ARCH_AMD64
8229 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
8230 * vbroadcast needs a memory operand or another xmm register to work... */
8231
8232 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
8233 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8234 pCodeBuf[off++] = X86_OP_REX_W
8235 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8236 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8237 pCodeBuf[off++] = 0x0f;
8238 pCodeBuf[off++] = 0x3a;
8239 pCodeBuf[off++] = 0x22;
8240 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8241 pCodeBuf[off++] = 0x00;
8242
8243 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
8244 pCodeBuf[off++] = X86_OP_VEX3;
8245 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8246 | 0x02 /* opcode map. */
8247 | ( iVecRegDst >= 8
8248 ? 0
8249 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8250 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8251 pCodeBuf[off++] = 0x59;
8252 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8253#elif defined(RT_ARCH_ARM64)
8254 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8255 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8256
8257 /* dup vecsrc, gpr */
8258 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
8259 if (f256Bit)
8260 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
8261#else
8262# error "port me"
8263#endif
8264 return off;
8265}
8266
8267
8268/**
8269 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
8270 */
8271DECL_INLINE_THROW(uint32_t)
8272iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8273{
8274#ifdef RT_ARCH_AMD64
8275 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
8276#elif defined(RT_ARCH_ARM64)
8277 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8278#else
8279# error "port me"
8280#endif
8281 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8282 return off;
8283}
8284
8285#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
8286
8287/** @} */
8288
8289#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
8290
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette