VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 103860

Last change on this file since 103860 was 103860, checked in by vboxsync, 13 months ago

VMM/IEM: Reworked iemNativeEmitMaybeRaiseSseRelatedXcpt to avoid relative 14-bit jumps on arm64, optimizing it while at it. Also redid much of the tail label emitting (all but a few call a helper taking only pVCpu as arg). Made the fixup code assert+barf upon short 14-bit jumps to tail TB code as those will blow up for large TBs. bugref:10614 bugref:10370

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 284.2 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 103860 2024-03-14 23:47:09Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 pu32CodeBuf[off++] = 0xd503201f;
71
72 RT_NOREF(uInfo);
73#else
74# error "port me"
75#endif
76 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
77 return off;
78}
79
80
81/**
82 * Emit a breakpoint instruction.
83 */
84DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
85{
86#ifdef RT_ARCH_AMD64
87 pCodeBuf[off++] = 0xcc;
88 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
89
90#elif defined(RT_ARCH_ARM64)
91 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
92
93#else
94# error "error"
95#endif
96 return off;
97}
98
99
100/**
101 * Emit a breakpoint instruction.
102 */
103DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
104{
105#ifdef RT_ARCH_AMD64
106 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
107#elif defined(RT_ARCH_ARM64)
108 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
109#else
110# error "error"
111#endif
112 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
113 return off;
114}
115
116
117/*********************************************************************************************************************************
118* Loads, Stores and Related Stuff. *
119*********************************************************************************************************************************/
120
121#ifdef RT_ARCH_AMD64
122/**
123 * Common bit of iemNativeEmitLoadGprByGpr and friends.
124 */
125DECL_FORCE_INLINE(uint32_t)
126iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
127{
128 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
129 {
130 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
131 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
132 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
133 }
134 else if (offDisp == (int8_t)offDisp)
135 {
136 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
137 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
138 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
139 pbCodeBuf[off++] = (uint8_t)offDisp;
140 }
141 else
142 {
143 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
144 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
145 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
146 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
147 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
148 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
149 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
150 }
151 return off;
152}
153#endif /* RT_ARCH_AMD64 */
154
155/**
156 * Emits setting a GPR to zero.
157 */
158DECL_INLINE_THROW(uint32_t)
159iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
160{
161#ifdef RT_ARCH_AMD64
162 /* xor gpr32, gpr32 */
163 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
164 if (iGpr >= 8)
165 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
166 pbCodeBuf[off++] = 0x33;
167 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
168
169#elif defined(RT_ARCH_ARM64)
170 /* mov gpr, #0x0 */
171 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
172 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
173
174#else
175# error "port me"
176#endif
177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
178 return off;
179}
180
181
182/**
183 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
184 * buffer space.
185 *
186 * Max buffer consumption:
187 * - AMD64: 10 instruction bytes.
188 * - ARM64: 4 instruction words (16 bytes).
189 */
190DECL_FORCE_INLINE(uint32_t)
191iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
192{
193#ifdef RT_ARCH_AMD64
194 if (uImm64 == 0)
195 {
196 /* xor gpr, gpr */
197 if (iGpr >= 8)
198 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
199 pCodeBuf[off++] = 0x33;
200 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
201 }
202 else if (uImm64 <= UINT32_MAX)
203 {
204 /* mov gpr, imm32 */
205 if (iGpr >= 8)
206 pCodeBuf[off++] = X86_OP_REX_B;
207 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
208 pCodeBuf[off++] = RT_BYTE1(uImm64);
209 pCodeBuf[off++] = RT_BYTE2(uImm64);
210 pCodeBuf[off++] = RT_BYTE3(uImm64);
211 pCodeBuf[off++] = RT_BYTE4(uImm64);
212 }
213 else if (uImm64 == (uint64_t)(int32_t)uImm64)
214 {
215 /* mov gpr, sx(imm32) */
216 if (iGpr < 8)
217 pCodeBuf[off++] = X86_OP_REX_W;
218 else
219 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
220 pCodeBuf[off++] = 0xc7;
221 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
222 pCodeBuf[off++] = RT_BYTE1(uImm64);
223 pCodeBuf[off++] = RT_BYTE2(uImm64);
224 pCodeBuf[off++] = RT_BYTE3(uImm64);
225 pCodeBuf[off++] = RT_BYTE4(uImm64);
226 }
227 else
228 {
229 /* mov gpr, imm64 */
230 if (iGpr < 8)
231 pCodeBuf[off++] = X86_OP_REX_W;
232 else
233 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
234 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
235 pCodeBuf[off++] = RT_BYTE1(uImm64);
236 pCodeBuf[off++] = RT_BYTE2(uImm64);
237 pCodeBuf[off++] = RT_BYTE3(uImm64);
238 pCodeBuf[off++] = RT_BYTE4(uImm64);
239 pCodeBuf[off++] = RT_BYTE5(uImm64);
240 pCodeBuf[off++] = RT_BYTE6(uImm64);
241 pCodeBuf[off++] = RT_BYTE7(uImm64);
242 pCodeBuf[off++] = RT_BYTE8(uImm64);
243 }
244
245#elif defined(RT_ARCH_ARM64)
246 /*
247 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
248 * supply remaining bits using 'movk grp, imm16, lsl #x'.
249 *
250 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
251 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
252 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
253 * after the first non-zero immediate component so we switch to movk for
254 * the remainder.
255 */
256 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
257 + !((uImm64 >> 16) & UINT16_MAX)
258 + !((uImm64 >> 32) & UINT16_MAX)
259 + !((uImm64 >> 48) & UINT16_MAX);
260 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
261 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
262 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
263 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
264 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
265 if (cFfffHalfWords <= cZeroHalfWords)
266 {
267 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
268
269 /* movz gpr, imm16 */
270 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
271 if (uImmPart || cZeroHalfWords == 4)
272 {
273 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
274 fMovBase |= RT_BIT_32(29);
275 }
276 /* mov[z/k] gpr, imm16, lsl #16 */
277 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
278 if (uImmPart)
279 {
280 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
281 fMovBase |= RT_BIT_32(29);
282 }
283 /* mov[z/k] gpr, imm16, lsl #32 */
284 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
285 if (uImmPart)
286 {
287 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
288 fMovBase |= RT_BIT_32(29);
289 }
290 /* mov[z/k] gpr, imm16, lsl #48 */
291 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
292 if (uImmPart)
293 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
294 }
295 else
296 {
297 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
298
299 /* find the first half-word that isn't UINT16_MAX. */
300 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
301 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
302 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
303
304 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
305 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
306 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
307 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
308 /* movk gpr, imm16 */
309 if (iHwNotFfff != 0)
310 {
311 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
312 if (uImmPart != UINT32_C(0xffff))
313 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
314 }
315 /* movk gpr, imm16, lsl #16 */
316 if (iHwNotFfff != 1)
317 {
318 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
319 if (uImmPart != UINT32_C(0xffff))
320 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
321 }
322 /* movk gpr, imm16, lsl #32 */
323 if (iHwNotFfff != 2)
324 {
325 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
326 if (uImmPart != UINT32_C(0xffff))
327 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
328 }
329 /* movk gpr, imm16, lsl #48 */
330 if (iHwNotFfff != 3)
331 {
332 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
333 if (uImmPart != UINT32_C(0xffff))
334 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
335 }
336 }
337
338 /** @todo load into 'w' register instead of 'x' when imm64 <= UINT32_MAX?
339 * clang 12.x does that, only to use the 'x' version for the
340 * addressing in the following ldr). */
341
342#else
343# error "port me"
344#endif
345 return off;
346}
347
348
349/**
350 * Emits loading a constant into a 64-bit GPR
351 */
352DECL_INLINE_THROW(uint32_t)
353iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
354{
355#ifdef RT_ARCH_AMD64
356 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
357#elif defined(RT_ARCH_ARM64)
358 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
359#else
360# error "port me"
361#endif
362 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
363 return off;
364}
365
366
367/**
368 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
369 * buffer space.
370 *
371 * Max buffer consumption:
372 * - AMD64: 6 instruction bytes.
373 * - ARM64: 2 instruction words (8 bytes).
374 *
375 * @note The top 32 bits will be cleared.
376 */
377DECLINLINE(uint32_t) iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
378{
379#ifdef RT_ARCH_AMD64
380 if (uImm32 == 0)
381 {
382 /* xor gpr, gpr */
383 if (iGpr >= 8)
384 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
385 pCodeBuf[off++] = 0x33;
386 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
387 }
388 else
389 {
390 /* mov gpr, imm32 */
391 if (iGpr >= 8)
392 pCodeBuf[off++] = X86_OP_REX_B;
393 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
394 pCodeBuf[off++] = RT_BYTE1(uImm32);
395 pCodeBuf[off++] = RT_BYTE2(uImm32);
396 pCodeBuf[off++] = RT_BYTE3(uImm32);
397 pCodeBuf[off++] = RT_BYTE4(uImm32);
398 }
399
400#elif defined(RT_ARCH_ARM64)
401 if ((uImm32 >> 16) == 0)
402 /* movz gpr, imm16 */
403 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
404 else if ((uImm32 & UINT32_C(0xffff)) == 0)
405 /* movz gpr, imm16, lsl #16 */
406 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
407 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
408 /* movn gpr, imm16, lsl #16 */
409 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
410 else if ((uImm32 >> 16) == UINT32_C(0xffff))
411 /* movn gpr, imm16 */
412 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
413 else
414 {
415 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
416 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
417 }
418
419#else
420# error "port me"
421#endif
422 return off;
423}
424
425
426/**
427 * Emits loading a constant into a 32-bit GPR.
428 * @note The top 32 bits will be cleared.
429 */
430DECL_INLINE_THROW(uint32_t)
431iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
432{
433#ifdef RT_ARCH_AMD64
434 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
435#elif defined(RT_ARCH_ARM64)
436 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
437#else
438# error "port me"
439#endif
440 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
441 return off;
442}
443
444
445/**
446 * Emits loading a constant into a 8-bit GPR
447 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
448 * only the ARM64 version does that.
449 */
450DECL_INLINE_THROW(uint32_t)
451iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
452{
453#ifdef RT_ARCH_AMD64
454 /* mov gpr, imm8 */
455 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
456 if (iGpr >= 8)
457 pbCodeBuf[off++] = X86_OP_REX_B;
458 else if (iGpr >= 4)
459 pbCodeBuf[off++] = X86_OP_REX;
460 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
461 pbCodeBuf[off++] = RT_BYTE1(uImm8);
462
463#elif defined(RT_ARCH_ARM64)
464 /* movz gpr, imm16, lsl #0 */
465 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
466 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
467
468#else
469# error "port me"
470#endif
471 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
472 return off;
473}
474
475
476#ifdef RT_ARCH_AMD64
477/**
478 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
479 */
480DECL_FORCE_INLINE(uint32_t)
481iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
482{
483 if (offVCpu < 128)
484 {
485 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
486 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
487 }
488 else
489 {
490 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
491 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
492 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
493 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
494 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
495 }
496 return off;
497}
498
499#elif defined(RT_ARCH_ARM64)
500
501/**
502 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
503 *
504 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
505 * registers (@a iGprTmp).
506 * @note DON'T try this with prefetch.
507 */
508DECL_FORCE_INLINE_THROW(uint32_t)
509iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
510 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
511{
512 /*
513 * There are a couple of ldr variants that takes an immediate offset, so
514 * try use those if we can, otherwise we have to use the temporary register
515 * help with the addressing.
516 */
517 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
518 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
519 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
520 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
521 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
522 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
523 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
524 {
525 /* The offset is too large, so we must load it into a register and use
526 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
527 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
528 if (iGprTmp == UINT8_MAX)
529 iGprTmp = iGprReg;
530 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
531 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
532 }
533 else
534# ifdef IEM_WITH_THROW_CATCH
535 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
536# else
537 AssertReleaseFailedStmt(off = UINT32_MAX);
538# endif
539
540 return off;
541}
542
543/**
544 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
545 */
546DECL_FORCE_INLINE_THROW(uint32_t)
547iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
548 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
549{
550 /*
551 * There are a couple of ldr variants that takes an immediate offset, so
552 * try use those if we can, otherwise we have to use the temporary register
553 * help with the addressing.
554 */
555 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
556 {
557 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
558 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
559 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
560 }
561 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
562 {
563 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
564 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
565 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
566 }
567 else
568 {
569 /* The offset is too large, so we must load it into a register and use
570 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
571 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
572 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
573 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
574 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
575 IEMNATIVE_REG_FIXED_TMP0);
576 }
577 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
578 return off;
579}
580
581#endif /* RT_ARCH_ARM64 */
582
583
584/**
585 * Emits a 64-bit GPR load of a VCpu value.
586 */
587DECL_FORCE_INLINE_THROW(uint32_t)
588iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
589{
590#ifdef RT_ARCH_AMD64
591 /* mov reg64, mem64 */
592 if (iGpr < 8)
593 pCodeBuf[off++] = X86_OP_REX_W;
594 else
595 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
596 pCodeBuf[off++] = 0x8b;
597 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off,iGpr, offVCpu);
598
599#elif defined(RT_ARCH_ARM64)
600 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
601
602#else
603# error "port me"
604#endif
605 return off;
606}
607
608
609/**
610 * Emits a 64-bit GPR load of a VCpu value.
611 */
612DECL_INLINE_THROW(uint32_t)
613iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
614{
615#ifdef RT_ARCH_AMD64
616 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
617 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
618
619#elif defined(RT_ARCH_ARM64)
620 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
621
622#else
623# error "port me"
624#endif
625 return off;
626}
627
628
629/**
630 * Emits a 32-bit GPR load of a VCpu value.
631 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
632 */
633DECL_INLINE_THROW(uint32_t)
634iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
635{
636#ifdef RT_ARCH_AMD64
637 /* mov reg32, mem32 */
638 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
639 if (iGpr >= 8)
640 pbCodeBuf[off++] = X86_OP_REX_R;
641 pbCodeBuf[off++] = 0x8b;
642 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
644
645#elif defined(RT_ARCH_ARM64)
646 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
647
648#else
649# error "port me"
650#endif
651 return off;
652}
653
654
655/**
656 * Emits a 16-bit GPR load of a VCpu value.
657 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
658 */
659DECL_INLINE_THROW(uint32_t)
660iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
661{
662#ifdef RT_ARCH_AMD64
663 /* movzx reg32, mem16 */
664 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
665 if (iGpr >= 8)
666 pbCodeBuf[off++] = X86_OP_REX_R;
667 pbCodeBuf[off++] = 0x0f;
668 pbCodeBuf[off++] = 0xb7;
669 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
670 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
671
672#elif defined(RT_ARCH_ARM64)
673 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
674
675#else
676# error "port me"
677#endif
678 return off;
679}
680
681
682/**
683 * Emits a 8-bit GPR load of a VCpu value.
684 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
685 */
686DECL_INLINE_THROW(uint32_t)
687iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
688{
689#ifdef RT_ARCH_AMD64
690 /* movzx reg32, mem8 */
691 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
692 if (iGpr >= 8)
693 pbCodeBuf[off++] = X86_OP_REX_R;
694 pbCodeBuf[off++] = 0x0f;
695 pbCodeBuf[off++] = 0xb6;
696 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
697 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
698
699#elif defined(RT_ARCH_ARM64)
700 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
701
702#else
703# error "port me"
704#endif
705 return off;
706}
707
708
709/**
710 * Emits a store of a GPR value to a 64-bit VCpu field.
711 */
712DECL_FORCE_INLINE_THROW(uint32_t)
713iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
714 uint8_t iGprTmp = UINT8_MAX)
715{
716#ifdef RT_ARCH_AMD64
717 /* mov mem64, reg64 */
718 if (iGpr < 8)
719 pCodeBuf[off++] = X86_OP_REX_W;
720 else
721 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
722 pCodeBuf[off++] = 0x89;
723 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
724 RT_NOREF(iGprTmp);
725
726#elif defined(RT_ARCH_ARM64)
727 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
728
729#else
730# error "port me"
731#endif
732 return off;
733}
734
735
736/**
737 * Emits a store of a GPR value to a 64-bit VCpu field.
738 */
739DECL_INLINE_THROW(uint32_t)
740iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
741{
742#ifdef RT_ARCH_AMD64
743 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
744#elif defined(RT_ARCH_ARM64)
745 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
746 IEMNATIVE_REG_FIXED_TMP0);
747#else
748# error "port me"
749#endif
750 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
751 return off;
752}
753
754
755/**
756 * Emits a store of a GPR value to a 32-bit VCpu field.
757 */
758DECL_INLINE_THROW(uint32_t)
759iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
760{
761#ifdef RT_ARCH_AMD64
762 /* mov mem32, reg32 */
763 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
764 if (iGpr >= 8)
765 pbCodeBuf[off++] = X86_OP_REX_R;
766 pbCodeBuf[off++] = 0x89;
767 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
769
770#elif defined(RT_ARCH_ARM64)
771 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
772
773#else
774# error "port me"
775#endif
776 return off;
777}
778
779
780/**
781 * Emits a store of a GPR value to a 16-bit VCpu field.
782 */
783DECL_INLINE_THROW(uint32_t)
784iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
785{
786#ifdef RT_ARCH_AMD64
787 /* mov mem16, reg16 */
788 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
789 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
790 if (iGpr >= 8)
791 pbCodeBuf[off++] = X86_OP_REX_R;
792 pbCodeBuf[off++] = 0x89;
793 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
794 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
795
796#elif defined(RT_ARCH_ARM64)
797 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
798
799#else
800# error "port me"
801#endif
802 return off;
803}
804
805
806/**
807 * Emits a store of a GPR value to a 8-bit VCpu field.
808 */
809DECL_INLINE_THROW(uint32_t)
810iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
811{
812#ifdef RT_ARCH_AMD64
813 /* mov mem8, reg8 */
814 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
815 if (iGpr >= 8)
816 pbCodeBuf[off++] = X86_OP_REX_R;
817 pbCodeBuf[off++] = 0x88;
818 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
820
821#elif defined(RT_ARCH_ARM64)
822 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
823
824#else
825# error "port me"
826#endif
827 return off;
828}
829
830
831/**
832 * Emits a store of an immediate value to a 32-bit VCpu field.
833 *
834 * @note ARM64: Will allocate temporary registers.
835 */
836DECL_FORCE_INLINE_THROW(uint32_t)
837iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
838{
839#ifdef RT_ARCH_AMD64
840 /* mov mem32, imm32 */
841 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
842 pCodeBuf[off++] = 0xc7;
843 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
844 pCodeBuf[off++] = RT_BYTE1(uImm);
845 pCodeBuf[off++] = RT_BYTE2(uImm);
846 pCodeBuf[off++] = RT_BYTE3(uImm);
847 pCodeBuf[off++] = RT_BYTE4(uImm);
848 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
849
850#elif defined(RT_ARCH_ARM64)
851 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
852 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
853 if (idxRegImm != ARMV8_A64_REG_XZR)
854 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
855
856#else
857# error "port me"
858#endif
859 return off;
860}
861
862
863
864/**
865 * Emits a store of an immediate value to a 16-bit VCpu field.
866 *
867 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
868 * offset can be encoded as an immediate or not. The @a offVCpu immediate
869 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
870 */
871DECL_FORCE_INLINE_THROW(uint32_t)
872iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
873 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
874{
875#ifdef RT_ARCH_AMD64
876 /* mov mem16, imm16 */
877 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
878 pCodeBuf[off++] = 0xc7;
879 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
880 pCodeBuf[off++] = RT_BYTE1(uImm);
881 pCodeBuf[off++] = RT_BYTE2(uImm);
882 RT_NOREF(idxTmp1, idxTmp2);
883
884#elif defined(RT_ARCH_ARM64)
885 if (idxTmp1 != UINT8_MAX)
886 {
887 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
888 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
889 sizeof(uint16_t), idxTmp2);
890 }
891 else
892# ifdef IEM_WITH_THROW_CATCH
893 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
894# else
895 AssertReleaseFailedStmt(off = UINT32_MAX);
896# endif
897
898#else
899# error "port me"
900#endif
901 return off;
902}
903
904
905/**
906 * Emits a store of an immediate value to a 8-bit VCpu field.
907 */
908DECL_INLINE_THROW(uint32_t)
909iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu)
910{
911#ifdef RT_ARCH_AMD64
912 /* mov mem8, imm8 */
913 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
914 pbCodeBuf[off++] = 0xc6;
915 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
916 pbCodeBuf[off++] = bImm;
917 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
918
919#elif defined(RT_ARCH_ARM64)
920 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
921 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
922 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
923 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
924
925#else
926# error "port me"
927#endif
928 return off;
929}
930
931
932/**
933 * Emits a load effective address to a GRP of a VCpu field.
934 */
935DECL_INLINE_THROW(uint32_t)
936iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
937{
938#ifdef RT_ARCH_AMD64
939 /* lea gprdst, [rbx + offDisp] */
940 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
941 if (iGprDst < 8)
942 pbCodeBuf[off++] = X86_OP_REX_W;
943 else
944 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
945 pbCodeBuf[off++] = 0x8d;
946 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
947
948#elif defined(RT_ARCH_ARM64)
949 if (offVCpu < (unsigned)_4K)
950 {
951 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
952 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
953 }
954 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
955 {
956 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
957 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
958 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
959 }
960 else
961 {
962 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
963 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
964 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
965 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
966 }
967
968#else
969# error "port me"
970#endif
971 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
972 return off;
973}
974
975
976/** This is just as a typesafe alternative to RT_UOFFSETOF. */
977DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
978{
979 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
980 Assert(off < sizeof(VMCPU));
981 return off;
982}
983
984
985/** This is just as a typesafe alternative to RT_UOFFSETOF. */
986DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
987{
988 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
989 Assert(off < sizeof(VMCPU));
990 return off;
991}
992
993
994/**
995 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
996 *
997 * @note The two temp registers are not required for AMD64. ARM64 always
998 * requires the first, and the 2nd is needed if the offset cannot be
999 * encoded as an immediate.
1000 */
1001DECL_FORCE_INLINE(uint32_t)
1002iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1003{
1004#ifdef RT_ARCH_AMD64
1005 /* inc qword [pVCpu + off] */
1006 pCodeBuf[off++] = X86_OP_REX_W;
1007 pCodeBuf[off++] = 0xff;
1008 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1009 RT_NOREF(idxTmp1, idxTmp2);
1010
1011#elif defined(RT_ARCH_ARM64)
1012 /* Determine how we're to access pVCpu first. */
1013 uint32_t const cbData = sizeof(STAMCOUNTER);
1014 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1015 {
1016 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1017 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1018 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1019 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1020 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1021 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1022 }
1023 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1024 {
1025 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1026 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1027 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1028 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1029 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1030 }
1031 else
1032 {
1033 /* The offset is too large, so we must load it into a register and use
1034 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1035 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1036 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1037 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1038 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1039 }
1040
1041#else
1042# error "port me"
1043#endif
1044 return off;
1045}
1046
1047
1048/**
1049 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1050 *
1051 * @note The two temp registers are not required for AMD64. ARM64 always
1052 * requires the first, and the 2nd is needed if the offset cannot be
1053 * encoded as an immediate.
1054 */
1055DECL_FORCE_INLINE(uint32_t)
1056iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1057{
1058#ifdef RT_ARCH_AMD64
1059 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1060#elif defined(RT_ARCH_ARM64)
1061 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1062#else
1063# error "port me"
1064#endif
1065 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1066 return off;
1067}
1068
1069
1070/**
1071 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1072 *
1073 * @note The two temp registers are not required for AMD64. ARM64 always
1074 * requires the first, and the 2nd is needed if the offset cannot be
1075 * encoded as an immediate.
1076 */
1077DECL_FORCE_INLINE(uint32_t)
1078iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1079{
1080 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1081#ifdef RT_ARCH_AMD64
1082 /* inc dword [pVCpu + offVCpu] */
1083 pCodeBuf[off++] = 0xff;
1084 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1085 RT_NOREF(idxTmp1, idxTmp2);
1086
1087#elif defined(RT_ARCH_ARM64)
1088 /* Determine how we're to access pVCpu first. */
1089 uint32_t const cbData = sizeof(uint32_t);
1090 if (offVCpu < (unsigned)(_4K * cbData))
1091 {
1092 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1093 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1094 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1095 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1096 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1097 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1098 }
1099 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1100 {
1101 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1102 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1103 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1104 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1105 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1106 }
1107 else
1108 {
1109 /* The offset is too large, so we must load it into a register and use
1110 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1111 of the instruction if that'll reduce the constant to 16-bits. */
1112 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1113 {
1114 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1115 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1116 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1117 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1118 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1119 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1120 }
1121 else
1122 {
1123 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1124 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1125 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1126 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1127 }
1128 }
1129
1130#else
1131# error "port me"
1132#endif
1133 return off;
1134}
1135
1136
1137/**
1138 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1139 *
1140 * @note The two temp registers are not required for AMD64. ARM64 always
1141 * requires the first, and the 2nd is needed if the offset cannot be
1142 * encoded as an immediate.
1143 */
1144DECL_FORCE_INLINE(uint32_t)
1145iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1146{
1147#ifdef RT_ARCH_AMD64
1148 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1149#elif defined(RT_ARCH_ARM64)
1150 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1151#else
1152# error "port me"
1153#endif
1154 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1155 return off;
1156}
1157
1158
1159/**
1160 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1161 *
1162 * @note May allocate temporary registers (not AMD64).
1163 */
1164DECL_FORCE_INLINE(uint32_t)
1165iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1166{
1167 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1168#ifdef RT_ARCH_AMD64
1169 /* or dword [pVCpu + offVCpu], imm8/32 */
1170 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1171 if (fMask < 0x80)
1172 {
1173 pCodeBuf[off++] = 0x83;
1174 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1175 pCodeBuf[off++] = (uint8_t)fMask;
1176 }
1177 else
1178 {
1179 pCodeBuf[off++] = 0x81;
1180 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1181 pCodeBuf[off++] = RT_BYTE1(fMask);
1182 pCodeBuf[off++] = RT_BYTE2(fMask);
1183 pCodeBuf[off++] = RT_BYTE3(fMask);
1184 pCodeBuf[off++] = RT_BYTE4(fMask);
1185 }
1186
1187#elif defined(RT_ARCH_ARM64)
1188 /* If the constant is unwieldy we'll need a register to hold it as well. */
1189 uint32_t uImmSizeLen, uImmRotate;
1190 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1191 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1192
1193 /* We need a temp register for holding the member value we're modifying. */
1194 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1195
1196 /* Determine how we're to access pVCpu first. */
1197 uint32_t const cbData = sizeof(uint32_t);
1198 if (offVCpu < (unsigned)(_4K * cbData))
1199 {
1200 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1201 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1202 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmpValue,
1203 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1204 if (idxTmpMask == UINT8_MAX)
1205 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1206 else
1207 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1208 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmpValue,
1209 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1210 }
1211 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1212 {
1213 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1214 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1215 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1216 if (idxTmpMask == UINT8_MAX)
1217 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1218 else
1219 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1220 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1221 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1222 }
1223 else
1224 {
1225 /* The offset is too large, so we must load it into a register and use
1226 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1227 of the instruction if that'll reduce the constant to 16-bits. */
1228 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1229 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1230 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1231 if (fShifted)
1232 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1233 else
1234 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1235
1236 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1237 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1238
1239 if (idxTmpMask == UINT8_MAX)
1240 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1241 else
1242 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1243
1244 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1245 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1246 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1247 }
1248 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1249 if (idxTmpMask != UINT8_MAX)
1250 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1251
1252#else
1253# error "port me"
1254#endif
1255 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1256 return off;
1257}
1258
1259
1260/**
1261 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1262 *
1263 * @note May allocate temporary registers (not AMD64).
1264 */
1265DECL_FORCE_INLINE(uint32_t)
1266iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1267{
1268 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1269#ifdef RT_ARCH_AMD64
1270 /* and dword [pVCpu + offVCpu], imm8/32 */
1271 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1272 if (fMask < 0x80)
1273 {
1274 pCodeBuf[off++] = 0x83;
1275 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1276 pCodeBuf[off++] = (uint8_t)fMask;
1277 }
1278 else
1279 {
1280 pCodeBuf[off++] = 0x81;
1281 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1282 pCodeBuf[off++] = RT_BYTE1(fMask);
1283 pCodeBuf[off++] = RT_BYTE2(fMask);
1284 pCodeBuf[off++] = RT_BYTE3(fMask);
1285 pCodeBuf[off++] = RT_BYTE4(fMask);
1286 }
1287
1288#elif defined(RT_ARCH_ARM64)
1289 /* If the constant is unwieldy we'll need a register to hold it as well. */
1290 uint32_t uImmSizeLen, uImmRotate;
1291 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1292 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1293
1294 /* We need a temp register for holding the member value we're modifying. */
1295 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1296
1297 /* Determine how we're to access pVCpu first. */
1298 uint32_t const cbData = sizeof(uint32_t);
1299 if (offVCpu < (unsigned)(_4K * cbData))
1300 {
1301 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1302 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1303 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmpValue,
1304 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1305 if (idxTmpMask == UINT8_MAX)
1306 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1307 else
1308 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1309 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmpValue,
1310 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1311 }
1312 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1313 {
1314 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1315 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1316 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1317 if (idxTmpMask == UINT8_MAX)
1318 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1319 else
1320 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1321 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1322 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1323 }
1324 else
1325 {
1326 /* The offset is too large, so we must load it into a register and use
1327 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1328 of the instruction if that'll reduce the constant to 16-bits. */
1329 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1330 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1331 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1332 if (fShifted)
1333 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1334 else
1335 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1336
1337 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1338 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1339
1340 if (idxTmpMask == UINT8_MAX)
1341 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1342 else
1343 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1344
1345 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1346 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1347 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1348 }
1349 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1350 if (idxTmpMask != UINT8_MAX)
1351 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1352
1353#else
1354# error "port me"
1355#endif
1356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1357 return off;
1358}
1359
1360
1361/**
1362 * Emits a gprdst = gprsrc load.
1363 */
1364DECL_FORCE_INLINE(uint32_t)
1365iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1366{
1367#ifdef RT_ARCH_AMD64
1368 /* mov gprdst, gprsrc */
1369 if ((iGprDst | iGprSrc) >= 8)
1370 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1371 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1372 : X86_OP_REX_W | X86_OP_REX_R;
1373 else
1374 pCodeBuf[off++] = X86_OP_REX_W;
1375 pCodeBuf[off++] = 0x8b;
1376 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1377
1378#elif defined(RT_ARCH_ARM64)
1379 /* mov dst, src; alias for: orr dst, xzr, src */
1380 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1381
1382#else
1383# error "port me"
1384#endif
1385 return off;
1386}
1387
1388
1389/**
1390 * Emits a gprdst = gprsrc load.
1391 */
1392DECL_INLINE_THROW(uint32_t)
1393iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1394{
1395#ifdef RT_ARCH_AMD64
1396 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1397#elif defined(RT_ARCH_ARM64)
1398 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1399#else
1400# error "port me"
1401#endif
1402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1403 return off;
1404}
1405
1406
1407/**
1408 * Emits a gprdst = gprsrc[31:0] load.
1409 * @note Bits 63 thru 32 are cleared.
1410 */
1411DECL_FORCE_INLINE(uint32_t)
1412iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1413{
1414#ifdef RT_ARCH_AMD64
1415 /* mov gprdst, gprsrc */
1416 if ((iGprDst | iGprSrc) >= 8)
1417 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1418 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1419 : X86_OP_REX_R;
1420 pCodeBuf[off++] = 0x8b;
1421 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1422
1423#elif defined(RT_ARCH_ARM64)
1424 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1425 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1426
1427#else
1428# error "port me"
1429#endif
1430 return off;
1431}
1432
1433
1434/**
1435 * Emits a gprdst = gprsrc[31:0] load.
1436 * @note Bits 63 thru 32 are cleared.
1437 */
1438DECL_INLINE_THROW(uint32_t)
1439iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1440{
1441#ifdef RT_ARCH_AMD64
1442 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1443#elif defined(RT_ARCH_ARM64)
1444 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1445#else
1446# error "port me"
1447#endif
1448 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1449 return off;
1450}
1451
1452
1453/**
1454 * Emits a gprdst = gprsrc[15:0] load.
1455 * @note Bits 63 thru 15 are cleared.
1456 */
1457DECL_INLINE_THROW(uint32_t)
1458iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1459{
1460#ifdef RT_ARCH_AMD64
1461 /* movzx Gv,Ew */
1462 if ((iGprDst | iGprSrc) >= 8)
1463 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1464 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1465 : X86_OP_REX_R;
1466 pCodeBuf[off++] = 0x0f;
1467 pCodeBuf[off++] = 0xb7;
1468 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1469
1470#elif defined(RT_ARCH_ARM64)
1471 /* and gprdst, gprsrc, #0xffff */
1472# if 1
1473 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1474 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1475# else
1476 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1477 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1478# endif
1479
1480#else
1481# error "port me"
1482#endif
1483 return off;
1484}
1485
1486
1487/**
1488 * Emits a gprdst = gprsrc[15:0] load.
1489 * @note Bits 63 thru 15 are cleared.
1490 */
1491DECL_INLINE_THROW(uint32_t)
1492iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1493{
1494#ifdef RT_ARCH_AMD64
1495 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1496#elif defined(RT_ARCH_ARM64)
1497 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1498#else
1499# error "port me"
1500#endif
1501 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1502 return off;
1503}
1504
1505
1506/**
1507 * Emits a gprdst = gprsrc[7:0] load.
1508 * @note Bits 63 thru 8 are cleared.
1509 */
1510DECL_FORCE_INLINE(uint32_t)
1511iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1512{
1513#ifdef RT_ARCH_AMD64
1514 /* movzx Gv,Eb */
1515 if (iGprDst >= 8 || iGprSrc >= 8)
1516 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1517 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1518 : X86_OP_REX_R;
1519 else if (iGprSrc >= 4)
1520 pCodeBuf[off++] = X86_OP_REX;
1521 pCodeBuf[off++] = 0x0f;
1522 pCodeBuf[off++] = 0xb6;
1523 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1524
1525#elif defined(RT_ARCH_ARM64)
1526 /* and gprdst, gprsrc, #0xff */
1527 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1528 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1529
1530#else
1531# error "port me"
1532#endif
1533 return off;
1534}
1535
1536
1537/**
1538 * Emits a gprdst = gprsrc[7:0] load.
1539 * @note Bits 63 thru 8 are cleared.
1540 */
1541DECL_INLINE_THROW(uint32_t)
1542iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1543{
1544#ifdef RT_ARCH_AMD64
1545 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1546#elif defined(RT_ARCH_ARM64)
1547 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1548#else
1549# error "port me"
1550#endif
1551 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1552 return off;
1553}
1554
1555
1556/**
1557 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1558 * @note Bits 63 thru 8 are cleared.
1559 */
1560DECL_INLINE_THROW(uint32_t)
1561iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1562{
1563#ifdef RT_ARCH_AMD64
1564 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1565
1566 /* movzx Gv,Ew */
1567 if ((iGprDst | iGprSrc) >= 8)
1568 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1569 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1570 : X86_OP_REX_R;
1571 pbCodeBuf[off++] = 0x0f;
1572 pbCodeBuf[off++] = 0xb7;
1573 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1574
1575 /* shr Ev,8 */
1576 if (iGprDst >= 8)
1577 pbCodeBuf[off++] = X86_OP_REX_B;
1578 pbCodeBuf[off++] = 0xc1;
1579 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1580 pbCodeBuf[off++] = 8;
1581
1582#elif defined(RT_ARCH_ARM64)
1583 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1584 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1585 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1586
1587#else
1588# error "port me"
1589#endif
1590 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1591 return off;
1592}
1593
1594
1595/**
1596 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1597 */
1598DECL_INLINE_THROW(uint32_t)
1599iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1600{
1601#ifdef RT_ARCH_AMD64
1602 /* movsxd r64, r/m32 */
1603 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1604 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1605 pbCodeBuf[off++] = 0x63;
1606 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1607
1608#elif defined(RT_ARCH_ARM64)
1609 /* sxtw dst, src */
1610 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1611 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1612
1613#else
1614# error "port me"
1615#endif
1616 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1617 return off;
1618}
1619
1620
1621/**
1622 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1623 */
1624DECL_INLINE_THROW(uint32_t)
1625iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1626{
1627#ifdef RT_ARCH_AMD64
1628 /* movsx r64, r/m16 */
1629 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1630 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1631 pbCodeBuf[off++] = 0x0f;
1632 pbCodeBuf[off++] = 0xbf;
1633 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1634
1635#elif defined(RT_ARCH_ARM64)
1636 /* sxth dst, src */
1637 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1638 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1639
1640#else
1641# error "port me"
1642#endif
1643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1644 return off;
1645}
1646
1647
1648/**
1649 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1650 */
1651DECL_INLINE_THROW(uint32_t)
1652iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1653{
1654#ifdef RT_ARCH_AMD64
1655 /* movsx r64, r/m16 */
1656 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1657 if (iGprDst >= 8 || iGprSrc >= 8)
1658 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1659 pbCodeBuf[off++] = 0x0f;
1660 pbCodeBuf[off++] = 0xbf;
1661 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1662
1663#elif defined(RT_ARCH_ARM64)
1664 /* sxth dst32, src */
1665 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1666 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1667
1668#else
1669# error "port me"
1670#endif
1671 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1672 return off;
1673}
1674
1675
1676/**
1677 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1678 */
1679DECL_INLINE_THROW(uint32_t)
1680iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1681{
1682#ifdef RT_ARCH_AMD64
1683 /* movsx r64, r/m8 */
1684 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1685 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1686 pbCodeBuf[off++] = 0x0f;
1687 pbCodeBuf[off++] = 0xbe;
1688 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1689
1690#elif defined(RT_ARCH_ARM64)
1691 /* sxtb dst, src */
1692 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1693 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1694
1695#else
1696# error "port me"
1697#endif
1698 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1699 return off;
1700}
1701
1702
1703/**
1704 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1705 * @note Bits 63 thru 32 are cleared.
1706 */
1707DECL_INLINE_THROW(uint32_t)
1708iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1709{
1710#ifdef RT_ARCH_AMD64
1711 /* movsx r32, r/m8 */
1712 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1713 if (iGprDst >= 8 || iGprSrc >= 8)
1714 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1715 else if (iGprSrc >= 4)
1716 pbCodeBuf[off++] = X86_OP_REX;
1717 pbCodeBuf[off++] = 0x0f;
1718 pbCodeBuf[off++] = 0xbe;
1719 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1720
1721#elif defined(RT_ARCH_ARM64)
1722 /* sxtb dst32, src32 */
1723 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1724 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1725
1726#else
1727# error "port me"
1728#endif
1729 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1730 return off;
1731}
1732
1733
1734/**
1735 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1736 * @note Bits 63 thru 16 are cleared.
1737 */
1738DECL_INLINE_THROW(uint32_t)
1739iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1740{
1741#ifdef RT_ARCH_AMD64
1742 /* movsx r16, r/m8 */
1743 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1744 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1745 if (iGprDst >= 8 || iGprSrc >= 8)
1746 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1747 else if (iGprSrc >= 4)
1748 pbCodeBuf[off++] = X86_OP_REX;
1749 pbCodeBuf[off++] = 0x0f;
1750 pbCodeBuf[off++] = 0xbe;
1751 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1752
1753 /* movzx r32, r/m16 */
1754 if (iGprDst >= 8)
1755 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
1756 pbCodeBuf[off++] = 0x0f;
1757 pbCodeBuf[off++] = 0xb7;
1758 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
1759
1760#elif defined(RT_ARCH_ARM64)
1761 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
1762 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1763 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1764 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1765 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
1766
1767#else
1768# error "port me"
1769#endif
1770 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1771 return off;
1772}
1773
1774
1775/**
1776 * Emits a gprdst = gprsrc + addend load.
1777 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1778 */
1779#ifdef RT_ARCH_AMD64
1780DECL_INLINE_THROW(uint32_t)
1781iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1782 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1783{
1784 Assert(iAddend != 0);
1785
1786 /* lea gprdst, [gprsrc + iAddend] */
1787 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1788 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1789 pbCodeBuf[off++] = 0x8d;
1790 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1791 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1792 return off;
1793}
1794
1795#elif defined(RT_ARCH_ARM64)
1796DECL_INLINE_THROW(uint32_t)
1797iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1798 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1799{
1800 if ((uint32_t)iAddend < 4096)
1801 {
1802 /* add dst, src, uimm12 */
1803 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1804 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
1805 }
1806 else if ((uint32_t)-iAddend < 4096)
1807 {
1808 /* sub dst, src, uimm12 */
1809 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1810 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
1811 }
1812 else
1813 {
1814 Assert(iGprSrc != iGprDst);
1815 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
1816 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1817 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
1818 }
1819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1820 return off;
1821}
1822#else
1823# error "port me"
1824#endif
1825
1826/**
1827 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
1828 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1829 */
1830#ifdef RT_ARCH_AMD64
1831DECL_INLINE_THROW(uint32_t)
1832iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1833 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1834#else
1835DECL_INLINE_THROW(uint32_t)
1836iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1837 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1838#endif
1839{
1840 if (iAddend != 0)
1841 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1842 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
1843}
1844
1845
1846/**
1847 * Emits a gprdst = gprsrc32 + addend load.
1848 * @note Bits 63 thru 32 are cleared.
1849 */
1850DECL_INLINE_THROW(uint32_t)
1851iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1852 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1853{
1854 Assert(iAddend != 0);
1855
1856#ifdef RT_ARCH_AMD64
1857 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
1858 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1859 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
1860 if ((iGprDst | iGprSrc) >= 8)
1861 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1862 pbCodeBuf[off++] = 0x8d;
1863 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1864
1865#elif defined(RT_ARCH_ARM64)
1866 if ((uint32_t)iAddend < 4096)
1867 {
1868 /* add dst, src, uimm12 */
1869 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1870 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
1871 }
1872 else if ((uint32_t)-iAddend < 4096)
1873 {
1874 /* sub dst, src, uimm12 */
1875 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1876 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
1877 }
1878 else
1879 {
1880 Assert(iGprSrc != iGprDst);
1881 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
1882 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1883 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
1884 }
1885
1886#else
1887# error "port me"
1888#endif
1889 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1890 return off;
1891}
1892
1893
1894/**
1895 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
1896 */
1897DECL_INLINE_THROW(uint32_t)
1898iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1899 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1900{
1901 if (iAddend != 0)
1902 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1903 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
1904}
1905
1906
1907/**
1908 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1909 * destination.
1910 */
1911DECL_FORCE_INLINE(uint32_t)
1912iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1913{
1914#ifdef RT_ARCH_AMD64
1915 /* mov reg16, r/m16 */
1916 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1917 if (idxDst >= 8 || idxSrc >= 8)
1918 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
1919 pCodeBuf[off++] = 0x8b;
1920 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
1921
1922#elif defined(RT_ARCH_ARM64)
1923 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
1924 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
1925
1926#else
1927# error "Port me!"
1928#endif
1929 return off;
1930}
1931
1932
1933/**
1934 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1935 * destination.
1936 */
1937DECL_INLINE_THROW(uint32_t)
1938iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1939{
1940#ifdef RT_ARCH_AMD64
1941 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
1942#elif defined(RT_ARCH_ARM64)
1943 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
1944#else
1945# error "Port me!"
1946#endif
1947 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1948 return off;
1949}
1950
1951
1952#ifdef RT_ARCH_AMD64
1953/**
1954 * Common bit of iemNativeEmitLoadGprByBp and friends.
1955 */
1956DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
1957 PIEMRECOMPILERSTATE pReNativeAssert)
1958{
1959 if (offDisp < 128 && offDisp >= -128)
1960 {
1961 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
1962 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
1963 }
1964 else
1965 {
1966 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
1967 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
1968 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
1969 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
1970 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
1971 }
1972 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
1973 return off;
1974}
1975#elif defined(RT_ARCH_ARM64)
1976/**
1977 * Common bit of iemNativeEmitLoadGprByBp and friends.
1978 */
1979DECL_FORCE_INLINE_THROW(uint32_t)
1980iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
1981 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
1982{
1983 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
1984 {
1985 /* str w/ unsigned imm12 (scaled) */
1986 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1987 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
1988 }
1989 else if (offDisp >= -256 && offDisp <= 256)
1990 {
1991 /* stur w/ signed imm9 (unscaled) */
1992 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1993 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
1994 }
1995 else
1996 {
1997 /* Use temporary indexing register. */
1998 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
1999 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2000 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2001 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2002 }
2003 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2004 return off;
2005}
2006#endif
2007
2008
2009/**
2010 * Emits a 64-bit GRP load instruction with an BP relative source address.
2011 */
2012DECL_INLINE_THROW(uint32_t)
2013iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2014{
2015#ifdef RT_ARCH_AMD64
2016 /* mov gprdst, qword [rbp + offDisp] */
2017 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2018 if (iGprDst < 8)
2019 pbCodeBuf[off++] = X86_OP_REX_W;
2020 else
2021 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2022 pbCodeBuf[off++] = 0x8b;
2023 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2024
2025#elif defined(RT_ARCH_ARM64)
2026 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2027
2028#else
2029# error "port me"
2030#endif
2031}
2032
2033
2034/**
2035 * Emits a 32-bit GRP load instruction with an BP relative source address.
2036 * @note Bits 63 thru 32 of the GPR will be cleared.
2037 */
2038DECL_INLINE_THROW(uint32_t)
2039iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2040{
2041#ifdef RT_ARCH_AMD64
2042 /* mov gprdst, dword [rbp + offDisp] */
2043 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2044 if (iGprDst >= 8)
2045 pbCodeBuf[off++] = X86_OP_REX_R;
2046 pbCodeBuf[off++] = 0x8b;
2047 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2048
2049#elif defined(RT_ARCH_ARM64)
2050 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2051
2052#else
2053# error "port me"
2054#endif
2055}
2056
2057
2058/**
2059 * Emits a 16-bit GRP load instruction with an BP relative source address.
2060 * @note Bits 63 thru 16 of the GPR will be cleared.
2061 */
2062DECL_INLINE_THROW(uint32_t)
2063iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2064{
2065#ifdef RT_ARCH_AMD64
2066 /* movzx gprdst, word [rbp + offDisp] */
2067 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2068 if (iGprDst >= 8)
2069 pbCodeBuf[off++] = X86_OP_REX_R;
2070 pbCodeBuf[off++] = 0x0f;
2071 pbCodeBuf[off++] = 0xb7;
2072 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2073
2074#elif defined(RT_ARCH_ARM64)
2075 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2076
2077#else
2078# error "port me"
2079#endif
2080}
2081
2082
2083/**
2084 * Emits a 8-bit GRP load instruction with an BP relative source address.
2085 * @note Bits 63 thru 8 of the GPR will be cleared.
2086 */
2087DECL_INLINE_THROW(uint32_t)
2088iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2089{
2090#ifdef RT_ARCH_AMD64
2091 /* movzx gprdst, byte [rbp + offDisp] */
2092 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2093 if (iGprDst >= 8)
2094 pbCodeBuf[off++] = X86_OP_REX_R;
2095 pbCodeBuf[off++] = 0x0f;
2096 pbCodeBuf[off++] = 0xb6;
2097 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2098
2099#elif defined(RT_ARCH_ARM64)
2100 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2101
2102#else
2103# error "port me"
2104#endif
2105}
2106
2107
2108/**
2109 * Emits a load effective address to a GRP with an BP relative source address.
2110 */
2111DECL_INLINE_THROW(uint32_t)
2112iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2113{
2114#ifdef RT_ARCH_AMD64
2115 /* lea gprdst, [rbp + offDisp] */
2116 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2117 if (iGprDst < 8)
2118 pbCodeBuf[off++] = X86_OP_REX_W;
2119 else
2120 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2121 pbCodeBuf[off++] = 0x8d;
2122 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2123
2124#elif defined(RT_ARCH_ARM64)
2125 if ((uint32_t)offDisp < (unsigned)_4K)
2126 {
2127 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2128 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)offDisp);
2129 }
2130 else if ((uint32_t)-offDisp < (unsigned)_4K)
2131 {
2132 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2133 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2134 }
2135 else
2136 {
2137 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2138 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offDisp >= 0 ? (uint32_t)offDisp : (uint32_t)-offDisp);
2139 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2140 if (offDisp >= 0)
2141 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2142 else
2143 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2144 }
2145
2146#else
2147# error "port me"
2148#endif
2149
2150 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2151 return off;
2152}
2153
2154
2155/**
2156 * Emits a 64-bit GPR store with an BP relative destination address.
2157 *
2158 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2159 */
2160DECL_INLINE_THROW(uint32_t)
2161iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2162{
2163#ifdef RT_ARCH_AMD64
2164 /* mov qword [rbp + offDisp], gprdst */
2165 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2166 if (iGprSrc < 8)
2167 pbCodeBuf[off++] = X86_OP_REX_W;
2168 else
2169 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2170 pbCodeBuf[off++] = 0x89;
2171 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2172
2173#elif defined(RT_ARCH_ARM64)
2174 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2175 {
2176 /* str w/ unsigned imm12 (scaled) */
2177 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2178 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2179 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2180 }
2181 else if (offDisp >= -256 && offDisp <= 256)
2182 {
2183 /* stur w/ signed imm9 (unscaled) */
2184 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2185 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2186 }
2187 else if ((uint32_t)-offDisp < (unsigned)_4K)
2188 {
2189 /* Use temporary indexing register w/ sub uimm12. */
2190 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2191 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2192 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2193 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2194 }
2195 else
2196 {
2197 /* Use temporary indexing register. */
2198 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2199 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2200 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2201 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2202 }
2203 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2204 return off;
2205
2206#else
2207# error "Port me!"
2208#endif
2209}
2210
2211
2212/**
2213 * Emits a 64-bit immediate store with an BP relative destination address.
2214 *
2215 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2216 */
2217DECL_INLINE_THROW(uint32_t)
2218iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2219{
2220#ifdef RT_ARCH_AMD64
2221 if ((int64_t)uImm64 == (int32_t)uImm64)
2222 {
2223 /* mov qword [rbp + offDisp], imm32 - sign extended */
2224 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2225 pbCodeBuf[off++] = X86_OP_REX_W;
2226 pbCodeBuf[off++] = 0xc7;
2227 if (offDisp < 128 && offDisp >= -128)
2228 {
2229 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2230 pbCodeBuf[off++] = (uint8_t)offDisp;
2231 }
2232 else
2233 {
2234 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2235 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2236 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2237 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2238 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2239 }
2240 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2241 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2242 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2243 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2244 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2245 return off;
2246 }
2247#endif
2248
2249 /* Load tmp0, imm64; Store tmp to bp+disp. */
2250 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2251 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2252}
2253
2254#if defined(RT_ARCH_ARM64)
2255
2256/**
2257 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2258 *
2259 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2260 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2261 * caller does not heed this.
2262 *
2263 * @note DON'T try this with prefetch.
2264 */
2265DECL_FORCE_INLINE_THROW(uint32_t)
2266iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2267 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2268{
2269 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2270 {
2271 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2272 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2273 }
2274 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2275 && iGprReg != iGprBase)
2276 || iGprTmp != UINT8_MAX)
2277 {
2278 /* The offset is too large, so we must load it into a register and use
2279 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2280 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2281 if (iGprTmp == UINT8_MAX)
2282 iGprTmp = iGprReg;
2283 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2284 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2285 }
2286 else
2287# ifdef IEM_WITH_THROW_CATCH
2288 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2289# else
2290 AssertReleaseFailedStmt(off = UINT32_MAX);
2291# endif
2292 return off;
2293}
2294
2295/**
2296 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2297 */
2298DECL_FORCE_INLINE_THROW(uint32_t)
2299iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2300 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2301{
2302 /*
2303 * There are a couple of ldr variants that takes an immediate offset, so
2304 * try use those if we can, otherwise we have to use the temporary register
2305 * help with the addressing.
2306 */
2307 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2308 {
2309 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2310 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2311 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2312 }
2313 else
2314 {
2315 /* The offset is too large, so we must load it into a register and use
2316 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2317 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2318 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2319
2320 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2321 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2322
2323 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2324 }
2325 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2326 return off;
2327}
2328
2329#endif /* RT_ARCH_ARM64 */
2330
2331/**
2332 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2333 *
2334 * @note ARM64: Misaligned @a offDisp values and values not in the
2335 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2336 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2337 * does not heed this.
2338 */
2339DECL_FORCE_INLINE_THROW(uint32_t)
2340iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2341 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2342{
2343#ifdef RT_ARCH_AMD64
2344 /* mov reg64, mem64 */
2345 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2346 pCodeBuf[off++] = 0x8b;
2347 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2348 RT_NOREF(iGprTmp);
2349
2350#elif defined(RT_ARCH_ARM64)
2351 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2352 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2353
2354#else
2355# error "port me"
2356#endif
2357 return off;
2358}
2359
2360
2361/**
2362 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2363 */
2364DECL_INLINE_THROW(uint32_t)
2365iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2366{
2367#ifdef RT_ARCH_AMD64
2368 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2369 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2370
2371#elif defined(RT_ARCH_ARM64)
2372 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2373
2374#else
2375# error "port me"
2376#endif
2377 return off;
2378}
2379
2380
2381/**
2382 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2383 *
2384 * @note ARM64: Misaligned @a offDisp values and values not in the
2385 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2386 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2387 * caller does not heed this.
2388 *
2389 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2390 */
2391DECL_FORCE_INLINE_THROW(uint32_t)
2392iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2393 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2394{
2395#ifdef RT_ARCH_AMD64
2396 /* mov reg32, mem32 */
2397 if (iGprDst >= 8 || iGprBase >= 8)
2398 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2399 pCodeBuf[off++] = 0x8b;
2400 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2401 RT_NOREF(iGprTmp);
2402
2403#elif defined(RT_ARCH_ARM64)
2404 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2405 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2406
2407#else
2408# error "port me"
2409#endif
2410 return off;
2411}
2412
2413
2414/**
2415 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2416 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2417 */
2418DECL_INLINE_THROW(uint32_t)
2419iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2420{
2421#ifdef RT_ARCH_AMD64
2422 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2423 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2424
2425#elif defined(RT_ARCH_ARM64)
2426 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2427
2428#else
2429# error "port me"
2430#endif
2431 return off;
2432}
2433
2434
2435/**
2436 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2437 * sign-extending the value to 64 bits.
2438 *
2439 * @note ARM64: Misaligned @a offDisp values and values not in the
2440 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2441 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2442 * caller does not heed this.
2443 */
2444DECL_FORCE_INLINE_THROW(uint32_t)
2445iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2446 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2447{
2448#ifdef RT_ARCH_AMD64
2449 /* movsxd reg64, mem32 */
2450 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2451 pCodeBuf[off++] = 0x63;
2452 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2453 RT_NOREF(iGprTmp);
2454
2455#elif defined(RT_ARCH_ARM64)
2456 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2457 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2458
2459#else
2460# error "port me"
2461#endif
2462 return off;
2463}
2464
2465
2466/**
2467 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2468 *
2469 * @note ARM64: Misaligned @a offDisp values and values not in the
2470 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2471 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2472 * caller does not heed this.
2473 *
2474 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2475 */
2476DECL_FORCE_INLINE_THROW(uint32_t)
2477iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2478 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2479{
2480#ifdef RT_ARCH_AMD64
2481 /* movzx reg32, mem16 */
2482 if (iGprDst >= 8 || iGprBase >= 8)
2483 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2484 pCodeBuf[off++] = 0x0f;
2485 pCodeBuf[off++] = 0xb7;
2486 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2487 RT_NOREF(iGprTmp);
2488
2489#elif defined(RT_ARCH_ARM64)
2490 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2491 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2492
2493#else
2494# error "port me"
2495#endif
2496 return off;
2497}
2498
2499
2500/**
2501 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2502 * sign-extending the value to 64 bits.
2503 *
2504 * @note ARM64: Misaligned @a offDisp values and values not in the
2505 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2506 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2507 * caller does not heed this.
2508 */
2509DECL_FORCE_INLINE_THROW(uint32_t)
2510iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2511 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2512{
2513#ifdef RT_ARCH_AMD64
2514 /* movsx reg64, mem16 */
2515 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2516 pCodeBuf[off++] = 0x0f;
2517 pCodeBuf[off++] = 0xbf;
2518 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2519 RT_NOREF(iGprTmp);
2520
2521#elif defined(RT_ARCH_ARM64)
2522 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2523 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2524
2525#else
2526# error "port me"
2527#endif
2528 return off;
2529}
2530
2531
2532/**
2533 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2534 * sign-extending the value to 32 bits.
2535 *
2536 * @note ARM64: Misaligned @a offDisp values and values not in the
2537 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2538 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2539 * caller does not heed this.
2540 *
2541 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2542 */
2543DECL_FORCE_INLINE_THROW(uint32_t)
2544iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2545 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2546{
2547#ifdef RT_ARCH_AMD64
2548 /* movsx reg32, mem16 */
2549 if (iGprDst >= 8 || iGprBase >= 8)
2550 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2551 pCodeBuf[off++] = 0x0f;
2552 pCodeBuf[off++] = 0xbf;
2553 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2554 RT_NOREF(iGprTmp);
2555
2556#elif defined(RT_ARCH_ARM64)
2557 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2558 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
2559
2560#else
2561# error "port me"
2562#endif
2563 return off;
2564}
2565
2566
2567/**
2568 * Emits a 8-bit GPR load via a GPR base address with a displacement.
2569 *
2570 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2571 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2572 * same. Will assert / throw if caller does not heed this.
2573 *
2574 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
2575 */
2576DECL_FORCE_INLINE_THROW(uint32_t)
2577iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2578 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2579{
2580#ifdef RT_ARCH_AMD64
2581 /* movzx reg32, mem8 */
2582 if (iGprDst >= 8 || iGprBase >= 8)
2583 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2584 pCodeBuf[off++] = 0x0f;
2585 pCodeBuf[off++] = 0xb6;
2586 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2587 RT_NOREF(iGprTmp);
2588
2589#elif defined(RT_ARCH_ARM64)
2590 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2591 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
2592
2593#else
2594# error "port me"
2595#endif
2596 return off;
2597}
2598
2599
2600/**
2601 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2602 * sign-extending the value to 64 bits.
2603 *
2604 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2605 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2606 * same. Will assert / throw if caller does not heed this.
2607 */
2608DECL_FORCE_INLINE_THROW(uint32_t)
2609iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2610 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2611{
2612#ifdef RT_ARCH_AMD64
2613 /* movsx reg64, mem8 */
2614 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2615 pCodeBuf[off++] = 0x0f;
2616 pCodeBuf[off++] = 0xbe;
2617 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2618 RT_NOREF(iGprTmp);
2619
2620#elif defined(RT_ARCH_ARM64)
2621 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2622 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
2623
2624#else
2625# error "port me"
2626#endif
2627 return off;
2628}
2629
2630
2631/**
2632 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2633 * sign-extending the value to 32 bits.
2634 *
2635 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2636 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2637 * same. Will assert / throw if caller does not heed this.
2638 *
2639 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2640 */
2641DECL_FORCE_INLINE_THROW(uint32_t)
2642iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2643 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2644{
2645#ifdef RT_ARCH_AMD64
2646 /* movsx reg32, mem8 */
2647 if (iGprDst >= 8 || iGprBase >= 8)
2648 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2649 pCodeBuf[off++] = 0x0f;
2650 pCodeBuf[off++] = 0xbe;
2651 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2652 RT_NOREF(iGprTmp);
2653
2654#elif defined(RT_ARCH_ARM64)
2655 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2656 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2657
2658#else
2659# error "port me"
2660#endif
2661 return off;
2662}
2663
2664
2665/**
2666 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2667 * sign-extending the value to 16 bits.
2668 *
2669 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2670 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2671 * same. Will assert / throw if caller does not heed this.
2672 *
2673 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2674 */
2675DECL_FORCE_INLINE_THROW(uint32_t)
2676iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2677 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2678{
2679#ifdef RT_ARCH_AMD64
2680 /* movsx reg32, mem8 */
2681 if (iGprDst >= 8 || iGprBase >= 8)
2682 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2683 pCodeBuf[off++] = 0x0f;
2684 pCodeBuf[off++] = 0xbe;
2685 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2686# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
2687 /* and reg32, 0xffffh */
2688 if (iGprDst >= 8)
2689 pCodeBuf[off++] = X86_OP_REX_B;
2690 pCodeBuf[off++] = 0x81;
2691 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
2692 pCodeBuf[off++] = 0xff;
2693 pCodeBuf[off++] = 0xff;
2694 pCodeBuf[off++] = 0;
2695 pCodeBuf[off++] = 0;
2696# else
2697 /* movzx reg32, reg16 */
2698 if (iGprDst >= 8)
2699 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
2700 pCodeBuf[off++] = 0x0f;
2701 pCodeBuf[off++] = 0xb7;
2702 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2703# endif
2704 RT_NOREF(iGprTmp);
2705
2706#elif defined(RT_ARCH_ARM64)
2707 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2708 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2709 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2710 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
2711
2712#else
2713# error "port me"
2714#endif
2715 return off;
2716}
2717
2718
2719/**
2720 * Emits a 64-bit GPR store via a GPR base address with a displacement.
2721 *
2722 * @note ARM64: Misaligned @a offDisp values and values not in the
2723 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2724 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2725 * does not heed this.
2726 */
2727DECL_FORCE_INLINE_THROW(uint32_t)
2728iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2729 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2730{
2731#ifdef RT_ARCH_AMD64
2732 /* mov mem64, reg64 */
2733 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2734 pCodeBuf[off++] = 0x89;
2735 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2736 RT_NOREF(iGprTmp);
2737
2738#elif defined(RT_ARCH_ARM64)
2739 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2740 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
2741
2742#else
2743# error "port me"
2744#endif
2745 return off;
2746}
2747
2748
2749/**
2750 * Emits a 32-bit GPR store via a GPR base address with a displacement.
2751 *
2752 * @note ARM64: Misaligned @a offDisp values and values not in the
2753 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
2754 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2755 * does not heed this.
2756 */
2757DECL_FORCE_INLINE_THROW(uint32_t)
2758iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2759 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2760{
2761#ifdef RT_ARCH_AMD64
2762 /* mov mem32, reg32 */
2763 if (iGprSrc >= 8 || iGprBase >= 8)
2764 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2765 pCodeBuf[off++] = 0x89;
2766 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2767 RT_NOREF(iGprTmp);
2768
2769#elif defined(RT_ARCH_ARM64)
2770 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2771 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
2772
2773#else
2774# error "port me"
2775#endif
2776 return off;
2777}
2778
2779
2780/**
2781 * Emits a 16-bit GPR store via a GPR base address with a displacement.
2782 *
2783 * @note ARM64: Misaligned @a offDisp values and values not in the
2784 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
2785 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2786 * does not heed this.
2787 */
2788DECL_FORCE_INLINE_THROW(uint32_t)
2789iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2790 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2791{
2792#ifdef RT_ARCH_AMD64
2793 /* mov mem16, reg16 */
2794 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2795 if (iGprSrc >= 8 || iGprBase >= 8)
2796 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2797 pCodeBuf[off++] = 0x89;
2798 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2799 RT_NOREF(iGprTmp);
2800
2801#elif defined(RT_ARCH_ARM64)
2802 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2803 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
2804
2805#else
2806# error "port me"
2807#endif
2808 return off;
2809}
2810
2811
2812/**
2813 * Emits a 8-bit GPR store via a GPR base address with a displacement.
2814 *
2815 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2816 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2817 * same. Will assert / throw if caller does not heed this.
2818 */
2819DECL_FORCE_INLINE_THROW(uint32_t)
2820iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2821 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2822{
2823#ifdef RT_ARCH_AMD64
2824 /* mov mem8, reg8 */
2825 if (iGprSrc >= 8 || iGprBase >= 8)
2826 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2827 else if (iGprSrc >= 4)
2828 pCodeBuf[off++] = X86_OP_REX;
2829 pCodeBuf[off++] = 0x88;
2830 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2831 RT_NOREF(iGprTmp);
2832
2833#elif defined(RT_ARCH_ARM64)
2834 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2835 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
2836
2837#else
2838# error "port me"
2839#endif
2840 return off;
2841}
2842
2843
2844/**
2845 * Emits a 64-bit immediate store via a GPR base address with a displacement.
2846 *
2847 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
2848 * AMD64 it depends on the immediate value.
2849 *
2850 * @note ARM64: Misaligned @a offDisp values and values not in the
2851 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2852 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2853 * does not heed this.
2854 */
2855DECL_FORCE_INLINE_THROW(uint32_t)
2856iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
2857 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2858{
2859#ifdef RT_ARCH_AMD64
2860 if ((int32_t)uImm == (int64_t)uImm)
2861 {
2862 /* mov mem64, imm32 (sign-extended) */
2863 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2864 pCodeBuf[off++] = 0xc7;
2865 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
2866 pCodeBuf[off++] = RT_BYTE1(uImm);
2867 pCodeBuf[off++] = RT_BYTE2(uImm);
2868 pCodeBuf[off++] = RT_BYTE3(uImm);
2869 pCodeBuf[off++] = RT_BYTE4(uImm);
2870 }
2871 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
2872 {
2873 /* require temporary register. */
2874 if (iGprImmTmp == UINT8_MAX)
2875 iGprImmTmp = iGprTmp;
2876 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
2877 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
2878 }
2879 else
2880# ifdef IEM_WITH_THROW_CATCH
2881 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2882# else
2883 AssertReleaseFailedStmt(off = UINT32_MAX);
2884# endif
2885
2886#elif defined(RT_ARCH_ARM64)
2887 if (uImm == 0)
2888 iGprImmTmp = ARMV8_A64_REG_XZR;
2889 else
2890 {
2891 Assert(iGprImmTmp < 31);
2892 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
2893 }
2894 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
2895
2896#else
2897# error "port me"
2898#endif
2899 return off;
2900}
2901
2902
2903/**
2904 * Emits a 32-bit GPR store via a GPR base address with a displacement.
2905 *
2906 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
2907 *
2908 * @note ARM64: Misaligned @a offDisp values and values not in the
2909 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
2910 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2911 * does not heed this.
2912 */
2913DECL_FORCE_INLINE_THROW(uint32_t)
2914iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
2915 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2916{
2917#ifdef RT_ARCH_AMD64
2918 /* mov mem32, imm32 */
2919 if (iGprBase >= 8)
2920 pCodeBuf[off++] = X86_OP_REX_B;
2921 pCodeBuf[off++] = 0xc7;
2922 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
2923 pCodeBuf[off++] = RT_BYTE1(uImm);
2924 pCodeBuf[off++] = RT_BYTE2(uImm);
2925 pCodeBuf[off++] = RT_BYTE3(uImm);
2926 pCodeBuf[off++] = RT_BYTE4(uImm);
2927 RT_NOREF(iGprImmTmp, iGprTmp);
2928
2929#elif defined(RT_ARCH_ARM64)
2930 Assert(iGprImmTmp < 31);
2931 if (uImm == 0)
2932 iGprImmTmp = ARMV8_A64_REG_XZR;
2933 else
2934 {
2935 Assert(iGprImmTmp < 31);
2936 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
2937 }
2938 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
2939 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
2940
2941#else
2942# error "port me"
2943#endif
2944 return off;
2945}
2946
2947
2948/**
2949 * Emits a 16-bit GPR store via a GPR base address with a displacement.
2950 *
2951 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
2952 *
2953 * @note ARM64: Misaligned @a offDisp values and values not in the
2954 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
2955 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2956 * does not heed this.
2957 */
2958DECL_FORCE_INLINE_THROW(uint32_t)
2959iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
2960 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2961{
2962#ifdef RT_ARCH_AMD64
2963 /* mov mem16, imm16 */
2964 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2965 if (iGprBase >= 8)
2966 pCodeBuf[off++] = X86_OP_REX_B;
2967 pCodeBuf[off++] = 0xc7;
2968 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
2969 pCodeBuf[off++] = RT_BYTE1(uImm);
2970 pCodeBuf[off++] = RT_BYTE2(uImm);
2971 RT_NOREF(iGprImmTmp, iGprTmp);
2972
2973#elif defined(RT_ARCH_ARM64)
2974 if (uImm == 0)
2975 iGprImmTmp = ARMV8_A64_REG_XZR;
2976 else
2977 {
2978 Assert(iGprImmTmp < 31);
2979 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
2980 }
2981 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
2982 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
2983
2984#else
2985# error "port me"
2986#endif
2987 return off;
2988}
2989
2990
2991/**
2992 * Emits a 8-bit GPR store via a GPR base address with a displacement.
2993 *
2994 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
2995 *
2996 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2997 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2998 * same. Will assert / throw if caller does not heed this.
2999 */
3000DECL_FORCE_INLINE_THROW(uint32_t)
3001iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3002 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3003{
3004#ifdef RT_ARCH_AMD64
3005 /* mov mem8, imm8 */
3006 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3007 if (iGprBase >= 8)
3008 pCodeBuf[off++] = X86_OP_REX_B;
3009 pCodeBuf[off++] = 0xc6;
3010 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3011 pCodeBuf[off++] = uImm;
3012 RT_NOREF(iGprImmTmp, iGprTmp);
3013
3014#elif defined(RT_ARCH_ARM64)
3015 if (uImm == 0)
3016 iGprImmTmp = ARMV8_A64_REG_XZR;
3017 else
3018 {
3019 Assert(iGprImmTmp < 31);
3020 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3021 }
3022 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3023 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3024
3025#else
3026# error "port me"
3027#endif
3028 return off;
3029}
3030
3031
3032
3033/*********************************************************************************************************************************
3034* Subtraction and Additions *
3035*********************************************************************************************************************************/
3036
3037/**
3038 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3039 * @note The AMD64 version sets flags.
3040 */
3041DECL_INLINE_THROW(uint32_t)
3042iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3043{
3044#if defined(RT_ARCH_AMD64)
3045 /* sub Gv,Ev */
3046 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3047 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3048 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3049 pbCodeBuf[off++] = 0x2b;
3050 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3051
3052#elif defined(RT_ARCH_ARM64)
3053 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3054 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3055
3056#else
3057# error "Port me"
3058#endif
3059 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3060 return off;
3061}
3062
3063
3064/**
3065 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3066 * @note The AMD64 version sets flags.
3067 */
3068DECL_FORCE_INLINE(uint32_t)
3069iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3070{
3071#if defined(RT_ARCH_AMD64)
3072 /* sub Gv,Ev */
3073 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3074 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3075 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3076 pCodeBuf[off++] = 0x2b;
3077 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3078
3079#elif defined(RT_ARCH_ARM64)
3080 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3081
3082#else
3083# error "Port me"
3084#endif
3085 return off;
3086}
3087
3088
3089/**
3090 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3091 * @note The AMD64 version sets flags.
3092 */
3093DECL_INLINE_THROW(uint32_t)
3094iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3095{
3096#if defined(RT_ARCH_AMD64)
3097 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3098#elif defined(RT_ARCH_ARM64)
3099 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3100#else
3101# error "Port me"
3102#endif
3103 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3104 return off;
3105}
3106
3107
3108/**
3109 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3110 *
3111 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3112 *
3113 * @note Larger constants will require a temporary register. Failing to specify
3114 * one when needed will trigger fatal assertion / throw.
3115 */
3116DECL_FORCE_INLINE_THROW(uint32_t)
3117iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3118 uint8_t iGprTmp = UINT8_MAX)
3119{
3120#ifdef RT_ARCH_AMD64
3121 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3122 if (iSubtrahend == 1)
3123 {
3124 /* dec r/m64 */
3125 pCodeBuf[off++] = 0xff;
3126 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3127 }
3128 else if (iSubtrahend == -1)
3129 {
3130 /* inc r/m64 */
3131 pCodeBuf[off++] = 0xff;
3132 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3133 }
3134 else if ((int8_t)iSubtrahend == iSubtrahend)
3135 {
3136 /* sub r/m64, imm8 */
3137 pCodeBuf[off++] = 0x83;
3138 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3139 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3140 }
3141 else if ((int32_t)iSubtrahend == iSubtrahend)
3142 {
3143 /* sub r/m64, imm32 */
3144 pCodeBuf[off++] = 0x81;
3145 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3146 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3147 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3148 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3149 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3150 }
3151 else if (iGprTmp != UINT8_MAX)
3152 {
3153 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3154 /* sub r/m64, r64 */
3155 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3156 pCodeBuf[off++] = 0x29;
3157 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3158 }
3159 else
3160# ifdef IEM_WITH_THROW_CATCH
3161 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3162# else
3163 AssertReleaseFailedStmt(off = UINT32_MAX);
3164# endif
3165
3166#elif defined(RT_ARCH_ARM64)
3167 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3168 if (uAbsSubtrahend < 4096)
3169 {
3170 if (iSubtrahend >= 0)
3171 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3172 else
3173 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3174 }
3175 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3176 {
3177 if (iSubtrahend >= 0)
3178 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3179 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3180 else
3181 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3182 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3183 }
3184 else if (iGprTmp != UINT8_MAX)
3185 {
3186 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3187 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3188 }
3189 else
3190# ifdef IEM_WITH_THROW_CATCH
3191 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3192# else
3193 AssertReleaseFailedStmt(off = UINT32_MAX);
3194# endif
3195
3196#else
3197# error "Port me"
3198#endif
3199 return off;
3200}
3201
3202
3203/**
3204 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3205 *
3206 * @note Larger constants will require a temporary register. Failing to specify
3207 * one when needed will trigger fatal assertion / throw.
3208 */
3209DECL_INLINE_THROW(uint32_t)
3210iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3211 uint8_t iGprTmp = UINT8_MAX)
3212
3213{
3214#ifdef RT_ARCH_AMD64
3215 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3216#elif defined(RT_ARCH_ARM64)
3217 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3218#else
3219# error "Port me"
3220#endif
3221 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3222 return off;
3223}
3224
3225
3226/**
3227 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3228 *
3229 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3230 *
3231 * @note ARM64: Larger constants will require a temporary register. Failing to
3232 * specify one when needed will trigger fatal assertion / throw.
3233 */
3234DECL_FORCE_INLINE_THROW(uint32_t)
3235iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3236 uint8_t iGprTmp = UINT8_MAX)
3237{
3238#ifdef RT_ARCH_AMD64
3239 if (iGprDst >= 8)
3240 pCodeBuf[off++] = X86_OP_REX_B;
3241 if (iSubtrahend == 1)
3242 {
3243 /* dec r/m32 */
3244 pCodeBuf[off++] = 0xff;
3245 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3246 }
3247 else if (iSubtrahend == -1)
3248 {
3249 /* inc r/m32 */
3250 pCodeBuf[off++] = 0xff;
3251 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3252 }
3253 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3254 {
3255 /* sub r/m32, imm8 */
3256 pCodeBuf[off++] = 0x83;
3257 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3258 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3259 }
3260 else
3261 {
3262 /* sub r/m32, imm32 */
3263 pCodeBuf[off++] = 0x81;
3264 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3265 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3266 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3267 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3268 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3269 }
3270 RT_NOREF(iGprTmp);
3271
3272#elif defined(RT_ARCH_ARM64)
3273 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3274 if (uAbsSubtrahend < 4096)
3275 {
3276 if (iSubtrahend >= 0)
3277 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3278 else
3279 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3280 }
3281 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3282 {
3283 if (iSubtrahend >= 0)
3284 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3285 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3286 else
3287 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3288 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3289 }
3290 else if (iGprTmp != UINT8_MAX)
3291 {
3292 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3293 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3294 }
3295 else
3296# ifdef IEM_WITH_THROW_CATCH
3297 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3298# else
3299 AssertReleaseFailedStmt(off = UINT32_MAX);
3300# endif
3301
3302#else
3303# error "Port me"
3304#endif
3305 return off;
3306}
3307
3308
3309/**
3310 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3311 *
3312 * @note ARM64: Larger constants will require a temporary register. Failing to
3313 * specify one when needed will trigger fatal assertion / throw.
3314 */
3315DECL_INLINE_THROW(uint32_t)
3316iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3317 uint8_t iGprTmp = UINT8_MAX)
3318
3319{
3320#ifdef RT_ARCH_AMD64
3321 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
3322#elif defined(RT_ARCH_ARM64)
3323 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
3324#else
3325# error "Port me"
3326#endif
3327 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3328 return off;
3329}
3330
3331
3332/**
3333 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
3334 *
3335 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
3336 * so not suitable as a base for conditional jumps.
3337 *
3338 * @note AMD64: Will only update the lower 16 bits of the register.
3339 * @note ARM64: Will update the entire register.
3340 * @note ARM64: Larger constants will require a temporary register. Failing to
3341 * specify one when needed will trigger fatal assertion / throw.
3342 */
3343DECL_FORCE_INLINE_THROW(uint32_t)
3344iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
3345 uint8_t iGprTmp = UINT8_MAX)
3346{
3347#ifdef RT_ARCH_AMD64
3348 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3349 if (iGprDst >= 8)
3350 pCodeBuf[off++] = X86_OP_REX_B;
3351 if (iSubtrahend == 1)
3352 {
3353 /* dec r/m16 */
3354 pCodeBuf[off++] = 0xff;
3355 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3356 }
3357 else if (iSubtrahend == -1)
3358 {
3359 /* inc r/m16 */
3360 pCodeBuf[off++] = 0xff;
3361 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3362 }
3363 else if ((int8_t)iSubtrahend == iSubtrahend)
3364 {
3365 /* sub r/m16, imm8 */
3366 pCodeBuf[off++] = 0x83;
3367 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3368 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3369 }
3370 else
3371 {
3372 /* sub r/m16, imm16 */
3373 pCodeBuf[off++] = 0x81;
3374 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3375 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
3376 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
3377 }
3378 RT_NOREF(iGprTmp);
3379
3380#elif defined(RT_ARCH_ARM64)
3381 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3382 if (uAbsSubtrahend < 4096)
3383 {
3384 if (iSubtrahend >= 0)
3385 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3386 else
3387 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3388 }
3389 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3390 {
3391 if (iSubtrahend >= 0)
3392 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3393 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3394 else
3395 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3396 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3397 }
3398 else if (iGprTmp != UINT8_MAX)
3399 {
3400 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3401 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3402 }
3403 else
3404# ifdef IEM_WITH_THROW_CATCH
3405 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3406# else
3407 AssertReleaseFailedStmt(off = UINT32_MAX);
3408# endif
3409 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3410
3411#else
3412# error "Port me"
3413#endif
3414 return off;
3415}
3416
3417
3418/**
3419 * Emits adding a 64-bit GPR to another, storing the result in the first.
3420 * @note The AMD64 version sets flags.
3421 */
3422DECL_FORCE_INLINE(uint32_t)
3423iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3424{
3425#if defined(RT_ARCH_AMD64)
3426 /* add Gv,Ev */
3427 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3428 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
3429 pCodeBuf[off++] = 0x03;
3430 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3431
3432#elif defined(RT_ARCH_ARM64)
3433 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
3434
3435#else
3436# error "Port me"
3437#endif
3438 return off;
3439}
3440
3441
3442/**
3443 * Emits adding a 64-bit GPR to another, storing the result in the first.
3444 * @note The AMD64 version sets flags.
3445 */
3446DECL_INLINE_THROW(uint32_t)
3447iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3448{
3449#if defined(RT_ARCH_AMD64)
3450 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3451#elif defined(RT_ARCH_ARM64)
3452 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3453#else
3454# error "Port me"
3455#endif
3456 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3457 return off;
3458}
3459
3460
3461/**
3462 * Emits adding a 64-bit GPR to another, storing the result in the first.
3463 * @note The AMD64 version sets flags.
3464 */
3465DECL_FORCE_INLINE(uint32_t)
3466iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3467{
3468#if defined(RT_ARCH_AMD64)
3469 /* add Gv,Ev */
3470 if (iGprDst >= 8 || iGprAddend >= 8)
3471 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
3472 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
3473 pCodeBuf[off++] = 0x03;
3474 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3475
3476#elif defined(RT_ARCH_ARM64)
3477 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
3478
3479#else
3480# error "Port me"
3481#endif
3482 return off;
3483}
3484
3485
3486/**
3487 * Emits adding a 64-bit GPR to another, storing the result in the first.
3488 * @note The AMD64 version sets flags.
3489 */
3490DECL_INLINE_THROW(uint32_t)
3491iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3492{
3493#if defined(RT_ARCH_AMD64)
3494 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3495#elif defined(RT_ARCH_ARM64)
3496 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3497#else
3498# error "Port me"
3499#endif
3500 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3501 return off;
3502}
3503
3504
3505/**
3506 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3507 */
3508DECL_INLINE_THROW(uint32_t)
3509iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3510{
3511#if defined(RT_ARCH_AMD64)
3512 /* add or inc */
3513 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3514 if (iImm8 != 1)
3515 {
3516 pCodeBuf[off++] = 0x83;
3517 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3518 pCodeBuf[off++] = (uint8_t)iImm8;
3519 }
3520 else
3521 {
3522 pCodeBuf[off++] = 0xff;
3523 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3524 }
3525
3526#elif defined(RT_ARCH_ARM64)
3527 if (iImm8 >= 0)
3528 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
3529 else
3530 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
3531
3532#else
3533# error "Port me"
3534#endif
3535 return off;
3536}
3537
3538
3539/**
3540 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3541 */
3542DECL_INLINE_THROW(uint32_t)
3543iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3544{
3545#if defined(RT_ARCH_AMD64)
3546 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
3547#elif defined(RT_ARCH_ARM64)
3548 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
3549#else
3550# error "Port me"
3551#endif
3552 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3553 return off;
3554}
3555
3556
3557/**
3558 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
3559 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3560 */
3561DECL_FORCE_INLINE(uint32_t)
3562iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3563{
3564#if defined(RT_ARCH_AMD64)
3565 /* add or inc */
3566 if (iGprDst >= 8)
3567 pCodeBuf[off++] = X86_OP_REX_B;
3568 if (iImm8 != 1)
3569 {
3570 pCodeBuf[off++] = 0x83;
3571 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3572 pCodeBuf[off++] = (uint8_t)iImm8;
3573 }
3574 else
3575 {
3576 pCodeBuf[off++] = 0xff;
3577 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3578 }
3579
3580#elif defined(RT_ARCH_ARM64)
3581 if (iImm8 >= 0)
3582 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
3583 else
3584 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
3585
3586#else
3587# error "Port me"
3588#endif
3589 return off;
3590}
3591
3592
3593/**
3594 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
3595 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3596 */
3597DECL_INLINE_THROW(uint32_t)
3598iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3599{
3600#if defined(RT_ARCH_AMD64)
3601 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
3602#elif defined(RT_ARCH_ARM64)
3603 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
3604#else
3605# error "Port me"
3606#endif
3607 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3608 return off;
3609}
3610
3611
3612/**
3613 * Emits a 64-bit GPR additions with a 64-bit signed addend.
3614 *
3615 * @note Will assert / throw if @a iGprTmp is not specified when needed.
3616 */
3617DECL_FORCE_INLINE_THROW(uint32_t)
3618iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
3619{
3620#if defined(RT_ARCH_AMD64)
3621 if ((int8_t)iAddend == iAddend)
3622 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
3623
3624 if ((int32_t)iAddend == iAddend)
3625 {
3626 /* add grp, imm32 */
3627 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3628 pCodeBuf[off++] = 0x81;
3629 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3630 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
3631 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
3632 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
3633 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
3634 }
3635 else if (iGprTmp != UINT8_MAX)
3636 {
3637 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
3638
3639 /* add dst, tmpreg */
3640 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3641 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
3642 pCodeBuf[off++] = 0x03;
3643 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
3644 }
3645 else
3646# ifdef IEM_WITH_THROW_CATCH
3647 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3648# else
3649 AssertReleaseFailedStmt(off = UINT32_MAX);
3650# endif
3651
3652#elif defined(RT_ARCH_ARM64)
3653 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
3654 if (uAbsAddend < 4096)
3655 {
3656 if (iAddend >= 0)
3657 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
3658 else
3659 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
3660 }
3661 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
3662 {
3663 if (iAddend >= 0)
3664 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
3665 true /*f64Bit*/, true /*fShift12*/);
3666 else
3667 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
3668 true /*f64Bit*/, true /*fShift12*/);
3669 }
3670 else if (iGprTmp != UINT8_MAX)
3671 {
3672 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
3673 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
3674 }
3675 else
3676# ifdef IEM_WITH_THROW_CATCH
3677 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3678# else
3679 AssertReleaseFailedStmt(off = UINT32_MAX);
3680# endif
3681
3682#else
3683# error "Port me"
3684#endif
3685 return off;
3686}
3687
3688
3689/**
3690 * Emits a 64-bit GPR additions with a 64-bit signed addend.
3691 */
3692DECL_INLINE_THROW(uint32_t)
3693iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
3694{
3695#if defined(RT_ARCH_AMD64)
3696 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
3697 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
3698
3699 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
3700 {
3701 /* add grp, imm32 */
3702 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3703 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3704 pbCodeBuf[off++] = 0x81;
3705 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3706 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
3707 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
3708 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
3709 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
3710 }
3711 else
3712 {
3713 /* Best to use a temporary register to deal with this in the simplest way: */
3714 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
3715
3716 /* add dst, tmpreg */
3717 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3718 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3719 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
3720 pbCodeBuf[off++] = 0x03;
3721 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
3722
3723 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
3724 }
3725
3726#elif defined(RT_ARCH_ARM64)
3727 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
3728 {
3729 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3730 if (iAddend >= 0)
3731 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend);
3732 else
3733 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend);
3734 }
3735 else
3736 {
3737 /* Use temporary register for the immediate. */
3738 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
3739
3740 /* add gprdst, gprdst, tmpreg */
3741 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3742 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg);
3743
3744 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
3745 }
3746
3747#else
3748# error "Port me"
3749#endif
3750 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3751 return off;
3752}
3753
3754
3755/**
3756 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
3757 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3758 * @note For ARM64 the iAddend value must be in the range 0x000..0xfff,
3759 * or that range shifted 12 bits to the left (e.g. 0x1000..0xfff000 with
3760 * the lower 12 bits always zero). The negative ranges are also allowed,
3761 * making it behave like a subtraction. If the constant does not conform,
3762 * bad stuff will happen.
3763 */
3764DECL_FORCE_INLINE_THROW(uint32_t)
3765iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend)
3766{
3767#if defined(RT_ARCH_AMD64)
3768 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
3769 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
3770
3771 /* add grp, imm32 */
3772 if (iGprDst >= 8)
3773 pCodeBuf[off++] = X86_OP_REX_B;
3774 pCodeBuf[off++] = 0x81;
3775 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3776 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
3777 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
3778 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
3779 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
3780
3781#elif defined(RT_ARCH_ARM64)
3782 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
3783 if (uAbsAddend <= 0xfff)
3784 {
3785 if (iAddend >= 0)
3786 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
3787 else
3788 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
3789 }
3790 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
3791 {
3792 if (iAddend >= 0)
3793 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
3794 false /*f64Bit*/, true /*fShift12*/);
3795 else
3796 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
3797 false /*f64Bit*/, true /*fShift12*/);
3798 }
3799 else
3800# ifdef IEM_WITH_THROW_CATCH
3801 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3802# else
3803 AssertReleaseFailedStmt(off = UINT32_MAX);
3804# endif
3805
3806#else
3807# error "Port me"
3808#endif
3809 return off;
3810}
3811
3812
3813/**
3814 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
3815 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3816 */
3817DECL_INLINE_THROW(uint32_t)
3818iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
3819{
3820#if defined(RT_ARCH_AMD64)
3821 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
3822
3823#elif defined(RT_ARCH_ARM64)
3824 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
3825 {
3826 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3827 if (iAddend >= 0)
3828 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend, false /*f64Bit*/);
3829 else
3830 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend, false /*f64Bit*/);
3831 }
3832 else
3833 {
3834 /* Use temporary register for the immediate. */
3835 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint32_t)iAddend);
3836
3837 /* add gprdst, gprdst, tmpreg */
3838 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3839 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
3840
3841 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
3842 }
3843
3844#else
3845# error "Port me"
3846#endif
3847 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3848 return off;
3849}
3850
3851
3852/**
3853 * Emits a 16-bit GPR add with a signed immediate addend.
3854 *
3855 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
3856 * so not suitable as a base for conditional jumps.
3857 *
3858 * @note AMD64: Will only update the lower 16 bits of the register.
3859 * @note ARM64: Will update the entire register.
3860 * @note ARM64: Larger constants will require a temporary register. Failing to
3861 * specify one when needed will trigger fatal assertion / throw.
3862 * @sa iemNativeEmitSubGpr16ImmEx
3863 */
3864DECL_FORCE_INLINE_THROW(uint32_t)
3865iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend,
3866 uint8_t iGprTmp = UINT8_MAX)
3867{
3868#ifdef RT_ARCH_AMD64
3869 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3870 if (iGprDst >= 8)
3871 pCodeBuf[off++] = X86_OP_REX_B;
3872 if (iAddend == 1)
3873 {
3874 /* inc r/m16 */
3875 pCodeBuf[off++] = 0xff;
3876 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3877 }
3878 else if (iAddend == -1)
3879 {
3880 /* dec r/m16 */
3881 pCodeBuf[off++] = 0xff;
3882 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3883 }
3884 else if ((int8_t)iAddend == iAddend)
3885 {
3886 /* add r/m16, imm8 */
3887 pCodeBuf[off++] = 0x83;
3888 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3889 pCodeBuf[off++] = (uint8_t)iAddend;
3890 }
3891 else
3892 {
3893 /* add r/m16, imm16 */
3894 pCodeBuf[off++] = 0x81;
3895 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3896 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
3897 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
3898 }
3899 RT_NOREF(iGprTmp);
3900
3901#elif defined(RT_ARCH_ARM64)
3902 uint32_t uAbsAddend = RT_ABS(iAddend);
3903 if (uAbsAddend < 4096)
3904 {
3905 if (iAddend >= 0)
3906 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
3907 else
3908 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
3909 }
3910 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
3911 {
3912 if (iAddend >= 0)
3913 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
3914 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3915 else
3916 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
3917 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3918 }
3919 else if (iGprTmp != UINT8_MAX)
3920 {
3921 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iAddend);
3922 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3923 }
3924 else
3925# ifdef IEM_WITH_THROW_CATCH
3926 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3927# else
3928 AssertReleaseFailedStmt(off = UINT32_MAX);
3929# endif
3930 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3931
3932#else
3933# error "Port me"
3934#endif
3935 return off;
3936}
3937
3938
3939
3940/**
3941 * Adds two 64-bit GPRs together, storing the result in a third register.
3942 */
3943DECL_FORCE_INLINE(uint32_t)
3944iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
3945{
3946#ifdef RT_ARCH_AMD64
3947 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
3948 {
3949 /** @todo consider LEA */
3950 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
3951 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
3952 }
3953 else
3954 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
3955
3956#elif defined(RT_ARCH_ARM64)
3957 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
3958
3959#else
3960# error "Port me!"
3961#endif
3962 return off;
3963}
3964
3965
3966
3967/**
3968 * Adds two 32-bit GPRs together, storing the result in a third register.
3969 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
3970 */
3971DECL_FORCE_INLINE(uint32_t)
3972iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
3973{
3974#ifdef RT_ARCH_AMD64
3975 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
3976 {
3977 /** @todo consider LEA */
3978 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
3979 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
3980 }
3981 else
3982 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
3983
3984#elif defined(RT_ARCH_ARM64)
3985 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
3986
3987#else
3988# error "Port me!"
3989#endif
3990 return off;
3991}
3992
3993
3994/**
3995 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
3996 * third register.
3997 *
3998 * @note The ARM64 version does not work for non-trivial constants if the
3999 * two registers are the same. Will assert / throw exception.
4000 */
4001DECL_FORCE_INLINE_THROW(uint32_t)
4002iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4003{
4004#ifdef RT_ARCH_AMD64
4005 /** @todo consider LEA */
4006 if ((int8_t)iImmAddend == iImmAddend)
4007 {
4008 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4009 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4010 }
4011 else
4012 {
4013 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4014 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4015 }
4016
4017#elif defined(RT_ARCH_ARM64)
4018 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4019 if (uAbsImmAddend < 4096)
4020 {
4021 if (iImmAddend >= 0)
4022 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4023 else
4024 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4025 }
4026 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4027 {
4028 if (iImmAddend >= 0)
4029 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4030 else
4031 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4032 }
4033 else if (iGprDst != iGprAddend)
4034 {
4035 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4036 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4037 }
4038 else
4039# ifdef IEM_WITH_THROW_CATCH
4040 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4041# else
4042 AssertReleaseFailedStmt(off = UINT32_MAX);
4043# endif
4044
4045#else
4046# error "Port me!"
4047#endif
4048 return off;
4049}
4050
4051
4052/**
4053 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4054 * third register.
4055 *
4056 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4057 *
4058 * @note The ARM64 version does not work for non-trivial constants if the
4059 * two registers are the same. Will assert / throw exception.
4060 */
4061DECL_FORCE_INLINE_THROW(uint32_t)
4062iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4063{
4064#ifdef RT_ARCH_AMD64
4065 /** @todo consider LEA */
4066 if ((int8_t)iImmAddend == iImmAddend)
4067 {
4068 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4069 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4070 }
4071 else
4072 {
4073 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4074 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4075 }
4076
4077#elif defined(RT_ARCH_ARM64)
4078 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4079 if (uAbsImmAddend < 4096)
4080 {
4081 if (iImmAddend >= 0)
4082 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4083 else
4084 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4085 }
4086 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4087 {
4088 if (iImmAddend >= 0)
4089 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4090 else
4091 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4092 }
4093 else if (iGprDst != iGprAddend)
4094 {
4095 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4096 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4097 }
4098 else
4099# ifdef IEM_WITH_THROW_CATCH
4100 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4101# else
4102 AssertReleaseFailedStmt(off = UINT32_MAX);
4103# endif
4104
4105#else
4106# error "Port me!"
4107#endif
4108 return off;
4109}
4110
4111
4112/*********************************************************************************************************************************
4113* Unary Operations *
4114*********************************************************************************************************************************/
4115
4116/**
4117 * Emits code for two complement negation of a 64-bit GPR.
4118 */
4119DECL_FORCE_INLINE_THROW(uint32_t)
4120iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4121{
4122#if defined(RT_ARCH_AMD64)
4123 /* neg Ev */
4124 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4125 pCodeBuf[off++] = 0xf7;
4126 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4127
4128#elif defined(RT_ARCH_ARM64)
4129 /* sub dst, xzr, dst */
4130 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4131
4132#else
4133# error "Port me"
4134#endif
4135 return off;
4136}
4137
4138
4139/**
4140 * Emits code for two complement negation of a 64-bit GPR.
4141 */
4142DECL_INLINE_THROW(uint32_t)
4143iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4144{
4145#if defined(RT_ARCH_AMD64)
4146 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4147#elif defined(RT_ARCH_ARM64)
4148 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4149#else
4150# error "Port me"
4151#endif
4152 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4153 return off;
4154}
4155
4156
4157/**
4158 * Emits code for two complement negation of a 32-bit GPR.
4159 * @note bit 32 thru 63 are set to zero.
4160 */
4161DECL_FORCE_INLINE_THROW(uint32_t)
4162iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4163{
4164#if defined(RT_ARCH_AMD64)
4165 /* neg Ev */
4166 if (iGprDst >= 8)
4167 pCodeBuf[off++] = X86_OP_REX_B;
4168 pCodeBuf[off++] = 0xf7;
4169 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4170
4171#elif defined(RT_ARCH_ARM64)
4172 /* sub dst, xzr, dst */
4173 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4174
4175#else
4176# error "Port me"
4177#endif
4178 return off;
4179}
4180
4181
4182/**
4183 * Emits code for two complement negation of a 32-bit GPR.
4184 * @note bit 32 thru 63 are set to zero.
4185 */
4186DECL_INLINE_THROW(uint32_t)
4187iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4188{
4189#if defined(RT_ARCH_AMD64)
4190 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4191#elif defined(RT_ARCH_ARM64)
4192 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4193#else
4194# error "Port me"
4195#endif
4196 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4197 return off;
4198}
4199
4200
4201
4202/*********************************************************************************************************************************
4203* Bit Operations *
4204*********************************************************************************************************************************/
4205
4206/**
4207 * Emits code for clearing bits 16 thru 63 in the GPR.
4208 */
4209DECL_INLINE_THROW(uint32_t)
4210iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4211{
4212#if defined(RT_ARCH_AMD64)
4213 /* movzx Gv,Ew */
4214 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4215 if (iGprDst >= 8)
4216 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4217 pbCodeBuf[off++] = 0x0f;
4218 pbCodeBuf[off++] = 0xb7;
4219 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4220
4221#elif defined(RT_ARCH_ARM64)
4222 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4223# if 1
4224 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4225# else
4226 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4227 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4228# endif
4229#else
4230# error "Port me"
4231#endif
4232 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4233 return off;
4234}
4235
4236
4237/**
4238 * Emits code for AND'ing two 64-bit GPRs.
4239 *
4240 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4241 * and ARM64 hosts.
4242 */
4243DECL_FORCE_INLINE(uint32_t)
4244iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4245{
4246#if defined(RT_ARCH_AMD64)
4247 /* and Gv, Ev */
4248 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4249 pCodeBuf[off++] = 0x23;
4250 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4251 RT_NOREF(fSetFlags);
4252
4253#elif defined(RT_ARCH_ARM64)
4254 if (!fSetFlags)
4255 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4256 else
4257 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4258
4259#else
4260# error "Port me"
4261#endif
4262 return off;
4263}
4264
4265
4266/**
4267 * Emits code for AND'ing two 64-bit GPRs.
4268 *
4269 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4270 * and ARM64 hosts.
4271 */
4272DECL_INLINE_THROW(uint32_t)
4273iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4274{
4275#if defined(RT_ARCH_AMD64)
4276 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4277#elif defined(RT_ARCH_ARM64)
4278 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4279#else
4280# error "Port me"
4281#endif
4282 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4283 return off;
4284}
4285
4286
4287/**
4288 * Emits code for AND'ing two 32-bit GPRs.
4289 */
4290DECL_FORCE_INLINE(uint32_t)
4291iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4292{
4293#if defined(RT_ARCH_AMD64)
4294 /* and Gv, Ev */
4295 if (iGprDst >= 8 || iGprSrc >= 8)
4296 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4297 pCodeBuf[off++] = 0x23;
4298 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4299 RT_NOREF(fSetFlags);
4300
4301#elif defined(RT_ARCH_ARM64)
4302 if (!fSetFlags)
4303 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4304 else
4305 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4306
4307#else
4308# error "Port me"
4309#endif
4310 return off;
4311}
4312
4313
4314/**
4315 * Emits code for AND'ing two 32-bit GPRs.
4316 */
4317DECL_INLINE_THROW(uint32_t)
4318iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4319{
4320#if defined(RT_ARCH_AMD64)
4321 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4322#elif defined(RT_ARCH_ARM64)
4323 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4324#else
4325# error "Port me"
4326#endif
4327 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4328 return off;
4329}
4330
4331
4332/**
4333 * Emits code for AND'ing a 64-bit GPRs with a constant.
4334 *
4335 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4336 * and ARM64 hosts.
4337 */
4338DECL_INLINE_THROW(uint32_t)
4339iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4340{
4341#if defined(RT_ARCH_AMD64)
4342 if ((int64_t)uImm == (int8_t)uImm)
4343 {
4344 /* and Ev, imm8 */
4345 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4346 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4347 pbCodeBuf[off++] = 0x83;
4348 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4349 pbCodeBuf[off++] = (uint8_t)uImm;
4350 }
4351 else if ((int64_t)uImm == (int32_t)uImm)
4352 {
4353 /* and Ev, imm32 */
4354 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4355 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4356 pbCodeBuf[off++] = 0x81;
4357 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4358 pbCodeBuf[off++] = RT_BYTE1(uImm);
4359 pbCodeBuf[off++] = RT_BYTE2(uImm);
4360 pbCodeBuf[off++] = RT_BYTE3(uImm);
4361 pbCodeBuf[off++] = RT_BYTE4(uImm);
4362 }
4363 else
4364 {
4365 /* Use temporary register for the 64-bit immediate. */
4366 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4367 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
4368 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4369 }
4370 RT_NOREF(fSetFlags);
4371
4372#elif defined(RT_ARCH_ARM64)
4373 uint32_t uImmR = 0;
4374 uint32_t uImmNandS = 0;
4375 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4376 {
4377 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4378 if (!fSetFlags)
4379 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
4380 else
4381 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
4382 }
4383 else
4384 {
4385 /* Use temporary register for the 64-bit immediate. */
4386 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4387 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4388 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4389 }
4390
4391#else
4392# error "Port me"
4393#endif
4394 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4395 return off;
4396}
4397
4398
4399/**
4400 * Emits code for AND'ing an 32-bit GPRs with a constant.
4401 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4402 * @note For ARM64 this only supports @a uImm values that can be expressed using
4403 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
4404 * make sure this is possible!
4405 */
4406DECL_FORCE_INLINE_THROW(uint32_t)
4407iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4408{
4409#if defined(RT_ARCH_AMD64)
4410 /* and Ev, imm */
4411 if (iGprDst >= 8)
4412 pCodeBuf[off++] = X86_OP_REX_B;
4413 if ((int32_t)uImm == (int8_t)uImm)
4414 {
4415 pCodeBuf[off++] = 0x83;
4416 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4417 pCodeBuf[off++] = (uint8_t)uImm;
4418 }
4419 else
4420 {
4421 pCodeBuf[off++] = 0x81;
4422 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4423 pCodeBuf[off++] = RT_BYTE1(uImm);
4424 pCodeBuf[off++] = RT_BYTE2(uImm);
4425 pCodeBuf[off++] = RT_BYTE3(uImm);
4426 pCodeBuf[off++] = RT_BYTE4(uImm);
4427 }
4428 RT_NOREF(fSetFlags);
4429
4430#elif defined(RT_ARCH_ARM64)
4431 uint32_t uImmR = 0;
4432 uint32_t uImmNandS = 0;
4433 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4434 {
4435 if (!fSetFlags)
4436 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4437 else
4438 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4439 }
4440 else
4441# ifdef IEM_WITH_THROW_CATCH
4442 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4443# else
4444 AssertReleaseFailedStmt(off = UINT32_MAX);
4445# endif
4446
4447#else
4448# error "Port me"
4449#endif
4450 return off;
4451}
4452
4453
4454/**
4455 * Emits code for AND'ing an 32-bit GPRs with a constant.
4456 *
4457 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4458 */
4459DECL_INLINE_THROW(uint32_t)
4460iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4461{
4462#if defined(RT_ARCH_AMD64)
4463 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
4464
4465#elif defined(RT_ARCH_ARM64)
4466 uint32_t uImmR = 0;
4467 uint32_t uImmNandS = 0;
4468 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4469 {
4470 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4471 if (!fSetFlags)
4472 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4473 else
4474 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4475 }
4476 else
4477 {
4478 /* Use temporary register for the 64-bit immediate. */
4479 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4480 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4481 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4482 }
4483
4484#else
4485# error "Port me"
4486#endif
4487 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4488 return off;
4489}
4490
4491
4492/**
4493 * Emits code for AND'ing an 64-bit GPRs with a constant.
4494 *
4495 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4496 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4497 * the same.
4498 */
4499DECL_FORCE_INLINE_THROW(uint32_t)
4500iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
4501 bool fSetFlags = false)
4502{
4503#if defined(RT_ARCH_AMD64)
4504 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4505 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
4506 RT_NOREF(fSetFlags);
4507
4508#elif defined(RT_ARCH_ARM64)
4509 uint32_t uImmR = 0;
4510 uint32_t uImmNandS = 0;
4511 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4512 {
4513 if (!fSetFlags)
4514 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4515 else
4516 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4517 }
4518 else if (iGprDst != iGprSrc)
4519 {
4520 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4521 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4522 }
4523 else
4524# ifdef IEM_WITH_THROW_CATCH
4525 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4526# else
4527 AssertReleaseFailedStmt(off = UINT32_MAX);
4528# endif
4529
4530#else
4531# error "Port me"
4532#endif
4533 return off;
4534}
4535
4536/**
4537 * Emits code for AND'ing an 32-bit GPRs with a constant.
4538 *
4539 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4540 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4541 * the same.
4542 *
4543 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4544 */
4545DECL_FORCE_INLINE_THROW(uint32_t)
4546iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
4547 bool fSetFlags = false)
4548{
4549#if defined(RT_ARCH_AMD64)
4550 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
4551 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
4552 RT_NOREF(fSetFlags);
4553
4554#elif defined(RT_ARCH_ARM64)
4555 uint32_t uImmR = 0;
4556 uint32_t uImmNandS = 0;
4557 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4558 {
4559 if (!fSetFlags)
4560 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
4561 else
4562 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
4563 }
4564 else if (iGprDst != iGprSrc)
4565 {
4566 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
4567 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4568 }
4569 else
4570# ifdef IEM_WITH_THROW_CATCH
4571 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4572# else
4573 AssertReleaseFailedStmt(off = UINT32_MAX);
4574# endif
4575
4576#else
4577# error "Port me"
4578#endif
4579 return off;
4580}
4581
4582
4583/**
4584 * Emits code for OR'ing two 64-bit GPRs.
4585 */
4586DECL_FORCE_INLINE(uint32_t)
4587iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4588{
4589#if defined(RT_ARCH_AMD64)
4590 /* or Gv, Ev */
4591 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4592 pCodeBuf[off++] = 0x0b;
4593 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4594
4595#elif defined(RT_ARCH_ARM64)
4596 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
4597
4598#else
4599# error "Port me"
4600#endif
4601 return off;
4602}
4603
4604
4605/**
4606 * Emits code for OR'ing two 64-bit GPRs.
4607 */
4608DECL_INLINE_THROW(uint32_t)
4609iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4610{
4611#if defined(RT_ARCH_AMD64)
4612 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4613#elif defined(RT_ARCH_ARM64)
4614 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4615#else
4616# error "Port me"
4617#endif
4618 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4619 return off;
4620}
4621
4622
4623/**
4624 * Emits code for OR'ing two 32-bit GPRs.
4625 * @note Bits 63:32 of the destination GPR will be cleared.
4626 */
4627DECL_FORCE_INLINE(uint32_t)
4628iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4629{
4630#if defined(RT_ARCH_AMD64)
4631 /* or Gv, Ev */
4632 if (iGprDst >= 8 || iGprSrc >= 8)
4633 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4634 pCodeBuf[off++] = 0x0b;
4635 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4636
4637#elif defined(RT_ARCH_ARM64)
4638 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4639
4640#else
4641# error "Port me"
4642#endif
4643 return off;
4644}
4645
4646
4647/**
4648 * Emits code for OR'ing two 32-bit GPRs.
4649 * @note Bits 63:32 of the destination GPR will be cleared.
4650 */
4651DECL_INLINE_THROW(uint32_t)
4652iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4653{
4654#if defined(RT_ARCH_AMD64)
4655 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4656#elif defined(RT_ARCH_ARM64)
4657 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4658#else
4659# error "Port me"
4660#endif
4661 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4662 return off;
4663}
4664
4665
4666/**
4667 * Emits code for OR'ing a 64-bit GPRs with a constant.
4668 */
4669DECL_INLINE_THROW(uint32_t)
4670iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
4671{
4672#if defined(RT_ARCH_AMD64)
4673 if ((int64_t)uImm == (int8_t)uImm)
4674 {
4675 /* or Ev, imm8 */
4676 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4677 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4678 pbCodeBuf[off++] = 0x83;
4679 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4680 pbCodeBuf[off++] = (uint8_t)uImm;
4681 }
4682 else if ((int64_t)uImm == (int32_t)uImm)
4683 {
4684 /* or Ev, imm32 */
4685 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4686 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4687 pbCodeBuf[off++] = 0x81;
4688 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4689 pbCodeBuf[off++] = RT_BYTE1(uImm);
4690 pbCodeBuf[off++] = RT_BYTE2(uImm);
4691 pbCodeBuf[off++] = RT_BYTE3(uImm);
4692 pbCodeBuf[off++] = RT_BYTE4(uImm);
4693 }
4694 else
4695 {
4696 /* Use temporary register for the 64-bit immediate. */
4697 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4698 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
4699 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4700 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4701 }
4702
4703#elif defined(RT_ARCH_ARM64)
4704 uint32_t uImmR = 0;
4705 uint32_t uImmNandS = 0;
4706 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4707 {
4708 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4709 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
4710 }
4711 else
4712 {
4713 /* Use temporary register for the 64-bit immediate. */
4714 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4715 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
4716 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4717 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4718 }
4719
4720#else
4721# error "Port me"
4722#endif
4723 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4724 return off;
4725}
4726
4727
4728/**
4729 * Emits code for OR'ing an 32-bit GPRs with a constant.
4730 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4731 * @note For ARM64 this only supports @a uImm values that can be expressed using
4732 * the two 6-bit immediates of the OR instructions. The caller must make
4733 * sure this is possible!
4734 */
4735DECL_FORCE_INLINE_THROW(uint32_t)
4736iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
4737{
4738#if defined(RT_ARCH_AMD64)
4739 /* or Ev, imm */
4740 if (iGprDst >= 8)
4741 pCodeBuf[off++] = X86_OP_REX_B;
4742 if ((int32_t)uImm == (int8_t)uImm)
4743 {
4744 pCodeBuf[off++] = 0x83;
4745 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4746 pCodeBuf[off++] = (uint8_t)uImm;
4747 }
4748 else
4749 {
4750 pCodeBuf[off++] = 0x81;
4751 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4752 pCodeBuf[off++] = RT_BYTE1(uImm);
4753 pCodeBuf[off++] = RT_BYTE2(uImm);
4754 pCodeBuf[off++] = RT_BYTE3(uImm);
4755 pCodeBuf[off++] = RT_BYTE4(uImm);
4756 }
4757
4758#elif defined(RT_ARCH_ARM64)
4759 uint32_t uImmR = 0;
4760 uint32_t uImmNandS = 0;
4761 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4762 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4763 else
4764# ifdef IEM_WITH_THROW_CATCH
4765 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4766# else
4767 AssertReleaseFailedStmt(off = UINT32_MAX);
4768# endif
4769
4770#else
4771# error "Port me"
4772#endif
4773 return off;
4774}
4775
4776
4777/**
4778 * Emits code for OR'ing an 32-bit GPRs with a constant.
4779 *
4780 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4781 */
4782DECL_INLINE_THROW(uint32_t)
4783iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
4784{
4785#if defined(RT_ARCH_AMD64)
4786 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
4787
4788#elif defined(RT_ARCH_ARM64)
4789 uint32_t uImmR = 0;
4790 uint32_t uImmNandS = 0;
4791 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4792 {
4793 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4794 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4795 }
4796 else
4797 {
4798 /* Use temporary register for the 64-bit immediate. */
4799 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4800 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
4801 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4802 }
4803
4804#else
4805# error "Port me"
4806#endif
4807 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4808 return off;
4809}
4810
4811
4812
4813/**
4814 * ORs two 64-bit GPRs together, storing the result in a third register.
4815 */
4816DECL_FORCE_INLINE(uint32_t)
4817iemNativeEmitGprEqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
4818{
4819#ifdef RT_ARCH_AMD64
4820 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
4821 {
4822 /** @todo consider LEA */
4823 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc1);
4824 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
4825 }
4826 else
4827 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
4828
4829#elif defined(RT_ARCH_ARM64)
4830 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2);
4831
4832#else
4833# error "Port me!"
4834#endif
4835 return off;
4836}
4837
4838
4839
4840/**
4841 * Ors two 32-bit GPRs together, storing the result in a third register.
4842 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4843 */
4844DECL_FORCE_INLINE(uint32_t)
4845iemNativeEmitGpr32EqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
4846{
4847#ifdef RT_ARCH_AMD64
4848 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
4849 {
4850 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc1);
4851 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
4852 }
4853 else
4854 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
4855
4856#elif defined(RT_ARCH_ARM64)
4857 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2, false /*f64Bit*/);
4858
4859#else
4860# error "Port me!"
4861#endif
4862 return off;
4863}
4864
4865
4866/**
4867 * Emits code for XOR'ing two 64-bit GPRs.
4868 */
4869DECL_INLINE_THROW(uint32_t)
4870iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4871{
4872#if defined(RT_ARCH_AMD64)
4873 /* and Gv, Ev */
4874 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4875 pCodeBuf[off++] = 0x33;
4876 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4877
4878#elif defined(RT_ARCH_ARM64)
4879 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
4880
4881#else
4882# error "Port me"
4883#endif
4884 return off;
4885}
4886
4887
4888/**
4889 * Emits code for XOR'ing two 64-bit GPRs.
4890 */
4891DECL_INLINE_THROW(uint32_t)
4892iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4893{
4894#if defined(RT_ARCH_AMD64)
4895 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4896#elif defined(RT_ARCH_ARM64)
4897 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4898#else
4899# error "Port me"
4900#endif
4901 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4902 return off;
4903}
4904
4905
4906/**
4907 * Emits code for XOR'ing two 32-bit GPRs.
4908 */
4909DECL_INLINE_THROW(uint32_t)
4910iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4911{
4912#if defined(RT_ARCH_AMD64)
4913 /* and Gv, Ev */
4914 if (iGprDst >= 8 || iGprSrc >= 8)
4915 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4916 pCodeBuf[off++] = 0x33;
4917 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4918
4919#elif defined(RT_ARCH_ARM64)
4920 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4921
4922#else
4923# error "Port me"
4924#endif
4925 return off;
4926}
4927
4928
4929/**
4930 * Emits code for XOR'ing two 32-bit GPRs.
4931 */
4932DECL_INLINE_THROW(uint32_t)
4933iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4934{
4935#if defined(RT_ARCH_AMD64)
4936 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4937#elif defined(RT_ARCH_ARM64)
4938 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4939#else
4940# error "Port me"
4941#endif
4942 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4943 return off;
4944}
4945
4946
4947/**
4948 * Emits code for XOR'ing an 32-bit GPRs with a constant.
4949 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4950 * @note For ARM64 this only supports @a uImm values that can be expressed using
4951 * the two 6-bit immediates of the EOR instructions. The caller must make
4952 * sure this is possible!
4953 */
4954DECL_FORCE_INLINE_THROW(uint32_t)
4955iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
4956{
4957#if defined(RT_ARCH_AMD64)
4958 /* and Ev, imm */
4959 if (iGprDst >= 8)
4960 pCodeBuf[off++] = X86_OP_REX_B;
4961 if ((int32_t)uImm == (int8_t)uImm)
4962 {
4963 pCodeBuf[off++] = 0x83;
4964 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
4965 pCodeBuf[off++] = (uint8_t)uImm;
4966 }
4967 else
4968 {
4969 pCodeBuf[off++] = 0x81;
4970 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
4971 pCodeBuf[off++] = RT_BYTE1(uImm);
4972 pCodeBuf[off++] = RT_BYTE2(uImm);
4973 pCodeBuf[off++] = RT_BYTE3(uImm);
4974 pCodeBuf[off++] = RT_BYTE4(uImm);
4975 }
4976
4977#elif defined(RT_ARCH_ARM64)
4978 uint32_t uImmR = 0;
4979 uint32_t uImmNandS = 0;
4980 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4981 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4982 else
4983# ifdef IEM_WITH_THROW_CATCH
4984 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4985# else
4986 AssertReleaseFailedStmt(off = UINT32_MAX);
4987# endif
4988
4989#else
4990# error "Port me"
4991#endif
4992 return off;
4993}
4994
4995
4996/*********************************************************************************************************************************
4997* Shifting *
4998*********************************************************************************************************************************/
4999
5000/**
5001 * Emits code for shifting a GPR a fixed number of bits to the left.
5002 */
5003DECL_FORCE_INLINE(uint32_t)
5004iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5005{
5006 Assert(cShift > 0 && cShift < 64);
5007
5008#if defined(RT_ARCH_AMD64)
5009 /* shl dst, cShift */
5010 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5011 if (cShift != 1)
5012 {
5013 pCodeBuf[off++] = 0xc1;
5014 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5015 pCodeBuf[off++] = cShift;
5016 }
5017 else
5018 {
5019 pCodeBuf[off++] = 0xd1;
5020 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5021 }
5022
5023#elif defined(RT_ARCH_ARM64)
5024 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
5025
5026#else
5027# error "Port me"
5028#endif
5029 return off;
5030}
5031
5032
5033/**
5034 * Emits code for shifting a GPR a fixed number of bits to the left.
5035 */
5036DECL_INLINE_THROW(uint32_t)
5037iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5038{
5039#if defined(RT_ARCH_AMD64)
5040 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5041#elif defined(RT_ARCH_ARM64)
5042 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5043#else
5044# error "Port me"
5045#endif
5046 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5047 return off;
5048}
5049
5050
5051/**
5052 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5053 */
5054DECL_FORCE_INLINE(uint32_t)
5055iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5056{
5057 Assert(cShift > 0 && cShift < 32);
5058
5059#if defined(RT_ARCH_AMD64)
5060 /* shl dst, cShift */
5061 if (iGprDst >= 8)
5062 pCodeBuf[off++] = X86_OP_REX_B;
5063 if (cShift != 1)
5064 {
5065 pCodeBuf[off++] = 0xc1;
5066 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5067 pCodeBuf[off++] = cShift;
5068 }
5069 else
5070 {
5071 pCodeBuf[off++] = 0xd1;
5072 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5073 }
5074
5075#elif defined(RT_ARCH_ARM64)
5076 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5077
5078#else
5079# error "Port me"
5080#endif
5081 return off;
5082}
5083
5084
5085/**
5086 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5087 */
5088DECL_INLINE_THROW(uint32_t)
5089iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5090{
5091#if defined(RT_ARCH_AMD64)
5092 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5093#elif defined(RT_ARCH_ARM64)
5094 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5095#else
5096# error "Port me"
5097#endif
5098 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5099 return off;
5100}
5101
5102
5103/**
5104 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5105 */
5106DECL_FORCE_INLINE(uint32_t)
5107iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5108{
5109 Assert(cShift > 0 && cShift < 64);
5110
5111#if defined(RT_ARCH_AMD64)
5112 /* shr dst, cShift */
5113 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5114 if (cShift != 1)
5115 {
5116 pCodeBuf[off++] = 0xc1;
5117 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5118 pCodeBuf[off++] = cShift;
5119 }
5120 else
5121 {
5122 pCodeBuf[off++] = 0xd1;
5123 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5124 }
5125
5126#elif defined(RT_ARCH_ARM64)
5127 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5128
5129#else
5130# error "Port me"
5131#endif
5132 return off;
5133}
5134
5135
5136/**
5137 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5138 */
5139DECL_INLINE_THROW(uint32_t)
5140iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5141{
5142#if defined(RT_ARCH_AMD64)
5143 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5144#elif defined(RT_ARCH_ARM64)
5145 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5146#else
5147# error "Port me"
5148#endif
5149 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5150 return off;
5151}
5152
5153
5154/**
5155 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5156 * right.
5157 */
5158DECL_FORCE_INLINE(uint32_t)
5159iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5160{
5161 Assert(cShift > 0 && cShift < 32);
5162
5163#if defined(RT_ARCH_AMD64)
5164 /* shr dst, cShift */
5165 if (iGprDst >= 8)
5166 pCodeBuf[off++] = X86_OP_REX_B;
5167 if (cShift != 1)
5168 {
5169 pCodeBuf[off++] = 0xc1;
5170 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5171 pCodeBuf[off++] = cShift;
5172 }
5173 else
5174 {
5175 pCodeBuf[off++] = 0xd1;
5176 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5177 }
5178
5179#elif defined(RT_ARCH_ARM64)
5180 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5181
5182#else
5183# error "Port me"
5184#endif
5185 return off;
5186}
5187
5188
5189/**
5190 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5191 * right.
5192 */
5193DECL_INLINE_THROW(uint32_t)
5194iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5195{
5196#if defined(RT_ARCH_AMD64)
5197 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5198#elif defined(RT_ARCH_ARM64)
5199 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5200#else
5201# error "Port me"
5202#endif
5203 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5204 return off;
5205}
5206
5207
5208/**
5209 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5210 * right and assigning it to a different GPR.
5211 */
5212DECL_INLINE_THROW(uint32_t)
5213iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5214{
5215 Assert(cShift > 0); Assert(cShift < 32);
5216#if defined(RT_ARCH_AMD64)
5217 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5218 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5219
5220#elif defined(RT_ARCH_ARM64)
5221 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5222
5223#else
5224# error "Port me"
5225#endif
5226 return off;
5227}
5228
5229
5230/**
5231 * Emits code for rotating a GPR a fixed number of bits to the left.
5232 */
5233DECL_FORCE_INLINE(uint32_t)
5234iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5235{
5236 Assert(cShift > 0 && cShift < 64);
5237
5238#if defined(RT_ARCH_AMD64)
5239 /* rol dst, cShift */
5240 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5241 if (cShift != 1)
5242 {
5243 pCodeBuf[off++] = 0xc1;
5244 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5245 pCodeBuf[off++] = cShift;
5246 }
5247 else
5248 {
5249 pCodeBuf[off++] = 0xd1;
5250 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5251 }
5252
5253#elif defined(RT_ARCH_ARM64)
5254 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
5255
5256#else
5257# error "Port me"
5258#endif
5259 return off;
5260}
5261
5262
5263/**
5264 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
5265 * @note Bits 63:32 of the destination GPR will be cleared.
5266 */
5267DECL_FORCE_INLINE(uint32_t)
5268iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5269{
5270#if defined(RT_ARCH_AMD64)
5271 /*
5272 * There is no bswap r16 on x86 (the encoding exists but does not work).
5273 * So just use a rol (gcc -O2 is doing that).
5274 *
5275 * rol r16, 0x8
5276 */
5277 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5278 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5279 if (iGpr >= 8)
5280 pbCodeBuf[off++] = X86_OP_REX_B;
5281 pbCodeBuf[off++] = 0xc1;
5282 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
5283 pbCodeBuf[off++] = 0x08;
5284#elif defined(RT_ARCH_ARM64)
5285 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5286
5287 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
5288#else
5289# error "Port me"
5290#endif
5291
5292 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5293 return off;
5294}
5295
5296
5297/**
5298 * Emits code for reversing the byte order in a 32-bit GPR.
5299 * @note Bits 63:32 of the destination GPR will be cleared.
5300 */
5301DECL_FORCE_INLINE(uint32_t)
5302iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5303{
5304#if defined(RT_ARCH_AMD64)
5305 /* bswap r32 */
5306 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5307
5308 if (iGpr >= 8)
5309 pbCodeBuf[off++] = X86_OP_REX_B;
5310 pbCodeBuf[off++] = 0x0f;
5311 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5312#elif defined(RT_ARCH_ARM64)
5313 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5314
5315 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
5316#else
5317# error "Port me"
5318#endif
5319
5320 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5321 return off;
5322}
5323
5324
5325/**
5326 * Emits code for reversing the byte order in a 64-bit GPR.
5327 */
5328DECL_FORCE_INLINE(uint32_t)
5329iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5330{
5331#if defined(RT_ARCH_AMD64)
5332 /* bswap r64 */
5333 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5334
5335 if (iGpr >= 8)
5336 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
5337 else
5338 pbCodeBuf[off++] = X86_OP_REX_W;
5339 pbCodeBuf[off++] = 0x0f;
5340 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5341#elif defined(RT_ARCH_ARM64)
5342 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5343
5344 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
5345#else
5346# error "Port me"
5347#endif
5348
5349 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5350 return off;
5351}
5352
5353
5354/*********************************************************************************************************************************
5355* Compare and Testing *
5356*********************************************************************************************************************************/
5357
5358
5359#ifdef RT_ARCH_ARM64
5360/**
5361 * Emits an ARM64 compare instruction.
5362 */
5363DECL_INLINE_THROW(uint32_t)
5364iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
5365 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
5366{
5367 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5368 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
5369 f64Bit, true /*fSetFlags*/, cShift, enmShift);
5370 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5371 return off;
5372}
5373#endif
5374
5375
5376/**
5377 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5378 * with conditional instruction.
5379 */
5380DECL_FORCE_INLINE(uint32_t)
5381iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5382{
5383#ifdef RT_ARCH_AMD64
5384 /* cmp Gv, Ev */
5385 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5386 pCodeBuf[off++] = 0x3b;
5387 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5388
5389#elif defined(RT_ARCH_ARM64)
5390 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
5391
5392#else
5393# error "Port me!"
5394#endif
5395 return off;
5396}
5397
5398
5399/**
5400 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5401 * with conditional instruction.
5402 */
5403DECL_INLINE_THROW(uint32_t)
5404iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5405{
5406#ifdef RT_ARCH_AMD64
5407 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5408#elif defined(RT_ARCH_ARM64)
5409 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5410#else
5411# error "Port me!"
5412#endif
5413 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5414 return off;
5415}
5416
5417
5418/**
5419 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5420 * with conditional instruction.
5421 */
5422DECL_FORCE_INLINE(uint32_t)
5423iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5424{
5425#ifdef RT_ARCH_AMD64
5426 /* cmp Gv, Ev */
5427 if (iGprLeft >= 8 || iGprRight >= 8)
5428 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5429 pCodeBuf[off++] = 0x3b;
5430 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5431
5432#elif defined(RT_ARCH_ARM64)
5433 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
5434
5435#else
5436# error "Port me!"
5437#endif
5438 return off;
5439}
5440
5441
5442/**
5443 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5444 * with conditional instruction.
5445 */
5446DECL_INLINE_THROW(uint32_t)
5447iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5448{
5449#ifdef RT_ARCH_AMD64
5450 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5451#elif defined(RT_ARCH_ARM64)
5452 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5453#else
5454# error "Port me!"
5455#endif
5456 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5457 return off;
5458}
5459
5460
5461/**
5462 * Emits a compare of a 64-bit GPR with a constant value, settings status
5463 * flags/whatever for use with conditional instruction.
5464 */
5465DECL_INLINE_THROW(uint32_t)
5466iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
5467{
5468#ifdef RT_ARCH_AMD64
5469 if (uImm <= UINT32_C(0xff))
5470 {
5471 /* cmp Ev, Ib */
5472 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5473 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
5474 pbCodeBuf[off++] = 0x83;
5475 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5476 pbCodeBuf[off++] = (uint8_t)uImm;
5477 }
5478 else if ((int64_t)uImm == (int32_t)uImm)
5479 {
5480 /* cmp Ev, imm */
5481 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5482 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
5483 pbCodeBuf[off++] = 0x81;
5484 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5485 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5486 pbCodeBuf[off++] = RT_BYTE1(uImm);
5487 pbCodeBuf[off++] = RT_BYTE2(uImm);
5488 pbCodeBuf[off++] = RT_BYTE3(uImm);
5489 pbCodeBuf[off++] = RT_BYTE4(uImm);
5490 }
5491 else
5492 {
5493 /* Use temporary register for the immediate. */
5494 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5495 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
5496 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5497 }
5498
5499#elif defined(RT_ARCH_ARM64)
5500 /** @todo guess there are clevere things we can do here... */
5501 if (uImm < _4K)
5502 {
5503 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5504 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5505 true /*64Bit*/, true /*fSetFlags*/);
5506 }
5507 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5508 {
5509 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5510 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
5511 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5512 }
5513 else
5514 {
5515 /* Use temporary register for the immediate. */
5516 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5517 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
5518 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5519 }
5520
5521#else
5522# error "Port me!"
5523#endif
5524
5525 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5526 return off;
5527}
5528
5529
5530/**
5531 * Emits a compare of a 32-bit GPR with a constant value, settings status
5532 * flags/whatever for use with conditional instruction.
5533 *
5534 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
5535 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
5536 * bits all zero). Will release assert or throw exception if the caller
5537 * violates this restriction.
5538 */
5539DECL_FORCE_INLINE_THROW(uint32_t)
5540iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
5541{
5542#ifdef RT_ARCH_AMD64
5543 if (iGprLeft >= 8)
5544 pCodeBuf[off++] = X86_OP_REX_B;
5545 if (uImm <= UINT32_C(0x7f))
5546 {
5547 /* cmp Ev, Ib */
5548 pCodeBuf[off++] = 0x83;
5549 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5550 pCodeBuf[off++] = (uint8_t)uImm;
5551 }
5552 else
5553 {
5554 /* cmp Ev, imm */
5555 pCodeBuf[off++] = 0x81;
5556 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5557 pCodeBuf[off++] = RT_BYTE1(uImm);
5558 pCodeBuf[off++] = RT_BYTE2(uImm);
5559 pCodeBuf[off++] = RT_BYTE3(uImm);
5560 pCodeBuf[off++] = RT_BYTE4(uImm);
5561 }
5562
5563#elif defined(RT_ARCH_ARM64)
5564 /** @todo guess there are clevere things we can do here... */
5565 if (uImm < _4K)
5566 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5567 false /*64Bit*/, true /*fSetFlags*/);
5568 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5569 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5570 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5571 else
5572# ifdef IEM_WITH_THROW_CATCH
5573 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5574# else
5575 AssertReleaseFailedStmt(off = UINT32_MAX);
5576# endif
5577
5578#else
5579# error "Port me!"
5580#endif
5581 return off;
5582}
5583
5584
5585/**
5586 * Emits a compare of a 32-bit GPR with a constant value, settings status
5587 * flags/whatever for use with conditional instruction.
5588 */
5589DECL_INLINE_THROW(uint32_t)
5590iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
5591{
5592#ifdef RT_ARCH_AMD64
5593 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
5594
5595#elif defined(RT_ARCH_ARM64)
5596 /** @todo guess there are clevere things we can do here... */
5597 if (uImm < _4K)
5598 {
5599 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5600 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5601 false /*64Bit*/, true /*fSetFlags*/);
5602 }
5603 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5604 {
5605 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5606 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5607 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5608 }
5609 else
5610 {
5611 /* Use temporary register for the immediate. */
5612 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5613 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
5614 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5615 }
5616
5617#else
5618# error "Port me!"
5619#endif
5620
5621 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5622 return off;
5623}
5624
5625
5626/**
5627 * Emits a compare of a 32-bit GPR with a constant value, settings status
5628 * flags/whatever for use with conditional instruction.
5629 *
5630 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
5631 * 16-bit value from @a iGrpLeft.
5632 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
5633 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
5634 * bits all zero). Will release assert or throw exception if the caller
5635 * violates this restriction.
5636 */
5637DECL_FORCE_INLINE_THROW(uint32_t)
5638iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
5639 uint8_t idxTmpReg = UINT8_MAX)
5640{
5641#ifdef RT_ARCH_AMD64
5642 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5643 if (iGprLeft >= 8)
5644 pCodeBuf[off++] = X86_OP_REX_B;
5645 if (uImm <= UINT32_C(0x7f))
5646 {
5647 /* cmp Ev, Ib */
5648 pCodeBuf[off++] = 0x83;
5649 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5650 pCodeBuf[off++] = (uint8_t)uImm;
5651 }
5652 else
5653 {
5654 /* cmp Ev, imm */
5655 pCodeBuf[off++] = 0x81;
5656 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5657 pCodeBuf[off++] = RT_BYTE1(uImm);
5658 pCodeBuf[off++] = RT_BYTE2(uImm);
5659 }
5660 RT_NOREF(idxTmpReg);
5661
5662#elif defined(RT_ARCH_ARM64)
5663# ifdef IEM_WITH_THROW_CATCH
5664 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5665# else
5666 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
5667# endif
5668 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
5669 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
5670 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
5671
5672#else
5673# error "Port me!"
5674#endif
5675 return off;
5676}
5677
5678
5679/**
5680 * Emits a compare of a 16-bit GPR with a constant value, settings status
5681 * flags/whatever for use with conditional instruction.
5682 *
5683 * @note ARM64: Helper register is required (idxTmpReg).
5684 */
5685DECL_INLINE_THROW(uint32_t)
5686iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
5687 uint8_t idxTmpReg = UINT8_MAX)
5688{
5689#ifdef RT_ARCH_AMD64
5690 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
5691#elif defined(RT_ARCH_ARM64)
5692 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
5693#else
5694# error "Port me!"
5695#endif
5696 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5697 return off;
5698}
5699
5700
5701
5702/*********************************************************************************************************************************
5703* Branching *
5704*********************************************************************************************************************************/
5705
5706/**
5707 * Emits a JMP rel32 / B imm19 to the given label.
5708 */
5709DECL_FORCE_INLINE_THROW(uint32_t)
5710iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
5711{
5712 Assert(idxLabel < pReNative->cLabels);
5713
5714#ifdef RT_ARCH_AMD64
5715 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
5716 {
5717 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
5718 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
5719 {
5720 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
5721 pCodeBuf[off++] = (uint8_t)offRel;
5722 }
5723 else
5724 {
5725 offRel -= 3;
5726 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
5727 pCodeBuf[off++] = RT_BYTE1(offRel);
5728 pCodeBuf[off++] = RT_BYTE2(offRel);
5729 pCodeBuf[off++] = RT_BYTE3(offRel);
5730 pCodeBuf[off++] = RT_BYTE4(offRel);
5731 }
5732 }
5733 else
5734 {
5735 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
5736 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
5737 pCodeBuf[off++] = 0xfe;
5738 pCodeBuf[off++] = 0xff;
5739 pCodeBuf[off++] = 0xff;
5740 pCodeBuf[off++] = 0xff;
5741 }
5742 pCodeBuf[off++] = 0xcc; /* int3 poison */
5743
5744#elif defined(RT_ARCH_ARM64)
5745 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
5746 pCodeBuf[off++] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
5747 else
5748 {
5749 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
5750 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
5751 }
5752
5753#else
5754# error "Port me!"
5755#endif
5756 return off;
5757}
5758
5759
5760/**
5761 * Emits a JMP rel32 / B imm19 to the given label.
5762 */
5763DECL_INLINE_THROW(uint32_t)
5764iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5765{
5766#ifdef RT_ARCH_AMD64
5767 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
5768#elif defined(RT_ARCH_ARM64)
5769 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
5770#else
5771# error "Port me!"
5772#endif
5773 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5774 return off;
5775}
5776
5777
5778/**
5779 * Emits a JMP rel32 / B imm19 to a new undefined label.
5780 */
5781DECL_INLINE_THROW(uint32_t)
5782iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
5783{
5784 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
5785 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
5786}
5787
5788/** Condition type. */
5789#ifdef RT_ARCH_AMD64
5790typedef enum IEMNATIVEINSTRCOND : uint8_t
5791{
5792 kIemNativeInstrCond_o = 0,
5793 kIemNativeInstrCond_no,
5794 kIemNativeInstrCond_c,
5795 kIemNativeInstrCond_nc,
5796 kIemNativeInstrCond_e,
5797 kIemNativeInstrCond_ne,
5798 kIemNativeInstrCond_be,
5799 kIemNativeInstrCond_nbe,
5800 kIemNativeInstrCond_s,
5801 kIemNativeInstrCond_ns,
5802 kIemNativeInstrCond_p,
5803 kIemNativeInstrCond_np,
5804 kIemNativeInstrCond_l,
5805 kIemNativeInstrCond_nl,
5806 kIemNativeInstrCond_le,
5807 kIemNativeInstrCond_nle
5808} IEMNATIVEINSTRCOND;
5809#elif defined(RT_ARCH_ARM64)
5810typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
5811# define kIemNativeInstrCond_o todo_conditional_codes
5812# define kIemNativeInstrCond_no todo_conditional_codes
5813# define kIemNativeInstrCond_c todo_conditional_codes
5814# define kIemNativeInstrCond_nc todo_conditional_codes
5815# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
5816# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
5817# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
5818# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
5819# define kIemNativeInstrCond_s todo_conditional_codes
5820# define kIemNativeInstrCond_ns todo_conditional_codes
5821# define kIemNativeInstrCond_p todo_conditional_codes
5822# define kIemNativeInstrCond_np todo_conditional_codes
5823# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
5824# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
5825# define kIemNativeInstrCond_le kArmv8InstrCond_Le
5826# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
5827#else
5828# error "Port me!"
5829#endif
5830
5831
5832/**
5833 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
5834 */
5835DECL_FORCE_INLINE_THROW(uint32_t)
5836iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
5837 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
5838{
5839 Assert(idxLabel < pReNative->cLabels);
5840
5841 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
5842#ifdef RT_ARCH_AMD64
5843 if (offLabel >= off)
5844 {
5845 /* jcc rel32 */
5846 pCodeBuf[off++] = 0x0f;
5847 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
5848 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
5849 pCodeBuf[off++] = 0x00;
5850 pCodeBuf[off++] = 0x00;
5851 pCodeBuf[off++] = 0x00;
5852 pCodeBuf[off++] = 0x00;
5853 }
5854 else
5855 {
5856 int32_t offDisp = offLabel - (off + 2);
5857 if ((int8_t)offDisp == offDisp)
5858 {
5859 /* jcc rel8 */
5860 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
5861 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
5862 }
5863 else
5864 {
5865 /* jcc rel32 */
5866 offDisp -= 4;
5867 pCodeBuf[off++] = 0x0f;
5868 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
5869 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
5870 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
5871 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
5872 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
5873 }
5874 }
5875
5876#elif defined(RT_ARCH_ARM64)
5877 if (offLabel >= off)
5878 {
5879 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5880 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
5881 }
5882 else
5883 {
5884 Assert(off - offLabel <= 0x3ffffU);
5885 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
5886 }
5887
5888#else
5889# error "Port me!"
5890#endif
5891 return off;
5892}
5893
5894
5895/**
5896 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
5897 */
5898DECL_INLINE_THROW(uint32_t)
5899iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
5900{
5901#ifdef RT_ARCH_AMD64
5902 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
5903#elif defined(RT_ARCH_ARM64)
5904 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
5905#else
5906# error "Port me!"
5907#endif
5908 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5909 return off;
5910}
5911
5912
5913/**
5914 * Emits a Jcc rel32 / B.cc imm19 to a new label.
5915 */
5916DECL_INLINE_THROW(uint32_t)
5917iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5918 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
5919{
5920 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
5921 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
5922}
5923
5924
5925/**
5926 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
5927 */
5928DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5929{
5930#ifdef RT_ARCH_AMD64
5931 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
5932#elif defined(RT_ARCH_ARM64)
5933 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
5934#else
5935# error "Port me!"
5936#endif
5937}
5938
5939/**
5940 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
5941 */
5942DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5943 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
5944{
5945#ifdef RT_ARCH_AMD64
5946 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
5947#elif defined(RT_ARCH_ARM64)
5948 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
5949#else
5950# error "Port me!"
5951#endif
5952}
5953
5954
5955/**
5956 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
5957 */
5958DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5959{
5960#ifdef RT_ARCH_AMD64
5961 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
5962#elif defined(RT_ARCH_ARM64)
5963 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
5964#else
5965# error "Port me!"
5966#endif
5967}
5968
5969/**
5970 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
5971 */
5972DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5973 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
5974{
5975#ifdef RT_ARCH_AMD64
5976 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
5977#elif defined(RT_ARCH_ARM64)
5978 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
5979#else
5980# error "Port me!"
5981#endif
5982}
5983
5984
5985/**
5986 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
5987 */
5988DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5989{
5990#ifdef RT_ARCH_AMD64
5991 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
5992#elif defined(RT_ARCH_ARM64)
5993 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
5994#else
5995# error "Port me!"
5996#endif
5997}
5998
5999/**
6000 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
6001 */
6002DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6003 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6004{
6005#ifdef RT_ARCH_AMD64
6006 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
6007#elif defined(RT_ARCH_ARM64)
6008 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
6009#else
6010# error "Port me!"
6011#endif
6012}
6013
6014
6015/**
6016 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
6017 */
6018DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6019{
6020#ifdef RT_ARCH_AMD64
6021 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
6022#elif defined(RT_ARCH_ARM64)
6023 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
6024#else
6025# error "Port me!"
6026#endif
6027}
6028
6029/**
6030 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
6031 */
6032DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6033 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6034{
6035#ifdef RT_ARCH_AMD64
6036 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
6037#elif defined(RT_ARCH_ARM64)
6038 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
6039#else
6040# error "Port me!"
6041#endif
6042}
6043
6044
6045/**
6046 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
6047 */
6048DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6049{
6050#ifdef RT_ARCH_AMD64
6051 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
6052#elif defined(RT_ARCH_ARM64)
6053 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
6054#else
6055# error "Port me!"
6056#endif
6057}
6058
6059/**
6060 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
6061 */
6062DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6063 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6064{
6065#ifdef RT_ARCH_AMD64
6066 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
6067#elif defined(RT_ARCH_ARM64)
6068 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
6069#else
6070# error "Port me!"
6071#endif
6072}
6073
6074
6075/**
6076 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6077 *
6078 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6079 *
6080 * Only use hardcoded jumps forward when emitting for exactly one
6081 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6082 * the right target address on all platforms!
6083 *
6084 * Please also note that on x86 it is necessary pass off + 256 or higher
6085 * for @a offTarget one believe the intervening code is more than 127
6086 * bytes long.
6087 */
6088DECL_FORCE_INLINE(uint32_t)
6089iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6090{
6091#ifdef RT_ARCH_AMD64
6092 /* jcc rel8 / rel32 */
6093 int32_t offDisp = (int32_t)(offTarget - (off + 2));
6094 if (offDisp < 128 && offDisp >= -128)
6095 {
6096 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6097 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6098 }
6099 else
6100 {
6101 offDisp -= 4;
6102 pCodeBuf[off++] = 0x0f;
6103 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6104 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6105 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6106 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6107 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6108 }
6109
6110#elif defined(RT_ARCH_ARM64)
6111 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
6112
6113#else
6114# error "Port me!"
6115#endif
6116 return off;
6117}
6118
6119
6120/**
6121 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6122 *
6123 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6124 *
6125 * Only use hardcoded jumps forward when emitting for exactly one
6126 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6127 * the right target address on all platforms!
6128 *
6129 * Please also note that on x86 it is necessary pass off + 256 or higher
6130 * for @a offTarget if one believe the intervening code is more than 127
6131 * bytes long.
6132 */
6133DECL_INLINE_THROW(uint32_t)
6134iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6135{
6136#ifdef RT_ARCH_AMD64
6137 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
6138#elif defined(RT_ARCH_ARM64)
6139 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
6140#else
6141# error "Port me!"
6142#endif
6143 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6144 return off;
6145}
6146
6147
6148/**
6149 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
6150 *
6151 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6152 */
6153DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6154{
6155#ifdef RT_ARCH_AMD64
6156 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
6157#elif defined(RT_ARCH_ARM64)
6158 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
6159#else
6160# error "Port me!"
6161#endif
6162}
6163
6164
6165/**
6166 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
6167 *
6168 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6169 */
6170DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6171{
6172#ifdef RT_ARCH_AMD64
6173 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
6174#elif defined(RT_ARCH_ARM64)
6175 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
6176#else
6177# error "Port me!"
6178#endif
6179}
6180
6181
6182/**
6183 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
6184 *
6185 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6186 */
6187DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6188{
6189#ifdef RT_ARCH_AMD64
6190 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
6191#elif defined(RT_ARCH_ARM64)
6192 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
6193#else
6194# error "Port me!"
6195#endif
6196}
6197
6198
6199/**
6200 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
6201 *
6202 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6203 */
6204DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6205{
6206#ifdef RT_ARCH_AMD64
6207 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
6208#elif defined(RT_ARCH_ARM64)
6209 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
6210#else
6211# error "Port me!"
6212#endif
6213}
6214
6215
6216/**
6217 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6218 *
6219 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6220 */
6221DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
6222{
6223#ifdef RT_ARCH_AMD64
6224 /* jmp rel8 or rel32 */
6225 int32_t offDisp = offTarget - (off + 2);
6226 if (offDisp < 128 && offDisp >= -128)
6227 {
6228 pCodeBuf[off++] = 0xeb;
6229 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6230 }
6231 else
6232 {
6233 offDisp -= 3;
6234 pCodeBuf[off++] = 0xe9;
6235 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6236 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6237 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6238 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6239 }
6240
6241#elif defined(RT_ARCH_ARM64)
6242 pCodeBuf[off++] = Armv8A64MkInstrB((int32_t)(offTarget - off));
6243
6244#else
6245# error "Port me!"
6246#endif
6247 return off;
6248}
6249
6250
6251/**
6252 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6253 *
6254 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6255 */
6256DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6257{
6258#ifdef RT_ARCH_AMD64
6259 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
6260#elif defined(RT_ARCH_ARM64)
6261 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
6262#else
6263# error "Port me!"
6264#endif
6265 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6266 return off;
6267}
6268
6269
6270/**
6271 * Fixes up a conditional jump to a fixed label.
6272 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
6273 * iemNativeEmitJzToFixed, ...
6274 */
6275DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
6276{
6277#ifdef RT_ARCH_AMD64
6278 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
6279 uint8_t const bOpcode = pbCodeBuf[offFixup];
6280 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
6281 {
6282 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
6283 AssertStmt(pbCodeBuf[offFixup + 1] == offTarget - (offFixup + 2),
6284 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
6285 }
6286 else
6287 {
6288 if (bOpcode != 0x0f)
6289 Assert(bOpcode == 0xe9);
6290 else
6291 {
6292 offFixup += 1;
6293 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
6294 }
6295 uint32_t const offRel32 = offTarget - (offFixup + 5);
6296 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
6297 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
6298 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
6299 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
6300 }
6301
6302#elif defined(RT_ARCH_ARM64)
6303 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
6304 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
6305 {
6306 /* B.COND + BC.COND */
6307 int32_t const offDisp = offTarget - offFixup;
6308 Assert(offDisp >= -262144 && offDisp < 262144);
6309 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
6310 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6311 }
6312 else
6313 {
6314 /* B imm26 */
6315 Assert((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000));
6316 int32_t const offDisp = offTarget - offFixup;
6317 Assert(offDisp >= -33554432 && offDisp < 33554432);
6318 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
6319 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6320 }
6321
6322#else
6323# error "Port me!"
6324#endif
6325}
6326
6327
6328/**
6329 * Internal helper, don't call directly.
6330 */
6331DECL_INLINE_THROW(uint32_t)
6332iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
6333 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
6334{
6335 Assert(iBitNo < 64);
6336#ifdef RT_ARCH_AMD64
6337 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6338 if (iBitNo < 8)
6339 {
6340 /* test Eb, imm8 */
6341 if (iGprSrc >= 4)
6342 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6343 pbCodeBuf[off++] = 0xf6;
6344 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6345 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
6346 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6347 }
6348 else
6349 {
6350 /* bt Ev, imm8 */
6351 if (iBitNo >= 32)
6352 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6353 else if (iGprSrc >= 8)
6354 pbCodeBuf[off++] = X86_OP_REX_B;
6355 pbCodeBuf[off++] = 0x0f;
6356 pbCodeBuf[off++] = 0xba;
6357 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6358 pbCodeBuf[off++] = iBitNo;
6359 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
6360 }
6361
6362#elif defined(RT_ARCH_ARM64)
6363 /* Use the TBNZ instruction here. */
6364 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6365 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
6366 pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
6367
6368#else
6369# error "Port me!"
6370#endif
6371 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6372 return off;
6373}
6374
6375
6376/**
6377 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
6378 * @a iGprSrc.
6379 *
6380 * @note On ARM64 the range is only +/-8191 instructions.
6381 */
6382DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6383 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
6384{
6385 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
6386}
6387
6388
6389/**
6390 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
6391 * _set_ in @a iGprSrc.
6392 *
6393 * @note On ARM64 the range is only +/-8191 instructions.
6394 */
6395DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6396 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
6397{
6398 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
6399}
6400
6401
6402/**
6403 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
6404 * flags accordingly.
6405 */
6406DECL_INLINE_THROW(uint32_t)
6407iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
6408{
6409 Assert(fBits != 0);
6410#ifdef RT_ARCH_AMD64
6411
6412 if (fBits >= UINT32_MAX)
6413 {
6414 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6415
6416 /* test Ev,Gv */
6417 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6418 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
6419 pbCodeBuf[off++] = 0x85;
6420 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
6421
6422 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6423 }
6424 else if (fBits <= UINT32_MAX)
6425 {
6426 /* test Eb, imm8 or test Ev, imm32 */
6427 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6428 if (fBits <= UINT8_MAX)
6429 {
6430 if (iGprSrc >= 4)
6431 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6432 pbCodeBuf[off++] = 0xf6;
6433 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6434 pbCodeBuf[off++] = (uint8_t)fBits;
6435 }
6436 else
6437 {
6438 if (iGprSrc >= 8)
6439 pbCodeBuf[off++] = X86_OP_REX_B;
6440 pbCodeBuf[off++] = 0xf7;
6441 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6442 pbCodeBuf[off++] = RT_BYTE1(fBits);
6443 pbCodeBuf[off++] = RT_BYTE2(fBits);
6444 pbCodeBuf[off++] = RT_BYTE3(fBits);
6445 pbCodeBuf[off++] = RT_BYTE4(fBits);
6446 }
6447 }
6448 /** @todo implement me. */
6449 else
6450 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
6451
6452#elif defined(RT_ARCH_ARM64)
6453 uint32_t uImmR = 0;
6454 uint32_t uImmNandS = 0;
6455 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
6456 {
6457 /* ands xzr, iGprSrc, #fBits */
6458 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6459 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
6460 }
6461 else
6462 {
6463 /* ands xzr, iGprSrc, iTmpReg */
6464 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6465 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6466 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
6467 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6468 }
6469
6470#else
6471# error "Port me!"
6472#endif
6473 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6474 return off;
6475}
6476
6477
6478/**
6479 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
6480 * @a iGprSrc, setting CPU flags accordingly.
6481 *
6482 * @note For ARM64 this only supports @a fBits values that can be expressed
6483 * using the two 6-bit immediates of the ANDS instruction. The caller
6484 * must make sure this is possible!
6485 */
6486DECL_FORCE_INLINE_THROW(uint32_t)
6487iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits)
6488{
6489 Assert(fBits != 0);
6490
6491#ifdef RT_ARCH_AMD64
6492 if (fBits <= UINT8_MAX)
6493 {
6494 /* test Eb, imm8 */
6495 if (iGprSrc >= 4)
6496 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6497 pCodeBuf[off++] = 0xf6;
6498 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6499 pCodeBuf[off++] = (uint8_t)fBits;
6500 }
6501 else
6502 {
6503 /* test Ev, imm32 */
6504 if (iGprSrc >= 8)
6505 pCodeBuf[off++] = X86_OP_REX_B;
6506 pCodeBuf[off++] = 0xf7;
6507 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6508 pCodeBuf[off++] = RT_BYTE1(fBits);
6509 pCodeBuf[off++] = RT_BYTE2(fBits);
6510 pCodeBuf[off++] = RT_BYTE3(fBits);
6511 pCodeBuf[off++] = RT_BYTE4(fBits);
6512 }
6513
6514#elif defined(RT_ARCH_ARM64)
6515 /* ands xzr, src, #fBits */
6516 uint32_t uImmR = 0;
6517 uint32_t uImmNandS = 0;
6518 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6519 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6520 else
6521# ifdef IEM_WITH_THROW_CATCH
6522 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6523# else
6524 AssertReleaseFailedStmt(off = UINT32_MAX);
6525# endif
6526
6527#else
6528# error "Port me!"
6529#endif
6530 return off;
6531}
6532
6533
6534
6535/**
6536 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
6537 * @a iGprSrc, setting CPU flags accordingly.
6538 *
6539 * @note For ARM64 this only supports @a fBits values that can be expressed
6540 * using the two 6-bit immediates of the ANDS instruction. The caller
6541 * must make sure this is possible!
6542 */
6543DECL_FORCE_INLINE_THROW(uint32_t)
6544iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
6545{
6546 Assert(fBits != 0);
6547
6548#ifdef RT_ARCH_AMD64
6549 /* test Eb, imm8 */
6550 if (iGprSrc >= 4)
6551 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6552 pCodeBuf[off++] = 0xf6;
6553 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6554 pCodeBuf[off++] = fBits;
6555
6556#elif defined(RT_ARCH_ARM64)
6557 /* ands xzr, src, #fBits */
6558 uint32_t uImmR = 0;
6559 uint32_t uImmNandS = 0;
6560 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6561 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6562 else
6563# ifdef IEM_WITH_THROW_CATCH
6564 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6565# else
6566 AssertReleaseFailedStmt(off = UINT32_MAX);
6567# endif
6568
6569#else
6570# error "Port me!"
6571#endif
6572 return off;
6573}
6574
6575
6576/**
6577 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
6578 * @a iGprSrc, setting CPU flags accordingly.
6579 */
6580DECL_INLINE_THROW(uint32_t)
6581iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
6582{
6583 Assert(fBits != 0);
6584
6585#ifdef RT_ARCH_AMD64
6586 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
6587
6588#elif defined(RT_ARCH_ARM64)
6589 /* ands xzr, src, [tmp|#imm] */
6590 uint32_t uImmR = 0;
6591 uint32_t uImmNandS = 0;
6592 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6593 {
6594 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6595 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6596 }
6597 else
6598 {
6599 /* Use temporary register for the 64-bit immediate. */
6600 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6601 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6602 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
6603 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6604 }
6605
6606#else
6607# error "Port me!"
6608#endif
6609 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6610 return off;
6611}
6612
6613
6614/**
6615 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
6616 * are set in @a iGprSrc.
6617 */
6618DECL_INLINE_THROW(uint32_t)
6619iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6620 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
6621{
6622 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
6623
6624 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
6625 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6626
6627 return off;
6628}
6629
6630
6631/**
6632 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
6633 * are set in @a iGprSrc.
6634 */
6635DECL_INLINE_THROW(uint32_t)
6636iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6637 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
6638{
6639 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
6640
6641 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
6642 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
6643
6644 return off;
6645}
6646
6647
6648/**
6649 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6650 *
6651 * The operand size is given by @a f64Bit.
6652 */
6653DECL_FORCE_INLINE_THROW(uint32_t)
6654iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6655 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
6656{
6657 Assert(idxLabel < pReNative->cLabels);
6658
6659#ifdef RT_ARCH_AMD64
6660 /* test reg32,reg32 / test reg64,reg64 */
6661 if (f64Bit)
6662 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
6663 else if (iGprSrc >= 8)
6664 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
6665 pCodeBuf[off++] = 0x85;
6666 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
6667
6668 /* jnz idxLabel */
6669 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
6670 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6671
6672#elif defined(RT_ARCH_ARM64)
6673 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6674 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
6675 iGprSrc, f64Bit);
6676 else
6677 {
6678 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6679 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
6680 }
6681
6682#else
6683# error "Port me!"
6684#endif
6685 return off;
6686}
6687
6688
6689/**
6690 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6691 *
6692 * The operand size is given by @a f64Bit.
6693 */
6694DECL_FORCE_INLINE_THROW(uint32_t)
6695iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
6696 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
6697{
6698#ifdef RT_ARCH_AMD64
6699 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
6700 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
6701#elif defined(RT_ARCH_ARM64)
6702 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
6703 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
6704#else
6705# error "Port me!"
6706#endif
6707 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6708 return off;
6709}
6710
6711
6712/* if (Grp1 == 0) Jmp idxLabel; */
6713
6714/**
6715 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
6716 *
6717 * The operand size is given by @a f64Bit.
6718 */
6719DECL_FORCE_INLINE_THROW(uint32_t)
6720iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6721 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6722{
6723 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
6724 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
6725}
6726
6727
6728/**
6729 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
6730 *
6731 * The operand size is given by @a f64Bit.
6732 */
6733DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6734 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6735{
6736 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
6737}
6738
6739
6740/**
6741 * Emits code that jumps to a new label if @a iGprSrc is zero.
6742 *
6743 * The operand size is given by @a f64Bit.
6744 */
6745DECL_INLINE_THROW(uint32_t)
6746iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
6747 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6748{
6749 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6750 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
6751}
6752
6753
6754/* if (Grp1 != 0) Jmp idxLabel; */
6755
6756/**
6757 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6758 *
6759 * The operand size is given by @a f64Bit.
6760 */
6761DECL_FORCE_INLINE_THROW(uint32_t)
6762iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6763 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6764{
6765 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
6766 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
6767}
6768
6769
6770/**
6771 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6772 *
6773 * The operand size is given by @a f64Bit.
6774 */
6775DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6776 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6777{
6778 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
6779}
6780
6781
6782/**
6783 * Emits code that jumps to a new label if @a iGprSrc is not zero.
6784 *
6785 * The operand size is given by @a f64Bit.
6786 */
6787DECL_INLINE_THROW(uint32_t)
6788iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
6789 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6790{
6791 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6792 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
6793}
6794
6795
6796/* if (Grp1 != Gpr2) Jmp idxLabel; */
6797
6798/**
6799 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
6800 * differs.
6801 */
6802DECL_INLINE_THROW(uint32_t)
6803iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6804 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
6805{
6806 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
6807 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6808 return off;
6809}
6810
6811
6812/**
6813 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
6814 */
6815DECL_INLINE_THROW(uint32_t)
6816iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6817 uint8_t iGprLeft, uint8_t iGprRight,
6818 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6819{
6820 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6821 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
6822}
6823
6824
6825/* if (Grp != Imm) Jmp idxLabel; */
6826
6827/**
6828 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
6829 */
6830DECL_INLINE_THROW(uint32_t)
6831iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6832 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
6833{
6834 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
6835 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6836 return off;
6837}
6838
6839
6840/**
6841 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
6842 */
6843DECL_INLINE_THROW(uint32_t)
6844iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6845 uint8_t iGprSrc, uint64_t uImm,
6846 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6847{
6848 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6849 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
6850}
6851
6852
6853/**
6854 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
6855 * @a uImm.
6856 */
6857DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6858 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
6859{
6860 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
6861 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6862 return off;
6863}
6864
6865
6866/**
6867 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
6868 * @a uImm.
6869 */
6870DECL_INLINE_THROW(uint32_t)
6871iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6872 uint8_t iGprSrc, uint32_t uImm,
6873 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6874{
6875 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6876 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
6877}
6878
6879
6880/**
6881 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
6882 * @a uImm.
6883 */
6884DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6885 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
6886{
6887 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
6888 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6889 return off;
6890}
6891
6892
6893/**
6894 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
6895 * @a uImm.
6896 */
6897DECL_INLINE_THROW(uint32_t)
6898iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6899 uint8_t iGprSrc, uint16_t uImm,
6900 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6901{
6902 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6903 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
6904}
6905
6906
6907/* if (Grp == Imm) Jmp idxLabel; */
6908
6909/**
6910 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
6911 */
6912DECL_INLINE_THROW(uint32_t)
6913iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6914 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
6915{
6916 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
6917 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
6918 return off;
6919}
6920
6921
6922/**
6923 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
6924 */
6925DECL_INLINE_THROW(uint32_t)
6926iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
6927 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6928{
6929 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6930 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
6931}
6932
6933
6934/**
6935 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
6936 */
6937DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6938 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
6939{
6940 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
6941 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
6942 return off;
6943}
6944
6945
6946/**
6947 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
6948 */
6949DECL_INLINE_THROW(uint32_t)
6950iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
6951 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6952{
6953 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6954 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
6955}
6956
6957
6958/**
6959 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
6960 *
6961 * @note ARM64: Helper register is required (idxTmpReg).
6962 */
6963DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6964 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
6965 uint8_t idxTmpReg = UINT8_MAX)
6966{
6967 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
6968 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
6969 return off;
6970}
6971
6972
6973/**
6974 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
6975 *
6976 * @note ARM64: Helper register is required (idxTmpReg).
6977 */
6978DECL_INLINE_THROW(uint32_t)
6979iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
6980 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
6981 uint8_t idxTmpReg = UINT8_MAX)
6982{
6983 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6984 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
6985}
6986
6987
6988/*********************************************************************************************************************************
6989* Calls. *
6990*********************************************************************************************************************************/
6991
6992/**
6993 * Emits a call to a 64-bit address.
6994 */
6995DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
6996{
6997#ifdef RT_ARCH_AMD64
6998 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
6999
7000 /* call rax */
7001 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7002 pbCodeBuf[off++] = 0xff;
7003 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
7004
7005#elif defined(RT_ARCH_ARM64)
7006 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
7007
7008 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7009 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
7010
7011#else
7012# error "port me"
7013#endif
7014 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7015 return off;
7016}
7017
7018
7019/**
7020 * Emits code to load a stack variable into an argument GPR.
7021 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7022 */
7023DECL_FORCE_INLINE_THROW(uint32_t)
7024iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7025 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
7026 bool fSpilledVarsInVolatileRegs = false)
7027{
7028 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7029 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7030 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7031
7032 uint8_t const idxRegVar = pVar->idxReg;
7033 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
7034 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
7035 || !fSpilledVarsInVolatileRegs ))
7036 {
7037 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
7038 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
7039 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
7040 if (!offAddend)
7041 {
7042 if (idxRegArg != idxRegVar)
7043 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
7044 }
7045 else
7046 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
7047 }
7048 else
7049 {
7050 uint8_t const idxStackSlot = pVar->idxStackSlot;
7051 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7052 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
7053 if (offAddend)
7054 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
7055 }
7056 return off;
7057}
7058
7059
7060/**
7061 * Emits code to load a stack or immediate variable value into an argument GPR,
7062 * optional with a addend.
7063 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7064 */
7065DECL_FORCE_INLINE_THROW(uint32_t)
7066iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7067 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
7068 bool fSpilledVarsInVolatileRegs = false)
7069{
7070 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7071 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7072 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7073 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
7074 else
7075 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
7076 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
7077 return off;
7078}
7079
7080
7081/**
7082 * Emits code to load the variable address into an argument GRP.
7083 *
7084 * This only works for uninitialized and stack variables.
7085 */
7086DECL_FORCE_INLINE_THROW(uint32_t)
7087iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7088 bool fFlushShadows)
7089{
7090 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7091 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7092 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7093 || pVar->enmKind == kIemNativeVarKind_Stack,
7094 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7095
7096 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7097 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7098
7099 uint8_t const idxRegVar = pVar->idxReg;
7100 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
7101 {
7102 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
7103 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
7104 Assert(pVar->idxReg == UINT8_MAX);
7105 }
7106 Assert( pVar->idxStackSlot != UINT8_MAX
7107 && pVar->idxReg == UINT8_MAX);
7108
7109 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7110}
7111
7112
7113#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7114
7115/**
7116 * Emits a gprdst = ~gprsrc store.
7117 */
7118DECL_FORCE_INLINE_THROW(uint32_t)
7119iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7120{
7121#ifdef RT_ARCH_AMD64
7122 if (iGprDst != iGprSrc)
7123 {
7124 /* mov gprdst, gprsrc. */
7125 if (f64Bit)
7126 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
7127 else
7128 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
7129 }
7130
7131 /* not gprdst */
7132 if (f64Bit || iGprDst >= 8)
7133 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
7134 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
7135 pCodeBuf[off++] = 0xf7;
7136 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
7137#elif defined(RT_ARCH_ARM64)
7138 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
7139#else
7140# error "port me"
7141#endif
7142 return off;
7143}
7144
7145
7146/**
7147 * Emits a gprdst = ~gprsrc store.
7148 */
7149DECL_INLINE_THROW(uint32_t)
7150iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7151{
7152#ifdef RT_ARCH_AMD64
7153 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
7154#elif defined(RT_ARCH_ARM64)
7155 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
7156#else
7157# error "port me"
7158#endif
7159 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7160 return off;
7161}
7162
7163
7164/**
7165 * Emits a 128-bit vector register store to a VCpu value.
7166 */
7167DECL_FORCE_INLINE_THROW(uint32_t)
7168iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7169{
7170#ifdef RT_ARCH_AMD64
7171 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
7172 pCodeBuf[off++] = 0x66;
7173 if (iVecReg >= 8)
7174 pCodeBuf[off++] = X86_OP_REX_R;
7175 pCodeBuf[off++] = 0x0f;
7176 pCodeBuf[off++] = 0x7f;
7177 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7178#elif defined(RT_ARCH_ARM64)
7179 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7180
7181#else
7182# error "port me"
7183#endif
7184 return off;
7185}
7186
7187
7188/**
7189 * Emits a 128-bit vector register load of a VCpu value.
7190 */
7191DECL_INLINE_THROW(uint32_t)
7192iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7193{
7194#ifdef RT_ARCH_AMD64
7195 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7196#elif defined(RT_ARCH_ARM64)
7197 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7198#else
7199# error "port me"
7200#endif
7201 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7202 return off;
7203}
7204
7205
7206/**
7207 * Emits a high 128-bit vector register store to a VCpu value.
7208 */
7209DECL_FORCE_INLINE_THROW(uint32_t)
7210iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7211{
7212#ifdef RT_ARCH_AMD64
7213 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
7214 pCodeBuf[off++] = X86_OP_VEX3;
7215 if (iVecReg >= 8)
7216 pCodeBuf[off++] = 0x63;
7217 else
7218 pCodeBuf[off++] = 0xe3;
7219 pCodeBuf[off++] = 0x7d;
7220 pCodeBuf[off++] = 0x39;
7221 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7222 pCodeBuf[off++] = 0x01; /* Immediate */
7223#elif defined(RT_ARCH_ARM64)
7224 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7225#else
7226# error "port me"
7227#endif
7228 return off;
7229}
7230
7231
7232/**
7233 * Emits a high 128-bit vector register load of a VCpu value.
7234 */
7235DECL_INLINE_THROW(uint32_t)
7236iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7237{
7238#ifdef RT_ARCH_AMD64
7239 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7240#elif defined(RT_ARCH_ARM64)
7241 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7242 Assert(!(iVecReg & 0x1));
7243 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
7244#else
7245# error "port me"
7246#endif
7247 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7248 return off;
7249}
7250
7251
7252/**
7253 * Emits a 128-bit vector register load of a VCpu value.
7254 */
7255DECL_FORCE_INLINE_THROW(uint32_t)
7256iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7257{
7258#ifdef RT_ARCH_AMD64
7259 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
7260 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7261 if (iVecReg >= 8)
7262 pCodeBuf[off++] = X86_OP_REX_R;
7263 pCodeBuf[off++] = 0x0f;
7264 pCodeBuf[off++] = 0x6f;
7265 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7266#elif defined(RT_ARCH_ARM64)
7267 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
7268
7269#else
7270# error "port me"
7271#endif
7272 return off;
7273}
7274
7275
7276/**
7277 * Emits a 128-bit vector register load of a VCpu value.
7278 */
7279DECL_INLINE_THROW(uint32_t)
7280iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7281{
7282#ifdef RT_ARCH_AMD64
7283 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7284#elif defined(RT_ARCH_ARM64)
7285 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7286#else
7287# error "port me"
7288#endif
7289 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7290 return off;
7291}
7292
7293
7294/**
7295 * Emits a 128-bit vector register load of a VCpu value.
7296 */
7297DECL_FORCE_INLINE_THROW(uint32_t)
7298iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7299{
7300#ifdef RT_ARCH_AMD64
7301 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
7302 pCodeBuf[off++] = X86_OP_VEX3;
7303 if (iVecReg >= 8)
7304 pCodeBuf[off++] = 0x63;
7305 else
7306 pCodeBuf[off++] = 0xe3;
7307 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
7308 pCodeBuf[off++] = 0x38;
7309 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7310 pCodeBuf[off++] = 0x01; /* Immediate */
7311#elif defined(RT_ARCH_ARM64)
7312 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
7313#else
7314# error "port me"
7315#endif
7316 return off;
7317}
7318
7319
7320/**
7321 * Emits a 128-bit vector register load of a VCpu value.
7322 */
7323DECL_INLINE_THROW(uint32_t)
7324iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7325{
7326#ifdef RT_ARCH_AMD64
7327 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7328#elif defined(RT_ARCH_ARM64)
7329 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7330 Assert(!(iVecReg & 0x1));
7331 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
7332#else
7333# error "port me"
7334#endif
7335 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7336 return off;
7337}
7338
7339
7340/**
7341 * Emits a vecdst = vecsrc load.
7342 */
7343DECL_FORCE_INLINE(uint32_t)
7344iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7345{
7346#ifdef RT_ARCH_AMD64
7347 /* movdqu vecdst, vecsrc */
7348 pCodeBuf[off++] = 0xf3;
7349
7350 if ((iVecRegDst | iVecRegSrc) >= 8)
7351 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
7352 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
7353 : X86_OP_REX_R;
7354 pCodeBuf[off++] = 0x0f;
7355 pCodeBuf[off++] = 0x6f;
7356 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7357
7358#elif defined(RT_ARCH_ARM64)
7359 /* mov dst, src; alias for: orr dst, src, src */
7360 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
7361
7362#else
7363# error "port me"
7364#endif
7365 return off;
7366}
7367
7368
7369/**
7370 * Emits a vecdst = vecsrc load, 128-bit.
7371 */
7372DECL_INLINE_THROW(uint32_t)
7373iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7374{
7375#ifdef RT_ARCH_AMD64
7376 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
7377#elif defined(RT_ARCH_ARM64)
7378 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
7379#else
7380# error "port me"
7381#endif
7382 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7383 return off;
7384}
7385
7386
7387/**
7388 * Emits a vecdst = vecsrc load, 256-bit.
7389 */
7390DECL_INLINE_THROW(uint32_t)
7391iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7392{
7393#ifdef RT_ARCH_AMD64
7394 /* vmovdqa ymm, ymm */
7395 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7396 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
7397 {
7398 pbCodeBuf[off++] = X86_OP_VEX3;
7399 pbCodeBuf[off++] = 0x41;
7400 pbCodeBuf[off++] = 0x7d;
7401 pbCodeBuf[off++] = 0x6f;
7402 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7403 }
7404 else
7405 {
7406 pbCodeBuf[off++] = X86_OP_VEX2;
7407 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
7408 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
7409 pbCodeBuf[off++] = iVecRegSrc >= 8
7410 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
7411 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7412 }
7413#elif defined(RT_ARCH_ARM64)
7414 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7415 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
7416 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
7417 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
7418#else
7419# error "port me"
7420#endif
7421 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7422 return off;
7423}
7424
7425
7426/**
7427 * Emits a gprdst = vecsrc[x] load, 64-bit.
7428 */
7429DECL_FORCE_INLINE(uint32_t)
7430iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
7431{
7432#ifdef RT_ARCH_AMD64
7433 if (iQWord >= 2)
7434 {
7435 /** @todo Currently not used. */
7436 AssertReleaseFailed();
7437 }
7438 else
7439 {
7440 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
7441 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7442 pCodeBuf[off++] = X86_OP_REX_W
7443 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
7444 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
7445 pCodeBuf[off++] = 0x0f;
7446 pCodeBuf[off++] = 0x3a;
7447 pCodeBuf[off++] = 0x16;
7448 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
7449 pCodeBuf[off++] = iQWord;
7450 }
7451#elif defined(RT_ARCH_ARM64)
7452 /* umov gprdst, vecsrc[iQWord] */
7453 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
7454#else
7455# error "port me"
7456#endif
7457 return off;
7458}
7459
7460
7461/**
7462 * Emits a gprdst = vecsrc[x] load, 64-bit.
7463 */
7464DECL_INLINE_THROW(uint32_t)
7465iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
7466{
7467 Assert(iQWord <= 3);
7468
7469#ifdef RT_ARCH_AMD64
7470 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iQWord);
7471#elif defined(RT_ARCH_ARM64)
7472 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7473 Assert(!(iVecRegSrc & 0x1));
7474 /* Need to access the "high" 128-bit vector register. */
7475 if (iQWord >= 2)
7476 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
7477 else
7478 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
7479#else
7480# error "port me"
7481#endif
7482 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7483 return off;
7484}
7485
7486
7487/**
7488 * Emits a gprdst = vecsrc[x] load, 32-bit.
7489 */
7490DECL_FORCE_INLINE(uint32_t)
7491iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
7492{
7493#ifdef RT_ARCH_AMD64
7494 if (iDWord >= 4)
7495 {
7496 /** @todo Currently not used. */
7497 AssertReleaseFailed();
7498 }
7499 else
7500 {
7501 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
7502 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7503 if (iGprDst >= 8 || iVecRegSrc >= 8)
7504 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
7505 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
7506 pCodeBuf[off++] = 0x0f;
7507 pCodeBuf[off++] = 0x3a;
7508 pCodeBuf[off++] = 0x16;
7509 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
7510 pCodeBuf[off++] = iDWord;
7511 }
7512#elif defined(RT_ARCH_ARM64)
7513 /* umov gprdst, vecsrc[iDWord] */
7514 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
7515#else
7516# error "port me"
7517#endif
7518 return off;
7519}
7520
7521
7522/**
7523 * Emits a gprdst = vecsrc[x] load, 32-bit.
7524 */
7525DECL_INLINE_THROW(uint32_t)
7526iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
7527{
7528 Assert(iDWord <= 7);
7529
7530#ifdef RT_ARCH_AMD64
7531 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iDWord);
7532#elif defined(RT_ARCH_ARM64)
7533 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7534 Assert(!(iVecRegSrc & 0x1));
7535 /* Need to access the "high" 128-bit vector register. */
7536 if (iDWord >= 4)
7537 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
7538 else
7539 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
7540#else
7541# error "port me"
7542#endif
7543 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7544 return off;
7545}
7546
7547
7548/**
7549 * Emits a gprdst = vecsrc[x] load, 16-bit.
7550 */
7551DECL_FORCE_INLINE(uint32_t)
7552iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
7553{
7554#ifdef RT_ARCH_AMD64
7555 if (iWord >= 8)
7556 {
7557 /** @todo Currently not used. */
7558 AssertReleaseFailed();
7559 }
7560 else
7561 {
7562 /* pextrw gpr, vecsrc, #iWord */
7563 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7564 if (iGprDst >= 8 || iVecRegSrc >= 8)
7565 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
7566 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
7567 pCodeBuf[off++] = 0x0f;
7568 pCodeBuf[off++] = 0xc5;
7569 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
7570 pCodeBuf[off++] = iWord;
7571 }
7572#elif defined(RT_ARCH_ARM64)
7573 /* umov gprdst, vecsrc[iWord] */
7574 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
7575#else
7576# error "port me"
7577#endif
7578 return off;
7579}
7580
7581
7582/**
7583 * Emits a gprdst = vecsrc[x] load, 16-bit.
7584 */
7585DECL_INLINE_THROW(uint32_t)
7586iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
7587{
7588 Assert(iWord <= 16);
7589
7590#ifdef RT_ARCH_AMD64
7591 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
7592#elif defined(RT_ARCH_ARM64)
7593 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7594 Assert(!(iVecRegSrc & 0x1));
7595 /* Need to access the "high" 128-bit vector register. */
7596 if (iWord >= 8)
7597 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
7598 else
7599 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
7600#else
7601# error "port me"
7602#endif
7603 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7604 return off;
7605}
7606
7607
7608/**
7609 * Emits a gprdst = vecsrc[x] load, 8-bit.
7610 */
7611DECL_FORCE_INLINE(uint32_t)
7612iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
7613{
7614#ifdef RT_ARCH_AMD64
7615 if (iByte >= 16)
7616 {
7617 /** @todo Currently not used. */
7618 AssertReleaseFailed();
7619 }
7620 else
7621 {
7622 /* pextrb gpr, vecsrc, #iByte */
7623 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7624 if (iGprDst >= 8 || iVecRegSrc >= 8)
7625 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
7626 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
7627 pCodeBuf[off++] = 0x0f;
7628 pCodeBuf[off++] = 0x3a;
7629 pCodeBuf[off++] = 0x14;
7630 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
7631 pCodeBuf[off++] = iByte;
7632 }
7633#elif defined(RT_ARCH_ARM64)
7634 /* umov gprdst, vecsrc[iByte] */
7635 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
7636#else
7637# error "port me"
7638#endif
7639 return off;
7640}
7641
7642
7643/**
7644 * Emits a gprdst = vecsrc[x] load, 8-bit.
7645 */
7646DECL_INLINE_THROW(uint32_t)
7647iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
7648{
7649 Assert(iByte <= 32);
7650
7651#ifdef RT_ARCH_AMD64
7652 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iByte);
7653#elif defined(RT_ARCH_ARM64)
7654 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7655 Assert(!(iVecRegSrc & 0x1));
7656 /* Need to access the "high" 128-bit vector register. */
7657 if (iByte >= 16)
7658 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
7659 else
7660 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
7661#else
7662# error "port me"
7663#endif
7664 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7665 return off;
7666}
7667
7668
7669/**
7670 * Emits a vecdst[x] = gprsrc store, 64-bit.
7671 */
7672DECL_FORCE_INLINE(uint32_t)
7673iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
7674{
7675#ifdef RT_ARCH_AMD64
7676 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
7677 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7678 pCodeBuf[off++] = X86_OP_REX_W
7679 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
7680 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7681 pCodeBuf[off++] = 0x0f;
7682 pCodeBuf[off++] = 0x3a;
7683 pCodeBuf[off++] = 0x22;
7684 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
7685 pCodeBuf[off++] = iQWord;
7686#elif defined(RT_ARCH_ARM64)
7687 /* ins vecsrc[iQWord], gpr */
7688 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
7689#else
7690# error "port me"
7691#endif
7692 return off;
7693}
7694
7695
7696/**
7697 * Emits a vecdst[x] = gprsrc store, 64-bit.
7698 */
7699DECL_INLINE_THROW(uint32_t)
7700iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
7701{
7702 Assert(iQWord <= 1);
7703
7704#ifdef RT_ARCH_AMD64
7705 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iQWord);
7706#elif defined(RT_ARCH_ARM64)
7707 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
7708#else
7709# error "port me"
7710#endif
7711 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7712 return off;
7713}
7714
7715
7716/**
7717 * Emits a vecdst[x] = gprsrc store, 32-bit.
7718 */
7719DECL_FORCE_INLINE(uint32_t)
7720iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
7721{
7722#ifdef RT_ARCH_AMD64
7723 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
7724 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7725 if (iVecRegDst >= 8 || iGprSrc >= 8)
7726 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
7727 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7728 pCodeBuf[off++] = 0x0f;
7729 pCodeBuf[off++] = 0x3a;
7730 pCodeBuf[off++] = 0x22;
7731 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
7732 pCodeBuf[off++] = iDWord;
7733#elif defined(RT_ARCH_ARM64)
7734 /* ins vecsrc[iDWord], gpr */
7735 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
7736#else
7737# error "port me"
7738#endif
7739 return off;
7740}
7741
7742
7743/**
7744 * Emits a vecdst[x] = gprsrc store, 64-bit.
7745 */
7746DECL_INLINE_THROW(uint32_t)
7747iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
7748{
7749 Assert(iDWord <= 3);
7750
7751#ifdef RT_ARCH_AMD64
7752 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iDWord);
7753#elif defined(RT_ARCH_ARM64)
7754 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
7755#else
7756# error "port me"
7757#endif
7758 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7759 return off;
7760}
7761
7762
7763/**
7764 * Emits a vecdst.au32[iDWord] = 0 store.
7765 */
7766DECL_FORCE_INLINE(uint32_t)
7767iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
7768{
7769 Assert(iDWord <= 7);
7770
7771#ifdef RT_ARCH_AMD64
7772 /*
7773 * xor tmp0, tmp0
7774 * pinsrd xmm, tmp0, iDword
7775 */
7776 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
7777 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7778 pCodeBuf[off++] = 0x33;
7779 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
7780 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(&pCodeBuf[off], off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
7781#elif defined(RT_ARCH_ARM64)
7782 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7783 Assert(!(iVecReg & 0x1));
7784 /* ins vecsrc[iDWord], wzr */
7785 if (iDWord >= 4)
7786 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
7787 else
7788 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
7789#else
7790# error "port me"
7791#endif
7792 return off;
7793}
7794
7795
7796/**
7797 * Emits a vecdst.au32[iDWord] = 0 store.
7798 */
7799DECL_INLINE_THROW(uint32_t)
7800iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
7801{
7802
7803#ifdef RT_ARCH_AMD64
7804 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
7805#elif defined(RT_ARCH_ARM64)
7806 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
7807#else
7808# error "port me"
7809#endif
7810 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7811 return off;
7812}
7813
7814
7815/**
7816 * Emits a vecdst[0:127] = 0 store.
7817 */
7818DECL_FORCE_INLINE(uint32_t)
7819iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
7820{
7821#ifdef RT_ARCH_AMD64
7822 /* pxor xmm, xmm */
7823 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7824 if (iVecReg >= 8)
7825 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
7826 pCodeBuf[off++] = 0x0f;
7827 pCodeBuf[off++] = 0xef;
7828 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
7829#elif defined(RT_ARCH_ARM64)
7830 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7831 Assert(!(iVecReg & 0x1));
7832 /* eor vecreg, vecreg, vecreg */
7833 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
7834#else
7835# error "port me"
7836#endif
7837 return off;
7838}
7839
7840
7841/**
7842 * Emits a vecdst[0:127] = 0 store.
7843 */
7844DECL_INLINE_THROW(uint32_t)
7845iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
7846{
7847#ifdef RT_ARCH_AMD64
7848 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
7849#elif defined(RT_ARCH_ARM64)
7850 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
7851#else
7852# error "port me"
7853#endif
7854 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7855 return off;
7856}
7857
7858
7859/**
7860 * Emits a vecdst[128:255] = 0 store.
7861 */
7862DECL_FORCE_INLINE(uint32_t)
7863iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
7864{
7865#ifdef RT_ARCH_AMD64
7866 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
7867 if (iVecReg < 8)
7868 {
7869 pCodeBuf[off++] = X86_OP_VEX2;
7870 pCodeBuf[off++] = 0xf9;
7871 }
7872 else
7873 {
7874 pCodeBuf[off++] = X86_OP_VEX3;
7875 pCodeBuf[off++] = 0x41;
7876 pCodeBuf[off++] = 0x79;
7877 }
7878 pCodeBuf[off++] = 0x6f;
7879 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
7880#elif defined(RT_ARCH_ARM64)
7881 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7882 Assert(!(iVecReg & 0x1));
7883 /* eor vecreg, vecreg, vecreg */
7884 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
7885#else
7886# error "port me"
7887#endif
7888 return off;
7889}
7890
7891
7892/**
7893 * Emits a vecdst[128:255] = 0 store.
7894 */
7895DECL_INLINE_THROW(uint32_t)
7896iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
7897{
7898#ifdef RT_ARCH_AMD64
7899 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
7900#elif defined(RT_ARCH_ARM64)
7901 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
7902#else
7903# error "port me"
7904#endif
7905 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7906 return off;
7907}
7908
7909
7910/**
7911 * Emits a vecdst[0:255] = 0 store.
7912 */
7913DECL_FORCE_INLINE(uint32_t)
7914iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
7915{
7916#ifdef RT_ARCH_AMD64
7917 /* vpxor ymm, ymm, ymm */
7918 if (iVecReg < 8)
7919 {
7920 pCodeBuf[off++] = X86_OP_VEX2;
7921 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
7922 }
7923 else
7924 {
7925 pCodeBuf[off++] = X86_OP_VEX3;
7926 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
7927 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
7928 }
7929 pCodeBuf[off++] = 0xef;
7930 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
7931#elif defined(RT_ARCH_ARM64)
7932 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7933 Assert(!(iVecReg & 0x1));
7934 /* eor vecreg, vecreg, vecreg */
7935 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
7936 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
7937#else
7938# error "port me"
7939#endif
7940 return off;
7941}
7942
7943
7944/**
7945 * Emits a vecdst[0:255] = 0 store.
7946 */
7947DECL_INLINE_THROW(uint32_t)
7948iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
7949{
7950#ifdef RT_ARCH_AMD64
7951 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
7952#elif defined(RT_ARCH_ARM64)
7953 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
7954#else
7955# error "port me"
7956#endif
7957 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7958 return off;
7959}
7960
7961
7962/**
7963 * Emits a vecdst = gprsrc broadcast, 8-bit.
7964 */
7965DECL_FORCE_INLINE(uint32_t)
7966iemNativeEmitSimdBroadcastGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
7967{
7968#ifdef RT_ARCH_AMD64
7969 /* pinsrb vecdst, gpr, #0 (ASSUMES SSE 4.1) */
7970 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7971 if (iVecRegDst >= 8 || iGprSrc >= 8)
7972 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
7973 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7974 pCodeBuf[off++] = 0x0f;
7975 pCodeBuf[off++] = 0x3a;
7976 pCodeBuf[off++] = 0x20;
7977 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
7978 pCodeBuf[off++] = 0x00;
7979
7980 /* vpbroadcastb {y,x}mm, xmm (ASSUMES AVX2). */
7981 pCodeBuf[off++] = X86_OP_VEX3;
7982 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
7983 | 0x02 /* opcode map. */
7984 | ( iVecRegDst >= 8
7985 ? 0
7986 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
7987 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
7988 pCodeBuf[off++] = 0x78;
7989 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
7990#elif defined(RT_ARCH_ARM64)
7991 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7992 Assert(!(iVecRegDst & 0x1) || !f256Bit);
7993
7994 /* dup vecsrc, gpr */
7995 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U8);
7996 if (f256Bit)
7997 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U8);
7998#else
7999# error "port me"
8000#endif
8001 return off;
8002}
8003
8004
8005/**
8006 * Emits a vecdst[x] = gprsrc broadcast, 8-bit.
8007 */
8008DECL_INLINE_THROW(uint32_t)
8009iemNativeEmitSimdBroadcastGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8010{
8011#ifdef RT_ARCH_AMD64
8012 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8013#elif defined(RT_ARCH_ARM64)
8014 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8015#else
8016# error "port me"
8017#endif
8018 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8019 return off;
8020}
8021
8022
8023/**
8024 * Emits a vecdst = gprsrc broadcast, 16-bit.
8025 */
8026DECL_FORCE_INLINE(uint32_t)
8027iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8028{
8029#ifdef RT_ARCH_AMD64
8030 /* pinsrw vecdst, gpr, #0 */
8031 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8032 if (iVecRegDst >= 8 || iGprSrc >= 8)
8033 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8034 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8035 pCodeBuf[off++] = 0x0f;
8036 pCodeBuf[off++] = 0xc4;
8037 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8038 pCodeBuf[off++] = 0x00;
8039
8040 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
8041 pCodeBuf[off++] = X86_OP_VEX3;
8042 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8043 | 0x02 /* opcode map. */
8044 | ( iVecRegDst >= 8
8045 ? 0
8046 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8047 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8048 pCodeBuf[off++] = 0x79;
8049 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8050#elif defined(RT_ARCH_ARM64)
8051 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8052 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8053
8054 /* dup vecsrc, gpr */
8055 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
8056 if (f256Bit)
8057 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
8058#else
8059# error "port me"
8060#endif
8061 return off;
8062}
8063
8064
8065/**
8066 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
8067 */
8068DECL_INLINE_THROW(uint32_t)
8069iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8070{
8071#ifdef RT_ARCH_AMD64
8072 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8073#elif defined(RT_ARCH_ARM64)
8074 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8075#else
8076# error "port me"
8077#endif
8078 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8079 return off;
8080}
8081
8082
8083/**
8084 * Emits a vecdst = gprsrc broadcast, 32-bit.
8085 */
8086DECL_FORCE_INLINE(uint32_t)
8087iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8088{
8089#ifdef RT_ARCH_AMD64
8090 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
8091 * vbroadcast needs a memory operand or another xmm register to work... */
8092
8093 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
8094 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8095 if (iVecRegDst >= 8 || iGprSrc >= 8)
8096 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8097 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8098 pCodeBuf[off++] = 0x0f;
8099 pCodeBuf[off++] = 0x3a;
8100 pCodeBuf[off++] = 0x22;
8101 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8102 pCodeBuf[off++] = 0x00;
8103
8104 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
8105 pCodeBuf[off++] = X86_OP_VEX3;
8106 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8107 | 0x02 /* opcode map. */
8108 | ( iVecRegDst >= 8
8109 ? 0
8110 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8111 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8112 pCodeBuf[off++] = 0x58;
8113 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8114#elif defined(RT_ARCH_ARM64)
8115 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8116 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8117
8118 /* dup vecsrc, gpr */
8119 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
8120 if (f256Bit)
8121 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
8122#else
8123# error "port me"
8124#endif
8125 return off;
8126}
8127
8128
8129/**
8130 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
8131 */
8132DECL_INLINE_THROW(uint32_t)
8133iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8134{
8135#ifdef RT_ARCH_AMD64
8136 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8137#elif defined(RT_ARCH_ARM64)
8138 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8139#else
8140# error "port me"
8141#endif
8142 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8143 return off;
8144}
8145
8146
8147/**
8148 * Emits a vecdst = gprsrc broadcast, 64-bit.
8149 */
8150DECL_FORCE_INLINE(uint32_t)
8151iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8152{
8153#ifdef RT_ARCH_AMD64
8154 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
8155 * vbroadcast needs a memory operand or another xmm register to work... */
8156
8157 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
8158 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8159 pCodeBuf[off++] = X86_OP_REX_W
8160 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8161 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8162 pCodeBuf[off++] = 0x0f;
8163 pCodeBuf[off++] = 0x3a;
8164 pCodeBuf[off++] = 0x22;
8165 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8166 pCodeBuf[off++] = 0x00;
8167
8168 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
8169 pCodeBuf[off++] = X86_OP_VEX3;
8170 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8171 | 0x02 /* opcode map. */
8172 | ( iVecRegDst >= 8
8173 ? 0
8174 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8175 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8176 pCodeBuf[off++] = 0x59;
8177 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8178#elif defined(RT_ARCH_ARM64)
8179 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8180 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8181
8182 /* dup vecsrc, gpr */
8183 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
8184 if (f256Bit)
8185 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
8186#else
8187# error "port me"
8188#endif
8189 return off;
8190}
8191
8192
8193/**
8194 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
8195 */
8196DECL_INLINE_THROW(uint32_t)
8197iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8198{
8199#ifdef RT_ARCH_AMD64
8200 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
8201#elif defined(RT_ARCH_ARM64)
8202 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8203#else
8204# error "port me"
8205#endif
8206 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8207 return off;
8208}
8209
8210#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
8211
8212/** @} */
8213
8214#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
8215
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette