VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 103916

Last change on this file since 103916 was 103916, checked in by vboxsync, 11 months ago

VMM/IEM: Convert iemMemStoreDataU256NoAc()/iemMemStoreDataU256NoAcJmp() to use the memory RW template and implement native emitters for IEM_MC_FETCH_MEM_U256_NO_AC()/IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 301.4 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 103916 2024-03-19 13:11:09Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 pu32CodeBuf[off++] = 0xd503201f;
71
72 RT_NOREF(uInfo);
73#else
74# error "port me"
75#endif
76 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
77 return off;
78}
79
80
81/**
82 * Emit a breakpoint instruction.
83 */
84DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
85{
86#ifdef RT_ARCH_AMD64
87 pCodeBuf[off++] = 0xcc;
88 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
89
90#elif defined(RT_ARCH_ARM64)
91 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
92
93#else
94# error "error"
95#endif
96 return off;
97}
98
99
100/**
101 * Emit a breakpoint instruction.
102 */
103DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
104{
105#ifdef RT_ARCH_AMD64
106 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
107#elif defined(RT_ARCH_ARM64)
108 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
109#else
110# error "error"
111#endif
112 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
113 return off;
114}
115
116
117/*********************************************************************************************************************************
118* Loads, Stores and Related Stuff. *
119*********************************************************************************************************************************/
120
121#ifdef RT_ARCH_AMD64
122/**
123 * Common bit of iemNativeEmitLoadGprByGpr and friends.
124 */
125DECL_FORCE_INLINE(uint32_t)
126iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
127{
128 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
129 {
130 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
131 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
132 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
133 }
134 else if (offDisp == (int8_t)offDisp)
135 {
136 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
137 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
138 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
139 pbCodeBuf[off++] = (uint8_t)offDisp;
140 }
141 else
142 {
143 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
144 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
145 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
146 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
147 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
148 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
149 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
150 }
151 return off;
152}
153#endif /* RT_ARCH_AMD64 */
154
155/**
156 * Emits setting a GPR to zero.
157 */
158DECL_INLINE_THROW(uint32_t)
159iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
160{
161#ifdef RT_ARCH_AMD64
162 /* xor gpr32, gpr32 */
163 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
164 if (iGpr >= 8)
165 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
166 pbCodeBuf[off++] = 0x33;
167 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
168
169#elif defined(RT_ARCH_ARM64)
170 /* mov gpr, #0x0 */
171 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
172 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
173
174#else
175# error "port me"
176#endif
177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
178 return off;
179}
180
181
182/**
183 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
184 * buffer space.
185 *
186 * Max buffer consumption:
187 * - AMD64: 10 instruction bytes.
188 * - ARM64: 4 instruction words (16 bytes).
189 */
190DECL_FORCE_INLINE(uint32_t)
191iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
192{
193#ifdef RT_ARCH_AMD64
194 if (uImm64 == 0)
195 {
196 /* xor gpr, gpr */
197 if (iGpr >= 8)
198 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
199 pCodeBuf[off++] = 0x33;
200 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
201 }
202 else if (uImm64 <= UINT32_MAX)
203 {
204 /* mov gpr, imm32 */
205 if (iGpr >= 8)
206 pCodeBuf[off++] = X86_OP_REX_B;
207 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
208 pCodeBuf[off++] = RT_BYTE1(uImm64);
209 pCodeBuf[off++] = RT_BYTE2(uImm64);
210 pCodeBuf[off++] = RT_BYTE3(uImm64);
211 pCodeBuf[off++] = RT_BYTE4(uImm64);
212 }
213 else if (uImm64 == (uint64_t)(int32_t)uImm64)
214 {
215 /* mov gpr, sx(imm32) */
216 if (iGpr < 8)
217 pCodeBuf[off++] = X86_OP_REX_W;
218 else
219 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
220 pCodeBuf[off++] = 0xc7;
221 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
222 pCodeBuf[off++] = RT_BYTE1(uImm64);
223 pCodeBuf[off++] = RT_BYTE2(uImm64);
224 pCodeBuf[off++] = RT_BYTE3(uImm64);
225 pCodeBuf[off++] = RT_BYTE4(uImm64);
226 }
227 else
228 {
229 /* mov gpr, imm64 */
230 if (iGpr < 8)
231 pCodeBuf[off++] = X86_OP_REX_W;
232 else
233 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
234 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
235 pCodeBuf[off++] = RT_BYTE1(uImm64);
236 pCodeBuf[off++] = RT_BYTE2(uImm64);
237 pCodeBuf[off++] = RT_BYTE3(uImm64);
238 pCodeBuf[off++] = RT_BYTE4(uImm64);
239 pCodeBuf[off++] = RT_BYTE5(uImm64);
240 pCodeBuf[off++] = RT_BYTE6(uImm64);
241 pCodeBuf[off++] = RT_BYTE7(uImm64);
242 pCodeBuf[off++] = RT_BYTE8(uImm64);
243 }
244
245#elif defined(RT_ARCH_ARM64)
246 /*
247 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
248 * supply remaining bits using 'movk grp, imm16, lsl #x'.
249 *
250 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
251 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
252 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
253 * after the first non-zero immediate component so we switch to movk for
254 * the remainder.
255 */
256 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
257 + !((uImm64 >> 16) & UINT16_MAX)
258 + !((uImm64 >> 32) & UINT16_MAX)
259 + !((uImm64 >> 48) & UINT16_MAX);
260 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
261 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
262 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
263 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
264 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
265 if (cFfffHalfWords <= cZeroHalfWords)
266 {
267 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
268
269 /* movz gpr, imm16 */
270 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
271 if (uImmPart || cZeroHalfWords == 4)
272 {
273 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
274 fMovBase |= RT_BIT_32(29);
275 }
276 /* mov[z/k] gpr, imm16, lsl #16 */
277 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
278 if (uImmPart)
279 {
280 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
281 fMovBase |= RT_BIT_32(29);
282 }
283 /* mov[z/k] gpr, imm16, lsl #32 */
284 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
285 if (uImmPart)
286 {
287 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
288 fMovBase |= RT_BIT_32(29);
289 }
290 /* mov[z/k] gpr, imm16, lsl #48 */
291 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
292 if (uImmPart)
293 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
294 }
295 else
296 {
297 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
298
299 /* find the first half-word that isn't UINT16_MAX. */
300 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
301 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
302 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
303
304 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
305 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
306 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
307 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
308 /* movk gpr, imm16 */
309 if (iHwNotFfff != 0)
310 {
311 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
312 if (uImmPart != UINT32_C(0xffff))
313 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
314 }
315 /* movk gpr, imm16, lsl #16 */
316 if (iHwNotFfff != 1)
317 {
318 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
319 if (uImmPart != UINT32_C(0xffff))
320 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
321 }
322 /* movk gpr, imm16, lsl #32 */
323 if (iHwNotFfff != 2)
324 {
325 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
326 if (uImmPart != UINT32_C(0xffff))
327 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
328 }
329 /* movk gpr, imm16, lsl #48 */
330 if (iHwNotFfff != 3)
331 {
332 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
333 if (uImmPart != UINT32_C(0xffff))
334 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
335 }
336 }
337
338 /** @todo load into 'w' register instead of 'x' when imm64 <= UINT32_MAX?
339 * clang 12.x does that, only to use the 'x' version for the
340 * addressing in the following ldr). */
341
342#else
343# error "port me"
344#endif
345 return off;
346}
347
348
349/**
350 * Emits loading a constant into a 64-bit GPR
351 */
352DECL_INLINE_THROW(uint32_t)
353iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
354{
355#ifdef RT_ARCH_AMD64
356 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
357#elif defined(RT_ARCH_ARM64)
358 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
359#else
360# error "port me"
361#endif
362 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
363 return off;
364}
365
366
367/**
368 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
369 * buffer space.
370 *
371 * Max buffer consumption:
372 * - AMD64: 6 instruction bytes.
373 * - ARM64: 2 instruction words (8 bytes).
374 *
375 * @note The top 32 bits will be cleared.
376 */
377DECLINLINE(uint32_t) iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
378{
379#ifdef RT_ARCH_AMD64
380 if (uImm32 == 0)
381 {
382 /* xor gpr, gpr */
383 if (iGpr >= 8)
384 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
385 pCodeBuf[off++] = 0x33;
386 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
387 }
388 else
389 {
390 /* mov gpr, imm32 */
391 if (iGpr >= 8)
392 pCodeBuf[off++] = X86_OP_REX_B;
393 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
394 pCodeBuf[off++] = RT_BYTE1(uImm32);
395 pCodeBuf[off++] = RT_BYTE2(uImm32);
396 pCodeBuf[off++] = RT_BYTE3(uImm32);
397 pCodeBuf[off++] = RT_BYTE4(uImm32);
398 }
399
400#elif defined(RT_ARCH_ARM64)
401 if ((uImm32 >> 16) == 0)
402 /* movz gpr, imm16 */
403 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
404 else if ((uImm32 & UINT32_C(0xffff)) == 0)
405 /* movz gpr, imm16, lsl #16 */
406 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
407 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
408 /* movn gpr, imm16, lsl #16 */
409 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
410 else if ((uImm32 >> 16) == UINT32_C(0xffff))
411 /* movn gpr, imm16 */
412 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
413 else
414 {
415 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
416 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
417 }
418
419#else
420# error "port me"
421#endif
422 return off;
423}
424
425
426/**
427 * Emits loading a constant into a 32-bit GPR.
428 * @note The top 32 bits will be cleared.
429 */
430DECL_INLINE_THROW(uint32_t)
431iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
432{
433#ifdef RT_ARCH_AMD64
434 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
435#elif defined(RT_ARCH_ARM64)
436 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
437#else
438# error "port me"
439#endif
440 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
441 return off;
442}
443
444
445/**
446 * Emits loading a constant into a 8-bit GPR
447 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
448 * only the ARM64 version does that.
449 */
450DECL_INLINE_THROW(uint32_t)
451iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
452{
453#ifdef RT_ARCH_AMD64
454 /* mov gpr, imm8 */
455 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
456 if (iGpr >= 8)
457 pbCodeBuf[off++] = X86_OP_REX_B;
458 else if (iGpr >= 4)
459 pbCodeBuf[off++] = X86_OP_REX;
460 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
461 pbCodeBuf[off++] = RT_BYTE1(uImm8);
462
463#elif defined(RT_ARCH_ARM64)
464 /* movz gpr, imm16, lsl #0 */
465 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
466 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
467
468#else
469# error "port me"
470#endif
471 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
472 return off;
473}
474
475
476#ifdef RT_ARCH_AMD64
477/**
478 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
479 */
480DECL_FORCE_INLINE(uint32_t)
481iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
482{
483 if (offVCpu < 128)
484 {
485 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
486 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
487 }
488 else
489 {
490 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
491 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
492 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
493 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
494 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
495 }
496 return off;
497}
498
499#elif defined(RT_ARCH_ARM64)
500
501/**
502 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
503 *
504 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
505 * registers (@a iGprTmp).
506 * @note DON'T try this with prefetch.
507 */
508DECL_FORCE_INLINE_THROW(uint32_t)
509iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
510 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
511{
512 /*
513 * There are a couple of ldr variants that takes an immediate offset, so
514 * try use those if we can, otherwise we have to use the temporary register
515 * help with the addressing.
516 */
517 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
518 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
519 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
520 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
521 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
522 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
523 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
524 {
525 /* The offset is too large, so we must load it into a register and use
526 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
527 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
528 if (iGprTmp == UINT8_MAX)
529 iGprTmp = iGprReg;
530 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
531 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
532 }
533 else
534# ifdef IEM_WITH_THROW_CATCH
535 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
536# else
537 AssertReleaseFailedStmt(off = UINT32_MAX);
538# endif
539
540 return off;
541}
542
543/**
544 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
545 */
546DECL_FORCE_INLINE_THROW(uint32_t)
547iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
548 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
549{
550 /*
551 * There are a couple of ldr variants that takes an immediate offset, so
552 * try use those if we can, otherwise we have to use the temporary register
553 * help with the addressing.
554 */
555 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
556 {
557 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
558 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
559 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
560 }
561 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
562 {
563 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
564 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
565 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
566 }
567 else
568 {
569 /* The offset is too large, so we must load it into a register and use
570 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
571 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
572 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
573 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
574 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
575 IEMNATIVE_REG_FIXED_TMP0);
576 }
577 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
578 return off;
579}
580
581#endif /* RT_ARCH_ARM64 */
582
583
584/**
585 * Emits a 64-bit GPR load of a VCpu value.
586 */
587DECL_FORCE_INLINE_THROW(uint32_t)
588iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
589{
590#ifdef RT_ARCH_AMD64
591 /* mov reg64, mem64 */
592 if (iGpr < 8)
593 pCodeBuf[off++] = X86_OP_REX_W;
594 else
595 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
596 pCodeBuf[off++] = 0x8b;
597 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off,iGpr, offVCpu);
598
599#elif defined(RT_ARCH_ARM64)
600 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
601
602#else
603# error "port me"
604#endif
605 return off;
606}
607
608
609/**
610 * Emits a 64-bit GPR load of a VCpu value.
611 */
612DECL_INLINE_THROW(uint32_t)
613iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
614{
615#ifdef RT_ARCH_AMD64
616 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
617 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
618
619#elif defined(RT_ARCH_ARM64)
620 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
621
622#else
623# error "port me"
624#endif
625 return off;
626}
627
628
629/**
630 * Emits a 32-bit GPR load of a VCpu value.
631 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
632 */
633DECL_INLINE_THROW(uint32_t)
634iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
635{
636#ifdef RT_ARCH_AMD64
637 /* mov reg32, mem32 */
638 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
639 if (iGpr >= 8)
640 pbCodeBuf[off++] = X86_OP_REX_R;
641 pbCodeBuf[off++] = 0x8b;
642 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
644
645#elif defined(RT_ARCH_ARM64)
646 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
647
648#else
649# error "port me"
650#endif
651 return off;
652}
653
654
655/**
656 * Emits a 16-bit GPR load of a VCpu value.
657 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
658 */
659DECL_INLINE_THROW(uint32_t)
660iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
661{
662#ifdef RT_ARCH_AMD64
663 /* movzx reg32, mem16 */
664 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
665 if (iGpr >= 8)
666 pbCodeBuf[off++] = X86_OP_REX_R;
667 pbCodeBuf[off++] = 0x0f;
668 pbCodeBuf[off++] = 0xb7;
669 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
670 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
671
672#elif defined(RT_ARCH_ARM64)
673 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
674
675#else
676# error "port me"
677#endif
678 return off;
679}
680
681
682/**
683 * Emits a 8-bit GPR load of a VCpu value.
684 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
685 */
686DECL_INLINE_THROW(uint32_t)
687iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
688{
689#ifdef RT_ARCH_AMD64
690 /* movzx reg32, mem8 */
691 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
692 if (iGpr >= 8)
693 pbCodeBuf[off++] = X86_OP_REX_R;
694 pbCodeBuf[off++] = 0x0f;
695 pbCodeBuf[off++] = 0xb6;
696 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
697 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
698
699#elif defined(RT_ARCH_ARM64)
700 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
701
702#else
703# error "port me"
704#endif
705 return off;
706}
707
708
709/**
710 * Emits a store of a GPR value to a 64-bit VCpu field.
711 */
712DECL_FORCE_INLINE_THROW(uint32_t)
713iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
714 uint8_t iGprTmp = UINT8_MAX)
715{
716#ifdef RT_ARCH_AMD64
717 /* mov mem64, reg64 */
718 if (iGpr < 8)
719 pCodeBuf[off++] = X86_OP_REX_W;
720 else
721 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
722 pCodeBuf[off++] = 0x89;
723 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
724 RT_NOREF(iGprTmp);
725
726#elif defined(RT_ARCH_ARM64)
727 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
728
729#else
730# error "port me"
731#endif
732 return off;
733}
734
735
736/**
737 * Emits a store of a GPR value to a 64-bit VCpu field.
738 */
739DECL_INLINE_THROW(uint32_t)
740iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
741{
742#ifdef RT_ARCH_AMD64
743 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
744#elif defined(RT_ARCH_ARM64)
745 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
746 IEMNATIVE_REG_FIXED_TMP0);
747#else
748# error "port me"
749#endif
750 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
751 return off;
752}
753
754
755/**
756 * Emits a store of a GPR value to a 32-bit VCpu field.
757 */
758DECL_INLINE_THROW(uint32_t)
759iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
760{
761#ifdef RT_ARCH_AMD64
762 /* mov mem32, reg32 */
763 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
764 if (iGpr >= 8)
765 pbCodeBuf[off++] = X86_OP_REX_R;
766 pbCodeBuf[off++] = 0x89;
767 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
769
770#elif defined(RT_ARCH_ARM64)
771 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
772
773#else
774# error "port me"
775#endif
776 return off;
777}
778
779
780/**
781 * Emits a store of a GPR value to a 16-bit VCpu field.
782 */
783DECL_INLINE_THROW(uint32_t)
784iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
785{
786#ifdef RT_ARCH_AMD64
787 /* mov mem16, reg16 */
788 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
789 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
790 if (iGpr >= 8)
791 pbCodeBuf[off++] = X86_OP_REX_R;
792 pbCodeBuf[off++] = 0x89;
793 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
794 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
795
796#elif defined(RT_ARCH_ARM64)
797 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
798
799#else
800# error "port me"
801#endif
802 return off;
803}
804
805
806/**
807 * Emits a store of a GPR value to a 8-bit VCpu field.
808 */
809DECL_INLINE_THROW(uint32_t)
810iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
811{
812#ifdef RT_ARCH_AMD64
813 /* mov mem8, reg8 */
814 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
815 if (iGpr >= 8)
816 pbCodeBuf[off++] = X86_OP_REX_R;
817 pbCodeBuf[off++] = 0x88;
818 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
820
821#elif defined(RT_ARCH_ARM64)
822 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
823
824#else
825# error "port me"
826#endif
827 return off;
828}
829
830
831/**
832 * Emits a store of an immediate value to a 32-bit VCpu field.
833 *
834 * @note ARM64: Will allocate temporary registers.
835 */
836DECL_FORCE_INLINE_THROW(uint32_t)
837iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
838{
839#ifdef RT_ARCH_AMD64
840 /* mov mem32, imm32 */
841 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
842 pCodeBuf[off++] = 0xc7;
843 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
844 pCodeBuf[off++] = RT_BYTE1(uImm);
845 pCodeBuf[off++] = RT_BYTE2(uImm);
846 pCodeBuf[off++] = RT_BYTE3(uImm);
847 pCodeBuf[off++] = RT_BYTE4(uImm);
848 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
849
850#elif defined(RT_ARCH_ARM64)
851 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
852 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
853 if (idxRegImm != ARMV8_A64_REG_XZR)
854 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
855
856#else
857# error "port me"
858#endif
859 return off;
860}
861
862
863
864/**
865 * Emits a store of an immediate value to a 16-bit VCpu field.
866 *
867 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
868 * offset can be encoded as an immediate or not. The @a offVCpu immediate
869 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
870 */
871DECL_FORCE_INLINE_THROW(uint32_t)
872iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
873 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
874{
875#ifdef RT_ARCH_AMD64
876 /* mov mem16, imm16 */
877 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
878 pCodeBuf[off++] = 0xc7;
879 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
880 pCodeBuf[off++] = RT_BYTE1(uImm);
881 pCodeBuf[off++] = RT_BYTE2(uImm);
882 RT_NOREF(idxTmp1, idxTmp2);
883
884#elif defined(RT_ARCH_ARM64)
885 if (idxTmp1 != UINT8_MAX)
886 {
887 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
888 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
889 sizeof(uint16_t), idxTmp2);
890 }
891 else
892# ifdef IEM_WITH_THROW_CATCH
893 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
894# else
895 AssertReleaseFailedStmt(off = UINT32_MAX);
896# endif
897
898#else
899# error "port me"
900#endif
901 return off;
902}
903
904
905/**
906 * Emits a store of an immediate value to a 8-bit VCpu field.
907 */
908DECL_INLINE_THROW(uint32_t)
909iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu)
910{
911#ifdef RT_ARCH_AMD64
912 /* mov mem8, imm8 */
913 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
914 pbCodeBuf[off++] = 0xc6;
915 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
916 pbCodeBuf[off++] = bImm;
917 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
918
919#elif defined(RT_ARCH_ARM64)
920 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
921 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
922 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
923 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
924
925#else
926# error "port me"
927#endif
928 return off;
929}
930
931
932/**
933 * Emits a load effective address to a GRP of a VCpu field.
934 */
935DECL_INLINE_THROW(uint32_t)
936iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
937{
938#ifdef RT_ARCH_AMD64
939 /* lea gprdst, [rbx + offDisp] */
940 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
941 if (iGprDst < 8)
942 pbCodeBuf[off++] = X86_OP_REX_W;
943 else
944 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
945 pbCodeBuf[off++] = 0x8d;
946 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
947
948#elif defined(RT_ARCH_ARM64)
949 if (offVCpu < (unsigned)_4K)
950 {
951 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
952 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
953 }
954 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
955 {
956 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
957 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
958 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
959 }
960 else
961 {
962 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
963 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
964 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
965 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
966 }
967
968#else
969# error "port me"
970#endif
971 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
972 return off;
973}
974
975
976/** This is just as a typesafe alternative to RT_UOFFSETOF. */
977DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
978{
979 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
980 Assert(off < sizeof(VMCPU));
981 return off;
982}
983
984
985/** This is just as a typesafe alternative to RT_UOFFSETOF. */
986DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
987{
988 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
989 Assert(off < sizeof(VMCPU));
990 return off;
991}
992
993
994/**
995 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
996 *
997 * @note The two temp registers are not required for AMD64. ARM64 always
998 * requires the first, and the 2nd is needed if the offset cannot be
999 * encoded as an immediate.
1000 */
1001DECL_FORCE_INLINE(uint32_t)
1002iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1003{
1004#ifdef RT_ARCH_AMD64
1005 /* inc qword [pVCpu + off] */
1006 pCodeBuf[off++] = X86_OP_REX_W;
1007 pCodeBuf[off++] = 0xff;
1008 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1009 RT_NOREF(idxTmp1, idxTmp2);
1010
1011#elif defined(RT_ARCH_ARM64)
1012 /* Determine how we're to access pVCpu first. */
1013 uint32_t const cbData = sizeof(STAMCOUNTER);
1014 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1015 {
1016 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1017 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1018 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1019 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1020 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1021 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1022 }
1023 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1024 {
1025 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1026 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1027 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1028 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1029 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1030 }
1031 else
1032 {
1033 /* The offset is too large, so we must load it into a register and use
1034 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1035 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1036 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1037 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1038 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1039 }
1040
1041#else
1042# error "port me"
1043#endif
1044 return off;
1045}
1046
1047
1048/**
1049 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1050 *
1051 * @note The two temp registers are not required for AMD64. ARM64 always
1052 * requires the first, and the 2nd is needed if the offset cannot be
1053 * encoded as an immediate.
1054 */
1055DECL_FORCE_INLINE(uint32_t)
1056iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1057{
1058#ifdef RT_ARCH_AMD64
1059 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1060#elif defined(RT_ARCH_ARM64)
1061 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1062#else
1063# error "port me"
1064#endif
1065 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1066 return off;
1067}
1068
1069
1070/**
1071 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1072 *
1073 * @note The two temp registers are not required for AMD64. ARM64 always
1074 * requires the first, and the 2nd is needed if the offset cannot be
1075 * encoded as an immediate.
1076 */
1077DECL_FORCE_INLINE(uint32_t)
1078iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1079{
1080 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1081#ifdef RT_ARCH_AMD64
1082 /* inc dword [pVCpu + offVCpu] */
1083 pCodeBuf[off++] = 0xff;
1084 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1085 RT_NOREF(idxTmp1, idxTmp2);
1086
1087#elif defined(RT_ARCH_ARM64)
1088 /* Determine how we're to access pVCpu first. */
1089 uint32_t const cbData = sizeof(uint32_t);
1090 if (offVCpu < (unsigned)(_4K * cbData))
1091 {
1092 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1093 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1094 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1095 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1096 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1097 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1098 }
1099 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1100 {
1101 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1102 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1103 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1104 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1105 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1106 }
1107 else
1108 {
1109 /* The offset is too large, so we must load it into a register and use
1110 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1111 of the instruction if that'll reduce the constant to 16-bits. */
1112 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1113 {
1114 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1115 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1116 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1117 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1118 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1119 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1120 }
1121 else
1122 {
1123 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1124 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1125 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1126 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1127 }
1128 }
1129
1130#else
1131# error "port me"
1132#endif
1133 return off;
1134}
1135
1136
1137/**
1138 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1139 *
1140 * @note The two temp registers are not required for AMD64. ARM64 always
1141 * requires the first, and the 2nd is needed if the offset cannot be
1142 * encoded as an immediate.
1143 */
1144DECL_FORCE_INLINE(uint32_t)
1145iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1146{
1147#ifdef RT_ARCH_AMD64
1148 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1149#elif defined(RT_ARCH_ARM64)
1150 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1151#else
1152# error "port me"
1153#endif
1154 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1155 return off;
1156}
1157
1158
1159/**
1160 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1161 *
1162 * @note May allocate temporary registers (not AMD64).
1163 */
1164DECL_FORCE_INLINE(uint32_t)
1165iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1166{
1167 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1168#ifdef RT_ARCH_AMD64
1169 /* or dword [pVCpu + offVCpu], imm8/32 */
1170 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1171 if (fMask < 0x80)
1172 {
1173 pCodeBuf[off++] = 0x83;
1174 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1175 pCodeBuf[off++] = (uint8_t)fMask;
1176 }
1177 else
1178 {
1179 pCodeBuf[off++] = 0x81;
1180 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1181 pCodeBuf[off++] = RT_BYTE1(fMask);
1182 pCodeBuf[off++] = RT_BYTE2(fMask);
1183 pCodeBuf[off++] = RT_BYTE3(fMask);
1184 pCodeBuf[off++] = RT_BYTE4(fMask);
1185 }
1186
1187#elif defined(RT_ARCH_ARM64)
1188 /* If the constant is unwieldy we'll need a register to hold it as well. */
1189 uint32_t uImmSizeLen, uImmRotate;
1190 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1191 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1192
1193 /* We need a temp register for holding the member value we're modifying. */
1194 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1195
1196 /* Determine how we're to access pVCpu first. */
1197 uint32_t const cbData = sizeof(uint32_t);
1198 if (offVCpu < (unsigned)(_4K * cbData))
1199 {
1200 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1201 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1202 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1203 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1204 if (idxTmpMask == UINT8_MAX)
1205 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1206 else
1207 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1208 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1209 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1210 }
1211 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1212 {
1213 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1214 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1215 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1216 if (idxTmpMask == UINT8_MAX)
1217 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1218 else
1219 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1220 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1221 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1222 }
1223 else
1224 {
1225 /* The offset is too large, so we must load it into a register and use
1226 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1227 of the instruction if that'll reduce the constant to 16-bits. */
1228 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1229 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1230 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1231 if (fShifted)
1232 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1233 else
1234 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1235
1236 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1237 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1238
1239 if (idxTmpMask == UINT8_MAX)
1240 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1241 else
1242 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1243
1244 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1245 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1246 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1247 }
1248 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1249 if (idxTmpMask != UINT8_MAX)
1250 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1251
1252#else
1253# error "port me"
1254#endif
1255 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1256 return off;
1257}
1258
1259
1260/**
1261 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1262 *
1263 * @note May allocate temporary registers (not AMD64).
1264 */
1265DECL_FORCE_INLINE(uint32_t)
1266iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1267{
1268 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1269#ifdef RT_ARCH_AMD64
1270 /* and dword [pVCpu + offVCpu], imm8/32 */
1271 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1272 if (fMask < 0x80)
1273 {
1274 pCodeBuf[off++] = 0x83;
1275 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1276 pCodeBuf[off++] = (uint8_t)fMask;
1277 }
1278 else
1279 {
1280 pCodeBuf[off++] = 0x81;
1281 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1282 pCodeBuf[off++] = RT_BYTE1(fMask);
1283 pCodeBuf[off++] = RT_BYTE2(fMask);
1284 pCodeBuf[off++] = RT_BYTE3(fMask);
1285 pCodeBuf[off++] = RT_BYTE4(fMask);
1286 }
1287
1288#elif defined(RT_ARCH_ARM64)
1289 /* If the constant is unwieldy we'll need a register to hold it as well. */
1290 uint32_t uImmSizeLen, uImmRotate;
1291 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1292 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1293
1294 /* We need a temp register for holding the member value we're modifying. */
1295 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1296
1297 /* Determine how we're to access pVCpu first. */
1298 uint32_t const cbData = sizeof(uint32_t);
1299 if (offVCpu < (unsigned)(_4K * cbData))
1300 {
1301 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1302 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1303 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1304 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1305 if (idxTmpMask == UINT8_MAX)
1306 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1307 else
1308 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1309 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1310 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1311 }
1312 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1313 {
1314 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1315 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1316 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1317 if (idxTmpMask == UINT8_MAX)
1318 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1319 else
1320 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1321 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1322 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1323 }
1324 else
1325 {
1326 /* The offset is too large, so we must load it into a register and use
1327 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1328 of the instruction if that'll reduce the constant to 16-bits. */
1329 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1330 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1331 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1332 if (fShifted)
1333 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1334 else
1335 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1336
1337 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1338 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1339
1340 if (idxTmpMask == UINT8_MAX)
1341 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1342 else
1343 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1344
1345 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1346 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1347 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1348 }
1349 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1350 if (idxTmpMask != UINT8_MAX)
1351 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1352
1353#else
1354# error "port me"
1355#endif
1356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1357 return off;
1358}
1359
1360
1361/**
1362 * Emits a gprdst = gprsrc load.
1363 */
1364DECL_FORCE_INLINE(uint32_t)
1365iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1366{
1367#ifdef RT_ARCH_AMD64
1368 /* mov gprdst, gprsrc */
1369 if ((iGprDst | iGprSrc) >= 8)
1370 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1371 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1372 : X86_OP_REX_W | X86_OP_REX_R;
1373 else
1374 pCodeBuf[off++] = X86_OP_REX_W;
1375 pCodeBuf[off++] = 0x8b;
1376 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1377
1378#elif defined(RT_ARCH_ARM64)
1379 /* mov dst, src; alias for: orr dst, xzr, src */
1380 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1381
1382#else
1383# error "port me"
1384#endif
1385 return off;
1386}
1387
1388
1389/**
1390 * Emits a gprdst = gprsrc load.
1391 */
1392DECL_INLINE_THROW(uint32_t)
1393iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1394{
1395#ifdef RT_ARCH_AMD64
1396 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1397#elif defined(RT_ARCH_ARM64)
1398 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1399#else
1400# error "port me"
1401#endif
1402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1403 return off;
1404}
1405
1406
1407/**
1408 * Emits a gprdst = gprsrc[31:0] load.
1409 * @note Bits 63 thru 32 are cleared.
1410 */
1411DECL_FORCE_INLINE(uint32_t)
1412iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1413{
1414#ifdef RT_ARCH_AMD64
1415 /* mov gprdst, gprsrc */
1416 if ((iGprDst | iGprSrc) >= 8)
1417 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1418 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1419 : X86_OP_REX_R;
1420 pCodeBuf[off++] = 0x8b;
1421 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1422
1423#elif defined(RT_ARCH_ARM64)
1424 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1425 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1426
1427#else
1428# error "port me"
1429#endif
1430 return off;
1431}
1432
1433
1434/**
1435 * Emits a gprdst = gprsrc[31:0] load.
1436 * @note Bits 63 thru 32 are cleared.
1437 */
1438DECL_INLINE_THROW(uint32_t)
1439iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1440{
1441#ifdef RT_ARCH_AMD64
1442 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1443#elif defined(RT_ARCH_ARM64)
1444 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1445#else
1446# error "port me"
1447#endif
1448 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1449 return off;
1450}
1451
1452
1453/**
1454 * Emits a gprdst = gprsrc[15:0] load.
1455 * @note Bits 63 thru 15 are cleared.
1456 */
1457DECL_INLINE_THROW(uint32_t)
1458iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1459{
1460#ifdef RT_ARCH_AMD64
1461 /* movzx Gv,Ew */
1462 if ((iGprDst | iGprSrc) >= 8)
1463 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1464 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1465 : X86_OP_REX_R;
1466 pCodeBuf[off++] = 0x0f;
1467 pCodeBuf[off++] = 0xb7;
1468 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1469
1470#elif defined(RT_ARCH_ARM64)
1471 /* and gprdst, gprsrc, #0xffff */
1472# if 1
1473 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1474 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1475# else
1476 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1477 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1478# endif
1479
1480#else
1481# error "port me"
1482#endif
1483 return off;
1484}
1485
1486
1487/**
1488 * Emits a gprdst = gprsrc[15:0] load.
1489 * @note Bits 63 thru 15 are cleared.
1490 */
1491DECL_INLINE_THROW(uint32_t)
1492iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1493{
1494#ifdef RT_ARCH_AMD64
1495 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1496#elif defined(RT_ARCH_ARM64)
1497 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1498#else
1499# error "port me"
1500#endif
1501 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1502 return off;
1503}
1504
1505
1506/**
1507 * Emits a gprdst = gprsrc[7:0] load.
1508 * @note Bits 63 thru 8 are cleared.
1509 */
1510DECL_FORCE_INLINE(uint32_t)
1511iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1512{
1513#ifdef RT_ARCH_AMD64
1514 /* movzx Gv,Eb */
1515 if (iGprDst >= 8 || iGprSrc >= 8)
1516 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1517 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1518 : X86_OP_REX_R;
1519 else if (iGprSrc >= 4)
1520 pCodeBuf[off++] = X86_OP_REX;
1521 pCodeBuf[off++] = 0x0f;
1522 pCodeBuf[off++] = 0xb6;
1523 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1524
1525#elif defined(RT_ARCH_ARM64)
1526 /* and gprdst, gprsrc, #0xff */
1527 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1528 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1529
1530#else
1531# error "port me"
1532#endif
1533 return off;
1534}
1535
1536
1537/**
1538 * Emits a gprdst = gprsrc[7:0] load.
1539 * @note Bits 63 thru 8 are cleared.
1540 */
1541DECL_INLINE_THROW(uint32_t)
1542iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1543{
1544#ifdef RT_ARCH_AMD64
1545 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1546#elif defined(RT_ARCH_ARM64)
1547 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1548#else
1549# error "port me"
1550#endif
1551 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1552 return off;
1553}
1554
1555
1556/**
1557 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1558 * @note Bits 63 thru 8 are cleared.
1559 */
1560DECL_INLINE_THROW(uint32_t)
1561iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1562{
1563#ifdef RT_ARCH_AMD64
1564 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1565
1566 /* movzx Gv,Ew */
1567 if ((iGprDst | iGprSrc) >= 8)
1568 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1569 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1570 : X86_OP_REX_R;
1571 pbCodeBuf[off++] = 0x0f;
1572 pbCodeBuf[off++] = 0xb7;
1573 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1574
1575 /* shr Ev,8 */
1576 if (iGprDst >= 8)
1577 pbCodeBuf[off++] = X86_OP_REX_B;
1578 pbCodeBuf[off++] = 0xc1;
1579 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1580 pbCodeBuf[off++] = 8;
1581
1582#elif defined(RT_ARCH_ARM64)
1583 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1584 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1585 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1586
1587#else
1588# error "port me"
1589#endif
1590 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1591 return off;
1592}
1593
1594
1595/**
1596 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1597 */
1598DECL_INLINE_THROW(uint32_t)
1599iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1600{
1601#ifdef RT_ARCH_AMD64
1602 /* movsxd r64, r/m32 */
1603 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1604 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1605 pbCodeBuf[off++] = 0x63;
1606 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1607
1608#elif defined(RT_ARCH_ARM64)
1609 /* sxtw dst, src */
1610 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1611 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1612
1613#else
1614# error "port me"
1615#endif
1616 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1617 return off;
1618}
1619
1620
1621/**
1622 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1623 */
1624DECL_INLINE_THROW(uint32_t)
1625iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1626{
1627#ifdef RT_ARCH_AMD64
1628 /* movsx r64, r/m16 */
1629 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1630 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1631 pbCodeBuf[off++] = 0x0f;
1632 pbCodeBuf[off++] = 0xbf;
1633 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1634
1635#elif defined(RT_ARCH_ARM64)
1636 /* sxth dst, src */
1637 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1638 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1639
1640#else
1641# error "port me"
1642#endif
1643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1644 return off;
1645}
1646
1647
1648/**
1649 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1650 */
1651DECL_INLINE_THROW(uint32_t)
1652iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1653{
1654#ifdef RT_ARCH_AMD64
1655 /* movsx r64, r/m16 */
1656 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1657 if (iGprDst >= 8 || iGprSrc >= 8)
1658 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1659 pbCodeBuf[off++] = 0x0f;
1660 pbCodeBuf[off++] = 0xbf;
1661 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1662
1663#elif defined(RT_ARCH_ARM64)
1664 /* sxth dst32, src */
1665 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1666 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1667
1668#else
1669# error "port me"
1670#endif
1671 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1672 return off;
1673}
1674
1675
1676/**
1677 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1678 */
1679DECL_INLINE_THROW(uint32_t)
1680iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1681{
1682#ifdef RT_ARCH_AMD64
1683 /* movsx r64, r/m8 */
1684 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1685 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1686 pbCodeBuf[off++] = 0x0f;
1687 pbCodeBuf[off++] = 0xbe;
1688 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1689
1690#elif defined(RT_ARCH_ARM64)
1691 /* sxtb dst, src */
1692 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1693 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1694
1695#else
1696# error "port me"
1697#endif
1698 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1699 return off;
1700}
1701
1702
1703/**
1704 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1705 * @note Bits 63 thru 32 are cleared.
1706 */
1707DECL_INLINE_THROW(uint32_t)
1708iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1709{
1710#ifdef RT_ARCH_AMD64
1711 /* movsx r32, r/m8 */
1712 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1713 if (iGprDst >= 8 || iGprSrc >= 8)
1714 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1715 else if (iGprSrc >= 4)
1716 pbCodeBuf[off++] = X86_OP_REX;
1717 pbCodeBuf[off++] = 0x0f;
1718 pbCodeBuf[off++] = 0xbe;
1719 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1720
1721#elif defined(RT_ARCH_ARM64)
1722 /* sxtb dst32, src32 */
1723 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1724 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1725
1726#else
1727# error "port me"
1728#endif
1729 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1730 return off;
1731}
1732
1733
1734/**
1735 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1736 * @note Bits 63 thru 16 are cleared.
1737 */
1738DECL_INLINE_THROW(uint32_t)
1739iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1740{
1741#ifdef RT_ARCH_AMD64
1742 /* movsx r16, r/m8 */
1743 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1744 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1745 if (iGprDst >= 8 || iGprSrc >= 8)
1746 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1747 else if (iGprSrc >= 4)
1748 pbCodeBuf[off++] = X86_OP_REX;
1749 pbCodeBuf[off++] = 0x0f;
1750 pbCodeBuf[off++] = 0xbe;
1751 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1752
1753 /* movzx r32, r/m16 */
1754 if (iGprDst >= 8)
1755 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
1756 pbCodeBuf[off++] = 0x0f;
1757 pbCodeBuf[off++] = 0xb7;
1758 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
1759
1760#elif defined(RT_ARCH_ARM64)
1761 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
1762 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1763 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1764 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1765 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
1766
1767#else
1768# error "port me"
1769#endif
1770 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1771 return off;
1772}
1773
1774
1775/**
1776 * Emits a gprdst = gprsrc + addend load.
1777 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1778 */
1779#ifdef RT_ARCH_AMD64
1780DECL_INLINE_THROW(uint32_t)
1781iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1782 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1783{
1784 Assert(iAddend != 0);
1785
1786 /* lea gprdst, [gprsrc + iAddend] */
1787 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1788 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1789 pbCodeBuf[off++] = 0x8d;
1790 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1791 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1792 return off;
1793}
1794
1795#elif defined(RT_ARCH_ARM64)
1796DECL_INLINE_THROW(uint32_t)
1797iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1798 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1799{
1800 if ((uint32_t)iAddend < 4096)
1801 {
1802 /* add dst, src, uimm12 */
1803 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1804 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
1805 }
1806 else if ((uint32_t)-iAddend < 4096)
1807 {
1808 /* sub dst, src, uimm12 */
1809 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1810 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
1811 }
1812 else
1813 {
1814 Assert(iGprSrc != iGprDst);
1815 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
1816 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1817 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
1818 }
1819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1820 return off;
1821}
1822#else
1823# error "port me"
1824#endif
1825
1826/**
1827 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
1828 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1829 */
1830#ifdef RT_ARCH_AMD64
1831DECL_INLINE_THROW(uint32_t)
1832iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1833 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1834#else
1835DECL_INLINE_THROW(uint32_t)
1836iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1837 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1838#endif
1839{
1840 if (iAddend != 0)
1841 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1842 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
1843}
1844
1845
1846/**
1847 * Emits a gprdst = gprsrc32 + addend load.
1848 * @note Bits 63 thru 32 are cleared.
1849 */
1850DECL_INLINE_THROW(uint32_t)
1851iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1852 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1853{
1854 Assert(iAddend != 0);
1855
1856#ifdef RT_ARCH_AMD64
1857 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
1858 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1859 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
1860 if ((iGprDst | iGprSrc) >= 8)
1861 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1862 pbCodeBuf[off++] = 0x8d;
1863 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1864
1865#elif defined(RT_ARCH_ARM64)
1866 if ((uint32_t)iAddend < 4096)
1867 {
1868 /* add dst, src, uimm12 */
1869 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1870 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
1871 }
1872 else if ((uint32_t)-iAddend < 4096)
1873 {
1874 /* sub dst, src, uimm12 */
1875 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1876 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
1877 }
1878 else
1879 {
1880 Assert(iGprSrc != iGprDst);
1881 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
1882 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1883 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
1884 }
1885
1886#else
1887# error "port me"
1888#endif
1889 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1890 return off;
1891}
1892
1893
1894/**
1895 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
1896 */
1897DECL_INLINE_THROW(uint32_t)
1898iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1899 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1900{
1901 if (iAddend != 0)
1902 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1903 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
1904}
1905
1906
1907/**
1908 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1909 * destination.
1910 */
1911DECL_FORCE_INLINE(uint32_t)
1912iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1913{
1914#ifdef RT_ARCH_AMD64
1915 /* mov reg16, r/m16 */
1916 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1917 if (idxDst >= 8 || idxSrc >= 8)
1918 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
1919 pCodeBuf[off++] = 0x8b;
1920 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
1921
1922#elif defined(RT_ARCH_ARM64)
1923 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
1924 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
1925
1926#else
1927# error "Port me!"
1928#endif
1929 return off;
1930}
1931
1932
1933/**
1934 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1935 * destination.
1936 */
1937DECL_INLINE_THROW(uint32_t)
1938iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1939{
1940#ifdef RT_ARCH_AMD64
1941 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
1942#elif defined(RT_ARCH_ARM64)
1943 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
1944#else
1945# error "Port me!"
1946#endif
1947 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1948 return off;
1949}
1950
1951
1952#ifdef RT_ARCH_AMD64
1953/**
1954 * Common bit of iemNativeEmitLoadGprByBp and friends.
1955 */
1956DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
1957 PIEMRECOMPILERSTATE pReNativeAssert)
1958{
1959 if (offDisp < 128 && offDisp >= -128)
1960 {
1961 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
1962 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
1963 }
1964 else
1965 {
1966 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
1967 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
1968 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
1969 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
1970 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
1971 }
1972 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
1973 return off;
1974}
1975#elif defined(RT_ARCH_ARM64)
1976/**
1977 * Common bit of iemNativeEmitLoadGprByBp and friends.
1978 */
1979DECL_FORCE_INLINE_THROW(uint32_t)
1980iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
1981 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
1982{
1983 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
1984 {
1985 /* str w/ unsigned imm12 (scaled) */
1986 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1987 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
1988 }
1989 else if (offDisp >= -256 && offDisp <= 256)
1990 {
1991 /* stur w/ signed imm9 (unscaled) */
1992 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1993 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
1994 }
1995 else
1996 {
1997 /* Use temporary indexing register. */
1998 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
1999 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2000 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2001 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2002 }
2003 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2004 return off;
2005}
2006#endif
2007
2008
2009/**
2010 * Emits a 64-bit GRP load instruction with an BP relative source address.
2011 */
2012DECL_INLINE_THROW(uint32_t)
2013iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2014{
2015#ifdef RT_ARCH_AMD64
2016 /* mov gprdst, qword [rbp + offDisp] */
2017 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2018 if (iGprDst < 8)
2019 pbCodeBuf[off++] = X86_OP_REX_W;
2020 else
2021 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2022 pbCodeBuf[off++] = 0x8b;
2023 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2024
2025#elif defined(RT_ARCH_ARM64)
2026 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2027
2028#else
2029# error "port me"
2030#endif
2031}
2032
2033
2034/**
2035 * Emits a 32-bit GRP load instruction with an BP relative source address.
2036 * @note Bits 63 thru 32 of the GPR will be cleared.
2037 */
2038DECL_INLINE_THROW(uint32_t)
2039iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2040{
2041#ifdef RT_ARCH_AMD64
2042 /* mov gprdst, dword [rbp + offDisp] */
2043 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2044 if (iGprDst >= 8)
2045 pbCodeBuf[off++] = X86_OP_REX_R;
2046 pbCodeBuf[off++] = 0x8b;
2047 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2048
2049#elif defined(RT_ARCH_ARM64)
2050 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2051
2052#else
2053# error "port me"
2054#endif
2055}
2056
2057
2058/**
2059 * Emits a 16-bit GRP load instruction with an BP relative source address.
2060 * @note Bits 63 thru 16 of the GPR will be cleared.
2061 */
2062DECL_INLINE_THROW(uint32_t)
2063iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2064{
2065#ifdef RT_ARCH_AMD64
2066 /* movzx gprdst, word [rbp + offDisp] */
2067 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2068 if (iGprDst >= 8)
2069 pbCodeBuf[off++] = X86_OP_REX_R;
2070 pbCodeBuf[off++] = 0x0f;
2071 pbCodeBuf[off++] = 0xb7;
2072 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2073
2074#elif defined(RT_ARCH_ARM64)
2075 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2076
2077#else
2078# error "port me"
2079#endif
2080}
2081
2082
2083/**
2084 * Emits a 8-bit GRP load instruction with an BP relative source address.
2085 * @note Bits 63 thru 8 of the GPR will be cleared.
2086 */
2087DECL_INLINE_THROW(uint32_t)
2088iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2089{
2090#ifdef RT_ARCH_AMD64
2091 /* movzx gprdst, byte [rbp + offDisp] */
2092 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2093 if (iGprDst >= 8)
2094 pbCodeBuf[off++] = X86_OP_REX_R;
2095 pbCodeBuf[off++] = 0x0f;
2096 pbCodeBuf[off++] = 0xb6;
2097 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2098
2099#elif defined(RT_ARCH_ARM64)
2100 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2101
2102#else
2103# error "port me"
2104#endif
2105}
2106
2107
2108#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2109/**
2110 * Emits a 128-bit vector register load instruction with an BP relative source address.
2111 */
2112DECL_FORCE_INLINE_THROW(uint32_t)
2113iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2114{
2115#ifdef RT_ARCH_AMD64
2116 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2117
2118 /* movdqu reg128, mem128 */
2119 pbCodeBuf[off++] = 0xf3;
2120 if (iVecRegDst >= 8)
2121 pbCodeBuf[off++] = X86_OP_REX_R;
2122 pbCodeBuf[off++] = 0x0f;
2123 pbCodeBuf[off++] = 0x6f;
2124 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2125#elif defined(RT_ARCH_ARM64)
2126 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2127#else
2128# error "port me"
2129#endif
2130}
2131
2132
2133/**
2134 * Emits a 256-bit vector register load instruction with an BP relative source address.
2135 */
2136DECL_FORCE_INLINE_THROW(uint32_t)
2137iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2138{
2139#ifdef RT_ARCH_AMD64
2140 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2141
2142 /* vmovdqu reg256, mem256 */
2143 pbCodeBuf[off++] = X86_OP_VEX2;
2144 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2145 pbCodeBuf[off++] = 0x6f;
2146 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2147#elif defined(RT_ARCH_ARM64)
2148 /* ASSUMES two consecutive vector registers for the 256-bit value. */
2149 Assert(!(iVecRegDst & 0x1));
2150 off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2151 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2152#else
2153# error "port me"
2154#endif
2155}
2156
2157#endif
2158
2159
2160/**
2161 * Emits a load effective address to a GRP with an BP relative source address.
2162 */
2163DECL_INLINE_THROW(uint32_t)
2164iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2165{
2166#ifdef RT_ARCH_AMD64
2167 /* lea gprdst, [rbp + offDisp] */
2168 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2169 if (iGprDst < 8)
2170 pbCodeBuf[off++] = X86_OP_REX_W;
2171 else
2172 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2173 pbCodeBuf[off++] = 0x8d;
2174 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2175
2176#elif defined(RT_ARCH_ARM64)
2177 if ((uint32_t)offDisp < (unsigned)_4K)
2178 {
2179 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2180 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)offDisp);
2181 }
2182 else if ((uint32_t)-offDisp < (unsigned)_4K)
2183 {
2184 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2185 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2186 }
2187 else
2188 {
2189 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2190 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offDisp >= 0 ? (uint32_t)offDisp : (uint32_t)-offDisp);
2191 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2192 if (offDisp >= 0)
2193 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2194 else
2195 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2196 }
2197
2198#else
2199# error "port me"
2200#endif
2201
2202 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2203 return off;
2204}
2205
2206
2207/**
2208 * Emits a 64-bit GPR store with an BP relative destination address.
2209 *
2210 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2211 */
2212DECL_INLINE_THROW(uint32_t)
2213iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2214{
2215#ifdef RT_ARCH_AMD64
2216 /* mov qword [rbp + offDisp], gprdst */
2217 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2218 if (iGprSrc < 8)
2219 pbCodeBuf[off++] = X86_OP_REX_W;
2220 else
2221 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2222 pbCodeBuf[off++] = 0x89;
2223 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2224
2225#elif defined(RT_ARCH_ARM64)
2226 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2227 {
2228 /* str w/ unsigned imm12 (scaled) */
2229 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2230 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2231 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2232 }
2233 else if (offDisp >= -256 && offDisp <= 256)
2234 {
2235 /* stur w/ signed imm9 (unscaled) */
2236 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2237 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2238 }
2239 else if ((uint32_t)-offDisp < (unsigned)_4K)
2240 {
2241 /* Use temporary indexing register w/ sub uimm12. */
2242 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2243 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2244 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2245 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2246 }
2247 else
2248 {
2249 /* Use temporary indexing register. */
2250 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2251 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2252 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2253 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2254 }
2255 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2256 return off;
2257
2258#else
2259# error "Port me!"
2260#endif
2261}
2262
2263
2264/**
2265 * Emits a 64-bit immediate store with an BP relative destination address.
2266 *
2267 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2268 */
2269DECL_INLINE_THROW(uint32_t)
2270iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2271{
2272#ifdef RT_ARCH_AMD64
2273 if ((int64_t)uImm64 == (int32_t)uImm64)
2274 {
2275 /* mov qword [rbp + offDisp], imm32 - sign extended */
2276 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2277 pbCodeBuf[off++] = X86_OP_REX_W;
2278 pbCodeBuf[off++] = 0xc7;
2279 if (offDisp < 128 && offDisp >= -128)
2280 {
2281 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2282 pbCodeBuf[off++] = (uint8_t)offDisp;
2283 }
2284 else
2285 {
2286 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2287 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2288 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2289 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2290 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2291 }
2292 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2293 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2294 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2295 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2296 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2297 return off;
2298 }
2299#endif
2300
2301 /* Load tmp0, imm64; Store tmp to bp+disp. */
2302 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2303 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2304}
2305
2306
2307#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2308/**
2309 * Emits a 128-bit vector register store with an BP relative destination address.
2310 *
2311 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2312 */
2313DECL_INLINE_THROW(uint32_t)
2314iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2315{
2316#ifdef RT_ARCH_AMD64
2317 /* movdqu [rbp + offDisp], vecsrc */
2318 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2319 pbCodeBuf[off++] = 0xf3;
2320 if (iVecRegSrc >= 8)
2321 pbCodeBuf[off++] = X86_OP_REX_R;
2322 pbCodeBuf[off++] = 0x0f;
2323 pbCodeBuf[off++] = 0x7f;
2324 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2325
2326#elif defined(RT_ARCH_ARM64)
2327 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2328 {
2329 /* str w/ unsigned imm12 (scaled) */
2330 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2331 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc,
2332 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2333 }
2334 else if (offDisp >= -256 && offDisp <= 256)
2335 {
2336 /* stur w/ signed imm9 (unscaled) */
2337 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2338 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp);
2339 }
2340 else if ((uint32_t)-offDisp < (unsigned)_4K)
2341 {
2342 /* Use temporary indexing register w/ sub uimm12. */
2343 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2344 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2345 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2346 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2347 }
2348 else
2349 {
2350 /* Use temporary indexing register. */
2351 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2352 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2353 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP,
2354 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2355 }
2356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2357 return off;
2358
2359#else
2360# error "Port me!"
2361#endif
2362}
2363
2364
2365/**
2366 * Emits a 256-bit vector register store with an BP relative destination address.
2367 *
2368 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2369 */
2370DECL_INLINE_THROW(uint32_t)
2371iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2372{
2373#ifdef RT_ARCH_AMD64
2374 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2375
2376 /* vmovdqu mem256, reg256 */
2377 pbCodeBuf[off++] = X86_OP_VEX2;
2378 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2379 pbCodeBuf[off++] = 0x7f;
2380 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2381#elif defined(RT_ARCH_ARM64)
2382 Assert(!(iVecRegSrc & 0x1));
2383 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp, iVecRegSrc);
2384 return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1);
2385#else
2386# error "Port me!"
2387#endif
2388}
2389#endif
2390
2391#if defined(RT_ARCH_ARM64)
2392
2393/**
2394 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2395 *
2396 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2397 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2398 * caller does not heed this.
2399 *
2400 * @note DON'T try this with prefetch.
2401 */
2402DECL_FORCE_INLINE_THROW(uint32_t)
2403iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2404 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2405{
2406 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2407 {
2408 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2409 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2410 }
2411 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2412 && iGprReg != iGprBase)
2413 || iGprTmp != UINT8_MAX)
2414 {
2415 /* The offset is too large, so we must load it into a register and use
2416 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2417 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2418 if (iGprTmp == UINT8_MAX)
2419 iGprTmp = iGprReg;
2420 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2421 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2422 }
2423 else
2424# ifdef IEM_WITH_THROW_CATCH
2425 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2426# else
2427 AssertReleaseFailedStmt(off = UINT32_MAX);
2428# endif
2429 return off;
2430}
2431
2432/**
2433 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2434 */
2435DECL_FORCE_INLINE_THROW(uint32_t)
2436iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2437 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2438{
2439 /*
2440 * There are a couple of ldr variants that takes an immediate offset, so
2441 * try use those if we can, otherwise we have to use the temporary register
2442 * help with the addressing.
2443 */
2444 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2445 {
2446 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2447 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2448 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2449 }
2450 else
2451 {
2452 /* The offset is too large, so we must load it into a register and use
2453 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2454 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2455 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2456
2457 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2458 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2459
2460 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2461 }
2462 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2463 return off;
2464}
2465
2466# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2467/**
2468 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2469 *
2470 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2471 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2472 * caller does not heed this.
2473 *
2474 * @note DON'T try this with prefetch.
2475 */
2476DECL_FORCE_INLINE_THROW(uint32_t)
2477iemNativeEmitVecRegByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iGprBase, int32_t offDisp,
2478 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2479{
2480 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2481 {
2482 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2483 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2484 }
2485 else if ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2486 || iGprTmp != UINT8_MAX)
2487 {
2488 /* The offset is too large, so we must load it into a register and use
2489 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2490 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2491 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2492 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, iGprTmp);
2493 }
2494 else
2495# ifdef IEM_WITH_THROW_CATCH
2496 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2497# else
2498 AssertReleaseFailedStmt(off = UINT32_MAX);
2499# endif
2500 return off;
2501}
2502# endif
2503
2504#endif /* RT_ARCH_ARM64 */
2505
2506/**
2507 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2508 *
2509 * @note ARM64: Misaligned @a offDisp values and values not in the
2510 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2511 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2512 * does not heed this.
2513 */
2514DECL_FORCE_INLINE_THROW(uint32_t)
2515iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2516 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2517{
2518#ifdef RT_ARCH_AMD64
2519 /* mov reg64, mem64 */
2520 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2521 pCodeBuf[off++] = 0x8b;
2522 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2523 RT_NOREF(iGprTmp);
2524
2525#elif defined(RT_ARCH_ARM64)
2526 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2527 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2528
2529#else
2530# error "port me"
2531#endif
2532 return off;
2533}
2534
2535
2536/**
2537 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2538 */
2539DECL_INLINE_THROW(uint32_t)
2540iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2541{
2542#ifdef RT_ARCH_AMD64
2543 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2544 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2545
2546#elif defined(RT_ARCH_ARM64)
2547 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2548
2549#else
2550# error "port me"
2551#endif
2552 return off;
2553}
2554
2555
2556/**
2557 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2558 *
2559 * @note ARM64: Misaligned @a offDisp values and values not in the
2560 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2561 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2562 * caller does not heed this.
2563 *
2564 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2565 */
2566DECL_FORCE_INLINE_THROW(uint32_t)
2567iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2568 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2569{
2570#ifdef RT_ARCH_AMD64
2571 /* mov reg32, mem32 */
2572 if (iGprDst >= 8 || iGprBase >= 8)
2573 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2574 pCodeBuf[off++] = 0x8b;
2575 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2576 RT_NOREF(iGprTmp);
2577
2578#elif defined(RT_ARCH_ARM64)
2579 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2580 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2581
2582#else
2583# error "port me"
2584#endif
2585 return off;
2586}
2587
2588
2589/**
2590 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2591 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2592 */
2593DECL_INLINE_THROW(uint32_t)
2594iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2595{
2596#ifdef RT_ARCH_AMD64
2597 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2598 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2599
2600#elif defined(RT_ARCH_ARM64)
2601 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2602
2603#else
2604# error "port me"
2605#endif
2606 return off;
2607}
2608
2609
2610/**
2611 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2612 * sign-extending the value to 64 bits.
2613 *
2614 * @note ARM64: Misaligned @a offDisp values and values not in the
2615 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2616 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2617 * caller does not heed this.
2618 */
2619DECL_FORCE_INLINE_THROW(uint32_t)
2620iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2621 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2622{
2623#ifdef RT_ARCH_AMD64
2624 /* movsxd reg64, mem32 */
2625 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2626 pCodeBuf[off++] = 0x63;
2627 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2628 RT_NOREF(iGprTmp);
2629
2630#elif defined(RT_ARCH_ARM64)
2631 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2632 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2633
2634#else
2635# error "port me"
2636#endif
2637 return off;
2638}
2639
2640
2641/**
2642 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2643 *
2644 * @note ARM64: Misaligned @a offDisp values and values not in the
2645 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2646 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2647 * caller does not heed this.
2648 *
2649 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2650 */
2651DECL_FORCE_INLINE_THROW(uint32_t)
2652iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2653 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2654{
2655#ifdef RT_ARCH_AMD64
2656 /* movzx reg32, mem16 */
2657 if (iGprDst >= 8 || iGprBase >= 8)
2658 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2659 pCodeBuf[off++] = 0x0f;
2660 pCodeBuf[off++] = 0xb7;
2661 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2662 RT_NOREF(iGprTmp);
2663
2664#elif defined(RT_ARCH_ARM64)
2665 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2666 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2667
2668#else
2669# error "port me"
2670#endif
2671 return off;
2672}
2673
2674
2675/**
2676 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2677 * sign-extending the value to 64 bits.
2678 *
2679 * @note ARM64: Misaligned @a offDisp values and values not in the
2680 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2681 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2682 * caller does not heed this.
2683 */
2684DECL_FORCE_INLINE_THROW(uint32_t)
2685iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2686 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2687{
2688#ifdef RT_ARCH_AMD64
2689 /* movsx reg64, mem16 */
2690 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2691 pCodeBuf[off++] = 0x0f;
2692 pCodeBuf[off++] = 0xbf;
2693 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2694 RT_NOREF(iGprTmp);
2695
2696#elif defined(RT_ARCH_ARM64)
2697 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2698 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2699
2700#else
2701# error "port me"
2702#endif
2703 return off;
2704}
2705
2706
2707/**
2708 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2709 * sign-extending the value to 32 bits.
2710 *
2711 * @note ARM64: Misaligned @a offDisp values and values not in the
2712 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2713 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2714 * caller does not heed this.
2715 *
2716 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2717 */
2718DECL_FORCE_INLINE_THROW(uint32_t)
2719iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2720 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2721{
2722#ifdef RT_ARCH_AMD64
2723 /* movsx reg32, mem16 */
2724 if (iGprDst >= 8 || iGprBase >= 8)
2725 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2726 pCodeBuf[off++] = 0x0f;
2727 pCodeBuf[off++] = 0xbf;
2728 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2729 RT_NOREF(iGprTmp);
2730
2731#elif defined(RT_ARCH_ARM64)
2732 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2733 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
2734
2735#else
2736# error "port me"
2737#endif
2738 return off;
2739}
2740
2741
2742/**
2743 * Emits a 8-bit GPR load via a GPR base address with a displacement.
2744 *
2745 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2746 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2747 * same. Will assert / throw if caller does not heed this.
2748 *
2749 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
2750 */
2751DECL_FORCE_INLINE_THROW(uint32_t)
2752iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2753 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2754{
2755#ifdef RT_ARCH_AMD64
2756 /* movzx reg32, mem8 */
2757 if (iGprDst >= 8 || iGprBase >= 8)
2758 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2759 pCodeBuf[off++] = 0x0f;
2760 pCodeBuf[off++] = 0xb6;
2761 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2762 RT_NOREF(iGprTmp);
2763
2764#elif defined(RT_ARCH_ARM64)
2765 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2766 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
2767
2768#else
2769# error "port me"
2770#endif
2771 return off;
2772}
2773
2774
2775/**
2776 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2777 * sign-extending the value to 64 bits.
2778 *
2779 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2780 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2781 * same. Will assert / throw if caller does not heed this.
2782 */
2783DECL_FORCE_INLINE_THROW(uint32_t)
2784iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2785 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2786{
2787#ifdef RT_ARCH_AMD64
2788 /* movsx reg64, mem8 */
2789 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2790 pCodeBuf[off++] = 0x0f;
2791 pCodeBuf[off++] = 0xbe;
2792 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2793 RT_NOREF(iGprTmp);
2794
2795#elif defined(RT_ARCH_ARM64)
2796 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2797 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
2798
2799#else
2800# error "port me"
2801#endif
2802 return off;
2803}
2804
2805
2806/**
2807 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2808 * sign-extending the value to 32 bits.
2809 *
2810 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2811 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2812 * same. Will assert / throw if caller does not heed this.
2813 *
2814 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2815 */
2816DECL_FORCE_INLINE_THROW(uint32_t)
2817iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2818 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2819{
2820#ifdef RT_ARCH_AMD64
2821 /* movsx reg32, mem8 */
2822 if (iGprDst >= 8 || iGprBase >= 8)
2823 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2824 pCodeBuf[off++] = 0x0f;
2825 pCodeBuf[off++] = 0xbe;
2826 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2827 RT_NOREF(iGprTmp);
2828
2829#elif defined(RT_ARCH_ARM64)
2830 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2831 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2832
2833#else
2834# error "port me"
2835#endif
2836 return off;
2837}
2838
2839
2840/**
2841 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2842 * sign-extending the value to 16 bits.
2843 *
2844 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2845 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2846 * same. Will assert / throw if caller does not heed this.
2847 *
2848 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2849 */
2850DECL_FORCE_INLINE_THROW(uint32_t)
2851iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2852 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2853{
2854#ifdef RT_ARCH_AMD64
2855 /* movsx reg32, mem8 */
2856 if (iGprDst >= 8 || iGprBase >= 8)
2857 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2858 pCodeBuf[off++] = 0x0f;
2859 pCodeBuf[off++] = 0xbe;
2860 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2861# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
2862 /* and reg32, 0xffffh */
2863 if (iGprDst >= 8)
2864 pCodeBuf[off++] = X86_OP_REX_B;
2865 pCodeBuf[off++] = 0x81;
2866 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
2867 pCodeBuf[off++] = 0xff;
2868 pCodeBuf[off++] = 0xff;
2869 pCodeBuf[off++] = 0;
2870 pCodeBuf[off++] = 0;
2871# else
2872 /* movzx reg32, reg16 */
2873 if (iGprDst >= 8)
2874 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
2875 pCodeBuf[off++] = 0x0f;
2876 pCodeBuf[off++] = 0xb7;
2877 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2878# endif
2879 RT_NOREF(iGprTmp);
2880
2881#elif defined(RT_ARCH_ARM64)
2882 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2883 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2884 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2885 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
2886
2887#else
2888# error "port me"
2889#endif
2890 return off;
2891}
2892
2893
2894#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2895/**
2896 * Emits a 128-bit vector register load via a GPR base address with a displacement.
2897 *
2898 * @note ARM64: Misaligned @a offDisp values and values not in the
2899 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2900 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2901 * does not heed this.
2902 */
2903DECL_FORCE_INLINE_THROW(uint32_t)
2904iemNativeEmitLoadVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
2905 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2906{
2907#ifdef RT_ARCH_AMD64
2908 /* movdqu reg128, mem128 */
2909 pCodeBuf[off++] = 0xf3;
2910 if (iVecRegDst >= 8 || iGprBase >= 8)
2911 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2912 pCodeBuf[off++] = 0x0f;
2913 pCodeBuf[off++] = 0x6f;
2914 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
2915 RT_NOREF(iGprTmp);
2916
2917#elif defined(RT_ARCH_ARM64)
2918 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
2919 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
2920
2921#else
2922# error "port me"
2923#endif
2924 return off;
2925}
2926
2927
2928/**
2929 * Emits a 128-bit GPR load via a GPR base address with a displacement.
2930 */
2931DECL_INLINE_THROW(uint32_t)
2932iemNativeEmitLoadVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
2933{
2934#ifdef RT_ARCH_AMD64
2935 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
2936 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2937
2938#elif defined(RT_ARCH_ARM64)
2939 off = iemNativeEmitGprByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2940
2941#else
2942# error "port me"
2943#endif
2944 return off;
2945}
2946
2947
2948/**
2949 * Emits a 256-bit vector register load via a GPR base address with a displacement.
2950 *
2951 * @note ARM64: Misaligned @a offDisp values and values not in the
2952 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2953 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2954 * does not heed this.
2955 */
2956DECL_FORCE_INLINE_THROW(uint32_t)
2957iemNativeEmitLoadVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
2958 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2959{
2960#ifdef RT_ARCH_AMD64
2961 /* vmovdqu reg256, mem256 */
2962 pCodeBuf[off++] = X86_OP_VEX3;
2963 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
2964 | X86_OP_VEX3_BYTE1_X
2965 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
2966 | UINT8_C(0x01);
2967 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2968 pCodeBuf[off++] = 0x6f;
2969 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
2970 RT_NOREF(iGprTmp);
2971
2972#elif defined(RT_ARCH_ARM64)
2973 Assert(!(iVecRegDst & 0x1));
2974 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
2975 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
2976 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
2977 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
2978#else
2979# error "port me"
2980#endif
2981 return off;
2982}
2983
2984
2985/**
2986 * Emits a 256-bit GPR load via a GPR base address with a displacement.
2987 */
2988DECL_INLINE_THROW(uint32_t)
2989iemNativeEmitLoadVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
2990{
2991#ifdef RT_ARCH_AMD64
2992 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
2993 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2994
2995#elif defined(RT_ARCH_ARM64)
2996 Assert(!(iVecRegDst & 0x1));
2997 off = iemNativeEmitGprByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
2998 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2999 off = iemNativeEmitGprByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3000 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3001
3002#else
3003# error "port me"
3004#endif
3005 return off;
3006}
3007#endif
3008
3009
3010/**
3011 * Emits a 64-bit GPR store via a GPR base address with a displacement.
3012 *
3013 * @note ARM64: Misaligned @a offDisp values and values not in the
3014 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3015 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3016 * does not heed this.
3017 */
3018DECL_FORCE_INLINE_THROW(uint32_t)
3019iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3020 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3021{
3022#ifdef RT_ARCH_AMD64
3023 /* mov mem64, reg64 */
3024 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3025 pCodeBuf[off++] = 0x89;
3026 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3027 RT_NOREF(iGprTmp);
3028
3029#elif defined(RT_ARCH_ARM64)
3030 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3031 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
3032
3033#else
3034# error "port me"
3035#endif
3036 return off;
3037}
3038
3039
3040/**
3041 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3042 *
3043 * @note ARM64: Misaligned @a offDisp values and values not in the
3044 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3045 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3046 * does not heed this.
3047 */
3048DECL_FORCE_INLINE_THROW(uint32_t)
3049iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3050 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3051{
3052#ifdef RT_ARCH_AMD64
3053 /* mov mem32, reg32 */
3054 if (iGprSrc >= 8 || iGprBase >= 8)
3055 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3056 pCodeBuf[off++] = 0x89;
3057 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3058 RT_NOREF(iGprTmp);
3059
3060#elif defined(RT_ARCH_ARM64)
3061 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3062 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3063
3064#else
3065# error "port me"
3066#endif
3067 return off;
3068}
3069
3070
3071/**
3072 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3073 *
3074 * @note ARM64: Misaligned @a offDisp values and values not in the
3075 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3076 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3077 * does not heed this.
3078 */
3079DECL_FORCE_INLINE_THROW(uint32_t)
3080iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3081 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3082{
3083#ifdef RT_ARCH_AMD64
3084 /* mov mem16, reg16 */
3085 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3086 if (iGprSrc >= 8 || iGprBase >= 8)
3087 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3088 pCodeBuf[off++] = 0x89;
3089 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3090 RT_NOREF(iGprTmp);
3091
3092#elif defined(RT_ARCH_ARM64)
3093 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3094 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3095
3096#else
3097# error "port me"
3098#endif
3099 return off;
3100}
3101
3102
3103/**
3104 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3105 *
3106 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3107 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3108 * same. Will assert / throw if caller does not heed this.
3109 */
3110DECL_FORCE_INLINE_THROW(uint32_t)
3111iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3112 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3113{
3114#ifdef RT_ARCH_AMD64
3115 /* mov mem8, reg8 */
3116 if (iGprSrc >= 8 || iGprBase >= 8)
3117 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3118 else if (iGprSrc >= 4)
3119 pCodeBuf[off++] = X86_OP_REX;
3120 pCodeBuf[off++] = 0x88;
3121 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3122 RT_NOREF(iGprTmp);
3123
3124#elif defined(RT_ARCH_ARM64)
3125 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3126 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3127
3128#else
3129# error "port me"
3130#endif
3131 return off;
3132}
3133
3134
3135/**
3136 * Emits a 64-bit immediate store via a GPR base address with a displacement.
3137 *
3138 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
3139 * AMD64 it depends on the immediate value.
3140 *
3141 * @note ARM64: Misaligned @a offDisp values and values not in the
3142 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3143 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3144 * does not heed this.
3145 */
3146DECL_FORCE_INLINE_THROW(uint32_t)
3147iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
3148 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3149{
3150#ifdef RT_ARCH_AMD64
3151 if ((int32_t)uImm == (int64_t)uImm)
3152 {
3153 /* mov mem64, imm32 (sign-extended) */
3154 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3155 pCodeBuf[off++] = 0xc7;
3156 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3157 pCodeBuf[off++] = RT_BYTE1(uImm);
3158 pCodeBuf[off++] = RT_BYTE2(uImm);
3159 pCodeBuf[off++] = RT_BYTE3(uImm);
3160 pCodeBuf[off++] = RT_BYTE4(uImm);
3161 }
3162 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
3163 {
3164 /* require temporary register. */
3165 if (iGprImmTmp == UINT8_MAX)
3166 iGprImmTmp = iGprTmp;
3167 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3168 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
3169 }
3170 else
3171# ifdef IEM_WITH_THROW_CATCH
3172 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3173# else
3174 AssertReleaseFailedStmt(off = UINT32_MAX);
3175# endif
3176
3177#elif defined(RT_ARCH_ARM64)
3178 if (uImm == 0)
3179 iGprImmTmp = ARMV8_A64_REG_XZR;
3180 else
3181 {
3182 Assert(iGprImmTmp < 31);
3183 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3184 }
3185 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
3186
3187#else
3188# error "port me"
3189#endif
3190 return off;
3191}
3192
3193
3194/**
3195 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3196 *
3197 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3198 *
3199 * @note ARM64: Misaligned @a offDisp values and values not in the
3200 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3201 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3202 * does not heed this.
3203 */
3204DECL_FORCE_INLINE_THROW(uint32_t)
3205iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
3206 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3207{
3208#ifdef RT_ARCH_AMD64
3209 /* mov mem32, imm32 */
3210 if (iGprBase >= 8)
3211 pCodeBuf[off++] = X86_OP_REX_B;
3212 pCodeBuf[off++] = 0xc7;
3213 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3214 pCodeBuf[off++] = RT_BYTE1(uImm);
3215 pCodeBuf[off++] = RT_BYTE2(uImm);
3216 pCodeBuf[off++] = RT_BYTE3(uImm);
3217 pCodeBuf[off++] = RT_BYTE4(uImm);
3218 RT_NOREF(iGprImmTmp, iGprTmp);
3219
3220#elif defined(RT_ARCH_ARM64)
3221 Assert(iGprImmTmp < 31);
3222 if (uImm == 0)
3223 iGprImmTmp = ARMV8_A64_REG_XZR;
3224 else
3225 {
3226 Assert(iGprImmTmp < 31);
3227 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3228 }
3229 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3230 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3231
3232#else
3233# error "port me"
3234#endif
3235 return off;
3236}
3237
3238
3239/**
3240 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3241 *
3242 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3243 *
3244 * @note ARM64: Misaligned @a offDisp values and values not in the
3245 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3246 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3247 * does not heed this.
3248 */
3249DECL_FORCE_INLINE_THROW(uint32_t)
3250iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
3251 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3252{
3253#ifdef RT_ARCH_AMD64
3254 /* mov mem16, imm16 */
3255 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3256 if (iGprBase >= 8)
3257 pCodeBuf[off++] = X86_OP_REX_B;
3258 pCodeBuf[off++] = 0xc7;
3259 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3260 pCodeBuf[off++] = RT_BYTE1(uImm);
3261 pCodeBuf[off++] = RT_BYTE2(uImm);
3262 RT_NOREF(iGprImmTmp, iGprTmp);
3263
3264#elif defined(RT_ARCH_ARM64)
3265 if (uImm == 0)
3266 iGprImmTmp = ARMV8_A64_REG_XZR;
3267 else
3268 {
3269 Assert(iGprImmTmp < 31);
3270 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3271 }
3272 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3273 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3274
3275#else
3276# error "port me"
3277#endif
3278 return off;
3279}
3280
3281
3282/**
3283 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3284 *
3285 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3286 *
3287 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3288 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3289 * same. Will assert / throw if caller does not heed this.
3290 */
3291DECL_FORCE_INLINE_THROW(uint32_t)
3292iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3293 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3294{
3295#ifdef RT_ARCH_AMD64
3296 /* mov mem8, imm8 */
3297 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3298 if (iGprBase >= 8)
3299 pCodeBuf[off++] = X86_OP_REX_B;
3300 pCodeBuf[off++] = 0xc6;
3301 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3302 pCodeBuf[off++] = uImm;
3303 RT_NOREF(iGprImmTmp, iGprTmp);
3304
3305#elif defined(RT_ARCH_ARM64)
3306 if (uImm == 0)
3307 iGprImmTmp = ARMV8_A64_REG_XZR;
3308 else
3309 {
3310 Assert(iGprImmTmp < 31);
3311 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3312 }
3313 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3314 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3315
3316#else
3317# error "port me"
3318#endif
3319 return off;
3320}
3321
3322
3323
3324/*********************************************************************************************************************************
3325* Subtraction and Additions *
3326*********************************************************************************************************************************/
3327
3328/**
3329 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3330 * @note The AMD64 version sets flags.
3331 */
3332DECL_INLINE_THROW(uint32_t)
3333iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3334{
3335#if defined(RT_ARCH_AMD64)
3336 /* sub Gv,Ev */
3337 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3338 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3339 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3340 pbCodeBuf[off++] = 0x2b;
3341 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3342
3343#elif defined(RT_ARCH_ARM64)
3344 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3345 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3346
3347#else
3348# error "Port me"
3349#endif
3350 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3351 return off;
3352}
3353
3354
3355/**
3356 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3357 * @note The AMD64 version sets flags.
3358 */
3359DECL_FORCE_INLINE(uint32_t)
3360iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3361{
3362#if defined(RT_ARCH_AMD64)
3363 /* sub Gv,Ev */
3364 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3365 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3366 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3367 pCodeBuf[off++] = 0x2b;
3368 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3369
3370#elif defined(RT_ARCH_ARM64)
3371 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3372
3373#else
3374# error "Port me"
3375#endif
3376 return off;
3377}
3378
3379
3380/**
3381 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3382 * @note The AMD64 version sets flags.
3383 */
3384DECL_INLINE_THROW(uint32_t)
3385iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3386{
3387#if defined(RT_ARCH_AMD64)
3388 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3389#elif defined(RT_ARCH_ARM64)
3390 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3391#else
3392# error "Port me"
3393#endif
3394 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3395 return off;
3396}
3397
3398
3399/**
3400 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3401 *
3402 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3403 *
3404 * @note Larger constants will require a temporary register. Failing to specify
3405 * one when needed will trigger fatal assertion / throw.
3406 */
3407DECL_FORCE_INLINE_THROW(uint32_t)
3408iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3409 uint8_t iGprTmp = UINT8_MAX)
3410{
3411#ifdef RT_ARCH_AMD64
3412 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3413 if (iSubtrahend == 1)
3414 {
3415 /* dec r/m64 */
3416 pCodeBuf[off++] = 0xff;
3417 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3418 }
3419 else if (iSubtrahend == -1)
3420 {
3421 /* inc r/m64 */
3422 pCodeBuf[off++] = 0xff;
3423 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3424 }
3425 else if ((int8_t)iSubtrahend == iSubtrahend)
3426 {
3427 /* sub r/m64, imm8 */
3428 pCodeBuf[off++] = 0x83;
3429 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3430 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3431 }
3432 else if ((int32_t)iSubtrahend == iSubtrahend)
3433 {
3434 /* sub r/m64, imm32 */
3435 pCodeBuf[off++] = 0x81;
3436 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3437 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3438 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3439 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3440 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3441 }
3442 else if (iGprTmp != UINT8_MAX)
3443 {
3444 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3445 /* sub r/m64, r64 */
3446 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3447 pCodeBuf[off++] = 0x29;
3448 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3449 }
3450 else
3451# ifdef IEM_WITH_THROW_CATCH
3452 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3453# else
3454 AssertReleaseFailedStmt(off = UINT32_MAX);
3455# endif
3456
3457#elif defined(RT_ARCH_ARM64)
3458 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3459 if (uAbsSubtrahend < 4096)
3460 {
3461 if (iSubtrahend >= 0)
3462 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3463 else
3464 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3465 }
3466 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3467 {
3468 if (iSubtrahend >= 0)
3469 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3470 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3471 else
3472 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3473 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3474 }
3475 else if (iGprTmp != UINT8_MAX)
3476 {
3477 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3478 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3479 }
3480 else
3481# ifdef IEM_WITH_THROW_CATCH
3482 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3483# else
3484 AssertReleaseFailedStmt(off = UINT32_MAX);
3485# endif
3486
3487#else
3488# error "Port me"
3489#endif
3490 return off;
3491}
3492
3493
3494/**
3495 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3496 *
3497 * @note Larger constants will require a temporary register. Failing to specify
3498 * one when needed will trigger fatal assertion / throw.
3499 */
3500DECL_INLINE_THROW(uint32_t)
3501iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3502 uint8_t iGprTmp = UINT8_MAX)
3503
3504{
3505#ifdef RT_ARCH_AMD64
3506 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3507#elif defined(RT_ARCH_ARM64)
3508 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3509#else
3510# error "Port me"
3511#endif
3512 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3513 return off;
3514}
3515
3516
3517/**
3518 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3519 *
3520 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3521 *
3522 * @note ARM64: Larger constants will require a temporary register. Failing to
3523 * specify one when needed will trigger fatal assertion / throw.
3524 */
3525DECL_FORCE_INLINE_THROW(uint32_t)
3526iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3527 uint8_t iGprTmp = UINT8_MAX)
3528{
3529#ifdef RT_ARCH_AMD64
3530 if (iGprDst >= 8)
3531 pCodeBuf[off++] = X86_OP_REX_B;
3532 if (iSubtrahend == 1)
3533 {
3534 /* dec r/m32 */
3535 pCodeBuf[off++] = 0xff;
3536 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3537 }
3538 else if (iSubtrahend == -1)
3539 {
3540 /* inc r/m32 */
3541 pCodeBuf[off++] = 0xff;
3542 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3543 }
3544 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3545 {
3546 /* sub r/m32, imm8 */
3547 pCodeBuf[off++] = 0x83;
3548 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3549 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3550 }
3551 else
3552 {
3553 /* sub r/m32, imm32 */
3554 pCodeBuf[off++] = 0x81;
3555 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3556 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3557 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3558 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3559 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3560 }
3561 RT_NOREF(iGprTmp);
3562
3563#elif defined(RT_ARCH_ARM64)
3564 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3565 if (uAbsSubtrahend < 4096)
3566 {
3567 if (iSubtrahend >= 0)
3568 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3569 else
3570 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3571 }
3572 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3573 {
3574 if (iSubtrahend >= 0)
3575 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3576 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3577 else
3578 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3579 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3580 }
3581 else if (iGprTmp != UINT8_MAX)
3582 {
3583 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3584 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3585 }
3586 else
3587# ifdef IEM_WITH_THROW_CATCH
3588 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3589# else
3590 AssertReleaseFailedStmt(off = UINT32_MAX);
3591# endif
3592
3593#else
3594# error "Port me"
3595#endif
3596 return off;
3597}
3598
3599
3600/**
3601 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3602 *
3603 * @note ARM64: Larger constants will require a temporary register. Failing to
3604 * specify one when needed will trigger fatal assertion / throw.
3605 */
3606DECL_INLINE_THROW(uint32_t)
3607iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3608 uint8_t iGprTmp = UINT8_MAX)
3609
3610{
3611#ifdef RT_ARCH_AMD64
3612 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
3613#elif defined(RT_ARCH_ARM64)
3614 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
3615#else
3616# error "Port me"
3617#endif
3618 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3619 return off;
3620}
3621
3622
3623/**
3624 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
3625 *
3626 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
3627 * so not suitable as a base for conditional jumps.
3628 *
3629 * @note AMD64: Will only update the lower 16 bits of the register.
3630 * @note ARM64: Will update the entire register.
3631 * @note ARM64: Larger constants will require a temporary register. Failing to
3632 * specify one when needed will trigger fatal assertion / throw.
3633 */
3634DECL_FORCE_INLINE_THROW(uint32_t)
3635iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
3636 uint8_t iGprTmp = UINT8_MAX)
3637{
3638#ifdef RT_ARCH_AMD64
3639 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3640 if (iGprDst >= 8)
3641 pCodeBuf[off++] = X86_OP_REX_B;
3642 if (iSubtrahend == 1)
3643 {
3644 /* dec r/m16 */
3645 pCodeBuf[off++] = 0xff;
3646 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3647 }
3648 else if (iSubtrahend == -1)
3649 {
3650 /* inc r/m16 */
3651 pCodeBuf[off++] = 0xff;
3652 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3653 }
3654 else if ((int8_t)iSubtrahend == iSubtrahend)
3655 {
3656 /* sub r/m16, imm8 */
3657 pCodeBuf[off++] = 0x83;
3658 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3659 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3660 }
3661 else
3662 {
3663 /* sub r/m16, imm16 */
3664 pCodeBuf[off++] = 0x81;
3665 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3666 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
3667 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
3668 }
3669 RT_NOREF(iGprTmp);
3670
3671#elif defined(RT_ARCH_ARM64)
3672 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3673 if (uAbsSubtrahend < 4096)
3674 {
3675 if (iSubtrahend >= 0)
3676 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3677 else
3678 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3679 }
3680 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3681 {
3682 if (iSubtrahend >= 0)
3683 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3684 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3685 else
3686 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3687 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3688 }
3689 else if (iGprTmp != UINT8_MAX)
3690 {
3691 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3692 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3693 }
3694 else
3695# ifdef IEM_WITH_THROW_CATCH
3696 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3697# else
3698 AssertReleaseFailedStmt(off = UINT32_MAX);
3699# endif
3700 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3701
3702#else
3703# error "Port me"
3704#endif
3705 return off;
3706}
3707
3708
3709/**
3710 * Emits adding a 64-bit GPR to another, storing the result in the first.
3711 * @note The AMD64 version sets flags.
3712 */
3713DECL_FORCE_INLINE(uint32_t)
3714iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3715{
3716#if defined(RT_ARCH_AMD64)
3717 /* add Gv,Ev */
3718 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3719 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
3720 pCodeBuf[off++] = 0x03;
3721 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3722
3723#elif defined(RT_ARCH_ARM64)
3724 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
3725
3726#else
3727# error "Port me"
3728#endif
3729 return off;
3730}
3731
3732
3733/**
3734 * Emits adding a 64-bit GPR to another, storing the result in the first.
3735 * @note The AMD64 version sets flags.
3736 */
3737DECL_INLINE_THROW(uint32_t)
3738iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3739{
3740#if defined(RT_ARCH_AMD64)
3741 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3742#elif defined(RT_ARCH_ARM64)
3743 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3744#else
3745# error "Port me"
3746#endif
3747 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3748 return off;
3749}
3750
3751
3752/**
3753 * Emits adding a 64-bit GPR to another, storing the result in the first.
3754 * @note The AMD64 version sets flags.
3755 */
3756DECL_FORCE_INLINE(uint32_t)
3757iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3758{
3759#if defined(RT_ARCH_AMD64)
3760 /* add Gv,Ev */
3761 if (iGprDst >= 8 || iGprAddend >= 8)
3762 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
3763 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
3764 pCodeBuf[off++] = 0x03;
3765 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3766
3767#elif defined(RT_ARCH_ARM64)
3768 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
3769
3770#else
3771# error "Port me"
3772#endif
3773 return off;
3774}
3775
3776
3777/**
3778 * Emits adding a 64-bit GPR to another, storing the result in the first.
3779 * @note The AMD64 version sets flags.
3780 */
3781DECL_INLINE_THROW(uint32_t)
3782iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3783{
3784#if defined(RT_ARCH_AMD64)
3785 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3786#elif defined(RT_ARCH_ARM64)
3787 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3788#else
3789# error "Port me"
3790#endif
3791 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3792 return off;
3793}
3794
3795
3796/**
3797 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3798 */
3799DECL_INLINE_THROW(uint32_t)
3800iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3801{
3802#if defined(RT_ARCH_AMD64)
3803 /* add or inc */
3804 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3805 if (iImm8 != 1)
3806 {
3807 pCodeBuf[off++] = 0x83;
3808 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3809 pCodeBuf[off++] = (uint8_t)iImm8;
3810 }
3811 else
3812 {
3813 pCodeBuf[off++] = 0xff;
3814 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3815 }
3816
3817#elif defined(RT_ARCH_ARM64)
3818 if (iImm8 >= 0)
3819 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
3820 else
3821 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
3822
3823#else
3824# error "Port me"
3825#endif
3826 return off;
3827}
3828
3829
3830/**
3831 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3832 */
3833DECL_INLINE_THROW(uint32_t)
3834iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3835{
3836#if defined(RT_ARCH_AMD64)
3837 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
3838#elif defined(RT_ARCH_ARM64)
3839 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
3840#else
3841# error "Port me"
3842#endif
3843 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3844 return off;
3845}
3846
3847
3848/**
3849 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
3850 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3851 */
3852DECL_FORCE_INLINE(uint32_t)
3853iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3854{
3855#if defined(RT_ARCH_AMD64)
3856 /* add or inc */
3857 if (iGprDst >= 8)
3858 pCodeBuf[off++] = X86_OP_REX_B;
3859 if (iImm8 != 1)
3860 {
3861 pCodeBuf[off++] = 0x83;
3862 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3863 pCodeBuf[off++] = (uint8_t)iImm8;
3864 }
3865 else
3866 {
3867 pCodeBuf[off++] = 0xff;
3868 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3869 }
3870
3871#elif defined(RT_ARCH_ARM64)
3872 if (iImm8 >= 0)
3873 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
3874 else
3875 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
3876
3877#else
3878# error "Port me"
3879#endif
3880 return off;
3881}
3882
3883
3884/**
3885 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
3886 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3887 */
3888DECL_INLINE_THROW(uint32_t)
3889iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3890{
3891#if defined(RT_ARCH_AMD64)
3892 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
3893#elif defined(RT_ARCH_ARM64)
3894 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
3895#else
3896# error "Port me"
3897#endif
3898 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3899 return off;
3900}
3901
3902
3903/**
3904 * Emits a 64-bit GPR additions with a 64-bit signed addend.
3905 *
3906 * @note Will assert / throw if @a iGprTmp is not specified when needed.
3907 */
3908DECL_FORCE_INLINE_THROW(uint32_t)
3909iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
3910{
3911#if defined(RT_ARCH_AMD64)
3912 if ((int8_t)iAddend == iAddend)
3913 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
3914
3915 if ((int32_t)iAddend == iAddend)
3916 {
3917 /* add grp, imm32 */
3918 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3919 pCodeBuf[off++] = 0x81;
3920 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3921 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
3922 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
3923 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
3924 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
3925 }
3926 else if (iGprTmp != UINT8_MAX)
3927 {
3928 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
3929
3930 /* add dst, tmpreg */
3931 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3932 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
3933 pCodeBuf[off++] = 0x03;
3934 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
3935 }
3936 else
3937# ifdef IEM_WITH_THROW_CATCH
3938 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3939# else
3940 AssertReleaseFailedStmt(off = UINT32_MAX);
3941# endif
3942
3943#elif defined(RT_ARCH_ARM64)
3944 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
3945 if (uAbsAddend < 4096)
3946 {
3947 if (iAddend >= 0)
3948 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
3949 else
3950 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
3951 }
3952 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
3953 {
3954 if (iAddend >= 0)
3955 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
3956 true /*f64Bit*/, true /*fShift12*/);
3957 else
3958 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
3959 true /*f64Bit*/, true /*fShift12*/);
3960 }
3961 else if (iGprTmp != UINT8_MAX)
3962 {
3963 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
3964 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
3965 }
3966 else
3967# ifdef IEM_WITH_THROW_CATCH
3968 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3969# else
3970 AssertReleaseFailedStmt(off = UINT32_MAX);
3971# endif
3972
3973#else
3974# error "Port me"
3975#endif
3976 return off;
3977}
3978
3979
3980/**
3981 * Emits a 64-bit GPR additions with a 64-bit signed addend.
3982 */
3983DECL_INLINE_THROW(uint32_t)
3984iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
3985{
3986#if defined(RT_ARCH_AMD64)
3987 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
3988 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
3989
3990 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
3991 {
3992 /* add grp, imm32 */
3993 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3994 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3995 pbCodeBuf[off++] = 0x81;
3996 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3997 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
3998 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
3999 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4000 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4001 }
4002 else
4003 {
4004 /* Best to use a temporary register to deal with this in the simplest way: */
4005 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4006
4007 /* add dst, tmpreg */
4008 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4009 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4010 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
4011 pbCodeBuf[off++] = 0x03;
4012 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
4013
4014 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4015 }
4016
4017#elif defined(RT_ARCH_ARM64)
4018 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
4019 {
4020 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4021 if (iAddend >= 0)
4022 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend);
4023 else
4024 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend);
4025 }
4026 else
4027 {
4028 /* Use temporary register for the immediate. */
4029 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4030
4031 /* add gprdst, gprdst, tmpreg */
4032 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4033 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg);
4034
4035 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4036 }
4037
4038#else
4039# error "Port me"
4040#endif
4041 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4042 return off;
4043}
4044
4045
4046/**
4047 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4048 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4049 * @note For ARM64 the iAddend value must be in the range 0x000..0xfff,
4050 * or that range shifted 12 bits to the left (e.g. 0x1000..0xfff000 with
4051 * the lower 12 bits always zero). The negative ranges are also allowed,
4052 * making it behave like a subtraction. If the constant does not conform,
4053 * bad stuff will happen.
4054 */
4055DECL_FORCE_INLINE_THROW(uint32_t)
4056iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4057{
4058#if defined(RT_ARCH_AMD64)
4059 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4060 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4061
4062 /* add grp, imm32 */
4063 if (iGprDst >= 8)
4064 pCodeBuf[off++] = X86_OP_REX_B;
4065 pCodeBuf[off++] = 0x81;
4066 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4067 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4068 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4069 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4070 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4071
4072#elif defined(RT_ARCH_ARM64)
4073 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4074 if (uAbsAddend <= 0xfff)
4075 {
4076 if (iAddend >= 0)
4077 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4078 else
4079 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4080 }
4081 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4082 {
4083 if (iAddend >= 0)
4084 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
4085 false /*f64Bit*/, true /*fShift12*/);
4086 else
4087 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
4088 false /*f64Bit*/, true /*fShift12*/);
4089 }
4090 else
4091# ifdef IEM_WITH_THROW_CATCH
4092 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4093# else
4094 AssertReleaseFailedStmt(off = UINT32_MAX);
4095# endif
4096
4097#else
4098# error "Port me"
4099#endif
4100 return off;
4101}
4102
4103
4104/**
4105 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4106 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4107 */
4108DECL_INLINE_THROW(uint32_t)
4109iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4110{
4111#if defined(RT_ARCH_AMD64)
4112 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
4113
4114#elif defined(RT_ARCH_ARM64)
4115 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
4116 {
4117 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4118 if (iAddend >= 0)
4119 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend, false /*f64Bit*/);
4120 else
4121 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend, false /*f64Bit*/);
4122 }
4123 else
4124 {
4125 /* Use temporary register for the immediate. */
4126 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint32_t)iAddend);
4127
4128 /* add gprdst, gprdst, tmpreg */
4129 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4130 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
4131
4132 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4133 }
4134
4135#else
4136# error "Port me"
4137#endif
4138 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4139 return off;
4140}
4141
4142
4143/**
4144 * Emits a 16-bit GPR add with a signed immediate addend.
4145 *
4146 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
4147 * so not suitable as a base for conditional jumps.
4148 *
4149 * @note AMD64: Will only update the lower 16 bits of the register.
4150 * @note ARM64: Will update the entire register.
4151 * @note ARM64: Larger constants will require a temporary register. Failing to
4152 * specify one when needed will trigger fatal assertion / throw.
4153 * @sa iemNativeEmitSubGpr16ImmEx
4154 */
4155DECL_FORCE_INLINE_THROW(uint32_t)
4156iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend,
4157 uint8_t iGprTmp = UINT8_MAX)
4158{
4159#ifdef RT_ARCH_AMD64
4160 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4161 if (iGprDst >= 8)
4162 pCodeBuf[off++] = X86_OP_REX_B;
4163 if (iAddend == 1)
4164 {
4165 /* inc r/m16 */
4166 pCodeBuf[off++] = 0xff;
4167 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4168 }
4169 else if (iAddend == -1)
4170 {
4171 /* dec r/m16 */
4172 pCodeBuf[off++] = 0xff;
4173 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4174 }
4175 else if ((int8_t)iAddend == iAddend)
4176 {
4177 /* add r/m16, imm8 */
4178 pCodeBuf[off++] = 0x83;
4179 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4180 pCodeBuf[off++] = (uint8_t)iAddend;
4181 }
4182 else
4183 {
4184 /* add r/m16, imm16 */
4185 pCodeBuf[off++] = 0x81;
4186 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4187 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
4188 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
4189 }
4190 RT_NOREF(iGprTmp);
4191
4192#elif defined(RT_ARCH_ARM64)
4193 uint32_t uAbsAddend = RT_ABS(iAddend);
4194 if (uAbsAddend < 4096)
4195 {
4196 if (iAddend >= 0)
4197 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4198 else
4199 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4200 }
4201 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4202 {
4203 if (iAddend >= 0)
4204 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
4205 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4206 else
4207 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
4208 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4209 }
4210 else if (iGprTmp != UINT8_MAX)
4211 {
4212 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iAddend);
4213 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4214 }
4215 else
4216# ifdef IEM_WITH_THROW_CATCH
4217 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4218# else
4219 AssertReleaseFailedStmt(off = UINT32_MAX);
4220# endif
4221 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4222
4223#else
4224# error "Port me"
4225#endif
4226 return off;
4227}
4228
4229
4230
4231/**
4232 * Adds two 64-bit GPRs together, storing the result in a third register.
4233 */
4234DECL_FORCE_INLINE(uint32_t)
4235iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4236{
4237#ifdef RT_ARCH_AMD64
4238 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4239 {
4240 /** @todo consider LEA */
4241 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
4242 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
4243 }
4244 else
4245 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4246
4247#elif defined(RT_ARCH_ARM64)
4248 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
4249
4250#else
4251# error "Port me!"
4252#endif
4253 return off;
4254}
4255
4256
4257
4258/**
4259 * Adds two 32-bit GPRs together, storing the result in a third register.
4260 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4261 */
4262DECL_FORCE_INLINE(uint32_t)
4263iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4264{
4265#ifdef RT_ARCH_AMD64
4266 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4267 {
4268 /** @todo consider LEA */
4269 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
4270 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
4271 }
4272 else
4273 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4274
4275#elif defined(RT_ARCH_ARM64)
4276 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
4277
4278#else
4279# error "Port me!"
4280#endif
4281 return off;
4282}
4283
4284
4285/**
4286 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
4287 * third register.
4288 *
4289 * @note The ARM64 version does not work for non-trivial constants if the
4290 * two registers are the same. Will assert / throw exception.
4291 */
4292DECL_FORCE_INLINE_THROW(uint32_t)
4293iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4294{
4295#ifdef RT_ARCH_AMD64
4296 /** @todo consider LEA */
4297 if ((int8_t)iImmAddend == iImmAddend)
4298 {
4299 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4300 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4301 }
4302 else
4303 {
4304 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4305 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4306 }
4307
4308#elif defined(RT_ARCH_ARM64)
4309 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4310 if (uAbsImmAddend < 4096)
4311 {
4312 if (iImmAddend >= 0)
4313 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4314 else
4315 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4316 }
4317 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4318 {
4319 if (iImmAddend >= 0)
4320 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4321 else
4322 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4323 }
4324 else if (iGprDst != iGprAddend)
4325 {
4326 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4327 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4328 }
4329 else
4330# ifdef IEM_WITH_THROW_CATCH
4331 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4332# else
4333 AssertReleaseFailedStmt(off = UINT32_MAX);
4334# endif
4335
4336#else
4337# error "Port me!"
4338#endif
4339 return off;
4340}
4341
4342
4343/**
4344 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4345 * third register.
4346 *
4347 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4348 *
4349 * @note The ARM64 version does not work for non-trivial constants if the
4350 * two registers are the same. Will assert / throw exception.
4351 */
4352DECL_FORCE_INLINE_THROW(uint32_t)
4353iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4354{
4355#ifdef RT_ARCH_AMD64
4356 /** @todo consider LEA */
4357 if ((int8_t)iImmAddend == iImmAddend)
4358 {
4359 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4360 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4361 }
4362 else
4363 {
4364 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4365 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4366 }
4367
4368#elif defined(RT_ARCH_ARM64)
4369 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4370 if (uAbsImmAddend < 4096)
4371 {
4372 if (iImmAddend >= 0)
4373 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4374 else
4375 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4376 }
4377 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4378 {
4379 if (iImmAddend >= 0)
4380 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4381 else
4382 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4383 }
4384 else if (iGprDst != iGprAddend)
4385 {
4386 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4387 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4388 }
4389 else
4390# ifdef IEM_WITH_THROW_CATCH
4391 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4392# else
4393 AssertReleaseFailedStmt(off = UINT32_MAX);
4394# endif
4395
4396#else
4397# error "Port me!"
4398#endif
4399 return off;
4400}
4401
4402
4403/*********************************************************************************************************************************
4404* Unary Operations *
4405*********************************************************************************************************************************/
4406
4407/**
4408 * Emits code for two complement negation of a 64-bit GPR.
4409 */
4410DECL_FORCE_INLINE_THROW(uint32_t)
4411iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4412{
4413#if defined(RT_ARCH_AMD64)
4414 /* neg Ev */
4415 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4416 pCodeBuf[off++] = 0xf7;
4417 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4418
4419#elif defined(RT_ARCH_ARM64)
4420 /* sub dst, xzr, dst */
4421 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4422
4423#else
4424# error "Port me"
4425#endif
4426 return off;
4427}
4428
4429
4430/**
4431 * Emits code for two complement negation of a 64-bit GPR.
4432 */
4433DECL_INLINE_THROW(uint32_t)
4434iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4435{
4436#if defined(RT_ARCH_AMD64)
4437 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4438#elif defined(RT_ARCH_ARM64)
4439 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4440#else
4441# error "Port me"
4442#endif
4443 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4444 return off;
4445}
4446
4447
4448/**
4449 * Emits code for two complement negation of a 32-bit GPR.
4450 * @note bit 32 thru 63 are set to zero.
4451 */
4452DECL_FORCE_INLINE_THROW(uint32_t)
4453iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4454{
4455#if defined(RT_ARCH_AMD64)
4456 /* neg Ev */
4457 if (iGprDst >= 8)
4458 pCodeBuf[off++] = X86_OP_REX_B;
4459 pCodeBuf[off++] = 0xf7;
4460 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4461
4462#elif defined(RT_ARCH_ARM64)
4463 /* sub dst, xzr, dst */
4464 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4465
4466#else
4467# error "Port me"
4468#endif
4469 return off;
4470}
4471
4472
4473/**
4474 * Emits code for two complement negation of a 32-bit GPR.
4475 * @note bit 32 thru 63 are set to zero.
4476 */
4477DECL_INLINE_THROW(uint32_t)
4478iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4479{
4480#if defined(RT_ARCH_AMD64)
4481 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4482#elif defined(RT_ARCH_ARM64)
4483 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4484#else
4485# error "Port me"
4486#endif
4487 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4488 return off;
4489}
4490
4491
4492
4493/*********************************************************************************************************************************
4494* Bit Operations *
4495*********************************************************************************************************************************/
4496
4497/**
4498 * Emits code for clearing bits 16 thru 63 in the GPR.
4499 */
4500DECL_INLINE_THROW(uint32_t)
4501iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4502{
4503#if defined(RT_ARCH_AMD64)
4504 /* movzx Gv,Ew */
4505 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4506 if (iGprDst >= 8)
4507 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4508 pbCodeBuf[off++] = 0x0f;
4509 pbCodeBuf[off++] = 0xb7;
4510 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4511
4512#elif defined(RT_ARCH_ARM64)
4513 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4514# if 1
4515 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4516# else
4517 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4518 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4519# endif
4520#else
4521# error "Port me"
4522#endif
4523 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4524 return off;
4525}
4526
4527
4528/**
4529 * Emits code for AND'ing two 64-bit GPRs.
4530 *
4531 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4532 * and ARM64 hosts.
4533 */
4534DECL_FORCE_INLINE(uint32_t)
4535iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4536{
4537#if defined(RT_ARCH_AMD64)
4538 /* and Gv, Ev */
4539 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4540 pCodeBuf[off++] = 0x23;
4541 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4542 RT_NOREF(fSetFlags);
4543
4544#elif defined(RT_ARCH_ARM64)
4545 if (!fSetFlags)
4546 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4547 else
4548 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4549
4550#else
4551# error "Port me"
4552#endif
4553 return off;
4554}
4555
4556
4557/**
4558 * Emits code for AND'ing two 64-bit GPRs.
4559 *
4560 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4561 * and ARM64 hosts.
4562 */
4563DECL_INLINE_THROW(uint32_t)
4564iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4565{
4566#if defined(RT_ARCH_AMD64)
4567 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4568#elif defined(RT_ARCH_ARM64)
4569 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4570#else
4571# error "Port me"
4572#endif
4573 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4574 return off;
4575}
4576
4577
4578/**
4579 * Emits code for AND'ing two 32-bit GPRs.
4580 */
4581DECL_FORCE_INLINE(uint32_t)
4582iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4583{
4584#if defined(RT_ARCH_AMD64)
4585 /* and Gv, Ev */
4586 if (iGprDst >= 8 || iGprSrc >= 8)
4587 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4588 pCodeBuf[off++] = 0x23;
4589 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4590 RT_NOREF(fSetFlags);
4591
4592#elif defined(RT_ARCH_ARM64)
4593 if (!fSetFlags)
4594 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4595 else
4596 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4597
4598#else
4599# error "Port me"
4600#endif
4601 return off;
4602}
4603
4604
4605/**
4606 * Emits code for AND'ing two 32-bit GPRs.
4607 */
4608DECL_INLINE_THROW(uint32_t)
4609iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4610{
4611#if defined(RT_ARCH_AMD64)
4612 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4613#elif defined(RT_ARCH_ARM64)
4614 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4615#else
4616# error "Port me"
4617#endif
4618 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4619 return off;
4620}
4621
4622
4623/**
4624 * Emits code for AND'ing a 64-bit GPRs with a constant.
4625 *
4626 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4627 * and ARM64 hosts.
4628 */
4629DECL_INLINE_THROW(uint32_t)
4630iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4631{
4632#if defined(RT_ARCH_AMD64)
4633 if ((int64_t)uImm == (int8_t)uImm)
4634 {
4635 /* and Ev, imm8 */
4636 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4637 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4638 pbCodeBuf[off++] = 0x83;
4639 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4640 pbCodeBuf[off++] = (uint8_t)uImm;
4641 }
4642 else if ((int64_t)uImm == (int32_t)uImm)
4643 {
4644 /* and Ev, imm32 */
4645 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4646 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4647 pbCodeBuf[off++] = 0x81;
4648 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4649 pbCodeBuf[off++] = RT_BYTE1(uImm);
4650 pbCodeBuf[off++] = RT_BYTE2(uImm);
4651 pbCodeBuf[off++] = RT_BYTE3(uImm);
4652 pbCodeBuf[off++] = RT_BYTE4(uImm);
4653 }
4654 else
4655 {
4656 /* Use temporary register for the 64-bit immediate. */
4657 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4658 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
4659 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4660 }
4661 RT_NOREF(fSetFlags);
4662
4663#elif defined(RT_ARCH_ARM64)
4664 uint32_t uImmR = 0;
4665 uint32_t uImmNandS = 0;
4666 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4667 {
4668 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4669 if (!fSetFlags)
4670 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
4671 else
4672 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
4673 }
4674 else
4675 {
4676 /* Use temporary register for the 64-bit immediate. */
4677 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4678 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4679 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4680 }
4681
4682#else
4683# error "Port me"
4684#endif
4685 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4686 return off;
4687}
4688
4689
4690/**
4691 * Emits code for AND'ing an 32-bit GPRs with a constant.
4692 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4693 * @note For ARM64 this only supports @a uImm values that can be expressed using
4694 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
4695 * make sure this is possible!
4696 */
4697DECL_FORCE_INLINE_THROW(uint32_t)
4698iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4699{
4700#if defined(RT_ARCH_AMD64)
4701 /* and Ev, imm */
4702 if (iGprDst >= 8)
4703 pCodeBuf[off++] = X86_OP_REX_B;
4704 if ((int32_t)uImm == (int8_t)uImm)
4705 {
4706 pCodeBuf[off++] = 0x83;
4707 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4708 pCodeBuf[off++] = (uint8_t)uImm;
4709 }
4710 else
4711 {
4712 pCodeBuf[off++] = 0x81;
4713 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4714 pCodeBuf[off++] = RT_BYTE1(uImm);
4715 pCodeBuf[off++] = RT_BYTE2(uImm);
4716 pCodeBuf[off++] = RT_BYTE3(uImm);
4717 pCodeBuf[off++] = RT_BYTE4(uImm);
4718 }
4719 RT_NOREF(fSetFlags);
4720
4721#elif defined(RT_ARCH_ARM64)
4722 uint32_t uImmR = 0;
4723 uint32_t uImmNandS = 0;
4724 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4725 {
4726 if (!fSetFlags)
4727 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4728 else
4729 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4730 }
4731 else
4732# ifdef IEM_WITH_THROW_CATCH
4733 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4734# else
4735 AssertReleaseFailedStmt(off = UINT32_MAX);
4736# endif
4737
4738#else
4739# error "Port me"
4740#endif
4741 return off;
4742}
4743
4744
4745/**
4746 * Emits code for AND'ing an 32-bit GPRs with a constant.
4747 *
4748 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4749 */
4750DECL_INLINE_THROW(uint32_t)
4751iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4752{
4753#if defined(RT_ARCH_AMD64)
4754 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
4755
4756#elif defined(RT_ARCH_ARM64)
4757 uint32_t uImmR = 0;
4758 uint32_t uImmNandS = 0;
4759 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4760 {
4761 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4762 if (!fSetFlags)
4763 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4764 else
4765 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4766 }
4767 else
4768 {
4769 /* Use temporary register for the 64-bit immediate. */
4770 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4771 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4772 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4773 }
4774
4775#else
4776# error "Port me"
4777#endif
4778 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4779 return off;
4780}
4781
4782
4783/**
4784 * Emits code for AND'ing an 64-bit GPRs with a constant.
4785 *
4786 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4787 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4788 * the same.
4789 */
4790DECL_FORCE_INLINE_THROW(uint32_t)
4791iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
4792 bool fSetFlags = false)
4793{
4794#if defined(RT_ARCH_AMD64)
4795 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4796 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
4797 RT_NOREF(fSetFlags);
4798
4799#elif defined(RT_ARCH_ARM64)
4800 uint32_t uImmR = 0;
4801 uint32_t uImmNandS = 0;
4802 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4803 {
4804 if (!fSetFlags)
4805 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4806 else
4807 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4808 }
4809 else if (iGprDst != iGprSrc)
4810 {
4811 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4812 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4813 }
4814 else
4815# ifdef IEM_WITH_THROW_CATCH
4816 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4817# else
4818 AssertReleaseFailedStmt(off = UINT32_MAX);
4819# endif
4820
4821#else
4822# error "Port me"
4823#endif
4824 return off;
4825}
4826
4827/**
4828 * Emits code for AND'ing an 32-bit GPRs with a constant.
4829 *
4830 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4831 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4832 * the same.
4833 *
4834 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4835 */
4836DECL_FORCE_INLINE_THROW(uint32_t)
4837iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
4838 bool fSetFlags = false)
4839{
4840#if defined(RT_ARCH_AMD64)
4841 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
4842 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
4843 RT_NOREF(fSetFlags);
4844
4845#elif defined(RT_ARCH_ARM64)
4846 uint32_t uImmR = 0;
4847 uint32_t uImmNandS = 0;
4848 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4849 {
4850 if (!fSetFlags)
4851 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
4852 else
4853 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
4854 }
4855 else if (iGprDst != iGprSrc)
4856 {
4857 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
4858 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4859 }
4860 else
4861# ifdef IEM_WITH_THROW_CATCH
4862 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4863# else
4864 AssertReleaseFailedStmt(off = UINT32_MAX);
4865# endif
4866
4867#else
4868# error "Port me"
4869#endif
4870 return off;
4871}
4872
4873
4874/**
4875 * Emits code for OR'ing two 64-bit GPRs.
4876 */
4877DECL_FORCE_INLINE(uint32_t)
4878iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4879{
4880#if defined(RT_ARCH_AMD64)
4881 /* or Gv, Ev */
4882 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4883 pCodeBuf[off++] = 0x0b;
4884 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4885
4886#elif defined(RT_ARCH_ARM64)
4887 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
4888
4889#else
4890# error "Port me"
4891#endif
4892 return off;
4893}
4894
4895
4896/**
4897 * Emits code for OR'ing two 64-bit GPRs.
4898 */
4899DECL_INLINE_THROW(uint32_t)
4900iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4901{
4902#if defined(RT_ARCH_AMD64)
4903 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4904#elif defined(RT_ARCH_ARM64)
4905 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4906#else
4907# error "Port me"
4908#endif
4909 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4910 return off;
4911}
4912
4913
4914/**
4915 * Emits code for OR'ing two 32-bit GPRs.
4916 * @note Bits 63:32 of the destination GPR will be cleared.
4917 */
4918DECL_FORCE_INLINE(uint32_t)
4919iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4920{
4921#if defined(RT_ARCH_AMD64)
4922 /* or Gv, Ev */
4923 if (iGprDst >= 8 || iGprSrc >= 8)
4924 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4925 pCodeBuf[off++] = 0x0b;
4926 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4927
4928#elif defined(RT_ARCH_ARM64)
4929 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4930
4931#else
4932# error "Port me"
4933#endif
4934 return off;
4935}
4936
4937
4938/**
4939 * Emits code for OR'ing two 32-bit GPRs.
4940 * @note Bits 63:32 of the destination GPR will be cleared.
4941 */
4942DECL_INLINE_THROW(uint32_t)
4943iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4944{
4945#if defined(RT_ARCH_AMD64)
4946 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4947#elif defined(RT_ARCH_ARM64)
4948 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4949#else
4950# error "Port me"
4951#endif
4952 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4953 return off;
4954}
4955
4956
4957/**
4958 * Emits code for OR'ing a 64-bit GPRs with a constant.
4959 */
4960DECL_INLINE_THROW(uint32_t)
4961iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
4962{
4963#if defined(RT_ARCH_AMD64)
4964 if ((int64_t)uImm == (int8_t)uImm)
4965 {
4966 /* or Ev, imm8 */
4967 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4968 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4969 pbCodeBuf[off++] = 0x83;
4970 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4971 pbCodeBuf[off++] = (uint8_t)uImm;
4972 }
4973 else if ((int64_t)uImm == (int32_t)uImm)
4974 {
4975 /* or Ev, imm32 */
4976 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4977 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4978 pbCodeBuf[off++] = 0x81;
4979 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4980 pbCodeBuf[off++] = RT_BYTE1(uImm);
4981 pbCodeBuf[off++] = RT_BYTE2(uImm);
4982 pbCodeBuf[off++] = RT_BYTE3(uImm);
4983 pbCodeBuf[off++] = RT_BYTE4(uImm);
4984 }
4985 else
4986 {
4987 /* Use temporary register for the 64-bit immediate. */
4988 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4989 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
4990 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4991 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4992 }
4993
4994#elif defined(RT_ARCH_ARM64)
4995 uint32_t uImmR = 0;
4996 uint32_t uImmNandS = 0;
4997 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4998 {
4999 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5000 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
5001 }
5002 else
5003 {
5004 /* Use temporary register for the 64-bit immediate. */
5005 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5006 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
5007 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5008 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5009 }
5010
5011#else
5012# error "Port me"
5013#endif
5014 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5015 return off;
5016}
5017
5018
5019/**
5020 * Emits code for OR'ing an 32-bit GPRs with a constant.
5021 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5022 * @note For ARM64 this only supports @a uImm values that can be expressed using
5023 * the two 6-bit immediates of the OR instructions. The caller must make
5024 * sure this is possible!
5025 */
5026DECL_FORCE_INLINE_THROW(uint32_t)
5027iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5028{
5029#if defined(RT_ARCH_AMD64)
5030 /* or Ev, imm */
5031 if (iGprDst >= 8)
5032 pCodeBuf[off++] = X86_OP_REX_B;
5033 if ((int32_t)uImm == (int8_t)uImm)
5034 {
5035 pCodeBuf[off++] = 0x83;
5036 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5037 pCodeBuf[off++] = (uint8_t)uImm;
5038 }
5039 else
5040 {
5041 pCodeBuf[off++] = 0x81;
5042 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5043 pCodeBuf[off++] = RT_BYTE1(uImm);
5044 pCodeBuf[off++] = RT_BYTE2(uImm);
5045 pCodeBuf[off++] = RT_BYTE3(uImm);
5046 pCodeBuf[off++] = RT_BYTE4(uImm);
5047 }
5048
5049#elif defined(RT_ARCH_ARM64)
5050 uint32_t uImmR = 0;
5051 uint32_t uImmNandS = 0;
5052 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5053 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5054 else
5055# ifdef IEM_WITH_THROW_CATCH
5056 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5057# else
5058 AssertReleaseFailedStmt(off = UINT32_MAX);
5059# endif
5060
5061#else
5062# error "Port me"
5063#endif
5064 return off;
5065}
5066
5067
5068/**
5069 * Emits code for OR'ing an 32-bit GPRs with a constant.
5070 *
5071 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5072 */
5073DECL_INLINE_THROW(uint32_t)
5074iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5075{
5076#if defined(RT_ARCH_AMD64)
5077 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5078
5079#elif defined(RT_ARCH_ARM64)
5080 uint32_t uImmR = 0;
5081 uint32_t uImmNandS = 0;
5082 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5083 {
5084 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5085 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5086 }
5087 else
5088 {
5089 /* Use temporary register for the 64-bit immediate. */
5090 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5091 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
5092 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5093 }
5094
5095#else
5096# error "Port me"
5097#endif
5098 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5099 return off;
5100}
5101
5102
5103
5104/**
5105 * ORs two 64-bit GPRs together, storing the result in a third register.
5106 */
5107DECL_FORCE_INLINE(uint32_t)
5108iemNativeEmitGprEqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5109{
5110#ifdef RT_ARCH_AMD64
5111 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5112 {
5113 /** @todo consider LEA */
5114 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc1);
5115 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5116 }
5117 else
5118 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5119
5120#elif defined(RT_ARCH_ARM64)
5121 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2);
5122
5123#else
5124# error "Port me!"
5125#endif
5126 return off;
5127}
5128
5129
5130
5131/**
5132 * Ors two 32-bit GPRs together, storing the result in a third register.
5133 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
5134 */
5135DECL_FORCE_INLINE(uint32_t)
5136iemNativeEmitGpr32EqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5137{
5138#ifdef RT_ARCH_AMD64
5139 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5140 {
5141 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc1);
5142 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5143 }
5144 else
5145 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5146
5147#elif defined(RT_ARCH_ARM64)
5148 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2, false /*f64Bit*/);
5149
5150#else
5151# error "Port me!"
5152#endif
5153 return off;
5154}
5155
5156
5157/**
5158 * Emits code for XOR'ing two 64-bit GPRs.
5159 */
5160DECL_INLINE_THROW(uint32_t)
5161iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5162{
5163#if defined(RT_ARCH_AMD64)
5164 /* and Gv, Ev */
5165 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5166 pCodeBuf[off++] = 0x33;
5167 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5168
5169#elif defined(RT_ARCH_ARM64)
5170 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
5171
5172#else
5173# error "Port me"
5174#endif
5175 return off;
5176}
5177
5178
5179/**
5180 * Emits code for XOR'ing two 64-bit GPRs.
5181 */
5182DECL_INLINE_THROW(uint32_t)
5183iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5184{
5185#if defined(RT_ARCH_AMD64)
5186 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5187#elif defined(RT_ARCH_ARM64)
5188 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5189#else
5190# error "Port me"
5191#endif
5192 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5193 return off;
5194}
5195
5196
5197/**
5198 * Emits code for XOR'ing two 32-bit GPRs.
5199 */
5200DECL_INLINE_THROW(uint32_t)
5201iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5202{
5203#if defined(RT_ARCH_AMD64)
5204 /* and Gv, Ev */
5205 if (iGprDst >= 8 || iGprSrc >= 8)
5206 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5207 pCodeBuf[off++] = 0x33;
5208 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5209
5210#elif defined(RT_ARCH_ARM64)
5211 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5212
5213#else
5214# error "Port me"
5215#endif
5216 return off;
5217}
5218
5219
5220/**
5221 * Emits code for XOR'ing two 32-bit GPRs.
5222 */
5223DECL_INLINE_THROW(uint32_t)
5224iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5225{
5226#if defined(RT_ARCH_AMD64)
5227 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5228#elif defined(RT_ARCH_ARM64)
5229 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5230#else
5231# error "Port me"
5232#endif
5233 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5234 return off;
5235}
5236
5237
5238/**
5239 * Emits code for XOR'ing an 32-bit GPRs with a constant.
5240 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5241 * @note For ARM64 this only supports @a uImm values that can be expressed using
5242 * the two 6-bit immediates of the EOR instructions. The caller must make
5243 * sure this is possible!
5244 */
5245DECL_FORCE_INLINE_THROW(uint32_t)
5246iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5247{
5248#if defined(RT_ARCH_AMD64)
5249 /* and Ev, imm */
5250 if (iGprDst >= 8)
5251 pCodeBuf[off++] = X86_OP_REX_B;
5252 if ((int32_t)uImm == (int8_t)uImm)
5253 {
5254 pCodeBuf[off++] = 0x83;
5255 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5256 pCodeBuf[off++] = (uint8_t)uImm;
5257 }
5258 else
5259 {
5260 pCodeBuf[off++] = 0x81;
5261 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5262 pCodeBuf[off++] = RT_BYTE1(uImm);
5263 pCodeBuf[off++] = RT_BYTE2(uImm);
5264 pCodeBuf[off++] = RT_BYTE3(uImm);
5265 pCodeBuf[off++] = RT_BYTE4(uImm);
5266 }
5267
5268#elif defined(RT_ARCH_ARM64)
5269 uint32_t uImmR = 0;
5270 uint32_t uImmNandS = 0;
5271 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5272 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5273 else
5274# ifdef IEM_WITH_THROW_CATCH
5275 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5276# else
5277 AssertReleaseFailedStmt(off = UINT32_MAX);
5278# endif
5279
5280#else
5281# error "Port me"
5282#endif
5283 return off;
5284}
5285
5286
5287/*********************************************************************************************************************************
5288* Shifting *
5289*********************************************************************************************************************************/
5290
5291/**
5292 * Emits code for shifting a GPR a fixed number of bits to the left.
5293 */
5294DECL_FORCE_INLINE(uint32_t)
5295iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5296{
5297 Assert(cShift > 0 && cShift < 64);
5298
5299#if defined(RT_ARCH_AMD64)
5300 /* shl dst, cShift */
5301 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5302 if (cShift != 1)
5303 {
5304 pCodeBuf[off++] = 0xc1;
5305 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5306 pCodeBuf[off++] = cShift;
5307 }
5308 else
5309 {
5310 pCodeBuf[off++] = 0xd1;
5311 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5312 }
5313
5314#elif defined(RT_ARCH_ARM64)
5315 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
5316
5317#else
5318# error "Port me"
5319#endif
5320 return off;
5321}
5322
5323
5324/**
5325 * Emits code for shifting a GPR a fixed number of bits to the left.
5326 */
5327DECL_INLINE_THROW(uint32_t)
5328iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5329{
5330#if defined(RT_ARCH_AMD64)
5331 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5332#elif defined(RT_ARCH_ARM64)
5333 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5334#else
5335# error "Port me"
5336#endif
5337 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5338 return off;
5339}
5340
5341
5342/**
5343 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5344 */
5345DECL_FORCE_INLINE(uint32_t)
5346iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5347{
5348 Assert(cShift > 0 && cShift < 32);
5349
5350#if defined(RT_ARCH_AMD64)
5351 /* shl dst, cShift */
5352 if (iGprDst >= 8)
5353 pCodeBuf[off++] = X86_OP_REX_B;
5354 if (cShift != 1)
5355 {
5356 pCodeBuf[off++] = 0xc1;
5357 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5358 pCodeBuf[off++] = cShift;
5359 }
5360 else
5361 {
5362 pCodeBuf[off++] = 0xd1;
5363 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5364 }
5365
5366#elif defined(RT_ARCH_ARM64)
5367 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5368
5369#else
5370# error "Port me"
5371#endif
5372 return off;
5373}
5374
5375
5376/**
5377 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5378 */
5379DECL_INLINE_THROW(uint32_t)
5380iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5381{
5382#if defined(RT_ARCH_AMD64)
5383 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5384#elif defined(RT_ARCH_ARM64)
5385 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5386#else
5387# error "Port me"
5388#endif
5389 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5390 return off;
5391}
5392
5393
5394/**
5395 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5396 */
5397DECL_FORCE_INLINE(uint32_t)
5398iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5399{
5400 Assert(cShift > 0 && cShift < 64);
5401
5402#if defined(RT_ARCH_AMD64)
5403 /* shr dst, cShift */
5404 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5405 if (cShift != 1)
5406 {
5407 pCodeBuf[off++] = 0xc1;
5408 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5409 pCodeBuf[off++] = cShift;
5410 }
5411 else
5412 {
5413 pCodeBuf[off++] = 0xd1;
5414 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5415 }
5416
5417#elif defined(RT_ARCH_ARM64)
5418 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5419
5420#else
5421# error "Port me"
5422#endif
5423 return off;
5424}
5425
5426
5427/**
5428 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5429 */
5430DECL_INLINE_THROW(uint32_t)
5431iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5432{
5433#if defined(RT_ARCH_AMD64)
5434 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5435#elif defined(RT_ARCH_ARM64)
5436 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5437#else
5438# error "Port me"
5439#endif
5440 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5441 return off;
5442}
5443
5444
5445/**
5446 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5447 * right.
5448 */
5449DECL_FORCE_INLINE(uint32_t)
5450iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5451{
5452 Assert(cShift > 0 && cShift < 32);
5453
5454#if defined(RT_ARCH_AMD64)
5455 /* shr dst, cShift */
5456 if (iGprDst >= 8)
5457 pCodeBuf[off++] = X86_OP_REX_B;
5458 if (cShift != 1)
5459 {
5460 pCodeBuf[off++] = 0xc1;
5461 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5462 pCodeBuf[off++] = cShift;
5463 }
5464 else
5465 {
5466 pCodeBuf[off++] = 0xd1;
5467 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5468 }
5469
5470#elif defined(RT_ARCH_ARM64)
5471 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5472
5473#else
5474# error "Port me"
5475#endif
5476 return off;
5477}
5478
5479
5480/**
5481 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5482 * right.
5483 */
5484DECL_INLINE_THROW(uint32_t)
5485iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5486{
5487#if defined(RT_ARCH_AMD64)
5488 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5489#elif defined(RT_ARCH_ARM64)
5490 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5491#else
5492# error "Port me"
5493#endif
5494 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5495 return off;
5496}
5497
5498
5499/**
5500 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5501 * right and assigning it to a different GPR.
5502 */
5503DECL_INLINE_THROW(uint32_t)
5504iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5505{
5506 Assert(cShift > 0); Assert(cShift < 32);
5507#if defined(RT_ARCH_AMD64)
5508 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5509 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5510
5511#elif defined(RT_ARCH_ARM64)
5512 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5513
5514#else
5515# error "Port me"
5516#endif
5517 return off;
5518}
5519
5520
5521/**
5522 * Emits code for rotating a GPR a fixed number of bits to the left.
5523 */
5524DECL_FORCE_INLINE(uint32_t)
5525iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5526{
5527 Assert(cShift > 0 && cShift < 64);
5528
5529#if defined(RT_ARCH_AMD64)
5530 /* rol dst, cShift */
5531 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5532 if (cShift != 1)
5533 {
5534 pCodeBuf[off++] = 0xc1;
5535 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5536 pCodeBuf[off++] = cShift;
5537 }
5538 else
5539 {
5540 pCodeBuf[off++] = 0xd1;
5541 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5542 }
5543
5544#elif defined(RT_ARCH_ARM64)
5545 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
5546
5547#else
5548# error "Port me"
5549#endif
5550 return off;
5551}
5552
5553
5554#if defined(RT_ARCH_AMD64)
5555/**
5556 * Emits code for rotating a 32-bit GPR a fixed number of bits to the left via carry.
5557 */
5558DECL_FORCE_INLINE(uint32_t)
5559iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5560{
5561 Assert(cShift > 0 && cShift < 32);
5562
5563 /* rcl dst, cShift */
5564 if (iGprDst >= 8)
5565 pCodeBuf[off++] = X86_OP_REX_B;
5566 if (cShift != 1)
5567 {
5568 pCodeBuf[off++] = 0xc1;
5569 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5570 pCodeBuf[off++] = cShift;
5571 }
5572 else
5573 {
5574 pCodeBuf[off++] = 0xd1;
5575 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5576 }
5577
5578 return off;
5579}
5580#endif /* RT_ARCH_AMD64 */
5581
5582
5583
5584/**
5585 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
5586 * @note Bits 63:32 of the destination GPR will be cleared.
5587 */
5588DECL_FORCE_INLINE(uint32_t)
5589iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5590{
5591#if defined(RT_ARCH_AMD64)
5592 /*
5593 * There is no bswap r16 on x86 (the encoding exists but does not work).
5594 * So just use a rol (gcc -O2 is doing that).
5595 *
5596 * rol r16, 0x8
5597 */
5598 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5599 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5600 if (iGpr >= 8)
5601 pbCodeBuf[off++] = X86_OP_REX_B;
5602 pbCodeBuf[off++] = 0xc1;
5603 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
5604 pbCodeBuf[off++] = 0x08;
5605#elif defined(RT_ARCH_ARM64)
5606 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5607
5608 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
5609#else
5610# error "Port me"
5611#endif
5612
5613 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5614 return off;
5615}
5616
5617
5618/**
5619 * Emits code for reversing the byte order in a 32-bit GPR.
5620 * @note Bits 63:32 of the destination GPR will be cleared.
5621 */
5622DECL_FORCE_INLINE(uint32_t)
5623iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5624{
5625#if defined(RT_ARCH_AMD64)
5626 /* bswap r32 */
5627 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5628
5629 if (iGpr >= 8)
5630 pbCodeBuf[off++] = X86_OP_REX_B;
5631 pbCodeBuf[off++] = 0x0f;
5632 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5633#elif defined(RT_ARCH_ARM64)
5634 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5635
5636 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
5637#else
5638# error "Port me"
5639#endif
5640
5641 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5642 return off;
5643}
5644
5645
5646/**
5647 * Emits code for reversing the byte order in a 64-bit GPR.
5648 */
5649DECL_FORCE_INLINE(uint32_t)
5650iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5651{
5652#if defined(RT_ARCH_AMD64)
5653 /* bswap r64 */
5654 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5655
5656 if (iGpr >= 8)
5657 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
5658 else
5659 pbCodeBuf[off++] = X86_OP_REX_W;
5660 pbCodeBuf[off++] = 0x0f;
5661 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5662#elif defined(RT_ARCH_ARM64)
5663 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5664
5665 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
5666#else
5667# error "Port me"
5668#endif
5669
5670 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5671 return off;
5672}
5673
5674
5675/*********************************************************************************************************************************
5676* Compare and Testing *
5677*********************************************************************************************************************************/
5678
5679
5680#ifdef RT_ARCH_ARM64
5681/**
5682 * Emits an ARM64 compare instruction.
5683 */
5684DECL_INLINE_THROW(uint32_t)
5685iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
5686 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
5687{
5688 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5689 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
5690 f64Bit, true /*fSetFlags*/, cShift, enmShift);
5691 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5692 return off;
5693}
5694#endif
5695
5696
5697/**
5698 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5699 * with conditional instruction.
5700 */
5701DECL_FORCE_INLINE(uint32_t)
5702iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5703{
5704#ifdef RT_ARCH_AMD64
5705 /* cmp Gv, Ev */
5706 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5707 pCodeBuf[off++] = 0x3b;
5708 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5709
5710#elif defined(RT_ARCH_ARM64)
5711 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
5712
5713#else
5714# error "Port me!"
5715#endif
5716 return off;
5717}
5718
5719
5720/**
5721 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5722 * with conditional instruction.
5723 */
5724DECL_INLINE_THROW(uint32_t)
5725iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5726{
5727#ifdef RT_ARCH_AMD64
5728 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5729#elif defined(RT_ARCH_ARM64)
5730 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5731#else
5732# error "Port me!"
5733#endif
5734 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5735 return off;
5736}
5737
5738
5739/**
5740 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5741 * with conditional instruction.
5742 */
5743DECL_FORCE_INLINE(uint32_t)
5744iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5745{
5746#ifdef RT_ARCH_AMD64
5747 /* cmp Gv, Ev */
5748 if (iGprLeft >= 8 || iGprRight >= 8)
5749 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5750 pCodeBuf[off++] = 0x3b;
5751 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5752
5753#elif defined(RT_ARCH_ARM64)
5754 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
5755
5756#else
5757# error "Port me!"
5758#endif
5759 return off;
5760}
5761
5762
5763/**
5764 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5765 * with conditional instruction.
5766 */
5767DECL_INLINE_THROW(uint32_t)
5768iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5769{
5770#ifdef RT_ARCH_AMD64
5771 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5772#elif defined(RT_ARCH_ARM64)
5773 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5774#else
5775# error "Port me!"
5776#endif
5777 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5778 return off;
5779}
5780
5781
5782/**
5783 * Emits a compare of a 64-bit GPR with a constant value, settings status
5784 * flags/whatever for use with conditional instruction.
5785 */
5786DECL_INLINE_THROW(uint32_t)
5787iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
5788{
5789#ifdef RT_ARCH_AMD64
5790 if (uImm <= UINT32_C(0xff))
5791 {
5792 /* cmp Ev, Ib */
5793 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5794 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
5795 pbCodeBuf[off++] = 0x83;
5796 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5797 pbCodeBuf[off++] = (uint8_t)uImm;
5798 }
5799 else if ((int64_t)uImm == (int32_t)uImm)
5800 {
5801 /* cmp Ev, imm */
5802 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5803 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
5804 pbCodeBuf[off++] = 0x81;
5805 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5806 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5807 pbCodeBuf[off++] = RT_BYTE1(uImm);
5808 pbCodeBuf[off++] = RT_BYTE2(uImm);
5809 pbCodeBuf[off++] = RT_BYTE3(uImm);
5810 pbCodeBuf[off++] = RT_BYTE4(uImm);
5811 }
5812 else
5813 {
5814 /* Use temporary register for the immediate. */
5815 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5816 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
5817 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5818 }
5819
5820#elif defined(RT_ARCH_ARM64)
5821 /** @todo guess there are clevere things we can do here... */
5822 if (uImm < _4K)
5823 {
5824 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5825 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5826 true /*64Bit*/, true /*fSetFlags*/);
5827 }
5828 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5829 {
5830 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5831 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
5832 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5833 }
5834 else
5835 {
5836 /* Use temporary register for the immediate. */
5837 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5838 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
5839 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5840 }
5841
5842#else
5843# error "Port me!"
5844#endif
5845
5846 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5847 return off;
5848}
5849
5850
5851/**
5852 * Emits a compare of a 32-bit GPR with a constant value, settings status
5853 * flags/whatever for use with conditional instruction.
5854 *
5855 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
5856 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
5857 * bits all zero). Will release assert or throw exception if the caller
5858 * violates this restriction.
5859 */
5860DECL_FORCE_INLINE_THROW(uint32_t)
5861iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
5862{
5863#ifdef RT_ARCH_AMD64
5864 if (iGprLeft >= 8)
5865 pCodeBuf[off++] = X86_OP_REX_B;
5866 if (uImm <= UINT32_C(0x7f))
5867 {
5868 /* cmp Ev, Ib */
5869 pCodeBuf[off++] = 0x83;
5870 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5871 pCodeBuf[off++] = (uint8_t)uImm;
5872 }
5873 else
5874 {
5875 /* cmp Ev, imm */
5876 pCodeBuf[off++] = 0x81;
5877 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5878 pCodeBuf[off++] = RT_BYTE1(uImm);
5879 pCodeBuf[off++] = RT_BYTE2(uImm);
5880 pCodeBuf[off++] = RT_BYTE3(uImm);
5881 pCodeBuf[off++] = RT_BYTE4(uImm);
5882 }
5883
5884#elif defined(RT_ARCH_ARM64)
5885 /** @todo guess there are clevere things we can do here... */
5886 if (uImm < _4K)
5887 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5888 false /*64Bit*/, true /*fSetFlags*/);
5889 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5890 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5891 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5892 else
5893# ifdef IEM_WITH_THROW_CATCH
5894 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5895# else
5896 AssertReleaseFailedStmt(off = UINT32_MAX);
5897# endif
5898
5899#else
5900# error "Port me!"
5901#endif
5902 return off;
5903}
5904
5905
5906/**
5907 * Emits a compare of a 32-bit GPR with a constant value, settings status
5908 * flags/whatever for use with conditional instruction.
5909 */
5910DECL_INLINE_THROW(uint32_t)
5911iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
5912{
5913#ifdef RT_ARCH_AMD64
5914 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
5915
5916#elif defined(RT_ARCH_ARM64)
5917 /** @todo guess there are clevere things we can do here... */
5918 if (uImm < _4K)
5919 {
5920 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5921 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5922 false /*64Bit*/, true /*fSetFlags*/);
5923 }
5924 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5925 {
5926 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5927 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5928 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5929 }
5930 else
5931 {
5932 /* Use temporary register for the immediate. */
5933 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5934 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
5935 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5936 }
5937
5938#else
5939# error "Port me!"
5940#endif
5941
5942 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5943 return off;
5944}
5945
5946
5947/**
5948 * Emits a compare of a 32-bit GPR with a constant value, settings status
5949 * flags/whatever for use with conditional instruction.
5950 *
5951 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
5952 * 16-bit value from @a iGrpLeft.
5953 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
5954 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
5955 * bits all zero). Will release assert or throw exception if the caller
5956 * violates this restriction.
5957 */
5958DECL_FORCE_INLINE_THROW(uint32_t)
5959iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
5960 uint8_t idxTmpReg = UINT8_MAX)
5961{
5962#ifdef RT_ARCH_AMD64
5963 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5964 if (iGprLeft >= 8)
5965 pCodeBuf[off++] = X86_OP_REX_B;
5966 if (uImm <= UINT32_C(0x7f))
5967 {
5968 /* cmp Ev, Ib */
5969 pCodeBuf[off++] = 0x83;
5970 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5971 pCodeBuf[off++] = (uint8_t)uImm;
5972 }
5973 else
5974 {
5975 /* cmp Ev, imm */
5976 pCodeBuf[off++] = 0x81;
5977 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5978 pCodeBuf[off++] = RT_BYTE1(uImm);
5979 pCodeBuf[off++] = RT_BYTE2(uImm);
5980 }
5981 RT_NOREF(idxTmpReg);
5982
5983#elif defined(RT_ARCH_ARM64)
5984# ifdef IEM_WITH_THROW_CATCH
5985 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5986# else
5987 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
5988# endif
5989 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
5990 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
5991 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
5992
5993#else
5994# error "Port me!"
5995#endif
5996 return off;
5997}
5998
5999
6000/**
6001 * Emits a compare of a 16-bit GPR with a constant value, settings status
6002 * flags/whatever for use with conditional instruction.
6003 *
6004 * @note ARM64: Helper register is required (idxTmpReg).
6005 */
6006DECL_INLINE_THROW(uint32_t)
6007iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6008 uint8_t idxTmpReg = UINT8_MAX)
6009{
6010#ifdef RT_ARCH_AMD64
6011 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
6012#elif defined(RT_ARCH_ARM64)
6013 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
6014#else
6015# error "Port me!"
6016#endif
6017 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6018 return off;
6019}
6020
6021
6022
6023/*********************************************************************************************************************************
6024* Branching *
6025*********************************************************************************************************************************/
6026
6027/**
6028 * Emits a JMP rel32 / B imm19 to the given label.
6029 */
6030DECL_FORCE_INLINE_THROW(uint32_t)
6031iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
6032{
6033 Assert(idxLabel < pReNative->cLabels);
6034
6035#ifdef RT_ARCH_AMD64
6036 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6037 {
6038 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
6039 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
6040 {
6041 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
6042 pCodeBuf[off++] = (uint8_t)offRel;
6043 }
6044 else
6045 {
6046 offRel -= 3;
6047 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6048 pCodeBuf[off++] = RT_BYTE1(offRel);
6049 pCodeBuf[off++] = RT_BYTE2(offRel);
6050 pCodeBuf[off++] = RT_BYTE3(offRel);
6051 pCodeBuf[off++] = RT_BYTE4(offRel);
6052 }
6053 }
6054 else
6055 {
6056 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6057 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6058 pCodeBuf[off++] = 0xfe;
6059 pCodeBuf[off++] = 0xff;
6060 pCodeBuf[off++] = 0xff;
6061 pCodeBuf[off++] = 0xff;
6062 }
6063 pCodeBuf[off++] = 0xcc; /* int3 poison */
6064
6065#elif defined(RT_ARCH_ARM64)
6066 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6067 pCodeBuf[off++] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
6068 else
6069 {
6070 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
6071 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
6072 }
6073
6074#else
6075# error "Port me!"
6076#endif
6077 return off;
6078}
6079
6080
6081/**
6082 * Emits a JMP rel32 / B imm19 to the given label.
6083 */
6084DECL_INLINE_THROW(uint32_t)
6085iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6086{
6087#ifdef RT_ARCH_AMD64
6088 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
6089#elif defined(RT_ARCH_ARM64)
6090 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
6091#else
6092# error "Port me!"
6093#endif
6094 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6095 return off;
6096}
6097
6098
6099/**
6100 * Emits a JMP rel32 / B imm19 to a new undefined label.
6101 */
6102DECL_INLINE_THROW(uint32_t)
6103iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6104{
6105 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6106 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
6107}
6108
6109/** Condition type. */
6110#ifdef RT_ARCH_AMD64
6111typedef enum IEMNATIVEINSTRCOND : uint8_t
6112{
6113 kIemNativeInstrCond_o = 0,
6114 kIemNativeInstrCond_no,
6115 kIemNativeInstrCond_c,
6116 kIemNativeInstrCond_nc,
6117 kIemNativeInstrCond_e,
6118 kIemNativeInstrCond_ne,
6119 kIemNativeInstrCond_be,
6120 kIemNativeInstrCond_nbe,
6121 kIemNativeInstrCond_s,
6122 kIemNativeInstrCond_ns,
6123 kIemNativeInstrCond_p,
6124 kIemNativeInstrCond_np,
6125 kIemNativeInstrCond_l,
6126 kIemNativeInstrCond_nl,
6127 kIemNativeInstrCond_le,
6128 kIemNativeInstrCond_nle
6129} IEMNATIVEINSTRCOND;
6130#elif defined(RT_ARCH_ARM64)
6131typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
6132# define kIemNativeInstrCond_o todo_conditional_codes
6133# define kIemNativeInstrCond_no todo_conditional_codes
6134# define kIemNativeInstrCond_c todo_conditional_codes
6135# define kIemNativeInstrCond_nc todo_conditional_codes
6136# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
6137# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
6138# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
6139# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
6140# define kIemNativeInstrCond_s todo_conditional_codes
6141# define kIemNativeInstrCond_ns todo_conditional_codes
6142# define kIemNativeInstrCond_p todo_conditional_codes
6143# define kIemNativeInstrCond_np todo_conditional_codes
6144# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
6145# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
6146# define kIemNativeInstrCond_le kArmv8InstrCond_Le
6147# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
6148#else
6149# error "Port me!"
6150#endif
6151
6152
6153/**
6154 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6155 */
6156DECL_FORCE_INLINE_THROW(uint32_t)
6157iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6158 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6159{
6160 Assert(idxLabel < pReNative->cLabels);
6161
6162 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
6163#ifdef RT_ARCH_AMD64
6164 if (offLabel >= off)
6165 {
6166 /* jcc rel32 */
6167 pCodeBuf[off++] = 0x0f;
6168 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6169 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6170 pCodeBuf[off++] = 0x00;
6171 pCodeBuf[off++] = 0x00;
6172 pCodeBuf[off++] = 0x00;
6173 pCodeBuf[off++] = 0x00;
6174 }
6175 else
6176 {
6177 int32_t offDisp = offLabel - (off + 2);
6178 if ((int8_t)offDisp == offDisp)
6179 {
6180 /* jcc rel8 */
6181 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6182 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6183 }
6184 else
6185 {
6186 /* jcc rel32 */
6187 offDisp -= 4;
6188 pCodeBuf[off++] = 0x0f;
6189 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6190 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6191 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6192 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6193 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6194 }
6195 }
6196
6197#elif defined(RT_ARCH_ARM64)
6198 if (offLabel >= off)
6199 {
6200 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6201 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
6202 }
6203 else
6204 {
6205 Assert(off - offLabel <= 0x3ffffU);
6206 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
6207 }
6208
6209#else
6210# error "Port me!"
6211#endif
6212 return off;
6213}
6214
6215
6216/**
6217 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6218 */
6219DECL_INLINE_THROW(uint32_t)
6220iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6221{
6222#ifdef RT_ARCH_AMD64
6223 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
6224#elif defined(RT_ARCH_ARM64)
6225 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
6226#else
6227# error "Port me!"
6228#endif
6229 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6230 return off;
6231}
6232
6233
6234/**
6235 * Emits a Jcc rel32 / B.cc imm19 to a new label.
6236 */
6237DECL_INLINE_THROW(uint32_t)
6238iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6239 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
6240{
6241 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6242 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
6243}
6244
6245
6246/**
6247 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
6248 */
6249DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6250{
6251#ifdef RT_ARCH_AMD64
6252 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
6253#elif defined(RT_ARCH_ARM64)
6254 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
6255#else
6256# error "Port me!"
6257#endif
6258}
6259
6260/**
6261 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
6262 */
6263DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6264 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6265{
6266#ifdef RT_ARCH_AMD64
6267 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
6268#elif defined(RT_ARCH_ARM64)
6269 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
6270#else
6271# error "Port me!"
6272#endif
6273}
6274
6275
6276/**
6277 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
6278 */
6279DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6280{
6281#ifdef RT_ARCH_AMD64
6282 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
6283#elif defined(RT_ARCH_ARM64)
6284 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
6285#else
6286# error "Port me!"
6287#endif
6288}
6289
6290/**
6291 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
6292 */
6293DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6294 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6295{
6296#ifdef RT_ARCH_AMD64
6297 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
6298#elif defined(RT_ARCH_ARM64)
6299 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
6300#else
6301# error "Port me!"
6302#endif
6303}
6304
6305
6306/**
6307 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
6308 */
6309DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6310{
6311#ifdef RT_ARCH_AMD64
6312 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
6313#elif defined(RT_ARCH_ARM64)
6314 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
6315#else
6316# error "Port me!"
6317#endif
6318}
6319
6320/**
6321 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
6322 */
6323DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6324 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6325{
6326#ifdef RT_ARCH_AMD64
6327 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
6328#elif defined(RT_ARCH_ARM64)
6329 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
6330#else
6331# error "Port me!"
6332#endif
6333}
6334
6335
6336/**
6337 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
6338 */
6339DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6340{
6341#ifdef RT_ARCH_AMD64
6342 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
6343#elif defined(RT_ARCH_ARM64)
6344 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
6345#else
6346# error "Port me!"
6347#endif
6348}
6349
6350/**
6351 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
6352 */
6353DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6354 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6355{
6356#ifdef RT_ARCH_AMD64
6357 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
6358#elif defined(RT_ARCH_ARM64)
6359 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
6360#else
6361# error "Port me!"
6362#endif
6363}
6364
6365
6366/**
6367 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
6368 */
6369DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6370{
6371#ifdef RT_ARCH_AMD64
6372 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
6373#elif defined(RT_ARCH_ARM64)
6374 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
6375#else
6376# error "Port me!"
6377#endif
6378}
6379
6380/**
6381 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
6382 */
6383DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6384 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6385{
6386#ifdef RT_ARCH_AMD64
6387 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
6388#elif defined(RT_ARCH_ARM64)
6389 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
6390#else
6391# error "Port me!"
6392#endif
6393}
6394
6395
6396/**
6397 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6398 *
6399 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6400 *
6401 * Only use hardcoded jumps forward when emitting for exactly one
6402 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6403 * the right target address on all platforms!
6404 *
6405 * Please also note that on x86 it is necessary pass off + 256 or higher
6406 * for @a offTarget one believe the intervening code is more than 127
6407 * bytes long.
6408 */
6409DECL_FORCE_INLINE(uint32_t)
6410iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6411{
6412#ifdef RT_ARCH_AMD64
6413 /* jcc rel8 / rel32 */
6414 int32_t offDisp = (int32_t)(offTarget - (off + 2));
6415 if (offDisp < 128 && offDisp >= -128)
6416 {
6417 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6418 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6419 }
6420 else
6421 {
6422 offDisp -= 4;
6423 pCodeBuf[off++] = 0x0f;
6424 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6425 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6426 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6427 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6428 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6429 }
6430
6431#elif defined(RT_ARCH_ARM64)
6432 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
6433
6434#else
6435# error "Port me!"
6436#endif
6437 return off;
6438}
6439
6440
6441/**
6442 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6443 *
6444 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6445 *
6446 * Only use hardcoded jumps forward when emitting for exactly one
6447 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6448 * the right target address on all platforms!
6449 *
6450 * Please also note that on x86 it is necessary pass off + 256 or higher
6451 * for @a offTarget if one believe the intervening code is more than 127
6452 * bytes long.
6453 */
6454DECL_INLINE_THROW(uint32_t)
6455iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6456{
6457#ifdef RT_ARCH_AMD64
6458 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
6459#elif defined(RT_ARCH_ARM64)
6460 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
6461#else
6462# error "Port me!"
6463#endif
6464 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6465 return off;
6466}
6467
6468
6469/**
6470 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
6471 *
6472 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6473 */
6474DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6475{
6476#ifdef RT_ARCH_AMD64
6477 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
6478#elif defined(RT_ARCH_ARM64)
6479 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
6480#else
6481# error "Port me!"
6482#endif
6483}
6484
6485
6486/**
6487 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
6488 *
6489 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6490 */
6491DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6492{
6493#ifdef RT_ARCH_AMD64
6494 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
6495#elif defined(RT_ARCH_ARM64)
6496 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
6497#else
6498# error "Port me!"
6499#endif
6500}
6501
6502
6503/**
6504 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
6505 *
6506 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6507 */
6508DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6509{
6510#ifdef RT_ARCH_AMD64
6511 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
6512#elif defined(RT_ARCH_ARM64)
6513 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
6514#else
6515# error "Port me!"
6516#endif
6517}
6518
6519
6520/**
6521 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
6522 *
6523 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6524 */
6525DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6526{
6527#ifdef RT_ARCH_AMD64
6528 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
6529#elif defined(RT_ARCH_ARM64)
6530 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
6531#else
6532# error "Port me!"
6533#endif
6534}
6535
6536
6537/**
6538 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6539 *
6540 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6541 */
6542DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
6543{
6544#ifdef RT_ARCH_AMD64
6545 /* jmp rel8 or rel32 */
6546 int32_t offDisp = offTarget - (off + 2);
6547 if (offDisp < 128 && offDisp >= -128)
6548 {
6549 pCodeBuf[off++] = 0xeb;
6550 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6551 }
6552 else
6553 {
6554 offDisp -= 3;
6555 pCodeBuf[off++] = 0xe9;
6556 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6557 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6558 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6559 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6560 }
6561
6562#elif defined(RT_ARCH_ARM64)
6563 pCodeBuf[off++] = Armv8A64MkInstrB((int32_t)(offTarget - off));
6564
6565#else
6566# error "Port me!"
6567#endif
6568 return off;
6569}
6570
6571
6572/**
6573 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6574 *
6575 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6576 */
6577DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6578{
6579#ifdef RT_ARCH_AMD64
6580 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
6581#elif defined(RT_ARCH_ARM64)
6582 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
6583#else
6584# error "Port me!"
6585#endif
6586 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6587 return off;
6588}
6589
6590
6591/**
6592 * Fixes up a conditional jump to a fixed label.
6593 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
6594 * iemNativeEmitJzToFixed, ...
6595 */
6596DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
6597{
6598#ifdef RT_ARCH_AMD64
6599 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
6600 uint8_t const bOpcode = pbCodeBuf[offFixup];
6601 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
6602 {
6603 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
6604 AssertStmt(pbCodeBuf[offFixup + 1] == offTarget - (offFixup + 2),
6605 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
6606 }
6607 else
6608 {
6609 if (bOpcode != 0x0f)
6610 Assert(bOpcode == 0xe9);
6611 else
6612 {
6613 offFixup += 1;
6614 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
6615 }
6616 uint32_t const offRel32 = offTarget - (offFixup + 5);
6617 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
6618 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
6619 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
6620 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
6621 }
6622
6623#elif defined(RT_ARCH_ARM64)
6624 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
6625 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
6626 {
6627 /* B.COND + BC.COND */
6628 int32_t const offDisp = offTarget - offFixup;
6629 Assert(offDisp >= -262144 && offDisp < 262144);
6630 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
6631 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6632 }
6633 else
6634 {
6635 /* B imm26 */
6636 Assert((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000));
6637 int32_t const offDisp = offTarget - offFixup;
6638 Assert(offDisp >= -33554432 && offDisp < 33554432);
6639 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
6640 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6641 }
6642
6643#else
6644# error "Port me!"
6645#endif
6646}
6647
6648
6649#ifdef RT_ARCH_AMD64
6650/**
6651 * For doing bt on a register.
6652 */
6653DECL_INLINE_THROW(uint32_t)
6654iemNativeEmitAmd64TestBitInGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
6655{
6656 Assert(iBitNo < 64);
6657 /* bt Ev, imm8 */
6658 if (iBitNo >= 32)
6659 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6660 else if (iGprSrc >= 8)
6661 pCodeBuf[off++] = X86_OP_REX_B;
6662 pCodeBuf[off++] = 0x0f;
6663 pCodeBuf[off++] = 0xba;
6664 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6665 pCodeBuf[off++] = iBitNo;
6666 return off;
6667}
6668#endif /* RT_ARCH_AMD64 */
6669
6670
6671/**
6672 * Internal helper, don't call directly.
6673 */
6674DECL_INLINE_THROW(uint32_t)
6675iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
6676 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
6677{
6678 Assert(iBitNo < 64);
6679#ifdef RT_ARCH_AMD64
6680 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6681 if (iBitNo < 8)
6682 {
6683 /* test Eb, imm8 */
6684 if (iGprSrc >= 4)
6685 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6686 pbCodeBuf[off++] = 0xf6;
6687 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6688 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
6689 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6690 }
6691 else
6692 {
6693 /* bt Ev, imm8 */
6694 if (iBitNo >= 32)
6695 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6696 else if (iGprSrc >= 8)
6697 pbCodeBuf[off++] = X86_OP_REX_B;
6698 pbCodeBuf[off++] = 0x0f;
6699 pbCodeBuf[off++] = 0xba;
6700 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6701 pbCodeBuf[off++] = iBitNo;
6702 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
6703 }
6704
6705#elif defined(RT_ARCH_ARM64)
6706 /* Use the TBNZ instruction here. */
6707 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6708 if (pReNative->paLabels[idxLabel].enmType > kIemNativeLabelType_LastWholeTbBranch)
6709 {
6710 AssertMsg(pReNative->paLabels[idxLabel].off == UINT32_MAX,
6711 ("TODO: Please enable & test commented out code for jumping back to a predefined label.\n"));
6712 //uint32_t offLabel = pReNative->paLabels[idxLabel].off;
6713 //if (offLabel == UINT32_MAX)
6714 {
6715 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
6716 pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
6717 }
6718 //else
6719 //{
6720 // RT_BREAKPOINT();
6721 // Assert(off - offLabel <= 0x1fffU);
6722 // pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, offLabel - off, iGprSrc, iBitNo);
6723 //
6724 //}
6725 }
6726 else
6727 {
6728 Assert(Armv8A64ConvertImmRImmS2Mask64(0x40, (64U - iBitNo) & 63U) == RT_BIT_64(iBitNo));
6729 pu32CodeBuf[off++] = Armv8A64MkInstrTstImm(iGprSrc, 0x40, (64U - iBitNo) & 63U);
6730 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6731 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(fJmpIfSet ? kArmv8InstrCond_Ne : kArmv8InstrCond_Eq, 0);
6732 }
6733
6734#else
6735# error "Port me!"
6736#endif
6737 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6738 return off;
6739}
6740
6741
6742/**
6743 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
6744 * @a iGprSrc.
6745 *
6746 * @note On ARM64 the range is only +/-8191 instructions.
6747 */
6748DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6749 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
6750{
6751 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
6752}
6753
6754
6755/**
6756 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
6757 * _set_ in @a iGprSrc.
6758 *
6759 * @note On ARM64 the range is only +/-8191 instructions.
6760 */
6761DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6762 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
6763{
6764 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
6765}
6766
6767
6768/**
6769 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
6770 * flags accordingly.
6771 */
6772DECL_INLINE_THROW(uint32_t)
6773iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
6774{
6775 Assert(fBits != 0);
6776#ifdef RT_ARCH_AMD64
6777
6778 if (fBits >= UINT32_MAX)
6779 {
6780 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6781
6782 /* test Ev,Gv */
6783 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6784 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
6785 pbCodeBuf[off++] = 0x85;
6786 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
6787
6788 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6789 }
6790 else if (fBits <= UINT32_MAX)
6791 {
6792 /* test Eb, imm8 or test Ev, imm32 */
6793 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6794 if (fBits <= UINT8_MAX)
6795 {
6796 if (iGprSrc >= 4)
6797 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6798 pbCodeBuf[off++] = 0xf6;
6799 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6800 pbCodeBuf[off++] = (uint8_t)fBits;
6801 }
6802 else
6803 {
6804 if (iGprSrc >= 8)
6805 pbCodeBuf[off++] = X86_OP_REX_B;
6806 pbCodeBuf[off++] = 0xf7;
6807 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6808 pbCodeBuf[off++] = RT_BYTE1(fBits);
6809 pbCodeBuf[off++] = RT_BYTE2(fBits);
6810 pbCodeBuf[off++] = RT_BYTE3(fBits);
6811 pbCodeBuf[off++] = RT_BYTE4(fBits);
6812 }
6813 }
6814 /** @todo implement me. */
6815 else
6816 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
6817
6818#elif defined(RT_ARCH_ARM64)
6819 uint32_t uImmR = 0;
6820 uint32_t uImmNandS = 0;
6821 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
6822 {
6823 /* ands xzr, iGprSrc, #fBits */
6824 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6825 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
6826 }
6827 else
6828 {
6829 /* ands xzr, iGprSrc, iTmpReg */
6830 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6831 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6832 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
6833 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6834 }
6835
6836#else
6837# error "Port me!"
6838#endif
6839 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6840 return off;
6841}
6842
6843
6844/**
6845 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
6846 * @a iGprSrc, setting CPU flags accordingly.
6847 *
6848 * @note For ARM64 this only supports @a fBits values that can be expressed
6849 * using the two 6-bit immediates of the ANDS instruction. The caller
6850 * must make sure this is possible!
6851 */
6852DECL_FORCE_INLINE_THROW(uint32_t)
6853iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits)
6854{
6855 Assert(fBits != 0);
6856
6857#ifdef RT_ARCH_AMD64
6858 if (fBits <= UINT8_MAX)
6859 {
6860 /* test Eb, imm8 */
6861 if (iGprSrc >= 4)
6862 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6863 pCodeBuf[off++] = 0xf6;
6864 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6865 pCodeBuf[off++] = (uint8_t)fBits;
6866 }
6867 else
6868 {
6869 /* test Ev, imm32 */
6870 if (iGprSrc >= 8)
6871 pCodeBuf[off++] = X86_OP_REX_B;
6872 pCodeBuf[off++] = 0xf7;
6873 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6874 pCodeBuf[off++] = RT_BYTE1(fBits);
6875 pCodeBuf[off++] = RT_BYTE2(fBits);
6876 pCodeBuf[off++] = RT_BYTE3(fBits);
6877 pCodeBuf[off++] = RT_BYTE4(fBits);
6878 }
6879
6880#elif defined(RT_ARCH_ARM64)
6881 /* ands xzr, src, #fBits */
6882 uint32_t uImmR = 0;
6883 uint32_t uImmNandS = 0;
6884 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6885 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6886 else
6887# ifdef IEM_WITH_THROW_CATCH
6888 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6889# else
6890 AssertReleaseFailedStmt(off = UINT32_MAX);
6891# endif
6892
6893#else
6894# error "Port me!"
6895#endif
6896 return off;
6897}
6898
6899
6900
6901/**
6902 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
6903 * @a iGprSrc, setting CPU flags accordingly.
6904 *
6905 * @note For ARM64 this only supports @a fBits values that can be expressed
6906 * using the two 6-bit immediates of the ANDS instruction. The caller
6907 * must make sure this is possible!
6908 */
6909DECL_FORCE_INLINE_THROW(uint32_t)
6910iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
6911{
6912 Assert(fBits != 0);
6913
6914#ifdef RT_ARCH_AMD64
6915 /* test Eb, imm8 */
6916 if (iGprSrc >= 4)
6917 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6918 pCodeBuf[off++] = 0xf6;
6919 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6920 pCodeBuf[off++] = fBits;
6921
6922#elif defined(RT_ARCH_ARM64)
6923 /* ands xzr, src, #fBits */
6924 uint32_t uImmR = 0;
6925 uint32_t uImmNandS = 0;
6926 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6927 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6928 else
6929# ifdef IEM_WITH_THROW_CATCH
6930 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6931# else
6932 AssertReleaseFailedStmt(off = UINT32_MAX);
6933# endif
6934
6935#else
6936# error "Port me!"
6937#endif
6938 return off;
6939}
6940
6941
6942/**
6943 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
6944 * @a iGprSrc, setting CPU flags accordingly.
6945 */
6946DECL_INLINE_THROW(uint32_t)
6947iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
6948{
6949 Assert(fBits != 0);
6950
6951#ifdef RT_ARCH_AMD64
6952 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
6953
6954#elif defined(RT_ARCH_ARM64)
6955 /* ands xzr, src, [tmp|#imm] */
6956 uint32_t uImmR = 0;
6957 uint32_t uImmNandS = 0;
6958 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6959 {
6960 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6961 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6962 }
6963 else
6964 {
6965 /* Use temporary register for the 64-bit immediate. */
6966 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6967 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6968 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
6969 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6970 }
6971
6972#else
6973# error "Port me!"
6974#endif
6975 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6976 return off;
6977}
6978
6979
6980/**
6981 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
6982 * are set in @a iGprSrc.
6983 */
6984DECL_INLINE_THROW(uint32_t)
6985iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6986 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
6987{
6988 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
6989
6990 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
6991 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6992
6993 return off;
6994}
6995
6996
6997/**
6998 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
6999 * are set in @a iGprSrc.
7000 */
7001DECL_INLINE_THROW(uint32_t)
7002iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7003 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7004{
7005 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7006
7007 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7008 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7009
7010 return off;
7011}
7012
7013
7014/**
7015 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7016 *
7017 * The operand size is given by @a f64Bit.
7018 */
7019DECL_FORCE_INLINE_THROW(uint32_t)
7020iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7021 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7022{
7023 Assert(idxLabel < pReNative->cLabels);
7024
7025#ifdef RT_ARCH_AMD64
7026 /* test reg32,reg32 / test reg64,reg64 */
7027 if (f64Bit)
7028 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7029 else if (iGprSrc >= 8)
7030 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7031 pCodeBuf[off++] = 0x85;
7032 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7033
7034 /* jnz idxLabel */
7035 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7036 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7037
7038#elif defined(RT_ARCH_ARM64)
7039 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
7040 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
7041 iGprSrc, f64Bit);
7042 else
7043 {
7044 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7045 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
7046 }
7047
7048#else
7049# error "Port me!"
7050#endif
7051 return off;
7052}
7053
7054
7055/**
7056 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7057 *
7058 * The operand size is given by @a f64Bit.
7059 */
7060DECL_FORCE_INLINE_THROW(uint32_t)
7061iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7062 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7063{
7064#ifdef RT_ARCH_AMD64
7065 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7066 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7067#elif defined(RT_ARCH_ARM64)
7068 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
7069 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7070#else
7071# error "Port me!"
7072#endif
7073 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7074 return off;
7075}
7076
7077
7078/* if (Grp1 == 0) Jmp idxLabel; */
7079
7080/**
7081 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7082 *
7083 * The operand size is given by @a f64Bit.
7084 */
7085DECL_FORCE_INLINE_THROW(uint32_t)
7086iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7087 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7088{
7089 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7090 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7091}
7092
7093
7094/**
7095 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7096 *
7097 * The operand size is given by @a f64Bit.
7098 */
7099DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7100 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7101{
7102 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7103}
7104
7105
7106/**
7107 * Emits code that jumps to a new label if @a iGprSrc is zero.
7108 *
7109 * The operand size is given by @a f64Bit.
7110 */
7111DECL_INLINE_THROW(uint32_t)
7112iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7113 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7114{
7115 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7116 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7117}
7118
7119
7120/* if (Grp1 != 0) Jmp idxLabel; */
7121
7122/**
7123 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7124 *
7125 * The operand size is given by @a f64Bit.
7126 */
7127DECL_FORCE_INLINE_THROW(uint32_t)
7128iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7129 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7130{
7131 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7132 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7133}
7134
7135
7136/**
7137 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7138 *
7139 * The operand size is given by @a f64Bit.
7140 */
7141DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7142 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7143{
7144 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7145}
7146
7147
7148/**
7149 * Emits code that jumps to a new label if @a iGprSrc is not zero.
7150 *
7151 * The operand size is given by @a f64Bit.
7152 */
7153DECL_INLINE_THROW(uint32_t)
7154iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7155 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7156{
7157 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7158 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7159}
7160
7161
7162/* if (Grp1 != Gpr2) Jmp idxLabel; */
7163
7164/**
7165 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
7166 * differs.
7167 */
7168DECL_INLINE_THROW(uint32_t)
7169iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7170 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
7171{
7172 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
7173 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7174 return off;
7175}
7176
7177
7178/**
7179 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
7180 */
7181DECL_INLINE_THROW(uint32_t)
7182iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7183 uint8_t iGprLeft, uint8_t iGprRight,
7184 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7185{
7186 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7187 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
7188}
7189
7190
7191/* if (Grp != Imm) Jmp idxLabel; */
7192
7193/**
7194 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
7195 */
7196DECL_INLINE_THROW(uint32_t)
7197iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7198 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7199{
7200 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7201 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7202 return off;
7203}
7204
7205
7206/**
7207 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
7208 */
7209DECL_INLINE_THROW(uint32_t)
7210iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7211 uint8_t iGprSrc, uint64_t uImm,
7212 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7213{
7214 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7215 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7216}
7217
7218
7219/**
7220 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
7221 * @a uImm.
7222 */
7223DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7224 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7225{
7226 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7227 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7228 return off;
7229}
7230
7231
7232/**
7233 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
7234 * @a uImm.
7235 */
7236DECL_INLINE_THROW(uint32_t)
7237iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7238 uint8_t iGprSrc, uint32_t uImm,
7239 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7240{
7241 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7242 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7243}
7244
7245
7246/**
7247 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
7248 * @a uImm.
7249 */
7250DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7251 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
7252{
7253 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
7254 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7255 return off;
7256}
7257
7258
7259/**
7260 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
7261 * @a uImm.
7262 */
7263DECL_INLINE_THROW(uint32_t)
7264iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7265 uint8_t iGprSrc, uint16_t uImm,
7266 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7267{
7268 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7269 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7270}
7271
7272
7273/* if (Grp == Imm) Jmp idxLabel; */
7274
7275/**
7276 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
7277 */
7278DECL_INLINE_THROW(uint32_t)
7279iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7280 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7281{
7282 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7283 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7284 return off;
7285}
7286
7287
7288/**
7289 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
7290 */
7291DECL_INLINE_THROW(uint32_t)
7292iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
7293 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7294{
7295 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7296 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7297}
7298
7299
7300/**
7301 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
7302 */
7303DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7304 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7305{
7306 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7307 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7308 return off;
7309}
7310
7311
7312/**
7313 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
7314 */
7315DECL_INLINE_THROW(uint32_t)
7316iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
7317 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7318{
7319 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7320 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7321}
7322
7323
7324/**
7325 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
7326 *
7327 * @note ARM64: Helper register is required (idxTmpReg).
7328 */
7329DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7330 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
7331 uint8_t idxTmpReg = UINT8_MAX)
7332{
7333 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
7334 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7335 return off;
7336}
7337
7338
7339/**
7340 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
7341 *
7342 * @note ARM64: Helper register is required (idxTmpReg).
7343 */
7344DECL_INLINE_THROW(uint32_t)
7345iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
7346 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
7347 uint8_t idxTmpReg = UINT8_MAX)
7348{
7349 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7350 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
7351}
7352
7353
7354/*********************************************************************************************************************************
7355* Calls. *
7356*********************************************************************************************************************************/
7357
7358/**
7359 * Emits a call to a 64-bit address.
7360 */
7361DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
7362{
7363#ifdef RT_ARCH_AMD64
7364 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
7365
7366 /* call rax */
7367 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7368 pbCodeBuf[off++] = 0xff;
7369 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
7370
7371#elif defined(RT_ARCH_ARM64)
7372 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
7373
7374 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7375 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
7376
7377#else
7378# error "port me"
7379#endif
7380 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7381 return off;
7382}
7383
7384
7385/**
7386 * Emits code to load a stack variable into an argument GPR.
7387 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7388 */
7389DECL_FORCE_INLINE_THROW(uint32_t)
7390iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7391 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
7392 bool fSpilledVarsInVolatileRegs = false)
7393{
7394 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7395 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7396 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7397
7398 uint8_t const idxRegVar = pVar->idxReg;
7399 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
7400 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
7401 || !fSpilledVarsInVolatileRegs ))
7402 {
7403 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
7404 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
7405 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
7406 if (!offAddend)
7407 {
7408 if (idxRegArg != idxRegVar)
7409 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
7410 }
7411 else
7412 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
7413 }
7414 else
7415 {
7416 uint8_t const idxStackSlot = pVar->idxStackSlot;
7417 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7418 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
7419 if (offAddend)
7420 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
7421 }
7422 return off;
7423}
7424
7425
7426/**
7427 * Emits code to load a stack or immediate variable value into an argument GPR,
7428 * optional with a addend.
7429 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7430 */
7431DECL_FORCE_INLINE_THROW(uint32_t)
7432iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7433 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
7434 bool fSpilledVarsInVolatileRegs = false)
7435{
7436 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7437 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7438 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7439 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
7440 else
7441 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
7442 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
7443 return off;
7444}
7445
7446
7447/**
7448 * Emits code to load the variable address into an argument GPR.
7449 *
7450 * This only works for uninitialized and stack variables.
7451 */
7452DECL_FORCE_INLINE_THROW(uint32_t)
7453iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7454 bool fFlushShadows)
7455{
7456 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7457 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7458 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7459 || pVar->enmKind == kIemNativeVarKind_Stack,
7460 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7461 AssertStmt(!pVar->fSimdReg,
7462 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7463
7464 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7465 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7466
7467 uint8_t const idxRegVar = pVar->idxReg;
7468 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
7469 {
7470 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
7471 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
7472 Assert(pVar->idxReg == UINT8_MAX);
7473 }
7474 Assert( pVar->idxStackSlot != UINT8_MAX
7475 && pVar->idxReg == UINT8_MAX);
7476
7477 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7478}
7479
7480
7481#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7482/**
7483 * Emits code to load the variable address into an argument GPR.
7484 *
7485 * This is a special variant intended for SIMD variables only and only called
7486 * by the TLB miss path in the memory fetch/store code because there we pass
7487 * the value by reference and need both the register and stack depending on which
7488 * path is taken (TLB hit vs. miss).
7489 */
7490DECL_FORCE_INLINE_THROW(uint32_t)
7491iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7492 bool fSyncRegWithStack = true)
7493{
7494 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7495 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7496 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7497 || pVar->enmKind == kIemNativeVarKind_Stack,
7498 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7499 AssertStmt(pVar->fSimdReg,
7500 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7501 Assert( pVar->idxStackSlot != UINT8_MAX
7502 && pVar->idxReg != UINT8_MAX);
7503
7504 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7505 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7506
7507 uint8_t const idxRegVar = pVar->idxReg;
7508 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7509 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
7510
7511 if (fSyncRegWithStack)
7512 {
7513 if (pVar->cbVar == sizeof(RTUINT128U))
7514 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDisp, idxRegVar);
7515 else
7516 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDisp, idxRegVar);
7517 }
7518
7519 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7520}
7521
7522
7523/**
7524 * Emits code to sync the host SIMD register assigned to the given SIMD variable.
7525 *
7526 * This is a special helper and only called
7527 * by the TLB miss path in the memory fetch/store code because there we pass
7528 * the value by reference and need to sync the value on the stack with the assigned host register
7529 * after a TLB miss where the value ends up on the stack.
7530 */
7531DECL_FORCE_INLINE_THROW(uint32_t)
7532iemNativeEmitSimdVarSyncStackToRegister(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar)
7533{
7534 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7535 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7536 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7537 || pVar->enmKind == kIemNativeVarKind_Stack,
7538 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7539 AssertStmt(pVar->fSimdReg,
7540 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7541 Assert( pVar->idxStackSlot != UINT8_MAX
7542 && pVar->idxReg != UINT8_MAX);
7543
7544 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7545 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7546
7547 uint8_t const idxRegVar = pVar->idxReg;
7548 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7549 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
7550
7551 if (pVar->cbVar == sizeof(RTUINT128U))
7552 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxRegVar, offBpDisp);
7553 else
7554 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxRegVar, offBpDisp);
7555
7556 return off;
7557}
7558
7559
7560/**
7561 * Emits a gprdst = ~gprsrc store.
7562 */
7563DECL_FORCE_INLINE_THROW(uint32_t)
7564iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7565{
7566#ifdef RT_ARCH_AMD64
7567 if (iGprDst != iGprSrc)
7568 {
7569 /* mov gprdst, gprsrc. */
7570 if (f64Bit)
7571 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
7572 else
7573 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
7574 }
7575
7576 /* not gprdst */
7577 if (f64Bit || iGprDst >= 8)
7578 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
7579 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
7580 pCodeBuf[off++] = 0xf7;
7581 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
7582#elif defined(RT_ARCH_ARM64)
7583 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
7584#else
7585# error "port me"
7586#endif
7587 return off;
7588}
7589
7590
7591/**
7592 * Emits a gprdst = ~gprsrc store.
7593 */
7594DECL_INLINE_THROW(uint32_t)
7595iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7596{
7597#ifdef RT_ARCH_AMD64
7598 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
7599#elif defined(RT_ARCH_ARM64)
7600 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
7601#else
7602# error "port me"
7603#endif
7604 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7605 return off;
7606}
7607
7608
7609/**
7610 * Emits a 128-bit vector register store to a VCpu value.
7611 */
7612DECL_FORCE_INLINE_THROW(uint32_t)
7613iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7614{
7615#ifdef RT_ARCH_AMD64
7616 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
7617 pCodeBuf[off++] = 0x66;
7618 if (iVecReg >= 8)
7619 pCodeBuf[off++] = X86_OP_REX_R;
7620 pCodeBuf[off++] = 0x0f;
7621 pCodeBuf[off++] = 0x7f;
7622 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7623#elif defined(RT_ARCH_ARM64)
7624 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7625
7626#else
7627# error "port me"
7628#endif
7629 return off;
7630}
7631
7632
7633/**
7634 * Emits a 128-bit vector register load of a VCpu value.
7635 */
7636DECL_INLINE_THROW(uint32_t)
7637iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7638{
7639#ifdef RT_ARCH_AMD64
7640 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7641#elif defined(RT_ARCH_ARM64)
7642 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7643#else
7644# error "port me"
7645#endif
7646 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7647 return off;
7648}
7649
7650
7651/**
7652 * Emits a high 128-bit vector register store to a VCpu value.
7653 */
7654DECL_FORCE_INLINE_THROW(uint32_t)
7655iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7656{
7657#ifdef RT_ARCH_AMD64
7658 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
7659 pCodeBuf[off++] = X86_OP_VEX3;
7660 if (iVecReg >= 8)
7661 pCodeBuf[off++] = 0x63;
7662 else
7663 pCodeBuf[off++] = 0xe3;
7664 pCodeBuf[off++] = 0x7d;
7665 pCodeBuf[off++] = 0x39;
7666 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7667 pCodeBuf[off++] = 0x01; /* Immediate */
7668#elif defined(RT_ARCH_ARM64)
7669 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7670#else
7671# error "port me"
7672#endif
7673 return off;
7674}
7675
7676
7677/**
7678 * Emits a high 128-bit vector register load of a VCpu value.
7679 */
7680DECL_INLINE_THROW(uint32_t)
7681iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7682{
7683#ifdef RT_ARCH_AMD64
7684 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7685#elif defined(RT_ARCH_ARM64)
7686 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7687 Assert(!(iVecReg & 0x1));
7688 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
7689#else
7690# error "port me"
7691#endif
7692 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7693 return off;
7694}
7695
7696
7697/**
7698 * Emits a 128-bit vector register load of a VCpu value.
7699 */
7700DECL_FORCE_INLINE_THROW(uint32_t)
7701iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7702{
7703#ifdef RT_ARCH_AMD64
7704 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
7705 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7706 if (iVecReg >= 8)
7707 pCodeBuf[off++] = X86_OP_REX_R;
7708 pCodeBuf[off++] = 0x0f;
7709 pCodeBuf[off++] = 0x6f;
7710 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7711#elif defined(RT_ARCH_ARM64)
7712 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
7713
7714#else
7715# error "port me"
7716#endif
7717 return off;
7718}
7719
7720
7721/**
7722 * Emits a 128-bit vector register load of a VCpu value.
7723 */
7724DECL_INLINE_THROW(uint32_t)
7725iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7726{
7727#ifdef RT_ARCH_AMD64
7728 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7729#elif defined(RT_ARCH_ARM64)
7730 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7731#else
7732# error "port me"
7733#endif
7734 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7735 return off;
7736}
7737
7738
7739/**
7740 * Emits a 128-bit vector register load of a VCpu value.
7741 */
7742DECL_FORCE_INLINE_THROW(uint32_t)
7743iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7744{
7745#ifdef RT_ARCH_AMD64
7746 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
7747 pCodeBuf[off++] = X86_OP_VEX3;
7748 if (iVecReg >= 8)
7749 pCodeBuf[off++] = 0x63;
7750 else
7751 pCodeBuf[off++] = 0xe3;
7752 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
7753 pCodeBuf[off++] = 0x38;
7754 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7755 pCodeBuf[off++] = 0x01; /* Immediate */
7756#elif defined(RT_ARCH_ARM64)
7757 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
7758#else
7759# error "port me"
7760#endif
7761 return off;
7762}
7763
7764
7765/**
7766 * Emits a 128-bit vector register load of a VCpu value.
7767 */
7768DECL_INLINE_THROW(uint32_t)
7769iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7770{
7771#ifdef RT_ARCH_AMD64
7772 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7773#elif defined(RT_ARCH_ARM64)
7774 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7775 Assert(!(iVecReg & 0x1));
7776 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
7777#else
7778# error "port me"
7779#endif
7780 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7781 return off;
7782}
7783
7784
7785/**
7786 * Emits a vecdst = vecsrc load.
7787 */
7788DECL_FORCE_INLINE(uint32_t)
7789iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7790{
7791#ifdef RT_ARCH_AMD64
7792 /* movdqu vecdst, vecsrc */
7793 pCodeBuf[off++] = 0xf3;
7794
7795 if ((iVecRegDst | iVecRegSrc) >= 8)
7796 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
7797 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
7798 : X86_OP_REX_R;
7799 pCodeBuf[off++] = 0x0f;
7800 pCodeBuf[off++] = 0x6f;
7801 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7802
7803#elif defined(RT_ARCH_ARM64)
7804 /* mov dst, src; alias for: orr dst, src, src */
7805 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
7806
7807#else
7808# error "port me"
7809#endif
7810 return off;
7811}
7812
7813
7814/**
7815 * Emits a vecdst = vecsrc load, 128-bit.
7816 */
7817DECL_INLINE_THROW(uint32_t)
7818iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7819{
7820#ifdef RT_ARCH_AMD64
7821 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
7822#elif defined(RT_ARCH_ARM64)
7823 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
7824#else
7825# error "port me"
7826#endif
7827 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7828 return off;
7829}
7830
7831
7832/**
7833 * Emits a vecdst = vecsrc load, 256-bit.
7834 */
7835DECL_INLINE_THROW(uint32_t)
7836iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7837{
7838#ifdef RT_ARCH_AMD64
7839 /* vmovdqa ymm, ymm */
7840 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7841 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
7842 {
7843 pbCodeBuf[off++] = X86_OP_VEX3;
7844 pbCodeBuf[off++] = 0x41;
7845 pbCodeBuf[off++] = 0x7d;
7846 pbCodeBuf[off++] = 0x6f;
7847 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7848 }
7849 else
7850 {
7851 pbCodeBuf[off++] = X86_OP_VEX2;
7852 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
7853 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
7854 pbCodeBuf[off++] = iVecRegSrc >= 8
7855 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
7856 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7857 }
7858#elif defined(RT_ARCH_ARM64)
7859 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7860 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
7861 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
7862 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
7863#else
7864# error "port me"
7865#endif
7866 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7867 return off;
7868}
7869
7870
7871/**
7872 * Emits a gprdst = vecsrc[x] load, 64-bit.
7873 */
7874DECL_FORCE_INLINE(uint32_t)
7875iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
7876{
7877#ifdef RT_ARCH_AMD64
7878 if (iQWord >= 2)
7879 {
7880 /** @todo Currently not used. */
7881 AssertReleaseFailed();
7882 }
7883 else
7884 {
7885 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
7886 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7887 pCodeBuf[off++] = X86_OP_REX_W
7888 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
7889 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
7890 pCodeBuf[off++] = 0x0f;
7891 pCodeBuf[off++] = 0x3a;
7892 pCodeBuf[off++] = 0x16;
7893 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
7894 pCodeBuf[off++] = iQWord;
7895 }
7896#elif defined(RT_ARCH_ARM64)
7897 /* umov gprdst, vecsrc[iQWord] */
7898 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
7899#else
7900# error "port me"
7901#endif
7902 return off;
7903}
7904
7905
7906/**
7907 * Emits a gprdst = vecsrc[x] load, 64-bit.
7908 */
7909DECL_INLINE_THROW(uint32_t)
7910iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
7911{
7912 Assert(iQWord <= 3);
7913
7914#ifdef RT_ARCH_AMD64
7915 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iQWord);
7916#elif defined(RT_ARCH_ARM64)
7917 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7918 Assert(!(iVecRegSrc & 0x1));
7919 /* Need to access the "high" 128-bit vector register. */
7920 if (iQWord >= 2)
7921 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
7922 else
7923 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
7924#else
7925# error "port me"
7926#endif
7927 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7928 return off;
7929}
7930
7931
7932/**
7933 * Emits a gprdst = vecsrc[x] load, 32-bit.
7934 */
7935DECL_FORCE_INLINE(uint32_t)
7936iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
7937{
7938#ifdef RT_ARCH_AMD64
7939 if (iDWord >= 4)
7940 {
7941 /** @todo Currently not used. */
7942 AssertReleaseFailed();
7943 }
7944 else
7945 {
7946 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
7947 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7948 if (iGprDst >= 8 || iVecRegSrc >= 8)
7949 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
7950 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
7951 pCodeBuf[off++] = 0x0f;
7952 pCodeBuf[off++] = 0x3a;
7953 pCodeBuf[off++] = 0x16;
7954 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
7955 pCodeBuf[off++] = iDWord;
7956 }
7957#elif defined(RT_ARCH_ARM64)
7958 /* umov gprdst, vecsrc[iDWord] */
7959 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
7960#else
7961# error "port me"
7962#endif
7963 return off;
7964}
7965
7966
7967/**
7968 * Emits a gprdst = vecsrc[x] load, 32-bit.
7969 */
7970DECL_INLINE_THROW(uint32_t)
7971iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
7972{
7973 Assert(iDWord <= 7);
7974
7975#ifdef RT_ARCH_AMD64
7976 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iDWord);
7977#elif defined(RT_ARCH_ARM64)
7978 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7979 Assert(!(iVecRegSrc & 0x1));
7980 /* Need to access the "high" 128-bit vector register. */
7981 if (iDWord >= 4)
7982 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
7983 else
7984 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
7985#else
7986# error "port me"
7987#endif
7988 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7989 return off;
7990}
7991
7992
7993/**
7994 * Emits a gprdst = vecsrc[x] load, 16-bit.
7995 */
7996DECL_FORCE_INLINE(uint32_t)
7997iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
7998{
7999#ifdef RT_ARCH_AMD64
8000 if (iWord >= 8)
8001 {
8002 /** @todo Currently not used. */
8003 AssertReleaseFailed();
8004 }
8005 else
8006 {
8007 /* pextrw gpr, vecsrc, #iWord */
8008 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8009 if (iGprDst >= 8 || iVecRegSrc >= 8)
8010 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
8011 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
8012 pCodeBuf[off++] = 0x0f;
8013 pCodeBuf[off++] = 0xc5;
8014 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
8015 pCodeBuf[off++] = iWord;
8016 }
8017#elif defined(RT_ARCH_ARM64)
8018 /* umov gprdst, vecsrc[iWord] */
8019 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
8020#else
8021# error "port me"
8022#endif
8023 return off;
8024}
8025
8026
8027/**
8028 * Emits a gprdst = vecsrc[x] load, 16-bit.
8029 */
8030DECL_INLINE_THROW(uint32_t)
8031iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
8032{
8033 Assert(iWord <= 16);
8034
8035#ifdef RT_ARCH_AMD64
8036 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
8037#elif defined(RT_ARCH_ARM64)
8038 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8039 Assert(!(iVecRegSrc & 0x1));
8040 /* Need to access the "high" 128-bit vector register. */
8041 if (iWord >= 8)
8042 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
8043 else
8044 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
8045#else
8046# error "port me"
8047#endif
8048 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8049 return off;
8050}
8051
8052
8053/**
8054 * Emits a gprdst = vecsrc[x] load, 8-bit.
8055 */
8056DECL_FORCE_INLINE(uint32_t)
8057iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
8058{
8059#ifdef RT_ARCH_AMD64
8060 if (iByte >= 16)
8061 {
8062 /** @todo Currently not used. */
8063 AssertReleaseFailed();
8064 }
8065 else
8066 {
8067 /* pextrb gpr, vecsrc, #iByte */
8068 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8069 if (iGprDst >= 8 || iVecRegSrc >= 8)
8070 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8071 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8072 pCodeBuf[off++] = 0x0f;
8073 pCodeBuf[off++] = 0x3a;
8074 pCodeBuf[off++] = 0x14;
8075 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8076 pCodeBuf[off++] = iByte;
8077 }
8078#elif defined(RT_ARCH_ARM64)
8079 /* umov gprdst, vecsrc[iByte] */
8080 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
8081#else
8082# error "port me"
8083#endif
8084 return off;
8085}
8086
8087
8088/**
8089 * Emits a gprdst = vecsrc[x] load, 8-bit.
8090 */
8091DECL_INLINE_THROW(uint32_t)
8092iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
8093{
8094 Assert(iByte <= 32);
8095
8096#ifdef RT_ARCH_AMD64
8097 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iByte);
8098#elif defined(RT_ARCH_ARM64)
8099 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8100 Assert(!(iVecRegSrc & 0x1));
8101 /* Need to access the "high" 128-bit vector register. */
8102 if (iByte >= 16)
8103 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
8104 else
8105 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
8106#else
8107# error "port me"
8108#endif
8109 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8110 return off;
8111}
8112
8113
8114/**
8115 * Emits a vecdst[x] = gprsrc store, 64-bit.
8116 */
8117DECL_FORCE_INLINE(uint32_t)
8118iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
8119{
8120#ifdef RT_ARCH_AMD64
8121 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
8122 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8123 pCodeBuf[off++] = X86_OP_REX_W
8124 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8125 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8126 pCodeBuf[off++] = 0x0f;
8127 pCodeBuf[off++] = 0x3a;
8128 pCodeBuf[off++] = 0x22;
8129 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8130 pCodeBuf[off++] = iQWord;
8131#elif defined(RT_ARCH_ARM64)
8132 /* ins vecsrc[iQWord], gpr */
8133 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
8134#else
8135# error "port me"
8136#endif
8137 return off;
8138}
8139
8140
8141/**
8142 * Emits a vecdst[x] = gprsrc store, 64-bit.
8143 */
8144DECL_INLINE_THROW(uint32_t)
8145iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
8146{
8147 Assert(iQWord <= 1);
8148
8149#ifdef RT_ARCH_AMD64
8150 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iQWord);
8151#elif defined(RT_ARCH_ARM64)
8152 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
8153#else
8154# error "port me"
8155#endif
8156 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8157 return off;
8158}
8159
8160
8161/**
8162 * Emits a vecdst[x] = gprsrc store, 32-bit.
8163 */
8164DECL_FORCE_INLINE(uint32_t)
8165iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
8166{
8167#ifdef RT_ARCH_AMD64
8168 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
8169 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8170 if (iVecRegDst >= 8 || iGprSrc >= 8)
8171 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8172 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8173 pCodeBuf[off++] = 0x0f;
8174 pCodeBuf[off++] = 0x3a;
8175 pCodeBuf[off++] = 0x22;
8176 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8177 pCodeBuf[off++] = iDWord;
8178#elif defined(RT_ARCH_ARM64)
8179 /* ins vecsrc[iDWord], gpr */
8180 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
8181#else
8182# error "port me"
8183#endif
8184 return off;
8185}
8186
8187
8188/**
8189 * Emits a vecdst[x] = gprsrc store, 64-bit.
8190 */
8191DECL_INLINE_THROW(uint32_t)
8192iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
8193{
8194 Assert(iDWord <= 3);
8195
8196#ifdef RT_ARCH_AMD64
8197 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iDWord);
8198#elif defined(RT_ARCH_ARM64)
8199 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
8200#else
8201# error "port me"
8202#endif
8203 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8204 return off;
8205}
8206
8207
8208/**
8209 * Emits a vecdst.au32[iDWord] = 0 store.
8210 */
8211DECL_FORCE_INLINE(uint32_t)
8212iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
8213{
8214 Assert(iDWord <= 7);
8215
8216#ifdef RT_ARCH_AMD64
8217 /*
8218 * xor tmp0, tmp0
8219 * pinsrd xmm, tmp0, iDword
8220 */
8221 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
8222 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8223 pCodeBuf[off++] = 0x33;
8224 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
8225 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(&pCodeBuf[off], off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
8226#elif defined(RT_ARCH_ARM64)
8227 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8228 Assert(!(iVecReg & 0x1));
8229 /* ins vecsrc[iDWord], wzr */
8230 if (iDWord >= 4)
8231 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
8232 else
8233 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
8234#else
8235# error "port me"
8236#endif
8237 return off;
8238}
8239
8240
8241/**
8242 * Emits a vecdst.au32[iDWord] = 0 store.
8243 */
8244DECL_INLINE_THROW(uint32_t)
8245iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
8246{
8247
8248#ifdef RT_ARCH_AMD64
8249 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
8250#elif defined(RT_ARCH_ARM64)
8251 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
8252#else
8253# error "port me"
8254#endif
8255 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8256 return off;
8257}
8258
8259
8260/**
8261 * Emits a vecdst[0:127] = 0 store.
8262 */
8263DECL_FORCE_INLINE(uint32_t)
8264iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8265{
8266#ifdef RT_ARCH_AMD64
8267 /* pxor xmm, xmm */
8268 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8269 if (iVecReg >= 8)
8270 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
8271 pCodeBuf[off++] = 0x0f;
8272 pCodeBuf[off++] = 0xef;
8273 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8274#elif defined(RT_ARCH_ARM64)
8275 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8276 Assert(!(iVecReg & 0x1));
8277 /* eor vecreg, vecreg, vecreg */
8278 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
8279#else
8280# error "port me"
8281#endif
8282 return off;
8283}
8284
8285
8286/**
8287 * Emits a vecdst[0:127] = 0 store.
8288 */
8289DECL_INLINE_THROW(uint32_t)
8290iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8291{
8292#ifdef RT_ARCH_AMD64
8293 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
8294#elif defined(RT_ARCH_ARM64)
8295 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
8296#else
8297# error "port me"
8298#endif
8299 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8300 return off;
8301}
8302
8303
8304/**
8305 * Emits a vecdst[128:255] = 0 store.
8306 */
8307DECL_FORCE_INLINE(uint32_t)
8308iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8309{
8310#ifdef RT_ARCH_AMD64
8311 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
8312 if (iVecReg < 8)
8313 {
8314 pCodeBuf[off++] = X86_OP_VEX2;
8315 pCodeBuf[off++] = 0xf9;
8316 }
8317 else
8318 {
8319 pCodeBuf[off++] = X86_OP_VEX3;
8320 pCodeBuf[off++] = 0x41;
8321 pCodeBuf[off++] = 0x79;
8322 }
8323 pCodeBuf[off++] = 0x6f;
8324 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8325#elif defined(RT_ARCH_ARM64)
8326 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8327 Assert(!(iVecReg & 0x1));
8328 /* eor vecreg, vecreg, vecreg */
8329 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
8330#else
8331# error "port me"
8332#endif
8333 return off;
8334}
8335
8336
8337/**
8338 * Emits a vecdst[128:255] = 0 store.
8339 */
8340DECL_INLINE_THROW(uint32_t)
8341iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8342{
8343#ifdef RT_ARCH_AMD64
8344 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
8345#elif defined(RT_ARCH_ARM64)
8346 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
8347#else
8348# error "port me"
8349#endif
8350 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8351 return off;
8352}
8353
8354
8355/**
8356 * Emits a vecdst[0:255] = 0 store.
8357 */
8358DECL_FORCE_INLINE(uint32_t)
8359iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8360{
8361#ifdef RT_ARCH_AMD64
8362 /* vpxor ymm, ymm, ymm */
8363 if (iVecReg < 8)
8364 {
8365 pCodeBuf[off++] = X86_OP_VEX2;
8366 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8367 }
8368 else
8369 {
8370 pCodeBuf[off++] = X86_OP_VEX3;
8371 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
8372 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8373 }
8374 pCodeBuf[off++] = 0xef;
8375 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8376#elif defined(RT_ARCH_ARM64)
8377 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8378 Assert(!(iVecReg & 0x1));
8379 /* eor vecreg, vecreg, vecreg */
8380 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
8381 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
8382#else
8383# error "port me"
8384#endif
8385 return off;
8386}
8387
8388
8389/**
8390 * Emits a vecdst[0:255] = 0 store.
8391 */
8392DECL_INLINE_THROW(uint32_t)
8393iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8394{
8395#ifdef RT_ARCH_AMD64
8396 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
8397#elif defined(RT_ARCH_ARM64)
8398 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
8399#else
8400# error "port me"
8401#endif
8402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8403 return off;
8404}
8405
8406
8407/**
8408 * Emits a vecdst = gprsrc broadcast, 8-bit.
8409 */
8410DECL_FORCE_INLINE(uint32_t)
8411iemNativeEmitSimdBroadcastGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8412{
8413#ifdef RT_ARCH_AMD64
8414 /* pinsrb vecdst, gpr, #0 (ASSUMES SSE 4.1) */
8415 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8416 if (iVecRegDst >= 8 || iGprSrc >= 8)
8417 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8418 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8419 pCodeBuf[off++] = 0x0f;
8420 pCodeBuf[off++] = 0x3a;
8421 pCodeBuf[off++] = 0x20;
8422 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8423 pCodeBuf[off++] = 0x00;
8424
8425 /* vpbroadcastb {y,x}mm, xmm (ASSUMES AVX2). */
8426 pCodeBuf[off++] = X86_OP_VEX3;
8427 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8428 | 0x02 /* opcode map. */
8429 | ( iVecRegDst >= 8
8430 ? 0
8431 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8432 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8433 pCodeBuf[off++] = 0x78;
8434 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8435#elif defined(RT_ARCH_ARM64)
8436 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8437 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8438
8439 /* dup vecsrc, gpr */
8440 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U8);
8441 if (f256Bit)
8442 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U8);
8443#else
8444# error "port me"
8445#endif
8446 return off;
8447}
8448
8449
8450/**
8451 * Emits a vecdst[x] = gprsrc broadcast, 8-bit.
8452 */
8453DECL_INLINE_THROW(uint32_t)
8454iemNativeEmitSimdBroadcastGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8455{
8456#ifdef RT_ARCH_AMD64
8457 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8458#elif defined(RT_ARCH_ARM64)
8459 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8460#else
8461# error "port me"
8462#endif
8463 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8464 return off;
8465}
8466
8467
8468/**
8469 * Emits a vecdst = gprsrc broadcast, 16-bit.
8470 */
8471DECL_FORCE_INLINE(uint32_t)
8472iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8473{
8474#ifdef RT_ARCH_AMD64
8475 /* pinsrw vecdst, gpr, #0 */
8476 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8477 if (iVecRegDst >= 8 || iGprSrc >= 8)
8478 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8479 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8480 pCodeBuf[off++] = 0x0f;
8481 pCodeBuf[off++] = 0xc4;
8482 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8483 pCodeBuf[off++] = 0x00;
8484
8485 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
8486 pCodeBuf[off++] = X86_OP_VEX3;
8487 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8488 | 0x02 /* opcode map. */
8489 | ( iVecRegDst >= 8
8490 ? 0
8491 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8492 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8493 pCodeBuf[off++] = 0x79;
8494 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8495#elif defined(RT_ARCH_ARM64)
8496 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8497 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8498
8499 /* dup vecsrc, gpr */
8500 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
8501 if (f256Bit)
8502 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
8503#else
8504# error "port me"
8505#endif
8506 return off;
8507}
8508
8509
8510/**
8511 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
8512 */
8513DECL_INLINE_THROW(uint32_t)
8514iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8515{
8516#ifdef RT_ARCH_AMD64
8517 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8518#elif defined(RT_ARCH_ARM64)
8519 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8520#else
8521# error "port me"
8522#endif
8523 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8524 return off;
8525}
8526
8527
8528/**
8529 * Emits a vecdst = gprsrc broadcast, 32-bit.
8530 */
8531DECL_FORCE_INLINE(uint32_t)
8532iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8533{
8534#ifdef RT_ARCH_AMD64
8535 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
8536 * vbroadcast needs a memory operand or another xmm register to work... */
8537
8538 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
8539 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8540 if (iVecRegDst >= 8 || iGprSrc >= 8)
8541 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8542 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8543 pCodeBuf[off++] = 0x0f;
8544 pCodeBuf[off++] = 0x3a;
8545 pCodeBuf[off++] = 0x22;
8546 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8547 pCodeBuf[off++] = 0x00;
8548
8549 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
8550 pCodeBuf[off++] = X86_OP_VEX3;
8551 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8552 | 0x02 /* opcode map. */
8553 | ( iVecRegDst >= 8
8554 ? 0
8555 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8556 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8557 pCodeBuf[off++] = 0x58;
8558 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8559#elif defined(RT_ARCH_ARM64)
8560 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8561 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8562
8563 /* dup vecsrc, gpr */
8564 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
8565 if (f256Bit)
8566 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
8567#else
8568# error "port me"
8569#endif
8570 return off;
8571}
8572
8573
8574/**
8575 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
8576 */
8577DECL_INLINE_THROW(uint32_t)
8578iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8579{
8580#ifdef RT_ARCH_AMD64
8581 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8582#elif defined(RT_ARCH_ARM64)
8583 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8584#else
8585# error "port me"
8586#endif
8587 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8588 return off;
8589}
8590
8591
8592/**
8593 * Emits a vecdst = gprsrc broadcast, 64-bit.
8594 */
8595DECL_FORCE_INLINE(uint32_t)
8596iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8597{
8598#ifdef RT_ARCH_AMD64
8599 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
8600 * vbroadcast needs a memory operand or another xmm register to work... */
8601
8602 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
8603 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8604 pCodeBuf[off++] = X86_OP_REX_W
8605 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8606 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8607 pCodeBuf[off++] = 0x0f;
8608 pCodeBuf[off++] = 0x3a;
8609 pCodeBuf[off++] = 0x22;
8610 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8611 pCodeBuf[off++] = 0x00;
8612
8613 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
8614 pCodeBuf[off++] = X86_OP_VEX3;
8615 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8616 | 0x02 /* opcode map. */
8617 | ( iVecRegDst >= 8
8618 ? 0
8619 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8620 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8621 pCodeBuf[off++] = 0x59;
8622 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8623#elif defined(RT_ARCH_ARM64)
8624 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8625 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8626
8627 /* dup vecsrc, gpr */
8628 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
8629 if (f256Bit)
8630 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
8631#else
8632# error "port me"
8633#endif
8634 return off;
8635}
8636
8637
8638/**
8639 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
8640 */
8641DECL_INLINE_THROW(uint32_t)
8642iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8643{
8644#ifdef RT_ARCH_AMD64
8645 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
8646#elif defined(RT_ARCH_ARM64)
8647 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8648#else
8649# error "port me"
8650#endif
8651 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8652 return off;
8653}
8654
8655#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
8656
8657/** @} */
8658
8659#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
8660
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette