VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 103911

Last change on this file since 103911 was 103911, checked in by vboxsync, 11 months ago

VMM/IEM: Implement support for fetching 128-bit/256-bit values from guest memory and implement native emitters for IEM_MC_FETCH_MEM_U128_ALIGN_SSE()/IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE()/IEM_MC_FETCH_MEM_U128_NO_AC()/IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 297.3 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 103911 2024-03-19 09:28:05Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 pu32CodeBuf[off++] = 0xd503201f;
71
72 RT_NOREF(uInfo);
73#else
74# error "port me"
75#endif
76 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
77 return off;
78}
79
80
81/**
82 * Emit a breakpoint instruction.
83 */
84DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
85{
86#ifdef RT_ARCH_AMD64
87 pCodeBuf[off++] = 0xcc;
88 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
89
90#elif defined(RT_ARCH_ARM64)
91 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
92
93#else
94# error "error"
95#endif
96 return off;
97}
98
99
100/**
101 * Emit a breakpoint instruction.
102 */
103DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
104{
105#ifdef RT_ARCH_AMD64
106 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
107#elif defined(RT_ARCH_ARM64)
108 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
109#else
110# error "error"
111#endif
112 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
113 return off;
114}
115
116
117/*********************************************************************************************************************************
118* Loads, Stores and Related Stuff. *
119*********************************************************************************************************************************/
120
121#ifdef RT_ARCH_AMD64
122/**
123 * Common bit of iemNativeEmitLoadGprByGpr and friends.
124 */
125DECL_FORCE_INLINE(uint32_t)
126iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
127{
128 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
129 {
130 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
131 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
132 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
133 }
134 else if (offDisp == (int8_t)offDisp)
135 {
136 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
137 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
138 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
139 pbCodeBuf[off++] = (uint8_t)offDisp;
140 }
141 else
142 {
143 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
144 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
145 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
146 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
147 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
148 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
149 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
150 }
151 return off;
152}
153#endif /* RT_ARCH_AMD64 */
154
155/**
156 * Emits setting a GPR to zero.
157 */
158DECL_INLINE_THROW(uint32_t)
159iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
160{
161#ifdef RT_ARCH_AMD64
162 /* xor gpr32, gpr32 */
163 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
164 if (iGpr >= 8)
165 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
166 pbCodeBuf[off++] = 0x33;
167 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
168
169#elif defined(RT_ARCH_ARM64)
170 /* mov gpr, #0x0 */
171 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
172 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
173
174#else
175# error "port me"
176#endif
177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
178 return off;
179}
180
181
182/**
183 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
184 * buffer space.
185 *
186 * Max buffer consumption:
187 * - AMD64: 10 instruction bytes.
188 * - ARM64: 4 instruction words (16 bytes).
189 */
190DECL_FORCE_INLINE(uint32_t)
191iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
192{
193#ifdef RT_ARCH_AMD64
194 if (uImm64 == 0)
195 {
196 /* xor gpr, gpr */
197 if (iGpr >= 8)
198 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
199 pCodeBuf[off++] = 0x33;
200 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
201 }
202 else if (uImm64 <= UINT32_MAX)
203 {
204 /* mov gpr, imm32 */
205 if (iGpr >= 8)
206 pCodeBuf[off++] = X86_OP_REX_B;
207 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
208 pCodeBuf[off++] = RT_BYTE1(uImm64);
209 pCodeBuf[off++] = RT_BYTE2(uImm64);
210 pCodeBuf[off++] = RT_BYTE3(uImm64);
211 pCodeBuf[off++] = RT_BYTE4(uImm64);
212 }
213 else if (uImm64 == (uint64_t)(int32_t)uImm64)
214 {
215 /* mov gpr, sx(imm32) */
216 if (iGpr < 8)
217 pCodeBuf[off++] = X86_OP_REX_W;
218 else
219 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
220 pCodeBuf[off++] = 0xc7;
221 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
222 pCodeBuf[off++] = RT_BYTE1(uImm64);
223 pCodeBuf[off++] = RT_BYTE2(uImm64);
224 pCodeBuf[off++] = RT_BYTE3(uImm64);
225 pCodeBuf[off++] = RT_BYTE4(uImm64);
226 }
227 else
228 {
229 /* mov gpr, imm64 */
230 if (iGpr < 8)
231 pCodeBuf[off++] = X86_OP_REX_W;
232 else
233 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
234 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
235 pCodeBuf[off++] = RT_BYTE1(uImm64);
236 pCodeBuf[off++] = RT_BYTE2(uImm64);
237 pCodeBuf[off++] = RT_BYTE3(uImm64);
238 pCodeBuf[off++] = RT_BYTE4(uImm64);
239 pCodeBuf[off++] = RT_BYTE5(uImm64);
240 pCodeBuf[off++] = RT_BYTE6(uImm64);
241 pCodeBuf[off++] = RT_BYTE7(uImm64);
242 pCodeBuf[off++] = RT_BYTE8(uImm64);
243 }
244
245#elif defined(RT_ARCH_ARM64)
246 /*
247 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
248 * supply remaining bits using 'movk grp, imm16, lsl #x'.
249 *
250 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
251 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
252 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
253 * after the first non-zero immediate component so we switch to movk for
254 * the remainder.
255 */
256 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
257 + !((uImm64 >> 16) & UINT16_MAX)
258 + !((uImm64 >> 32) & UINT16_MAX)
259 + !((uImm64 >> 48) & UINT16_MAX);
260 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
261 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
262 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
263 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
264 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
265 if (cFfffHalfWords <= cZeroHalfWords)
266 {
267 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
268
269 /* movz gpr, imm16 */
270 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
271 if (uImmPart || cZeroHalfWords == 4)
272 {
273 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
274 fMovBase |= RT_BIT_32(29);
275 }
276 /* mov[z/k] gpr, imm16, lsl #16 */
277 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
278 if (uImmPart)
279 {
280 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
281 fMovBase |= RT_BIT_32(29);
282 }
283 /* mov[z/k] gpr, imm16, lsl #32 */
284 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
285 if (uImmPart)
286 {
287 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
288 fMovBase |= RT_BIT_32(29);
289 }
290 /* mov[z/k] gpr, imm16, lsl #48 */
291 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
292 if (uImmPart)
293 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
294 }
295 else
296 {
297 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
298
299 /* find the first half-word that isn't UINT16_MAX. */
300 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
301 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
302 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
303
304 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
305 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
306 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
307 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
308 /* movk gpr, imm16 */
309 if (iHwNotFfff != 0)
310 {
311 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
312 if (uImmPart != UINT32_C(0xffff))
313 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
314 }
315 /* movk gpr, imm16, lsl #16 */
316 if (iHwNotFfff != 1)
317 {
318 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
319 if (uImmPart != UINT32_C(0xffff))
320 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
321 }
322 /* movk gpr, imm16, lsl #32 */
323 if (iHwNotFfff != 2)
324 {
325 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
326 if (uImmPart != UINT32_C(0xffff))
327 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
328 }
329 /* movk gpr, imm16, lsl #48 */
330 if (iHwNotFfff != 3)
331 {
332 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
333 if (uImmPart != UINT32_C(0xffff))
334 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
335 }
336 }
337
338 /** @todo load into 'w' register instead of 'x' when imm64 <= UINT32_MAX?
339 * clang 12.x does that, only to use the 'x' version for the
340 * addressing in the following ldr). */
341
342#else
343# error "port me"
344#endif
345 return off;
346}
347
348
349/**
350 * Emits loading a constant into a 64-bit GPR
351 */
352DECL_INLINE_THROW(uint32_t)
353iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
354{
355#ifdef RT_ARCH_AMD64
356 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
357#elif defined(RT_ARCH_ARM64)
358 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
359#else
360# error "port me"
361#endif
362 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
363 return off;
364}
365
366
367/**
368 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
369 * buffer space.
370 *
371 * Max buffer consumption:
372 * - AMD64: 6 instruction bytes.
373 * - ARM64: 2 instruction words (8 bytes).
374 *
375 * @note The top 32 bits will be cleared.
376 */
377DECLINLINE(uint32_t) iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
378{
379#ifdef RT_ARCH_AMD64
380 if (uImm32 == 0)
381 {
382 /* xor gpr, gpr */
383 if (iGpr >= 8)
384 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
385 pCodeBuf[off++] = 0x33;
386 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
387 }
388 else
389 {
390 /* mov gpr, imm32 */
391 if (iGpr >= 8)
392 pCodeBuf[off++] = X86_OP_REX_B;
393 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
394 pCodeBuf[off++] = RT_BYTE1(uImm32);
395 pCodeBuf[off++] = RT_BYTE2(uImm32);
396 pCodeBuf[off++] = RT_BYTE3(uImm32);
397 pCodeBuf[off++] = RT_BYTE4(uImm32);
398 }
399
400#elif defined(RT_ARCH_ARM64)
401 if ((uImm32 >> 16) == 0)
402 /* movz gpr, imm16 */
403 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
404 else if ((uImm32 & UINT32_C(0xffff)) == 0)
405 /* movz gpr, imm16, lsl #16 */
406 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
407 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
408 /* movn gpr, imm16, lsl #16 */
409 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
410 else if ((uImm32 >> 16) == UINT32_C(0xffff))
411 /* movn gpr, imm16 */
412 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
413 else
414 {
415 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
416 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
417 }
418
419#else
420# error "port me"
421#endif
422 return off;
423}
424
425
426/**
427 * Emits loading a constant into a 32-bit GPR.
428 * @note The top 32 bits will be cleared.
429 */
430DECL_INLINE_THROW(uint32_t)
431iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
432{
433#ifdef RT_ARCH_AMD64
434 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
435#elif defined(RT_ARCH_ARM64)
436 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
437#else
438# error "port me"
439#endif
440 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
441 return off;
442}
443
444
445/**
446 * Emits loading a constant into a 8-bit GPR
447 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
448 * only the ARM64 version does that.
449 */
450DECL_INLINE_THROW(uint32_t)
451iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
452{
453#ifdef RT_ARCH_AMD64
454 /* mov gpr, imm8 */
455 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
456 if (iGpr >= 8)
457 pbCodeBuf[off++] = X86_OP_REX_B;
458 else if (iGpr >= 4)
459 pbCodeBuf[off++] = X86_OP_REX;
460 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
461 pbCodeBuf[off++] = RT_BYTE1(uImm8);
462
463#elif defined(RT_ARCH_ARM64)
464 /* movz gpr, imm16, lsl #0 */
465 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
466 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
467
468#else
469# error "port me"
470#endif
471 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
472 return off;
473}
474
475
476#ifdef RT_ARCH_AMD64
477/**
478 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
479 */
480DECL_FORCE_INLINE(uint32_t)
481iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
482{
483 if (offVCpu < 128)
484 {
485 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
486 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
487 }
488 else
489 {
490 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
491 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
492 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
493 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
494 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
495 }
496 return off;
497}
498
499#elif defined(RT_ARCH_ARM64)
500
501/**
502 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
503 *
504 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
505 * registers (@a iGprTmp).
506 * @note DON'T try this with prefetch.
507 */
508DECL_FORCE_INLINE_THROW(uint32_t)
509iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
510 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
511{
512 /*
513 * There are a couple of ldr variants that takes an immediate offset, so
514 * try use those if we can, otherwise we have to use the temporary register
515 * help with the addressing.
516 */
517 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
518 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
519 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
520 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
521 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
522 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
523 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
524 {
525 /* The offset is too large, so we must load it into a register and use
526 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
527 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
528 if (iGprTmp == UINT8_MAX)
529 iGprTmp = iGprReg;
530 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
531 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
532 }
533 else
534# ifdef IEM_WITH_THROW_CATCH
535 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
536# else
537 AssertReleaseFailedStmt(off = UINT32_MAX);
538# endif
539
540 return off;
541}
542
543/**
544 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
545 */
546DECL_FORCE_INLINE_THROW(uint32_t)
547iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
548 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
549{
550 /*
551 * There are a couple of ldr variants that takes an immediate offset, so
552 * try use those if we can, otherwise we have to use the temporary register
553 * help with the addressing.
554 */
555 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
556 {
557 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
558 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
559 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
560 }
561 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
562 {
563 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
564 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
565 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
566 }
567 else
568 {
569 /* The offset is too large, so we must load it into a register and use
570 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
571 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
572 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
573 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
574 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
575 IEMNATIVE_REG_FIXED_TMP0);
576 }
577 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
578 return off;
579}
580
581#endif /* RT_ARCH_ARM64 */
582
583
584/**
585 * Emits a 64-bit GPR load of a VCpu value.
586 */
587DECL_FORCE_INLINE_THROW(uint32_t)
588iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
589{
590#ifdef RT_ARCH_AMD64
591 /* mov reg64, mem64 */
592 if (iGpr < 8)
593 pCodeBuf[off++] = X86_OP_REX_W;
594 else
595 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
596 pCodeBuf[off++] = 0x8b;
597 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off,iGpr, offVCpu);
598
599#elif defined(RT_ARCH_ARM64)
600 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
601
602#else
603# error "port me"
604#endif
605 return off;
606}
607
608
609/**
610 * Emits a 64-bit GPR load of a VCpu value.
611 */
612DECL_INLINE_THROW(uint32_t)
613iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
614{
615#ifdef RT_ARCH_AMD64
616 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
617 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
618
619#elif defined(RT_ARCH_ARM64)
620 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
621
622#else
623# error "port me"
624#endif
625 return off;
626}
627
628
629/**
630 * Emits a 32-bit GPR load of a VCpu value.
631 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
632 */
633DECL_INLINE_THROW(uint32_t)
634iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
635{
636#ifdef RT_ARCH_AMD64
637 /* mov reg32, mem32 */
638 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
639 if (iGpr >= 8)
640 pbCodeBuf[off++] = X86_OP_REX_R;
641 pbCodeBuf[off++] = 0x8b;
642 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
644
645#elif defined(RT_ARCH_ARM64)
646 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
647
648#else
649# error "port me"
650#endif
651 return off;
652}
653
654
655/**
656 * Emits a 16-bit GPR load of a VCpu value.
657 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
658 */
659DECL_INLINE_THROW(uint32_t)
660iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
661{
662#ifdef RT_ARCH_AMD64
663 /* movzx reg32, mem16 */
664 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
665 if (iGpr >= 8)
666 pbCodeBuf[off++] = X86_OP_REX_R;
667 pbCodeBuf[off++] = 0x0f;
668 pbCodeBuf[off++] = 0xb7;
669 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
670 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
671
672#elif defined(RT_ARCH_ARM64)
673 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
674
675#else
676# error "port me"
677#endif
678 return off;
679}
680
681
682/**
683 * Emits a 8-bit GPR load of a VCpu value.
684 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
685 */
686DECL_INLINE_THROW(uint32_t)
687iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
688{
689#ifdef RT_ARCH_AMD64
690 /* movzx reg32, mem8 */
691 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
692 if (iGpr >= 8)
693 pbCodeBuf[off++] = X86_OP_REX_R;
694 pbCodeBuf[off++] = 0x0f;
695 pbCodeBuf[off++] = 0xb6;
696 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
697 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
698
699#elif defined(RT_ARCH_ARM64)
700 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
701
702#else
703# error "port me"
704#endif
705 return off;
706}
707
708
709/**
710 * Emits a store of a GPR value to a 64-bit VCpu field.
711 */
712DECL_FORCE_INLINE_THROW(uint32_t)
713iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
714 uint8_t iGprTmp = UINT8_MAX)
715{
716#ifdef RT_ARCH_AMD64
717 /* mov mem64, reg64 */
718 if (iGpr < 8)
719 pCodeBuf[off++] = X86_OP_REX_W;
720 else
721 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
722 pCodeBuf[off++] = 0x89;
723 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
724 RT_NOREF(iGprTmp);
725
726#elif defined(RT_ARCH_ARM64)
727 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
728
729#else
730# error "port me"
731#endif
732 return off;
733}
734
735
736/**
737 * Emits a store of a GPR value to a 64-bit VCpu field.
738 */
739DECL_INLINE_THROW(uint32_t)
740iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
741{
742#ifdef RT_ARCH_AMD64
743 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
744#elif defined(RT_ARCH_ARM64)
745 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
746 IEMNATIVE_REG_FIXED_TMP0);
747#else
748# error "port me"
749#endif
750 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
751 return off;
752}
753
754
755/**
756 * Emits a store of a GPR value to a 32-bit VCpu field.
757 */
758DECL_INLINE_THROW(uint32_t)
759iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
760{
761#ifdef RT_ARCH_AMD64
762 /* mov mem32, reg32 */
763 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
764 if (iGpr >= 8)
765 pbCodeBuf[off++] = X86_OP_REX_R;
766 pbCodeBuf[off++] = 0x89;
767 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
769
770#elif defined(RT_ARCH_ARM64)
771 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
772
773#else
774# error "port me"
775#endif
776 return off;
777}
778
779
780/**
781 * Emits a store of a GPR value to a 16-bit VCpu field.
782 */
783DECL_INLINE_THROW(uint32_t)
784iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
785{
786#ifdef RT_ARCH_AMD64
787 /* mov mem16, reg16 */
788 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
789 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
790 if (iGpr >= 8)
791 pbCodeBuf[off++] = X86_OP_REX_R;
792 pbCodeBuf[off++] = 0x89;
793 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
794 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
795
796#elif defined(RT_ARCH_ARM64)
797 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
798
799#else
800# error "port me"
801#endif
802 return off;
803}
804
805
806/**
807 * Emits a store of a GPR value to a 8-bit VCpu field.
808 */
809DECL_INLINE_THROW(uint32_t)
810iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
811{
812#ifdef RT_ARCH_AMD64
813 /* mov mem8, reg8 */
814 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
815 if (iGpr >= 8)
816 pbCodeBuf[off++] = X86_OP_REX_R;
817 pbCodeBuf[off++] = 0x88;
818 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
820
821#elif defined(RT_ARCH_ARM64)
822 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
823
824#else
825# error "port me"
826#endif
827 return off;
828}
829
830
831/**
832 * Emits a store of an immediate value to a 32-bit VCpu field.
833 *
834 * @note ARM64: Will allocate temporary registers.
835 */
836DECL_FORCE_INLINE_THROW(uint32_t)
837iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
838{
839#ifdef RT_ARCH_AMD64
840 /* mov mem32, imm32 */
841 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
842 pCodeBuf[off++] = 0xc7;
843 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
844 pCodeBuf[off++] = RT_BYTE1(uImm);
845 pCodeBuf[off++] = RT_BYTE2(uImm);
846 pCodeBuf[off++] = RT_BYTE3(uImm);
847 pCodeBuf[off++] = RT_BYTE4(uImm);
848 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
849
850#elif defined(RT_ARCH_ARM64)
851 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
852 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
853 if (idxRegImm != ARMV8_A64_REG_XZR)
854 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
855
856#else
857# error "port me"
858#endif
859 return off;
860}
861
862
863
864/**
865 * Emits a store of an immediate value to a 16-bit VCpu field.
866 *
867 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
868 * offset can be encoded as an immediate or not. The @a offVCpu immediate
869 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
870 */
871DECL_FORCE_INLINE_THROW(uint32_t)
872iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
873 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
874{
875#ifdef RT_ARCH_AMD64
876 /* mov mem16, imm16 */
877 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
878 pCodeBuf[off++] = 0xc7;
879 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
880 pCodeBuf[off++] = RT_BYTE1(uImm);
881 pCodeBuf[off++] = RT_BYTE2(uImm);
882 RT_NOREF(idxTmp1, idxTmp2);
883
884#elif defined(RT_ARCH_ARM64)
885 if (idxTmp1 != UINT8_MAX)
886 {
887 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
888 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
889 sizeof(uint16_t), idxTmp2);
890 }
891 else
892# ifdef IEM_WITH_THROW_CATCH
893 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
894# else
895 AssertReleaseFailedStmt(off = UINT32_MAX);
896# endif
897
898#else
899# error "port me"
900#endif
901 return off;
902}
903
904
905/**
906 * Emits a store of an immediate value to a 8-bit VCpu field.
907 */
908DECL_INLINE_THROW(uint32_t)
909iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu)
910{
911#ifdef RT_ARCH_AMD64
912 /* mov mem8, imm8 */
913 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
914 pbCodeBuf[off++] = 0xc6;
915 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
916 pbCodeBuf[off++] = bImm;
917 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
918
919#elif defined(RT_ARCH_ARM64)
920 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
921 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
922 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
923 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
924
925#else
926# error "port me"
927#endif
928 return off;
929}
930
931
932/**
933 * Emits a load effective address to a GRP of a VCpu field.
934 */
935DECL_INLINE_THROW(uint32_t)
936iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
937{
938#ifdef RT_ARCH_AMD64
939 /* lea gprdst, [rbx + offDisp] */
940 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
941 if (iGprDst < 8)
942 pbCodeBuf[off++] = X86_OP_REX_W;
943 else
944 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
945 pbCodeBuf[off++] = 0x8d;
946 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
947
948#elif defined(RT_ARCH_ARM64)
949 if (offVCpu < (unsigned)_4K)
950 {
951 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
952 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
953 }
954 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
955 {
956 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
957 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
958 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
959 }
960 else
961 {
962 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
963 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
964 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
965 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
966 }
967
968#else
969# error "port me"
970#endif
971 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
972 return off;
973}
974
975
976/** This is just as a typesafe alternative to RT_UOFFSETOF. */
977DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
978{
979 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
980 Assert(off < sizeof(VMCPU));
981 return off;
982}
983
984
985/** This is just as a typesafe alternative to RT_UOFFSETOF. */
986DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
987{
988 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
989 Assert(off < sizeof(VMCPU));
990 return off;
991}
992
993
994/**
995 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
996 *
997 * @note The two temp registers are not required for AMD64. ARM64 always
998 * requires the first, and the 2nd is needed if the offset cannot be
999 * encoded as an immediate.
1000 */
1001DECL_FORCE_INLINE(uint32_t)
1002iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1003{
1004#ifdef RT_ARCH_AMD64
1005 /* inc qword [pVCpu + off] */
1006 pCodeBuf[off++] = X86_OP_REX_W;
1007 pCodeBuf[off++] = 0xff;
1008 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1009 RT_NOREF(idxTmp1, idxTmp2);
1010
1011#elif defined(RT_ARCH_ARM64)
1012 /* Determine how we're to access pVCpu first. */
1013 uint32_t const cbData = sizeof(STAMCOUNTER);
1014 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1015 {
1016 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1017 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1018 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1019 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1020 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1021 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1022 }
1023 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1024 {
1025 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1026 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1027 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1028 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1029 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1030 }
1031 else
1032 {
1033 /* The offset is too large, so we must load it into a register and use
1034 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1035 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1036 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1037 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1038 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1039 }
1040
1041#else
1042# error "port me"
1043#endif
1044 return off;
1045}
1046
1047
1048/**
1049 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1050 *
1051 * @note The two temp registers are not required for AMD64. ARM64 always
1052 * requires the first, and the 2nd is needed if the offset cannot be
1053 * encoded as an immediate.
1054 */
1055DECL_FORCE_INLINE(uint32_t)
1056iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1057{
1058#ifdef RT_ARCH_AMD64
1059 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1060#elif defined(RT_ARCH_ARM64)
1061 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1062#else
1063# error "port me"
1064#endif
1065 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1066 return off;
1067}
1068
1069
1070/**
1071 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1072 *
1073 * @note The two temp registers are not required for AMD64. ARM64 always
1074 * requires the first, and the 2nd is needed if the offset cannot be
1075 * encoded as an immediate.
1076 */
1077DECL_FORCE_INLINE(uint32_t)
1078iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1079{
1080 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1081#ifdef RT_ARCH_AMD64
1082 /* inc dword [pVCpu + offVCpu] */
1083 pCodeBuf[off++] = 0xff;
1084 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1085 RT_NOREF(idxTmp1, idxTmp2);
1086
1087#elif defined(RT_ARCH_ARM64)
1088 /* Determine how we're to access pVCpu first. */
1089 uint32_t const cbData = sizeof(uint32_t);
1090 if (offVCpu < (unsigned)(_4K * cbData))
1091 {
1092 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1093 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1094 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1095 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1096 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1097 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1098 }
1099 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1100 {
1101 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1102 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1103 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1104 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1105 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1106 }
1107 else
1108 {
1109 /* The offset is too large, so we must load it into a register and use
1110 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1111 of the instruction if that'll reduce the constant to 16-bits. */
1112 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1113 {
1114 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1115 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1116 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1117 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1118 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1119 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1120 }
1121 else
1122 {
1123 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1124 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1125 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1126 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1127 }
1128 }
1129
1130#else
1131# error "port me"
1132#endif
1133 return off;
1134}
1135
1136
1137/**
1138 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1139 *
1140 * @note The two temp registers are not required for AMD64. ARM64 always
1141 * requires the first, and the 2nd is needed if the offset cannot be
1142 * encoded as an immediate.
1143 */
1144DECL_FORCE_INLINE(uint32_t)
1145iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1146{
1147#ifdef RT_ARCH_AMD64
1148 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1149#elif defined(RT_ARCH_ARM64)
1150 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1151#else
1152# error "port me"
1153#endif
1154 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1155 return off;
1156}
1157
1158
1159/**
1160 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1161 *
1162 * @note May allocate temporary registers (not AMD64).
1163 */
1164DECL_FORCE_INLINE(uint32_t)
1165iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1166{
1167 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1168#ifdef RT_ARCH_AMD64
1169 /* or dword [pVCpu + offVCpu], imm8/32 */
1170 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1171 if (fMask < 0x80)
1172 {
1173 pCodeBuf[off++] = 0x83;
1174 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1175 pCodeBuf[off++] = (uint8_t)fMask;
1176 }
1177 else
1178 {
1179 pCodeBuf[off++] = 0x81;
1180 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1181 pCodeBuf[off++] = RT_BYTE1(fMask);
1182 pCodeBuf[off++] = RT_BYTE2(fMask);
1183 pCodeBuf[off++] = RT_BYTE3(fMask);
1184 pCodeBuf[off++] = RT_BYTE4(fMask);
1185 }
1186
1187#elif defined(RT_ARCH_ARM64)
1188 /* If the constant is unwieldy we'll need a register to hold it as well. */
1189 uint32_t uImmSizeLen, uImmRotate;
1190 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1191 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1192
1193 /* We need a temp register for holding the member value we're modifying. */
1194 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1195
1196 /* Determine how we're to access pVCpu first. */
1197 uint32_t const cbData = sizeof(uint32_t);
1198 if (offVCpu < (unsigned)(_4K * cbData))
1199 {
1200 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1201 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1202 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1203 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1204 if (idxTmpMask == UINT8_MAX)
1205 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1206 else
1207 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1208 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1209 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1210 }
1211 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1212 {
1213 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1214 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1215 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1216 if (idxTmpMask == UINT8_MAX)
1217 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1218 else
1219 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1220 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1221 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1222 }
1223 else
1224 {
1225 /* The offset is too large, so we must load it into a register and use
1226 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1227 of the instruction if that'll reduce the constant to 16-bits. */
1228 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1229 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1230 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1231 if (fShifted)
1232 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1233 else
1234 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1235
1236 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1237 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1238
1239 if (idxTmpMask == UINT8_MAX)
1240 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1241 else
1242 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1243
1244 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1245 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1246 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1247 }
1248 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1249 if (idxTmpMask != UINT8_MAX)
1250 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1251
1252#else
1253# error "port me"
1254#endif
1255 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1256 return off;
1257}
1258
1259
1260/**
1261 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1262 *
1263 * @note May allocate temporary registers (not AMD64).
1264 */
1265DECL_FORCE_INLINE(uint32_t)
1266iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1267{
1268 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1269#ifdef RT_ARCH_AMD64
1270 /* and dword [pVCpu + offVCpu], imm8/32 */
1271 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1272 if (fMask < 0x80)
1273 {
1274 pCodeBuf[off++] = 0x83;
1275 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1276 pCodeBuf[off++] = (uint8_t)fMask;
1277 }
1278 else
1279 {
1280 pCodeBuf[off++] = 0x81;
1281 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1282 pCodeBuf[off++] = RT_BYTE1(fMask);
1283 pCodeBuf[off++] = RT_BYTE2(fMask);
1284 pCodeBuf[off++] = RT_BYTE3(fMask);
1285 pCodeBuf[off++] = RT_BYTE4(fMask);
1286 }
1287
1288#elif defined(RT_ARCH_ARM64)
1289 /* If the constant is unwieldy we'll need a register to hold it as well. */
1290 uint32_t uImmSizeLen, uImmRotate;
1291 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1292 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1293
1294 /* We need a temp register for holding the member value we're modifying. */
1295 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1296
1297 /* Determine how we're to access pVCpu first. */
1298 uint32_t const cbData = sizeof(uint32_t);
1299 if (offVCpu < (unsigned)(_4K * cbData))
1300 {
1301 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1302 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1303 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1304 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1305 if (idxTmpMask == UINT8_MAX)
1306 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1307 else
1308 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1309 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1310 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1311 }
1312 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1313 {
1314 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1315 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1316 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1317 if (idxTmpMask == UINT8_MAX)
1318 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1319 else
1320 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1321 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1322 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1323 }
1324 else
1325 {
1326 /* The offset is too large, so we must load it into a register and use
1327 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1328 of the instruction if that'll reduce the constant to 16-bits. */
1329 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1330 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1331 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1332 if (fShifted)
1333 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1334 else
1335 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1336
1337 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1338 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1339
1340 if (idxTmpMask == UINT8_MAX)
1341 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1342 else
1343 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1344
1345 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1346 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1347 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1348 }
1349 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1350 if (idxTmpMask != UINT8_MAX)
1351 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1352
1353#else
1354# error "port me"
1355#endif
1356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1357 return off;
1358}
1359
1360
1361/**
1362 * Emits a gprdst = gprsrc load.
1363 */
1364DECL_FORCE_INLINE(uint32_t)
1365iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1366{
1367#ifdef RT_ARCH_AMD64
1368 /* mov gprdst, gprsrc */
1369 if ((iGprDst | iGprSrc) >= 8)
1370 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1371 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1372 : X86_OP_REX_W | X86_OP_REX_R;
1373 else
1374 pCodeBuf[off++] = X86_OP_REX_W;
1375 pCodeBuf[off++] = 0x8b;
1376 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1377
1378#elif defined(RT_ARCH_ARM64)
1379 /* mov dst, src; alias for: orr dst, xzr, src */
1380 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1381
1382#else
1383# error "port me"
1384#endif
1385 return off;
1386}
1387
1388
1389/**
1390 * Emits a gprdst = gprsrc load.
1391 */
1392DECL_INLINE_THROW(uint32_t)
1393iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1394{
1395#ifdef RT_ARCH_AMD64
1396 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1397#elif defined(RT_ARCH_ARM64)
1398 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1399#else
1400# error "port me"
1401#endif
1402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1403 return off;
1404}
1405
1406
1407/**
1408 * Emits a gprdst = gprsrc[31:0] load.
1409 * @note Bits 63 thru 32 are cleared.
1410 */
1411DECL_FORCE_INLINE(uint32_t)
1412iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1413{
1414#ifdef RT_ARCH_AMD64
1415 /* mov gprdst, gprsrc */
1416 if ((iGprDst | iGprSrc) >= 8)
1417 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1418 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1419 : X86_OP_REX_R;
1420 pCodeBuf[off++] = 0x8b;
1421 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1422
1423#elif defined(RT_ARCH_ARM64)
1424 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1425 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1426
1427#else
1428# error "port me"
1429#endif
1430 return off;
1431}
1432
1433
1434/**
1435 * Emits a gprdst = gprsrc[31:0] load.
1436 * @note Bits 63 thru 32 are cleared.
1437 */
1438DECL_INLINE_THROW(uint32_t)
1439iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1440{
1441#ifdef RT_ARCH_AMD64
1442 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1443#elif defined(RT_ARCH_ARM64)
1444 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1445#else
1446# error "port me"
1447#endif
1448 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1449 return off;
1450}
1451
1452
1453/**
1454 * Emits a gprdst = gprsrc[15:0] load.
1455 * @note Bits 63 thru 15 are cleared.
1456 */
1457DECL_INLINE_THROW(uint32_t)
1458iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1459{
1460#ifdef RT_ARCH_AMD64
1461 /* movzx Gv,Ew */
1462 if ((iGprDst | iGprSrc) >= 8)
1463 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1464 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1465 : X86_OP_REX_R;
1466 pCodeBuf[off++] = 0x0f;
1467 pCodeBuf[off++] = 0xb7;
1468 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1469
1470#elif defined(RT_ARCH_ARM64)
1471 /* and gprdst, gprsrc, #0xffff */
1472# if 1
1473 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1474 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1475# else
1476 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1477 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1478# endif
1479
1480#else
1481# error "port me"
1482#endif
1483 return off;
1484}
1485
1486
1487/**
1488 * Emits a gprdst = gprsrc[15:0] load.
1489 * @note Bits 63 thru 15 are cleared.
1490 */
1491DECL_INLINE_THROW(uint32_t)
1492iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1493{
1494#ifdef RT_ARCH_AMD64
1495 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1496#elif defined(RT_ARCH_ARM64)
1497 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1498#else
1499# error "port me"
1500#endif
1501 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1502 return off;
1503}
1504
1505
1506/**
1507 * Emits a gprdst = gprsrc[7:0] load.
1508 * @note Bits 63 thru 8 are cleared.
1509 */
1510DECL_FORCE_INLINE(uint32_t)
1511iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1512{
1513#ifdef RT_ARCH_AMD64
1514 /* movzx Gv,Eb */
1515 if (iGprDst >= 8 || iGprSrc >= 8)
1516 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1517 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1518 : X86_OP_REX_R;
1519 else if (iGprSrc >= 4)
1520 pCodeBuf[off++] = X86_OP_REX;
1521 pCodeBuf[off++] = 0x0f;
1522 pCodeBuf[off++] = 0xb6;
1523 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1524
1525#elif defined(RT_ARCH_ARM64)
1526 /* and gprdst, gprsrc, #0xff */
1527 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1528 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1529
1530#else
1531# error "port me"
1532#endif
1533 return off;
1534}
1535
1536
1537/**
1538 * Emits a gprdst = gprsrc[7:0] load.
1539 * @note Bits 63 thru 8 are cleared.
1540 */
1541DECL_INLINE_THROW(uint32_t)
1542iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1543{
1544#ifdef RT_ARCH_AMD64
1545 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1546#elif defined(RT_ARCH_ARM64)
1547 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1548#else
1549# error "port me"
1550#endif
1551 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1552 return off;
1553}
1554
1555
1556/**
1557 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1558 * @note Bits 63 thru 8 are cleared.
1559 */
1560DECL_INLINE_THROW(uint32_t)
1561iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1562{
1563#ifdef RT_ARCH_AMD64
1564 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1565
1566 /* movzx Gv,Ew */
1567 if ((iGprDst | iGprSrc) >= 8)
1568 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1569 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1570 : X86_OP_REX_R;
1571 pbCodeBuf[off++] = 0x0f;
1572 pbCodeBuf[off++] = 0xb7;
1573 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1574
1575 /* shr Ev,8 */
1576 if (iGprDst >= 8)
1577 pbCodeBuf[off++] = X86_OP_REX_B;
1578 pbCodeBuf[off++] = 0xc1;
1579 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1580 pbCodeBuf[off++] = 8;
1581
1582#elif defined(RT_ARCH_ARM64)
1583 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1584 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1585 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1586
1587#else
1588# error "port me"
1589#endif
1590 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1591 return off;
1592}
1593
1594
1595/**
1596 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1597 */
1598DECL_INLINE_THROW(uint32_t)
1599iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1600{
1601#ifdef RT_ARCH_AMD64
1602 /* movsxd r64, r/m32 */
1603 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1604 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1605 pbCodeBuf[off++] = 0x63;
1606 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1607
1608#elif defined(RT_ARCH_ARM64)
1609 /* sxtw dst, src */
1610 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1611 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1612
1613#else
1614# error "port me"
1615#endif
1616 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1617 return off;
1618}
1619
1620
1621/**
1622 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1623 */
1624DECL_INLINE_THROW(uint32_t)
1625iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1626{
1627#ifdef RT_ARCH_AMD64
1628 /* movsx r64, r/m16 */
1629 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1630 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1631 pbCodeBuf[off++] = 0x0f;
1632 pbCodeBuf[off++] = 0xbf;
1633 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1634
1635#elif defined(RT_ARCH_ARM64)
1636 /* sxth dst, src */
1637 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1638 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1639
1640#else
1641# error "port me"
1642#endif
1643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1644 return off;
1645}
1646
1647
1648/**
1649 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1650 */
1651DECL_INLINE_THROW(uint32_t)
1652iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1653{
1654#ifdef RT_ARCH_AMD64
1655 /* movsx r64, r/m16 */
1656 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1657 if (iGprDst >= 8 || iGprSrc >= 8)
1658 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1659 pbCodeBuf[off++] = 0x0f;
1660 pbCodeBuf[off++] = 0xbf;
1661 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1662
1663#elif defined(RT_ARCH_ARM64)
1664 /* sxth dst32, src */
1665 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1666 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1667
1668#else
1669# error "port me"
1670#endif
1671 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1672 return off;
1673}
1674
1675
1676/**
1677 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1678 */
1679DECL_INLINE_THROW(uint32_t)
1680iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1681{
1682#ifdef RT_ARCH_AMD64
1683 /* movsx r64, r/m8 */
1684 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1685 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1686 pbCodeBuf[off++] = 0x0f;
1687 pbCodeBuf[off++] = 0xbe;
1688 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1689
1690#elif defined(RT_ARCH_ARM64)
1691 /* sxtb dst, src */
1692 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1693 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1694
1695#else
1696# error "port me"
1697#endif
1698 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1699 return off;
1700}
1701
1702
1703/**
1704 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1705 * @note Bits 63 thru 32 are cleared.
1706 */
1707DECL_INLINE_THROW(uint32_t)
1708iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1709{
1710#ifdef RT_ARCH_AMD64
1711 /* movsx r32, r/m8 */
1712 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1713 if (iGprDst >= 8 || iGprSrc >= 8)
1714 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1715 else if (iGprSrc >= 4)
1716 pbCodeBuf[off++] = X86_OP_REX;
1717 pbCodeBuf[off++] = 0x0f;
1718 pbCodeBuf[off++] = 0xbe;
1719 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1720
1721#elif defined(RT_ARCH_ARM64)
1722 /* sxtb dst32, src32 */
1723 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1724 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1725
1726#else
1727# error "port me"
1728#endif
1729 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1730 return off;
1731}
1732
1733
1734/**
1735 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1736 * @note Bits 63 thru 16 are cleared.
1737 */
1738DECL_INLINE_THROW(uint32_t)
1739iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1740{
1741#ifdef RT_ARCH_AMD64
1742 /* movsx r16, r/m8 */
1743 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1744 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1745 if (iGprDst >= 8 || iGprSrc >= 8)
1746 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1747 else if (iGprSrc >= 4)
1748 pbCodeBuf[off++] = X86_OP_REX;
1749 pbCodeBuf[off++] = 0x0f;
1750 pbCodeBuf[off++] = 0xbe;
1751 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1752
1753 /* movzx r32, r/m16 */
1754 if (iGprDst >= 8)
1755 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
1756 pbCodeBuf[off++] = 0x0f;
1757 pbCodeBuf[off++] = 0xb7;
1758 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
1759
1760#elif defined(RT_ARCH_ARM64)
1761 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
1762 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1763 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1764 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1765 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
1766
1767#else
1768# error "port me"
1769#endif
1770 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1771 return off;
1772}
1773
1774
1775/**
1776 * Emits a gprdst = gprsrc + addend load.
1777 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1778 */
1779#ifdef RT_ARCH_AMD64
1780DECL_INLINE_THROW(uint32_t)
1781iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1782 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1783{
1784 Assert(iAddend != 0);
1785
1786 /* lea gprdst, [gprsrc + iAddend] */
1787 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1788 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1789 pbCodeBuf[off++] = 0x8d;
1790 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1791 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1792 return off;
1793}
1794
1795#elif defined(RT_ARCH_ARM64)
1796DECL_INLINE_THROW(uint32_t)
1797iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1798 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1799{
1800 if ((uint32_t)iAddend < 4096)
1801 {
1802 /* add dst, src, uimm12 */
1803 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1804 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
1805 }
1806 else if ((uint32_t)-iAddend < 4096)
1807 {
1808 /* sub dst, src, uimm12 */
1809 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1810 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
1811 }
1812 else
1813 {
1814 Assert(iGprSrc != iGprDst);
1815 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
1816 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1817 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
1818 }
1819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1820 return off;
1821}
1822#else
1823# error "port me"
1824#endif
1825
1826/**
1827 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
1828 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1829 */
1830#ifdef RT_ARCH_AMD64
1831DECL_INLINE_THROW(uint32_t)
1832iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1833 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1834#else
1835DECL_INLINE_THROW(uint32_t)
1836iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1837 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1838#endif
1839{
1840 if (iAddend != 0)
1841 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1842 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
1843}
1844
1845
1846/**
1847 * Emits a gprdst = gprsrc32 + addend load.
1848 * @note Bits 63 thru 32 are cleared.
1849 */
1850DECL_INLINE_THROW(uint32_t)
1851iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1852 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1853{
1854 Assert(iAddend != 0);
1855
1856#ifdef RT_ARCH_AMD64
1857 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
1858 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1859 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
1860 if ((iGprDst | iGprSrc) >= 8)
1861 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1862 pbCodeBuf[off++] = 0x8d;
1863 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1864
1865#elif defined(RT_ARCH_ARM64)
1866 if ((uint32_t)iAddend < 4096)
1867 {
1868 /* add dst, src, uimm12 */
1869 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1870 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
1871 }
1872 else if ((uint32_t)-iAddend < 4096)
1873 {
1874 /* sub dst, src, uimm12 */
1875 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1876 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
1877 }
1878 else
1879 {
1880 Assert(iGprSrc != iGprDst);
1881 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
1882 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1883 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
1884 }
1885
1886#else
1887# error "port me"
1888#endif
1889 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1890 return off;
1891}
1892
1893
1894/**
1895 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
1896 */
1897DECL_INLINE_THROW(uint32_t)
1898iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1899 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1900{
1901 if (iAddend != 0)
1902 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1903 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
1904}
1905
1906
1907/**
1908 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1909 * destination.
1910 */
1911DECL_FORCE_INLINE(uint32_t)
1912iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1913{
1914#ifdef RT_ARCH_AMD64
1915 /* mov reg16, r/m16 */
1916 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1917 if (idxDst >= 8 || idxSrc >= 8)
1918 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
1919 pCodeBuf[off++] = 0x8b;
1920 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
1921
1922#elif defined(RT_ARCH_ARM64)
1923 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
1924 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
1925
1926#else
1927# error "Port me!"
1928#endif
1929 return off;
1930}
1931
1932
1933/**
1934 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1935 * destination.
1936 */
1937DECL_INLINE_THROW(uint32_t)
1938iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1939{
1940#ifdef RT_ARCH_AMD64
1941 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
1942#elif defined(RT_ARCH_ARM64)
1943 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
1944#else
1945# error "Port me!"
1946#endif
1947 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1948 return off;
1949}
1950
1951
1952#ifdef RT_ARCH_AMD64
1953/**
1954 * Common bit of iemNativeEmitLoadGprByBp and friends.
1955 */
1956DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
1957 PIEMRECOMPILERSTATE pReNativeAssert)
1958{
1959 if (offDisp < 128 && offDisp >= -128)
1960 {
1961 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
1962 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
1963 }
1964 else
1965 {
1966 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
1967 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
1968 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
1969 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
1970 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
1971 }
1972 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
1973 return off;
1974}
1975#elif defined(RT_ARCH_ARM64)
1976/**
1977 * Common bit of iemNativeEmitLoadGprByBp and friends.
1978 */
1979DECL_FORCE_INLINE_THROW(uint32_t)
1980iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
1981 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
1982{
1983 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
1984 {
1985 /* str w/ unsigned imm12 (scaled) */
1986 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1987 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
1988 }
1989 else if (offDisp >= -256 && offDisp <= 256)
1990 {
1991 /* stur w/ signed imm9 (unscaled) */
1992 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1993 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
1994 }
1995 else
1996 {
1997 /* Use temporary indexing register. */
1998 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
1999 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2000 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2001 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2002 }
2003 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2004 return off;
2005}
2006#endif
2007
2008
2009/**
2010 * Emits a 64-bit GRP load instruction with an BP relative source address.
2011 */
2012DECL_INLINE_THROW(uint32_t)
2013iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2014{
2015#ifdef RT_ARCH_AMD64
2016 /* mov gprdst, qword [rbp + offDisp] */
2017 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2018 if (iGprDst < 8)
2019 pbCodeBuf[off++] = X86_OP_REX_W;
2020 else
2021 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2022 pbCodeBuf[off++] = 0x8b;
2023 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2024
2025#elif defined(RT_ARCH_ARM64)
2026 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2027
2028#else
2029# error "port me"
2030#endif
2031}
2032
2033
2034/**
2035 * Emits a 32-bit GRP load instruction with an BP relative source address.
2036 * @note Bits 63 thru 32 of the GPR will be cleared.
2037 */
2038DECL_INLINE_THROW(uint32_t)
2039iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2040{
2041#ifdef RT_ARCH_AMD64
2042 /* mov gprdst, dword [rbp + offDisp] */
2043 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2044 if (iGprDst >= 8)
2045 pbCodeBuf[off++] = X86_OP_REX_R;
2046 pbCodeBuf[off++] = 0x8b;
2047 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2048
2049#elif defined(RT_ARCH_ARM64)
2050 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2051
2052#else
2053# error "port me"
2054#endif
2055}
2056
2057
2058/**
2059 * Emits a 16-bit GRP load instruction with an BP relative source address.
2060 * @note Bits 63 thru 16 of the GPR will be cleared.
2061 */
2062DECL_INLINE_THROW(uint32_t)
2063iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2064{
2065#ifdef RT_ARCH_AMD64
2066 /* movzx gprdst, word [rbp + offDisp] */
2067 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2068 if (iGprDst >= 8)
2069 pbCodeBuf[off++] = X86_OP_REX_R;
2070 pbCodeBuf[off++] = 0x0f;
2071 pbCodeBuf[off++] = 0xb7;
2072 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2073
2074#elif defined(RT_ARCH_ARM64)
2075 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2076
2077#else
2078# error "port me"
2079#endif
2080}
2081
2082
2083/**
2084 * Emits a 8-bit GRP load instruction with an BP relative source address.
2085 * @note Bits 63 thru 8 of the GPR will be cleared.
2086 */
2087DECL_INLINE_THROW(uint32_t)
2088iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2089{
2090#ifdef RT_ARCH_AMD64
2091 /* movzx gprdst, byte [rbp + offDisp] */
2092 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2093 if (iGprDst >= 8)
2094 pbCodeBuf[off++] = X86_OP_REX_R;
2095 pbCodeBuf[off++] = 0x0f;
2096 pbCodeBuf[off++] = 0xb6;
2097 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2098
2099#elif defined(RT_ARCH_ARM64)
2100 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2101
2102#else
2103# error "port me"
2104#endif
2105}
2106
2107
2108#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2109/**
2110 * Emits a 128-bit vector register load instruction with an BP relative source address.
2111 */
2112DECL_FORCE_INLINE_THROW(uint32_t)
2113iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2114{
2115#ifdef RT_ARCH_AMD64
2116 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2117
2118 /* movdqu reg128, mem128 */
2119 pbCodeBuf[off++] = 0xf3;
2120 if (iVecRegDst >= 8)
2121 pbCodeBuf[off++] = X86_OP_REX_R;
2122 pbCodeBuf[off++] = 0x0f;
2123 pbCodeBuf[off++] = 0x6f;
2124 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2125#elif defined(RT_ARCH_ARM64)
2126 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2127#else
2128# error "port me"
2129#endif
2130}
2131
2132
2133/**
2134 * Emits a 256-bit vector register load instruction with an BP relative source address.
2135 */
2136DECL_FORCE_INLINE_THROW(uint32_t)
2137iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2138{
2139#ifdef RT_ARCH_AMD64
2140 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2141
2142 /* vmovdqu reg256, mem256 */
2143 pbCodeBuf[off++] = X86_OP_VEX2;
2144 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2145 pbCodeBuf[off++] = 0x6f;
2146 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2147#elif defined(RT_ARCH_ARM64)
2148 /* ASSUMES two consecutive vector registers for the 256-bit value. */
2149 Assert(!(iVecRegDst & 0x1));
2150 off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2151 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2152#else
2153# error "port me"
2154#endif
2155}
2156
2157#endif
2158
2159
2160/**
2161 * Emits a load effective address to a GRP with an BP relative source address.
2162 */
2163DECL_INLINE_THROW(uint32_t)
2164iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2165{
2166#ifdef RT_ARCH_AMD64
2167 /* lea gprdst, [rbp + offDisp] */
2168 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2169 if (iGprDst < 8)
2170 pbCodeBuf[off++] = X86_OP_REX_W;
2171 else
2172 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2173 pbCodeBuf[off++] = 0x8d;
2174 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2175
2176#elif defined(RT_ARCH_ARM64)
2177 if ((uint32_t)offDisp < (unsigned)_4K)
2178 {
2179 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2180 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)offDisp);
2181 }
2182 else if ((uint32_t)-offDisp < (unsigned)_4K)
2183 {
2184 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2185 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2186 }
2187 else
2188 {
2189 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2190 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offDisp >= 0 ? (uint32_t)offDisp : (uint32_t)-offDisp);
2191 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2192 if (offDisp >= 0)
2193 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2194 else
2195 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2196 }
2197
2198#else
2199# error "port me"
2200#endif
2201
2202 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2203 return off;
2204}
2205
2206
2207/**
2208 * Emits a 64-bit GPR store with an BP relative destination address.
2209 *
2210 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2211 */
2212DECL_INLINE_THROW(uint32_t)
2213iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2214{
2215#ifdef RT_ARCH_AMD64
2216 /* mov qword [rbp + offDisp], gprdst */
2217 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2218 if (iGprSrc < 8)
2219 pbCodeBuf[off++] = X86_OP_REX_W;
2220 else
2221 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2222 pbCodeBuf[off++] = 0x89;
2223 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2224
2225#elif defined(RT_ARCH_ARM64)
2226 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2227 {
2228 /* str w/ unsigned imm12 (scaled) */
2229 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2230 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2231 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2232 }
2233 else if (offDisp >= -256 && offDisp <= 256)
2234 {
2235 /* stur w/ signed imm9 (unscaled) */
2236 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2237 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2238 }
2239 else if ((uint32_t)-offDisp < (unsigned)_4K)
2240 {
2241 /* Use temporary indexing register w/ sub uimm12. */
2242 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2243 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2244 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2245 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2246 }
2247 else
2248 {
2249 /* Use temporary indexing register. */
2250 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2251 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2252 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2253 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2254 }
2255 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2256 return off;
2257
2258#else
2259# error "Port me!"
2260#endif
2261}
2262
2263
2264/**
2265 * Emits a 64-bit immediate store with an BP relative destination address.
2266 *
2267 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2268 */
2269DECL_INLINE_THROW(uint32_t)
2270iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2271{
2272#ifdef RT_ARCH_AMD64
2273 if ((int64_t)uImm64 == (int32_t)uImm64)
2274 {
2275 /* mov qword [rbp + offDisp], imm32 - sign extended */
2276 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2277 pbCodeBuf[off++] = X86_OP_REX_W;
2278 pbCodeBuf[off++] = 0xc7;
2279 if (offDisp < 128 && offDisp >= -128)
2280 {
2281 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2282 pbCodeBuf[off++] = (uint8_t)offDisp;
2283 }
2284 else
2285 {
2286 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2287 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2288 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2289 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2290 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2291 }
2292 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2293 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2294 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2295 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2296 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2297 return off;
2298 }
2299#endif
2300
2301 /* Load tmp0, imm64; Store tmp to bp+disp. */
2302 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2303 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2304}
2305
2306
2307#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2308/**
2309 * Emits a 128-bit vector register store with an BP relative destination address.
2310 *
2311 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2312 */
2313DECL_INLINE_THROW(uint32_t)
2314iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2315{
2316#ifdef RT_ARCH_AMD64
2317 /* movdqu [rbp + offDisp], vecsrc */
2318 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2319 pbCodeBuf[off++] = 0xf3;
2320 if (iVecRegSrc >= 8)
2321 pbCodeBuf[off++] = X86_OP_REX_R;
2322 pbCodeBuf[off++] = 0x0f;
2323 pbCodeBuf[off++] = 0x7f;
2324 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2325
2326#elif defined(RT_ARCH_ARM64)
2327 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2328 {
2329 /* str w/ unsigned imm12 (scaled) */
2330 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2331 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc,
2332 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2333 }
2334 else if (offDisp >= -256 && offDisp <= 256)
2335 {
2336 /* stur w/ signed imm9 (unscaled) */
2337 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2338 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp);
2339 }
2340 else if ((uint32_t)-offDisp < (unsigned)_4K)
2341 {
2342 /* Use temporary indexing register w/ sub uimm12. */
2343 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2344 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2345 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2346 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2347 }
2348 else
2349 {
2350 /* Use temporary indexing register. */
2351 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2352 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2353 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP,
2354 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2355 }
2356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2357 return off;
2358
2359#else
2360# error "Port me!"
2361#endif
2362}
2363
2364
2365/**
2366 * Emits a 256-bit vector register store with an BP relative destination address.
2367 *
2368 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2369 */
2370DECL_INLINE_THROW(uint32_t)
2371iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2372{
2373#ifdef RT_ARCH_AMD64
2374 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2375
2376 /* vmovdqu mem256, reg256 */
2377 pbCodeBuf[off++] = X86_OP_VEX2;
2378 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2379 pbCodeBuf[off++] = 0x7f;
2380 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2381#elif defined(RT_ARCH_ARM64)
2382 Assert(!(iVecRegSrc & 0x1));
2383 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp, iVecRegSrc);
2384 return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1);
2385#else
2386# error "Port me!"
2387#endif
2388}
2389#endif
2390
2391#if defined(RT_ARCH_ARM64)
2392
2393/**
2394 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2395 *
2396 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2397 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2398 * caller does not heed this.
2399 *
2400 * @note DON'T try this with prefetch.
2401 */
2402DECL_FORCE_INLINE_THROW(uint32_t)
2403iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2404 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2405{
2406 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2407 {
2408 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2409 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2410 }
2411 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2412 && iGprReg != iGprBase)
2413 || iGprTmp != UINT8_MAX)
2414 {
2415 /* The offset is too large, so we must load it into a register and use
2416 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2417 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2418 if (iGprTmp == UINT8_MAX)
2419 iGprTmp = iGprReg;
2420 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2421 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2422 }
2423 else
2424# ifdef IEM_WITH_THROW_CATCH
2425 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2426# else
2427 AssertReleaseFailedStmt(off = UINT32_MAX);
2428# endif
2429 return off;
2430}
2431
2432/**
2433 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2434 */
2435DECL_FORCE_INLINE_THROW(uint32_t)
2436iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2437 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2438{
2439 /*
2440 * There are a couple of ldr variants that takes an immediate offset, so
2441 * try use those if we can, otherwise we have to use the temporary register
2442 * help with the addressing.
2443 */
2444 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2445 {
2446 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2447 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2448 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2449 }
2450 else
2451 {
2452 /* The offset is too large, so we must load it into a register and use
2453 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2454 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2455 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2456
2457 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2458 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2459
2460 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2461 }
2462 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2463 return off;
2464}
2465
2466#endif /* RT_ARCH_ARM64 */
2467
2468/**
2469 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2470 *
2471 * @note ARM64: Misaligned @a offDisp values and values not in the
2472 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2473 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2474 * does not heed this.
2475 */
2476DECL_FORCE_INLINE_THROW(uint32_t)
2477iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2478 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2479{
2480#ifdef RT_ARCH_AMD64
2481 /* mov reg64, mem64 */
2482 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2483 pCodeBuf[off++] = 0x8b;
2484 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2485 RT_NOREF(iGprTmp);
2486
2487#elif defined(RT_ARCH_ARM64)
2488 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2489 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2490
2491#else
2492# error "port me"
2493#endif
2494 return off;
2495}
2496
2497
2498/**
2499 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2500 */
2501DECL_INLINE_THROW(uint32_t)
2502iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2503{
2504#ifdef RT_ARCH_AMD64
2505 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2506 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2507
2508#elif defined(RT_ARCH_ARM64)
2509 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2510
2511#else
2512# error "port me"
2513#endif
2514 return off;
2515}
2516
2517
2518/**
2519 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2520 *
2521 * @note ARM64: Misaligned @a offDisp values and values not in the
2522 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2523 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2524 * caller does not heed this.
2525 *
2526 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2527 */
2528DECL_FORCE_INLINE_THROW(uint32_t)
2529iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2530 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2531{
2532#ifdef RT_ARCH_AMD64
2533 /* mov reg32, mem32 */
2534 if (iGprDst >= 8 || iGprBase >= 8)
2535 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2536 pCodeBuf[off++] = 0x8b;
2537 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2538 RT_NOREF(iGprTmp);
2539
2540#elif defined(RT_ARCH_ARM64)
2541 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2542 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2543
2544#else
2545# error "port me"
2546#endif
2547 return off;
2548}
2549
2550
2551/**
2552 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2553 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2554 */
2555DECL_INLINE_THROW(uint32_t)
2556iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2557{
2558#ifdef RT_ARCH_AMD64
2559 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2560 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2561
2562#elif defined(RT_ARCH_ARM64)
2563 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2564
2565#else
2566# error "port me"
2567#endif
2568 return off;
2569}
2570
2571
2572/**
2573 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2574 * sign-extending the value to 64 bits.
2575 *
2576 * @note ARM64: Misaligned @a offDisp values and values not in the
2577 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2578 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2579 * caller does not heed this.
2580 */
2581DECL_FORCE_INLINE_THROW(uint32_t)
2582iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2583 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2584{
2585#ifdef RT_ARCH_AMD64
2586 /* movsxd reg64, mem32 */
2587 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2588 pCodeBuf[off++] = 0x63;
2589 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2590 RT_NOREF(iGprTmp);
2591
2592#elif defined(RT_ARCH_ARM64)
2593 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2594 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2595
2596#else
2597# error "port me"
2598#endif
2599 return off;
2600}
2601
2602
2603/**
2604 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2605 *
2606 * @note ARM64: Misaligned @a offDisp values and values not in the
2607 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2608 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2609 * caller does not heed this.
2610 *
2611 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2612 */
2613DECL_FORCE_INLINE_THROW(uint32_t)
2614iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2615 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2616{
2617#ifdef RT_ARCH_AMD64
2618 /* movzx reg32, mem16 */
2619 if (iGprDst >= 8 || iGprBase >= 8)
2620 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2621 pCodeBuf[off++] = 0x0f;
2622 pCodeBuf[off++] = 0xb7;
2623 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2624 RT_NOREF(iGprTmp);
2625
2626#elif defined(RT_ARCH_ARM64)
2627 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2628 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2629
2630#else
2631# error "port me"
2632#endif
2633 return off;
2634}
2635
2636
2637/**
2638 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2639 * sign-extending the value to 64 bits.
2640 *
2641 * @note ARM64: Misaligned @a offDisp values and values not in the
2642 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2643 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2644 * caller does not heed this.
2645 */
2646DECL_FORCE_INLINE_THROW(uint32_t)
2647iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2648 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2649{
2650#ifdef RT_ARCH_AMD64
2651 /* movsx reg64, mem16 */
2652 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2653 pCodeBuf[off++] = 0x0f;
2654 pCodeBuf[off++] = 0xbf;
2655 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2656 RT_NOREF(iGprTmp);
2657
2658#elif defined(RT_ARCH_ARM64)
2659 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2660 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2661
2662#else
2663# error "port me"
2664#endif
2665 return off;
2666}
2667
2668
2669/**
2670 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2671 * sign-extending the value to 32 bits.
2672 *
2673 * @note ARM64: Misaligned @a offDisp values and values not in the
2674 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2675 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2676 * caller does not heed this.
2677 *
2678 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2679 */
2680DECL_FORCE_INLINE_THROW(uint32_t)
2681iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2682 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2683{
2684#ifdef RT_ARCH_AMD64
2685 /* movsx reg32, mem16 */
2686 if (iGprDst >= 8 || iGprBase >= 8)
2687 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2688 pCodeBuf[off++] = 0x0f;
2689 pCodeBuf[off++] = 0xbf;
2690 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2691 RT_NOREF(iGprTmp);
2692
2693#elif defined(RT_ARCH_ARM64)
2694 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2695 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
2696
2697#else
2698# error "port me"
2699#endif
2700 return off;
2701}
2702
2703
2704/**
2705 * Emits a 8-bit GPR load via a GPR base address with a displacement.
2706 *
2707 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2708 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2709 * same. Will assert / throw if caller does not heed this.
2710 *
2711 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
2712 */
2713DECL_FORCE_INLINE_THROW(uint32_t)
2714iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2715 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2716{
2717#ifdef RT_ARCH_AMD64
2718 /* movzx reg32, mem8 */
2719 if (iGprDst >= 8 || iGprBase >= 8)
2720 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2721 pCodeBuf[off++] = 0x0f;
2722 pCodeBuf[off++] = 0xb6;
2723 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2724 RT_NOREF(iGprTmp);
2725
2726#elif defined(RT_ARCH_ARM64)
2727 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2728 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
2729
2730#else
2731# error "port me"
2732#endif
2733 return off;
2734}
2735
2736
2737/**
2738 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2739 * sign-extending the value to 64 bits.
2740 *
2741 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2742 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2743 * same. Will assert / throw if caller does not heed this.
2744 */
2745DECL_FORCE_INLINE_THROW(uint32_t)
2746iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2747 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2748{
2749#ifdef RT_ARCH_AMD64
2750 /* movsx reg64, mem8 */
2751 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2752 pCodeBuf[off++] = 0x0f;
2753 pCodeBuf[off++] = 0xbe;
2754 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2755 RT_NOREF(iGprTmp);
2756
2757#elif defined(RT_ARCH_ARM64)
2758 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2759 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
2760
2761#else
2762# error "port me"
2763#endif
2764 return off;
2765}
2766
2767
2768/**
2769 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2770 * sign-extending the value to 32 bits.
2771 *
2772 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2773 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2774 * same. Will assert / throw if caller does not heed this.
2775 *
2776 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2777 */
2778DECL_FORCE_INLINE_THROW(uint32_t)
2779iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2780 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2781{
2782#ifdef RT_ARCH_AMD64
2783 /* movsx reg32, mem8 */
2784 if (iGprDst >= 8 || iGprBase >= 8)
2785 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2786 pCodeBuf[off++] = 0x0f;
2787 pCodeBuf[off++] = 0xbe;
2788 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2789 RT_NOREF(iGprTmp);
2790
2791#elif defined(RT_ARCH_ARM64)
2792 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2793 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2794
2795#else
2796# error "port me"
2797#endif
2798 return off;
2799}
2800
2801
2802/**
2803 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2804 * sign-extending the value to 16 bits.
2805 *
2806 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2807 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2808 * same. Will assert / throw if caller does not heed this.
2809 *
2810 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2811 */
2812DECL_FORCE_INLINE_THROW(uint32_t)
2813iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2814 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2815{
2816#ifdef RT_ARCH_AMD64
2817 /* movsx reg32, mem8 */
2818 if (iGprDst >= 8 || iGprBase >= 8)
2819 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2820 pCodeBuf[off++] = 0x0f;
2821 pCodeBuf[off++] = 0xbe;
2822 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2823# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
2824 /* and reg32, 0xffffh */
2825 if (iGprDst >= 8)
2826 pCodeBuf[off++] = X86_OP_REX_B;
2827 pCodeBuf[off++] = 0x81;
2828 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
2829 pCodeBuf[off++] = 0xff;
2830 pCodeBuf[off++] = 0xff;
2831 pCodeBuf[off++] = 0;
2832 pCodeBuf[off++] = 0;
2833# else
2834 /* movzx reg32, reg16 */
2835 if (iGprDst >= 8)
2836 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
2837 pCodeBuf[off++] = 0x0f;
2838 pCodeBuf[off++] = 0xb7;
2839 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2840# endif
2841 RT_NOREF(iGprTmp);
2842
2843#elif defined(RT_ARCH_ARM64)
2844 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2845 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2846 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2847 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
2848
2849#else
2850# error "port me"
2851#endif
2852 return off;
2853}
2854
2855
2856#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2857/**
2858 * Emits a 128-bit vector register load via a GPR base address with a displacement.
2859 *
2860 * @note ARM64: Misaligned @a offDisp values and values not in the
2861 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2862 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2863 * does not heed this.
2864 */
2865DECL_FORCE_INLINE_THROW(uint32_t)
2866iemNativeEmitLoadVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
2867 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2868{
2869#ifdef RT_ARCH_AMD64
2870 /* movdqu reg128, mem128 */
2871 pCodeBuf[off++] = 0xf3;
2872 if (iVecRegDst >= 8 || iGprBase >= 8)
2873 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2874 pCodeBuf[off++] = 0x0f;
2875 pCodeBuf[off++] = 0x6f;
2876 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
2877 RT_NOREF(iGprTmp);
2878
2879#elif defined(RT_ARCH_ARM64)
2880 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
2881 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
2882
2883#else
2884# error "port me"
2885#endif
2886 return off;
2887}
2888
2889
2890/**
2891 * Emits a 128-bit GPR load via a GPR base address with a displacement.
2892 */
2893DECL_INLINE_THROW(uint32_t)
2894iemNativeEmitLoadVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
2895{
2896#ifdef RT_ARCH_AMD64
2897 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
2898 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2899
2900#elif defined(RT_ARCH_ARM64)
2901 off = iemNativeEmitGprByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2902
2903#else
2904# error "port me"
2905#endif
2906 return off;
2907}
2908#endif
2909
2910
2911/**
2912 * Emits a 64-bit GPR store via a GPR base address with a displacement.
2913 *
2914 * @note ARM64: Misaligned @a offDisp values and values not in the
2915 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2916 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2917 * does not heed this.
2918 */
2919DECL_FORCE_INLINE_THROW(uint32_t)
2920iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2921 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2922{
2923#ifdef RT_ARCH_AMD64
2924 /* mov mem64, reg64 */
2925 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2926 pCodeBuf[off++] = 0x89;
2927 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2928 RT_NOREF(iGprTmp);
2929
2930#elif defined(RT_ARCH_ARM64)
2931 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2932 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
2933
2934#else
2935# error "port me"
2936#endif
2937 return off;
2938}
2939
2940
2941/**
2942 * Emits a 32-bit GPR store via a GPR base address with a displacement.
2943 *
2944 * @note ARM64: Misaligned @a offDisp values and values not in the
2945 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
2946 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2947 * does not heed this.
2948 */
2949DECL_FORCE_INLINE_THROW(uint32_t)
2950iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2951 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2952{
2953#ifdef RT_ARCH_AMD64
2954 /* mov mem32, reg32 */
2955 if (iGprSrc >= 8 || iGprBase >= 8)
2956 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2957 pCodeBuf[off++] = 0x89;
2958 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2959 RT_NOREF(iGprTmp);
2960
2961#elif defined(RT_ARCH_ARM64)
2962 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2963 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
2964
2965#else
2966# error "port me"
2967#endif
2968 return off;
2969}
2970
2971
2972/**
2973 * Emits a 16-bit GPR store via a GPR base address with a displacement.
2974 *
2975 * @note ARM64: Misaligned @a offDisp values and values not in the
2976 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
2977 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2978 * does not heed this.
2979 */
2980DECL_FORCE_INLINE_THROW(uint32_t)
2981iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2982 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2983{
2984#ifdef RT_ARCH_AMD64
2985 /* mov mem16, reg16 */
2986 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2987 if (iGprSrc >= 8 || iGprBase >= 8)
2988 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2989 pCodeBuf[off++] = 0x89;
2990 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2991 RT_NOREF(iGprTmp);
2992
2993#elif defined(RT_ARCH_ARM64)
2994 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2995 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
2996
2997#else
2998# error "port me"
2999#endif
3000 return off;
3001}
3002
3003
3004/**
3005 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3006 *
3007 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3008 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3009 * same. Will assert / throw if caller does not heed this.
3010 */
3011DECL_FORCE_INLINE_THROW(uint32_t)
3012iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3013 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3014{
3015#ifdef RT_ARCH_AMD64
3016 /* mov mem8, reg8 */
3017 if (iGprSrc >= 8 || iGprBase >= 8)
3018 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3019 else if (iGprSrc >= 4)
3020 pCodeBuf[off++] = X86_OP_REX;
3021 pCodeBuf[off++] = 0x88;
3022 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3023 RT_NOREF(iGprTmp);
3024
3025#elif defined(RT_ARCH_ARM64)
3026 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3027 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3028
3029#else
3030# error "port me"
3031#endif
3032 return off;
3033}
3034
3035
3036/**
3037 * Emits a 64-bit immediate store via a GPR base address with a displacement.
3038 *
3039 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
3040 * AMD64 it depends on the immediate value.
3041 *
3042 * @note ARM64: Misaligned @a offDisp values and values not in the
3043 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3044 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3045 * does not heed this.
3046 */
3047DECL_FORCE_INLINE_THROW(uint32_t)
3048iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
3049 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3050{
3051#ifdef RT_ARCH_AMD64
3052 if ((int32_t)uImm == (int64_t)uImm)
3053 {
3054 /* mov mem64, imm32 (sign-extended) */
3055 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3056 pCodeBuf[off++] = 0xc7;
3057 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3058 pCodeBuf[off++] = RT_BYTE1(uImm);
3059 pCodeBuf[off++] = RT_BYTE2(uImm);
3060 pCodeBuf[off++] = RT_BYTE3(uImm);
3061 pCodeBuf[off++] = RT_BYTE4(uImm);
3062 }
3063 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
3064 {
3065 /* require temporary register. */
3066 if (iGprImmTmp == UINT8_MAX)
3067 iGprImmTmp = iGprTmp;
3068 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3069 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
3070 }
3071 else
3072# ifdef IEM_WITH_THROW_CATCH
3073 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3074# else
3075 AssertReleaseFailedStmt(off = UINT32_MAX);
3076# endif
3077
3078#elif defined(RT_ARCH_ARM64)
3079 if (uImm == 0)
3080 iGprImmTmp = ARMV8_A64_REG_XZR;
3081 else
3082 {
3083 Assert(iGprImmTmp < 31);
3084 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3085 }
3086 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
3087
3088#else
3089# error "port me"
3090#endif
3091 return off;
3092}
3093
3094
3095/**
3096 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3097 *
3098 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3099 *
3100 * @note ARM64: Misaligned @a offDisp values and values not in the
3101 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3102 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3103 * does not heed this.
3104 */
3105DECL_FORCE_INLINE_THROW(uint32_t)
3106iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
3107 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3108{
3109#ifdef RT_ARCH_AMD64
3110 /* mov mem32, imm32 */
3111 if (iGprBase >= 8)
3112 pCodeBuf[off++] = X86_OP_REX_B;
3113 pCodeBuf[off++] = 0xc7;
3114 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3115 pCodeBuf[off++] = RT_BYTE1(uImm);
3116 pCodeBuf[off++] = RT_BYTE2(uImm);
3117 pCodeBuf[off++] = RT_BYTE3(uImm);
3118 pCodeBuf[off++] = RT_BYTE4(uImm);
3119 RT_NOREF(iGprImmTmp, iGprTmp);
3120
3121#elif defined(RT_ARCH_ARM64)
3122 Assert(iGprImmTmp < 31);
3123 if (uImm == 0)
3124 iGprImmTmp = ARMV8_A64_REG_XZR;
3125 else
3126 {
3127 Assert(iGprImmTmp < 31);
3128 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3129 }
3130 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3131 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3132
3133#else
3134# error "port me"
3135#endif
3136 return off;
3137}
3138
3139
3140/**
3141 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3142 *
3143 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3144 *
3145 * @note ARM64: Misaligned @a offDisp values and values not in the
3146 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3147 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3148 * does not heed this.
3149 */
3150DECL_FORCE_INLINE_THROW(uint32_t)
3151iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
3152 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3153{
3154#ifdef RT_ARCH_AMD64
3155 /* mov mem16, imm16 */
3156 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3157 if (iGprBase >= 8)
3158 pCodeBuf[off++] = X86_OP_REX_B;
3159 pCodeBuf[off++] = 0xc7;
3160 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3161 pCodeBuf[off++] = RT_BYTE1(uImm);
3162 pCodeBuf[off++] = RT_BYTE2(uImm);
3163 RT_NOREF(iGprImmTmp, iGprTmp);
3164
3165#elif defined(RT_ARCH_ARM64)
3166 if (uImm == 0)
3167 iGprImmTmp = ARMV8_A64_REG_XZR;
3168 else
3169 {
3170 Assert(iGprImmTmp < 31);
3171 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3172 }
3173 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3174 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3175
3176#else
3177# error "port me"
3178#endif
3179 return off;
3180}
3181
3182
3183/**
3184 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3185 *
3186 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3187 *
3188 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3189 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3190 * same. Will assert / throw if caller does not heed this.
3191 */
3192DECL_FORCE_INLINE_THROW(uint32_t)
3193iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3194 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3195{
3196#ifdef RT_ARCH_AMD64
3197 /* mov mem8, imm8 */
3198 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3199 if (iGprBase >= 8)
3200 pCodeBuf[off++] = X86_OP_REX_B;
3201 pCodeBuf[off++] = 0xc6;
3202 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3203 pCodeBuf[off++] = uImm;
3204 RT_NOREF(iGprImmTmp, iGprTmp);
3205
3206#elif defined(RT_ARCH_ARM64)
3207 if (uImm == 0)
3208 iGprImmTmp = ARMV8_A64_REG_XZR;
3209 else
3210 {
3211 Assert(iGprImmTmp < 31);
3212 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3213 }
3214 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3215 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3216
3217#else
3218# error "port me"
3219#endif
3220 return off;
3221}
3222
3223
3224
3225/*********************************************************************************************************************************
3226* Subtraction and Additions *
3227*********************************************************************************************************************************/
3228
3229/**
3230 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3231 * @note The AMD64 version sets flags.
3232 */
3233DECL_INLINE_THROW(uint32_t)
3234iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3235{
3236#if defined(RT_ARCH_AMD64)
3237 /* sub Gv,Ev */
3238 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3239 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3240 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3241 pbCodeBuf[off++] = 0x2b;
3242 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3243
3244#elif defined(RT_ARCH_ARM64)
3245 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3246 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3247
3248#else
3249# error "Port me"
3250#endif
3251 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3252 return off;
3253}
3254
3255
3256/**
3257 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3258 * @note The AMD64 version sets flags.
3259 */
3260DECL_FORCE_INLINE(uint32_t)
3261iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3262{
3263#if defined(RT_ARCH_AMD64)
3264 /* sub Gv,Ev */
3265 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3266 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3267 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3268 pCodeBuf[off++] = 0x2b;
3269 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3270
3271#elif defined(RT_ARCH_ARM64)
3272 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3273
3274#else
3275# error "Port me"
3276#endif
3277 return off;
3278}
3279
3280
3281/**
3282 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3283 * @note The AMD64 version sets flags.
3284 */
3285DECL_INLINE_THROW(uint32_t)
3286iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3287{
3288#if defined(RT_ARCH_AMD64)
3289 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3290#elif defined(RT_ARCH_ARM64)
3291 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3292#else
3293# error "Port me"
3294#endif
3295 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3296 return off;
3297}
3298
3299
3300/**
3301 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3302 *
3303 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3304 *
3305 * @note Larger constants will require a temporary register. Failing to specify
3306 * one when needed will trigger fatal assertion / throw.
3307 */
3308DECL_FORCE_INLINE_THROW(uint32_t)
3309iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3310 uint8_t iGprTmp = UINT8_MAX)
3311{
3312#ifdef RT_ARCH_AMD64
3313 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3314 if (iSubtrahend == 1)
3315 {
3316 /* dec r/m64 */
3317 pCodeBuf[off++] = 0xff;
3318 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3319 }
3320 else if (iSubtrahend == -1)
3321 {
3322 /* inc r/m64 */
3323 pCodeBuf[off++] = 0xff;
3324 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3325 }
3326 else if ((int8_t)iSubtrahend == iSubtrahend)
3327 {
3328 /* sub r/m64, imm8 */
3329 pCodeBuf[off++] = 0x83;
3330 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3331 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3332 }
3333 else if ((int32_t)iSubtrahend == iSubtrahend)
3334 {
3335 /* sub r/m64, imm32 */
3336 pCodeBuf[off++] = 0x81;
3337 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3338 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3339 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3340 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3341 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3342 }
3343 else if (iGprTmp != UINT8_MAX)
3344 {
3345 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3346 /* sub r/m64, r64 */
3347 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3348 pCodeBuf[off++] = 0x29;
3349 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3350 }
3351 else
3352# ifdef IEM_WITH_THROW_CATCH
3353 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3354# else
3355 AssertReleaseFailedStmt(off = UINT32_MAX);
3356# endif
3357
3358#elif defined(RT_ARCH_ARM64)
3359 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3360 if (uAbsSubtrahend < 4096)
3361 {
3362 if (iSubtrahend >= 0)
3363 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3364 else
3365 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3366 }
3367 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3368 {
3369 if (iSubtrahend >= 0)
3370 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3371 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3372 else
3373 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3374 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3375 }
3376 else if (iGprTmp != UINT8_MAX)
3377 {
3378 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3379 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3380 }
3381 else
3382# ifdef IEM_WITH_THROW_CATCH
3383 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3384# else
3385 AssertReleaseFailedStmt(off = UINT32_MAX);
3386# endif
3387
3388#else
3389# error "Port me"
3390#endif
3391 return off;
3392}
3393
3394
3395/**
3396 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3397 *
3398 * @note Larger constants will require a temporary register. Failing to specify
3399 * one when needed will trigger fatal assertion / throw.
3400 */
3401DECL_INLINE_THROW(uint32_t)
3402iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3403 uint8_t iGprTmp = UINT8_MAX)
3404
3405{
3406#ifdef RT_ARCH_AMD64
3407 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3408#elif defined(RT_ARCH_ARM64)
3409 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3410#else
3411# error "Port me"
3412#endif
3413 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3414 return off;
3415}
3416
3417
3418/**
3419 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3420 *
3421 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3422 *
3423 * @note ARM64: Larger constants will require a temporary register. Failing to
3424 * specify one when needed will trigger fatal assertion / throw.
3425 */
3426DECL_FORCE_INLINE_THROW(uint32_t)
3427iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3428 uint8_t iGprTmp = UINT8_MAX)
3429{
3430#ifdef RT_ARCH_AMD64
3431 if (iGprDst >= 8)
3432 pCodeBuf[off++] = X86_OP_REX_B;
3433 if (iSubtrahend == 1)
3434 {
3435 /* dec r/m32 */
3436 pCodeBuf[off++] = 0xff;
3437 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3438 }
3439 else if (iSubtrahend == -1)
3440 {
3441 /* inc r/m32 */
3442 pCodeBuf[off++] = 0xff;
3443 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3444 }
3445 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3446 {
3447 /* sub r/m32, imm8 */
3448 pCodeBuf[off++] = 0x83;
3449 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3450 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3451 }
3452 else
3453 {
3454 /* sub r/m32, imm32 */
3455 pCodeBuf[off++] = 0x81;
3456 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3457 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3458 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3459 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3460 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3461 }
3462 RT_NOREF(iGprTmp);
3463
3464#elif defined(RT_ARCH_ARM64)
3465 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3466 if (uAbsSubtrahend < 4096)
3467 {
3468 if (iSubtrahend >= 0)
3469 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3470 else
3471 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3472 }
3473 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3474 {
3475 if (iSubtrahend >= 0)
3476 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3477 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3478 else
3479 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3480 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3481 }
3482 else if (iGprTmp != UINT8_MAX)
3483 {
3484 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3485 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3486 }
3487 else
3488# ifdef IEM_WITH_THROW_CATCH
3489 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3490# else
3491 AssertReleaseFailedStmt(off = UINT32_MAX);
3492# endif
3493
3494#else
3495# error "Port me"
3496#endif
3497 return off;
3498}
3499
3500
3501/**
3502 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3503 *
3504 * @note ARM64: Larger constants will require a temporary register. Failing to
3505 * specify one when needed will trigger fatal assertion / throw.
3506 */
3507DECL_INLINE_THROW(uint32_t)
3508iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3509 uint8_t iGprTmp = UINT8_MAX)
3510
3511{
3512#ifdef RT_ARCH_AMD64
3513 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
3514#elif defined(RT_ARCH_ARM64)
3515 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
3516#else
3517# error "Port me"
3518#endif
3519 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3520 return off;
3521}
3522
3523
3524/**
3525 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
3526 *
3527 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
3528 * so not suitable as a base for conditional jumps.
3529 *
3530 * @note AMD64: Will only update the lower 16 bits of the register.
3531 * @note ARM64: Will update the entire register.
3532 * @note ARM64: Larger constants will require a temporary register. Failing to
3533 * specify one when needed will trigger fatal assertion / throw.
3534 */
3535DECL_FORCE_INLINE_THROW(uint32_t)
3536iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
3537 uint8_t iGprTmp = UINT8_MAX)
3538{
3539#ifdef RT_ARCH_AMD64
3540 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3541 if (iGprDst >= 8)
3542 pCodeBuf[off++] = X86_OP_REX_B;
3543 if (iSubtrahend == 1)
3544 {
3545 /* dec r/m16 */
3546 pCodeBuf[off++] = 0xff;
3547 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3548 }
3549 else if (iSubtrahend == -1)
3550 {
3551 /* inc r/m16 */
3552 pCodeBuf[off++] = 0xff;
3553 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3554 }
3555 else if ((int8_t)iSubtrahend == iSubtrahend)
3556 {
3557 /* sub r/m16, imm8 */
3558 pCodeBuf[off++] = 0x83;
3559 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3560 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3561 }
3562 else
3563 {
3564 /* sub r/m16, imm16 */
3565 pCodeBuf[off++] = 0x81;
3566 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3567 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
3568 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
3569 }
3570 RT_NOREF(iGprTmp);
3571
3572#elif defined(RT_ARCH_ARM64)
3573 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3574 if (uAbsSubtrahend < 4096)
3575 {
3576 if (iSubtrahend >= 0)
3577 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3578 else
3579 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3580 }
3581 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3582 {
3583 if (iSubtrahend >= 0)
3584 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3585 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3586 else
3587 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3588 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3589 }
3590 else if (iGprTmp != UINT8_MAX)
3591 {
3592 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3593 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3594 }
3595 else
3596# ifdef IEM_WITH_THROW_CATCH
3597 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3598# else
3599 AssertReleaseFailedStmt(off = UINT32_MAX);
3600# endif
3601 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3602
3603#else
3604# error "Port me"
3605#endif
3606 return off;
3607}
3608
3609
3610/**
3611 * Emits adding a 64-bit GPR to another, storing the result in the first.
3612 * @note The AMD64 version sets flags.
3613 */
3614DECL_FORCE_INLINE(uint32_t)
3615iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3616{
3617#if defined(RT_ARCH_AMD64)
3618 /* add Gv,Ev */
3619 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3620 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
3621 pCodeBuf[off++] = 0x03;
3622 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3623
3624#elif defined(RT_ARCH_ARM64)
3625 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
3626
3627#else
3628# error "Port me"
3629#endif
3630 return off;
3631}
3632
3633
3634/**
3635 * Emits adding a 64-bit GPR to another, storing the result in the first.
3636 * @note The AMD64 version sets flags.
3637 */
3638DECL_INLINE_THROW(uint32_t)
3639iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3640{
3641#if defined(RT_ARCH_AMD64)
3642 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3643#elif defined(RT_ARCH_ARM64)
3644 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3645#else
3646# error "Port me"
3647#endif
3648 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3649 return off;
3650}
3651
3652
3653/**
3654 * Emits adding a 64-bit GPR to another, storing the result in the first.
3655 * @note The AMD64 version sets flags.
3656 */
3657DECL_FORCE_INLINE(uint32_t)
3658iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3659{
3660#if defined(RT_ARCH_AMD64)
3661 /* add Gv,Ev */
3662 if (iGprDst >= 8 || iGprAddend >= 8)
3663 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
3664 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
3665 pCodeBuf[off++] = 0x03;
3666 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3667
3668#elif defined(RT_ARCH_ARM64)
3669 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
3670
3671#else
3672# error "Port me"
3673#endif
3674 return off;
3675}
3676
3677
3678/**
3679 * Emits adding a 64-bit GPR to another, storing the result in the first.
3680 * @note The AMD64 version sets flags.
3681 */
3682DECL_INLINE_THROW(uint32_t)
3683iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3684{
3685#if defined(RT_ARCH_AMD64)
3686 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3687#elif defined(RT_ARCH_ARM64)
3688 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3689#else
3690# error "Port me"
3691#endif
3692 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3693 return off;
3694}
3695
3696
3697/**
3698 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3699 */
3700DECL_INLINE_THROW(uint32_t)
3701iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3702{
3703#if defined(RT_ARCH_AMD64)
3704 /* add or inc */
3705 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3706 if (iImm8 != 1)
3707 {
3708 pCodeBuf[off++] = 0x83;
3709 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3710 pCodeBuf[off++] = (uint8_t)iImm8;
3711 }
3712 else
3713 {
3714 pCodeBuf[off++] = 0xff;
3715 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3716 }
3717
3718#elif defined(RT_ARCH_ARM64)
3719 if (iImm8 >= 0)
3720 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
3721 else
3722 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
3723
3724#else
3725# error "Port me"
3726#endif
3727 return off;
3728}
3729
3730
3731/**
3732 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3733 */
3734DECL_INLINE_THROW(uint32_t)
3735iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3736{
3737#if defined(RT_ARCH_AMD64)
3738 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
3739#elif defined(RT_ARCH_ARM64)
3740 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
3741#else
3742# error "Port me"
3743#endif
3744 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3745 return off;
3746}
3747
3748
3749/**
3750 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
3751 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3752 */
3753DECL_FORCE_INLINE(uint32_t)
3754iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3755{
3756#if defined(RT_ARCH_AMD64)
3757 /* add or inc */
3758 if (iGprDst >= 8)
3759 pCodeBuf[off++] = X86_OP_REX_B;
3760 if (iImm8 != 1)
3761 {
3762 pCodeBuf[off++] = 0x83;
3763 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3764 pCodeBuf[off++] = (uint8_t)iImm8;
3765 }
3766 else
3767 {
3768 pCodeBuf[off++] = 0xff;
3769 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3770 }
3771
3772#elif defined(RT_ARCH_ARM64)
3773 if (iImm8 >= 0)
3774 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
3775 else
3776 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
3777
3778#else
3779# error "Port me"
3780#endif
3781 return off;
3782}
3783
3784
3785/**
3786 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
3787 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3788 */
3789DECL_INLINE_THROW(uint32_t)
3790iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3791{
3792#if defined(RT_ARCH_AMD64)
3793 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
3794#elif defined(RT_ARCH_ARM64)
3795 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
3796#else
3797# error "Port me"
3798#endif
3799 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3800 return off;
3801}
3802
3803
3804/**
3805 * Emits a 64-bit GPR additions with a 64-bit signed addend.
3806 *
3807 * @note Will assert / throw if @a iGprTmp is not specified when needed.
3808 */
3809DECL_FORCE_INLINE_THROW(uint32_t)
3810iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
3811{
3812#if defined(RT_ARCH_AMD64)
3813 if ((int8_t)iAddend == iAddend)
3814 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
3815
3816 if ((int32_t)iAddend == iAddend)
3817 {
3818 /* add grp, imm32 */
3819 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3820 pCodeBuf[off++] = 0x81;
3821 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3822 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
3823 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
3824 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
3825 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
3826 }
3827 else if (iGprTmp != UINT8_MAX)
3828 {
3829 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
3830
3831 /* add dst, tmpreg */
3832 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3833 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
3834 pCodeBuf[off++] = 0x03;
3835 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
3836 }
3837 else
3838# ifdef IEM_WITH_THROW_CATCH
3839 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3840# else
3841 AssertReleaseFailedStmt(off = UINT32_MAX);
3842# endif
3843
3844#elif defined(RT_ARCH_ARM64)
3845 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
3846 if (uAbsAddend < 4096)
3847 {
3848 if (iAddend >= 0)
3849 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
3850 else
3851 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
3852 }
3853 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
3854 {
3855 if (iAddend >= 0)
3856 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
3857 true /*f64Bit*/, true /*fShift12*/);
3858 else
3859 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
3860 true /*f64Bit*/, true /*fShift12*/);
3861 }
3862 else if (iGprTmp != UINT8_MAX)
3863 {
3864 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
3865 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
3866 }
3867 else
3868# ifdef IEM_WITH_THROW_CATCH
3869 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3870# else
3871 AssertReleaseFailedStmt(off = UINT32_MAX);
3872# endif
3873
3874#else
3875# error "Port me"
3876#endif
3877 return off;
3878}
3879
3880
3881/**
3882 * Emits a 64-bit GPR additions with a 64-bit signed addend.
3883 */
3884DECL_INLINE_THROW(uint32_t)
3885iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
3886{
3887#if defined(RT_ARCH_AMD64)
3888 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
3889 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
3890
3891 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
3892 {
3893 /* add grp, imm32 */
3894 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3895 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3896 pbCodeBuf[off++] = 0x81;
3897 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3898 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
3899 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
3900 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
3901 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
3902 }
3903 else
3904 {
3905 /* Best to use a temporary register to deal with this in the simplest way: */
3906 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
3907
3908 /* add dst, tmpreg */
3909 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3910 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3911 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
3912 pbCodeBuf[off++] = 0x03;
3913 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
3914
3915 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
3916 }
3917
3918#elif defined(RT_ARCH_ARM64)
3919 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
3920 {
3921 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3922 if (iAddend >= 0)
3923 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend);
3924 else
3925 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend);
3926 }
3927 else
3928 {
3929 /* Use temporary register for the immediate. */
3930 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
3931
3932 /* add gprdst, gprdst, tmpreg */
3933 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3934 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg);
3935
3936 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
3937 }
3938
3939#else
3940# error "Port me"
3941#endif
3942 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3943 return off;
3944}
3945
3946
3947/**
3948 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
3949 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3950 * @note For ARM64 the iAddend value must be in the range 0x000..0xfff,
3951 * or that range shifted 12 bits to the left (e.g. 0x1000..0xfff000 with
3952 * the lower 12 bits always zero). The negative ranges are also allowed,
3953 * making it behave like a subtraction. If the constant does not conform,
3954 * bad stuff will happen.
3955 */
3956DECL_FORCE_INLINE_THROW(uint32_t)
3957iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend)
3958{
3959#if defined(RT_ARCH_AMD64)
3960 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
3961 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
3962
3963 /* add grp, imm32 */
3964 if (iGprDst >= 8)
3965 pCodeBuf[off++] = X86_OP_REX_B;
3966 pCodeBuf[off++] = 0x81;
3967 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3968 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
3969 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
3970 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
3971 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
3972
3973#elif defined(RT_ARCH_ARM64)
3974 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
3975 if (uAbsAddend <= 0xfff)
3976 {
3977 if (iAddend >= 0)
3978 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
3979 else
3980 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
3981 }
3982 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
3983 {
3984 if (iAddend >= 0)
3985 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
3986 false /*f64Bit*/, true /*fShift12*/);
3987 else
3988 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
3989 false /*f64Bit*/, true /*fShift12*/);
3990 }
3991 else
3992# ifdef IEM_WITH_THROW_CATCH
3993 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3994# else
3995 AssertReleaseFailedStmt(off = UINT32_MAX);
3996# endif
3997
3998#else
3999# error "Port me"
4000#endif
4001 return off;
4002}
4003
4004
4005/**
4006 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4007 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4008 */
4009DECL_INLINE_THROW(uint32_t)
4010iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4011{
4012#if defined(RT_ARCH_AMD64)
4013 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
4014
4015#elif defined(RT_ARCH_ARM64)
4016 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
4017 {
4018 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4019 if (iAddend >= 0)
4020 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend, false /*f64Bit*/);
4021 else
4022 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend, false /*f64Bit*/);
4023 }
4024 else
4025 {
4026 /* Use temporary register for the immediate. */
4027 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint32_t)iAddend);
4028
4029 /* add gprdst, gprdst, tmpreg */
4030 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4031 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
4032
4033 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4034 }
4035
4036#else
4037# error "Port me"
4038#endif
4039 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4040 return off;
4041}
4042
4043
4044/**
4045 * Emits a 16-bit GPR add with a signed immediate addend.
4046 *
4047 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
4048 * so not suitable as a base for conditional jumps.
4049 *
4050 * @note AMD64: Will only update the lower 16 bits of the register.
4051 * @note ARM64: Will update the entire register.
4052 * @note ARM64: Larger constants will require a temporary register. Failing to
4053 * specify one when needed will trigger fatal assertion / throw.
4054 * @sa iemNativeEmitSubGpr16ImmEx
4055 */
4056DECL_FORCE_INLINE_THROW(uint32_t)
4057iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend,
4058 uint8_t iGprTmp = UINT8_MAX)
4059{
4060#ifdef RT_ARCH_AMD64
4061 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4062 if (iGprDst >= 8)
4063 pCodeBuf[off++] = X86_OP_REX_B;
4064 if (iAddend == 1)
4065 {
4066 /* inc r/m16 */
4067 pCodeBuf[off++] = 0xff;
4068 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4069 }
4070 else if (iAddend == -1)
4071 {
4072 /* dec r/m16 */
4073 pCodeBuf[off++] = 0xff;
4074 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4075 }
4076 else if ((int8_t)iAddend == iAddend)
4077 {
4078 /* add r/m16, imm8 */
4079 pCodeBuf[off++] = 0x83;
4080 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4081 pCodeBuf[off++] = (uint8_t)iAddend;
4082 }
4083 else
4084 {
4085 /* add r/m16, imm16 */
4086 pCodeBuf[off++] = 0x81;
4087 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4088 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
4089 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
4090 }
4091 RT_NOREF(iGprTmp);
4092
4093#elif defined(RT_ARCH_ARM64)
4094 uint32_t uAbsAddend = RT_ABS(iAddend);
4095 if (uAbsAddend < 4096)
4096 {
4097 if (iAddend >= 0)
4098 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4099 else
4100 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4101 }
4102 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4103 {
4104 if (iAddend >= 0)
4105 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
4106 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4107 else
4108 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
4109 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4110 }
4111 else if (iGprTmp != UINT8_MAX)
4112 {
4113 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iAddend);
4114 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4115 }
4116 else
4117# ifdef IEM_WITH_THROW_CATCH
4118 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4119# else
4120 AssertReleaseFailedStmt(off = UINT32_MAX);
4121# endif
4122 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4123
4124#else
4125# error "Port me"
4126#endif
4127 return off;
4128}
4129
4130
4131
4132/**
4133 * Adds two 64-bit GPRs together, storing the result in a third register.
4134 */
4135DECL_FORCE_INLINE(uint32_t)
4136iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4137{
4138#ifdef RT_ARCH_AMD64
4139 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4140 {
4141 /** @todo consider LEA */
4142 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
4143 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
4144 }
4145 else
4146 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4147
4148#elif defined(RT_ARCH_ARM64)
4149 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
4150
4151#else
4152# error "Port me!"
4153#endif
4154 return off;
4155}
4156
4157
4158
4159/**
4160 * Adds two 32-bit GPRs together, storing the result in a third register.
4161 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4162 */
4163DECL_FORCE_INLINE(uint32_t)
4164iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4165{
4166#ifdef RT_ARCH_AMD64
4167 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4168 {
4169 /** @todo consider LEA */
4170 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
4171 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
4172 }
4173 else
4174 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4175
4176#elif defined(RT_ARCH_ARM64)
4177 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
4178
4179#else
4180# error "Port me!"
4181#endif
4182 return off;
4183}
4184
4185
4186/**
4187 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
4188 * third register.
4189 *
4190 * @note The ARM64 version does not work for non-trivial constants if the
4191 * two registers are the same. Will assert / throw exception.
4192 */
4193DECL_FORCE_INLINE_THROW(uint32_t)
4194iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4195{
4196#ifdef RT_ARCH_AMD64
4197 /** @todo consider LEA */
4198 if ((int8_t)iImmAddend == iImmAddend)
4199 {
4200 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4201 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4202 }
4203 else
4204 {
4205 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4206 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4207 }
4208
4209#elif defined(RT_ARCH_ARM64)
4210 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4211 if (uAbsImmAddend < 4096)
4212 {
4213 if (iImmAddend >= 0)
4214 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4215 else
4216 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4217 }
4218 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4219 {
4220 if (iImmAddend >= 0)
4221 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4222 else
4223 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4224 }
4225 else if (iGprDst != iGprAddend)
4226 {
4227 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4228 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4229 }
4230 else
4231# ifdef IEM_WITH_THROW_CATCH
4232 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4233# else
4234 AssertReleaseFailedStmt(off = UINT32_MAX);
4235# endif
4236
4237#else
4238# error "Port me!"
4239#endif
4240 return off;
4241}
4242
4243
4244/**
4245 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4246 * third register.
4247 *
4248 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4249 *
4250 * @note The ARM64 version does not work for non-trivial constants if the
4251 * two registers are the same. Will assert / throw exception.
4252 */
4253DECL_FORCE_INLINE_THROW(uint32_t)
4254iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4255{
4256#ifdef RT_ARCH_AMD64
4257 /** @todo consider LEA */
4258 if ((int8_t)iImmAddend == iImmAddend)
4259 {
4260 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4261 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4262 }
4263 else
4264 {
4265 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4266 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4267 }
4268
4269#elif defined(RT_ARCH_ARM64)
4270 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4271 if (uAbsImmAddend < 4096)
4272 {
4273 if (iImmAddend >= 0)
4274 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4275 else
4276 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4277 }
4278 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4279 {
4280 if (iImmAddend >= 0)
4281 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4282 else
4283 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4284 }
4285 else if (iGprDst != iGprAddend)
4286 {
4287 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4288 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4289 }
4290 else
4291# ifdef IEM_WITH_THROW_CATCH
4292 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4293# else
4294 AssertReleaseFailedStmt(off = UINT32_MAX);
4295# endif
4296
4297#else
4298# error "Port me!"
4299#endif
4300 return off;
4301}
4302
4303
4304/*********************************************************************************************************************************
4305* Unary Operations *
4306*********************************************************************************************************************************/
4307
4308/**
4309 * Emits code for two complement negation of a 64-bit GPR.
4310 */
4311DECL_FORCE_INLINE_THROW(uint32_t)
4312iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4313{
4314#if defined(RT_ARCH_AMD64)
4315 /* neg Ev */
4316 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4317 pCodeBuf[off++] = 0xf7;
4318 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4319
4320#elif defined(RT_ARCH_ARM64)
4321 /* sub dst, xzr, dst */
4322 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4323
4324#else
4325# error "Port me"
4326#endif
4327 return off;
4328}
4329
4330
4331/**
4332 * Emits code for two complement negation of a 64-bit GPR.
4333 */
4334DECL_INLINE_THROW(uint32_t)
4335iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4336{
4337#if defined(RT_ARCH_AMD64)
4338 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4339#elif defined(RT_ARCH_ARM64)
4340 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4341#else
4342# error "Port me"
4343#endif
4344 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4345 return off;
4346}
4347
4348
4349/**
4350 * Emits code for two complement negation of a 32-bit GPR.
4351 * @note bit 32 thru 63 are set to zero.
4352 */
4353DECL_FORCE_INLINE_THROW(uint32_t)
4354iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4355{
4356#if defined(RT_ARCH_AMD64)
4357 /* neg Ev */
4358 if (iGprDst >= 8)
4359 pCodeBuf[off++] = X86_OP_REX_B;
4360 pCodeBuf[off++] = 0xf7;
4361 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4362
4363#elif defined(RT_ARCH_ARM64)
4364 /* sub dst, xzr, dst */
4365 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4366
4367#else
4368# error "Port me"
4369#endif
4370 return off;
4371}
4372
4373
4374/**
4375 * Emits code for two complement negation of a 32-bit GPR.
4376 * @note bit 32 thru 63 are set to zero.
4377 */
4378DECL_INLINE_THROW(uint32_t)
4379iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4380{
4381#if defined(RT_ARCH_AMD64)
4382 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4383#elif defined(RT_ARCH_ARM64)
4384 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4385#else
4386# error "Port me"
4387#endif
4388 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4389 return off;
4390}
4391
4392
4393
4394/*********************************************************************************************************************************
4395* Bit Operations *
4396*********************************************************************************************************************************/
4397
4398/**
4399 * Emits code for clearing bits 16 thru 63 in the GPR.
4400 */
4401DECL_INLINE_THROW(uint32_t)
4402iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4403{
4404#if defined(RT_ARCH_AMD64)
4405 /* movzx Gv,Ew */
4406 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4407 if (iGprDst >= 8)
4408 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4409 pbCodeBuf[off++] = 0x0f;
4410 pbCodeBuf[off++] = 0xb7;
4411 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4412
4413#elif defined(RT_ARCH_ARM64)
4414 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4415# if 1
4416 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4417# else
4418 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4419 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4420# endif
4421#else
4422# error "Port me"
4423#endif
4424 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4425 return off;
4426}
4427
4428
4429/**
4430 * Emits code for AND'ing two 64-bit GPRs.
4431 *
4432 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4433 * and ARM64 hosts.
4434 */
4435DECL_FORCE_INLINE(uint32_t)
4436iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4437{
4438#if defined(RT_ARCH_AMD64)
4439 /* and Gv, Ev */
4440 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4441 pCodeBuf[off++] = 0x23;
4442 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4443 RT_NOREF(fSetFlags);
4444
4445#elif defined(RT_ARCH_ARM64)
4446 if (!fSetFlags)
4447 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4448 else
4449 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4450
4451#else
4452# error "Port me"
4453#endif
4454 return off;
4455}
4456
4457
4458/**
4459 * Emits code for AND'ing two 64-bit GPRs.
4460 *
4461 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4462 * and ARM64 hosts.
4463 */
4464DECL_INLINE_THROW(uint32_t)
4465iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4466{
4467#if defined(RT_ARCH_AMD64)
4468 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4469#elif defined(RT_ARCH_ARM64)
4470 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4471#else
4472# error "Port me"
4473#endif
4474 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4475 return off;
4476}
4477
4478
4479/**
4480 * Emits code for AND'ing two 32-bit GPRs.
4481 */
4482DECL_FORCE_INLINE(uint32_t)
4483iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4484{
4485#if defined(RT_ARCH_AMD64)
4486 /* and Gv, Ev */
4487 if (iGprDst >= 8 || iGprSrc >= 8)
4488 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4489 pCodeBuf[off++] = 0x23;
4490 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4491 RT_NOREF(fSetFlags);
4492
4493#elif defined(RT_ARCH_ARM64)
4494 if (!fSetFlags)
4495 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4496 else
4497 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4498
4499#else
4500# error "Port me"
4501#endif
4502 return off;
4503}
4504
4505
4506/**
4507 * Emits code for AND'ing two 32-bit GPRs.
4508 */
4509DECL_INLINE_THROW(uint32_t)
4510iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4511{
4512#if defined(RT_ARCH_AMD64)
4513 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4514#elif defined(RT_ARCH_ARM64)
4515 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4516#else
4517# error "Port me"
4518#endif
4519 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4520 return off;
4521}
4522
4523
4524/**
4525 * Emits code for AND'ing a 64-bit GPRs with a constant.
4526 *
4527 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4528 * and ARM64 hosts.
4529 */
4530DECL_INLINE_THROW(uint32_t)
4531iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4532{
4533#if defined(RT_ARCH_AMD64)
4534 if ((int64_t)uImm == (int8_t)uImm)
4535 {
4536 /* and Ev, imm8 */
4537 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4538 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4539 pbCodeBuf[off++] = 0x83;
4540 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4541 pbCodeBuf[off++] = (uint8_t)uImm;
4542 }
4543 else if ((int64_t)uImm == (int32_t)uImm)
4544 {
4545 /* and Ev, imm32 */
4546 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4547 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4548 pbCodeBuf[off++] = 0x81;
4549 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4550 pbCodeBuf[off++] = RT_BYTE1(uImm);
4551 pbCodeBuf[off++] = RT_BYTE2(uImm);
4552 pbCodeBuf[off++] = RT_BYTE3(uImm);
4553 pbCodeBuf[off++] = RT_BYTE4(uImm);
4554 }
4555 else
4556 {
4557 /* Use temporary register for the 64-bit immediate. */
4558 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4559 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
4560 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4561 }
4562 RT_NOREF(fSetFlags);
4563
4564#elif defined(RT_ARCH_ARM64)
4565 uint32_t uImmR = 0;
4566 uint32_t uImmNandS = 0;
4567 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4568 {
4569 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4570 if (!fSetFlags)
4571 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
4572 else
4573 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
4574 }
4575 else
4576 {
4577 /* Use temporary register for the 64-bit immediate. */
4578 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4579 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4580 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4581 }
4582
4583#else
4584# error "Port me"
4585#endif
4586 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4587 return off;
4588}
4589
4590
4591/**
4592 * Emits code for AND'ing an 32-bit GPRs with a constant.
4593 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4594 * @note For ARM64 this only supports @a uImm values that can be expressed using
4595 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
4596 * make sure this is possible!
4597 */
4598DECL_FORCE_INLINE_THROW(uint32_t)
4599iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4600{
4601#if defined(RT_ARCH_AMD64)
4602 /* and Ev, imm */
4603 if (iGprDst >= 8)
4604 pCodeBuf[off++] = X86_OP_REX_B;
4605 if ((int32_t)uImm == (int8_t)uImm)
4606 {
4607 pCodeBuf[off++] = 0x83;
4608 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4609 pCodeBuf[off++] = (uint8_t)uImm;
4610 }
4611 else
4612 {
4613 pCodeBuf[off++] = 0x81;
4614 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4615 pCodeBuf[off++] = RT_BYTE1(uImm);
4616 pCodeBuf[off++] = RT_BYTE2(uImm);
4617 pCodeBuf[off++] = RT_BYTE3(uImm);
4618 pCodeBuf[off++] = RT_BYTE4(uImm);
4619 }
4620 RT_NOREF(fSetFlags);
4621
4622#elif defined(RT_ARCH_ARM64)
4623 uint32_t uImmR = 0;
4624 uint32_t uImmNandS = 0;
4625 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4626 {
4627 if (!fSetFlags)
4628 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4629 else
4630 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4631 }
4632 else
4633# ifdef IEM_WITH_THROW_CATCH
4634 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4635# else
4636 AssertReleaseFailedStmt(off = UINT32_MAX);
4637# endif
4638
4639#else
4640# error "Port me"
4641#endif
4642 return off;
4643}
4644
4645
4646/**
4647 * Emits code for AND'ing an 32-bit GPRs with a constant.
4648 *
4649 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4650 */
4651DECL_INLINE_THROW(uint32_t)
4652iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4653{
4654#if defined(RT_ARCH_AMD64)
4655 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
4656
4657#elif defined(RT_ARCH_ARM64)
4658 uint32_t uImmR = 0;
4659 uint32_t uImmNandS = 0;
4660 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4661 {
4662 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4663 if (!fSetFlags)
4664 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4665 else
4666 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4667 }
4668 else
4669 {
4670 /* Use temporary register for the 64-bit immediate. */
4671 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4672 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4673 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4674 }
4675
4676#else
4677# error "Port me"
4678#endif
4679 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4680 return off;
4681}
4682
4683
4684/**
4685 * Emits code for AND'ing an 64-bit GPRs with a constant.
4686 *
4687 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4688 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4689 * the same.
4690 */
4691DECL_FORCE_INLINE_THROW(uint32_t)
4692iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
4693 bool fSetFlags = false)
4694{
4695#if defined(RT_ARCH_AMD64)
4696 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4697 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
4698 RT_NOREF(fSetFlags);
4699
4700#elif defined(RT_ARCH_ARM64)
4701 uint32_t uImmR = 0;
4702 uint32_t uImmNandS = 0;
4703 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4704 {
4705 if (!fSetFlags)
4706 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4707 else
4708 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4709 }
4710 else if (iGprDst != iGprSrc)
4711 {
4712 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4713 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4714 }
4715 else
4716# ifdef IEM_WITH_THROW_CATCH
4717 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4718# else
4719 AssertReleaseFailedStmt(off = UINT32_MAX);
4720# endif
4721
4722#else
4723# error "Port me"
4724#endif
4725 return off;
4726}
4727
4728/**
4729 * Emits code for AND'ing an 32-bit GPRs with a constant.
4730 *
4731 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4732 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4733 * the same.
4734 *
4735 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4736 */
4737DECL_FORCE_INLINE_THROW(uint32_t)
4738iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
4739 bool fSetFlags = false)
4740{
4741#if defined(RT_ARCH_AMD64)
4742 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
4743 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
4744 RT_NOREF(fSetFlags);
4745
4746#elif defined(RT_ARCH_ARM64)
4747 uint32_t uImmR = 0;
4748 uint32_t uImmNandS = 0;
4749 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4750 {
4751 if (!fSetFlags)
4752 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
4753 else
4754 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
4755 }
4756 else if (iGprDst != iGprSrc)
4757 {
4758 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
4759 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4760 }
4761 else
4762# ifdef IEM_WITH_THROW_CATCH
4763 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4764# else
4765 AssertReleaseFailedStmt(off = UINT32_MAX);
4766# endif
4767
4768#else
4769# error "Port me"
4770#endif
4771 return off;
4772}
4773
4774
4775/**
4776 * Emits code for OR'ing two 64-bit GPRs.
4777 */
4778DECL_FORCE_INLINE(uint32_t)
4779iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4780{
4781#if defined(RT_ARCH_AMD64)
4782 /* or Gv, Ev */
4783 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4784 pCodeBuf[off++] = 0x0b;
4785 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4786
4787#elif defined(RT_ARCH_ARM64)
4788 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
4789
4790#else
4791# error "Port me"
4792#endif
4793 return off;
4794}
4795
4796
4797/**
4798 * Emits code for OR'ing two 64-bit GPRs.
4799 */
4800DECL_INLINE_THROW(uint32_t)
4801iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4802{
4803#if defined(RT_ARCH_AMD64)
4804 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4805#elif defined(RT_ARCH_ARM64)
4806 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4807#else
4808# error "Port me"
4809#endif
4810 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4811 return off;
4812}
4813
4814
4815/**
4816 * Emits code for OR'ing two 32-bit GPRs.
4817 * @note Bits 63:32 of the destination GPR will be cleared.
4818 */
4819DECL_FORCE_INLINE(uint32_t)
4820iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4821{
4822#if defined(RT_ARCH_AMD64)
4823 /* or Gv, Ev */
4824 if (iGprDst >= 8 || iGprSrc >= 8)
4825 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4826 pCodeBuf[off++] = 0x0b;
4827 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4828
4829#elif defined(RT_ARCH_ARM64)
4830 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4831
4832#else
4833# error "Port me"
4834#endif
4835 return off;
4836}
4837
4838
4839/**
4840 * Emits code for OR'ing two 32-bit GPRs.
4841 * @note Bits 63:32 of the destination GPR will be cleared.
4842 */
4843DECL_INLINE_THROW(uint32_t)
4844iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4845{
4846#if defined(RT_ARCH_AMD64)
4847 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4848#elif defined(RT_ARCH_ARM64)
4849 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4850#else
4851# error "Port me"
4852#endif
4853 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4854 return off;
4855}
4856
4857
4858/**
4859 * Emits code for OR'ing a 64-bit GPRs with a constant.
4860 */
4861DECL_INLINE_THROW(uint32_t)
4862iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
4863{
4864#if defined(RT_ARCH_AMD64)
4865 if ((int64_t)uImm == (int8_t)uImm)
4866 {
4867 /* or Ev, imm8 */
4868 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4869 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4870 pbCodeBuf[off++] = 0x83;
4871 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4872 pbCodeBuf[off++] = (uint8_t)uImm;
4873 }
4874 else if ((int64_t)uImm == (int32_t)uImm)
4875 {
4876 /* or Ev, imm32 */
4877 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4878 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4879 pbCodeBuf[off++] = 0x81;
4880 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4881 pbCodeBuf[off++] = RT_BYTE1(uImm);
4882 pbCodeBuf[off++] = RT_BYTE2(uImm);
4883 pbCodeBuf[off++] = RT_BYTE3(uImm);
4884 pbCodeBuf[off++] = RT_BYTE4(uImm);
4885 }
4886 else
4887 {
4888 /* Use temporary register for the 64-bit immediate. */
4889 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4890 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
4891 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4892 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4893 }
4894
4895#elif defined(RT_ARCH_ARM64)
4896 uint32_t uImmR = 0;
4897 uint32_t uImmNandS = 0;
4898 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4899 {
4900 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4901 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
4902 }
4903 else
4904 {
4905 /* Use temporary register for the 64-bit immediate. */
4906 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4907 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
4908 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4909 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4910 }
4911
4912#else
4913# error "Port me"
4914#endif
4915 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4916 return off;
4917}
4918
4919
4920/**
4921 * Emits code for OR'ing an 32-bit GPRs with a constant.
4922 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4923 * @note For ARM64 this only supports @a uImm values that can be expressed using
4924 * the two 6-bit immediates of the OR instructions. The caller must make
4925 * sure this is possible!
4926 */
4927DECL_FORCE_INLINE_THROW(uint32_t)
4928iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
4929{
4930#if defined(RT_ARCH_AMD64)
4931 /* or Ev, imm */
4932 if (iGprDst >= 8)
4933 pCodeBuf[off++] = X86_OP_REX_B;
4934 if ((int32_t)uImm == (int8_t)uImm)
4935 {
4936 pCodeBuf[off++] = 0x83;
4937 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4938 pCodeBuf[off++] = (uint8_t)uImm;
4939 }
4940 else
4941 {
4942 pCodeBuf[off++] = 0x81;
4943 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4944 pCodeBuf[off++] = RT_BYTE1(uImm);
4945 pCodeBuf[off++] = RT_BYTE2(uImm);
4946 pCodeBuf[off++] = RT_BYTE3(uImm);
4947 pCodeBuf[off++] = RT_BYTE4(uImm);
4948 }
4949
4950#elif defined(RT_ARCH_ARM64)
4951 uint32_t uImmR = 0;
4952 uint32_t uImmNandS = 0;
4953 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4954 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4955 else
4956# ifdef IEM_WITH_THROW_CATCH
4957 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4958# else
4959 AssertReleaseFailedStmt(off = UINT32_MAX);
4960# endif
4961
4962#else
4963# error "Port me"
4964#endif
4965 return off;
4966}
4967
4968
4969/**
4970 * Emits code for OR'ing an 32-bit GPRs with a constant.
4971 *
4972 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4973 */
4974DECL_INLINE_THROW(uint32_t)
4975iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
4976{
4977#if defined(RT_ARCH_AMD64)
4978 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
4979
4980#elif defined(RT_ARCH_ARM64)
4981 uint32_t uImmR = 0;
4982 uint32_t uImmNandS = 0;
4983 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4984 {
4985 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4986 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4987 }
4988 else
4989 {
4990 /* Use temporary register for the 64-bit immediate. */
4991 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4992 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
4993 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4994 }
4995
4996#else
4997# error "Port me"
4998#endif
4999 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5000 return off;
5001}
5002
5003
5004
5005/**
5006 * ORs two 64-bit GPRs together, storing the result in a third register.
5007 */
5008DECL_FORCE_INLINE(uint32_t)
5009iemNativeEmitGprEqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5010{
5011#ifdef RT_ARCH_AMD64
5012 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5013 {
5014 /** @todo consider LEA */
5015 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc1);
5016 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5017 }
5018 else
5019 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5020
5021#elif defined(RT_ARCH_ARM64)
5022 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2);
5023
5024#else
5025# error "Port me!"
5026#endif
5027 return off;
5028}
5029
5030
5031
5032/**
5033 * Ors two 32-bit GPRs together, storing the result in a third register.
5034 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
5035 */
5036DECL_FORCE_INLINE(uint32_t)
5037iemNativeEmitGpr32EqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5038{
5039#ifdef RT_ARCH_AMD64
5040 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5041 {
5042 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc1);
5043 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5044 }
5045 else
5046 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5047
5048#elif defined(RT_ARCH_ARM64)
5049 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2, false /*f64Bit*/);
5050
5051#else
5052# error "Port me!"
5053#endif
5054 return off;
5055}
5056
5057
5058/**
5059 * Emits code for XOR'ing two 64-bit GPRs.
5060 */
5061DECL_INLINE_THROW(uint32_t)
5062iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5063{
5064#if defined(RT_ARCH_AMD64)
5065 /* and Gv, Ev */
5066 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5067 pCodeBuf[off++] = 0x33;
5068 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5069
5070#elif defined(RT_ARCH_ARM64)
5071 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
5072
5073#else
5074# error "Port me"
5075#endif
5076 return off;
5077}
5078
5079
5080/**
5081 * Emits code for XOR'ing two 64-bit GPRs.
5082 */
5083DECL_INLINE_THROW(uint32_t)
5084iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5085{
5086#if defined(RT_ARCH_AMD64)
5087 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5088#elif defined(RT_ARCH_ARM64)
5089 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5090#else
5091# error "Port me"
5092#endif
5093 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5094 return off;
5095}
5096
5097
5098/**
5099 * Emits code for XOR'ing two 32-bit GPRs.
5100 */
5101DECL_INLINE_THROW(uint32_t)
5102iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5103{
5104#if defined(RT_ARCH_AMD64)
5105 /* and Gv, Ev */
5106 if (iGprDst >= 8 || iGprSrc >= 8)
5107 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5108 pCodeBuf[off++] = 0x33;
5109 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5110
5111#elif defined(RT_ARCH_ARM64)
5112 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5113
5114#else
5115# error "Port me"
5116#endif
5117 return off;
5118}
5119
5120
5121/**
5122 * Emits code for XOR'ing two 32-bit GPRs.
5123 */
5124DECL_INLINE_THROW(uint32_t)
5125iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5126{
5127#if defined(RT_ARCH_AMD64)
5128 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5129#elif defined(RT_ARCH_ARM64)
5130 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5131#else
5132# error "Port me"
5133#endif
5134 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5135 return off;
5136}
5137
5138
5139/**
5140 * Emits code for XOR'ing an 32-bit GPRs with a constant.
5141 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5142 * @note For ARM64 this only supports @a uImm values that can be expressed using
5143 * the two 6-bit immediates of the EOR instructions. The caller must make
5144 * sure this is possible!
5145 */
5146DECL_FORCE_INLINE_THROW(uint32_t)
5147iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5148{
5149#if defined(RT_ARCH_AMD64)
5150 /* and Ev, imm */
5151 if (iGprDst >= 8)
5152 pCodeBuf[off++] = X86_OP_REX_B;
5153 if ((int32_t)uImm == (int8_t)uImm)
5154 {
5155 pCodeBuf[off++] = 0x83;
5156 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5157 pCodeBuf[off++] = (uint8_t)uImm;
5158 }
5159 else
5160 {
5161 pCodeBuf[off++] = 0x81;
5162 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5163 pCodeBuf[off++] = RT_BYTE1(uImm);
5164 pCodeBuf[off++] = RT_BYTE2(uImm);
5165 pCodeBuf[off++] = RT_BYTE3(uImm);
5166 pCodeBuf[off++] = RT_BYTE4(uImm);
5167 }
5168
5169#elif defined(RT_ARCH_ARM64)
5170 uint32_t uImmR = 0;
5171 uint32_t uImmNandS = 0;
5172 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5173 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5174 else
5175# ifdef IEM_WITH_THROW_CATCH
5176 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5177# else
5178 AssertReleaseFailedStmt(off = UINT32_MAX);
5179# endif
5180
5181#else
5182# error "Port me"
5183#endif
5184 return off;
5185}
5186
5187
5188/*********************************************************************************************************************************
5189* Shifting *
5190*********************************************************************************************************************************/
5191
5192/**
5193 * Emits code for shifting a GPR a fixed number of bits to the left.
5194 */
5195DECL_FORCE_INLINE(uint32_t)
5196iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5197{
5198 Assert(cShift > 0 && cShift < 64);
5199
5200#if defined(RT_ARCH_AMD64)
5201 /* shl dst, cShift */
5202 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5203 if (cShift != 1)
5204 {
5205 pCodeBuf[off++] = 0xc1;
5206 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5207 pCodeBuf[off++] = cShift;
5208 }
5209 else
5210 {
5211 pCodeBuf[off++] = 0xd1;
5212 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5213 }
5214
5215#elif defined(RT_ARCH_ARM64)
5216 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
5217
5218#else
5219# error "Port me"
5220#endif
5221 return off;
5222}
5223
5224
5225/**
5226 * Emits code for shifting a GPR a fixed number of bits to the left.
5227 */
5228DECL_INLINE_THROW(uint32_t)
5229iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5230{
5231#if defined(RT_ARCH_AMD64)
5232 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5233#elif defined(RT_ARCH_ARM64)
5234 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5235#else
5236# error "Port me"
5237#endif
5238 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5239 return off;
5240}
5241
5242
5243/**
5244 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5245 */
5246DECL_FORCE_INLINE(uint32_t)
5247iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5248{
5249 Assert(cShift > 0 && cShift < 32);
5250
5251#if defined(RT_ARCH_AMD64)
5252 /* shl dst, cShift */
5253 if (iGprDst >= 8)
5254 pCodeBuf[off++] = X86_OP_REX_B;
5255 if (cShift != 1)
5256 {
5257 pCodeBuf[off++] = 0xc1;
5258 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5259 pCodeBuf[off++] = cShift;
5260 }
5261 else
5262 {
5263 pCodeBuf[off++] = 0xd1;
5264 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5265 }
5266
5267#elif defined(RT_ARCH_ARM64)
5268 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5269
5270#else
5271# error "Port me"
5272#endif
5273 return off;
5274}
5275
5276
5277/**
5278 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5279 */
5280DECL_INLINE_THROW(uint32_t)
5281iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5282{
5283#if defined(RT_ARCH_AMD64)
5284 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5285#elif defined(RT_ARCH_ARM64)
5286 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5287#else
5288# error "Port me"
5289#endif
5290 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5291 return off;
5292}
5293
5294
5295/**
5296 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5297 */
5298DECL_FORCE_INLINE(uint32_t)
5299iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5300{
5301 Assert(cShift > 0 && cShift < 64);
5302
5303#if defined(RT_ARCH_AMD64)
5304 /* shr dst, cShift */
5305 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5306 if (cShift != 1)
5307 {
5308 pCodeBuf[off++] = 0xc1;
5309 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5310 pCodeBuf[off++] = cShift;
5311 }
5312 else
5313 {
5314 pCodeBuf[off++] = 0xd1;
5315 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5316 }
5317
5318#elif defined(RT_ARCH_ARM64)
5319 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5320
5321#else
5322# error "Port me"
5323#endif
5324 return off;
5325}
5326
5327
5328/**
5329 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5330 */
5331DECL_INLINE_THROW(uint32_t)
5332iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5333{
5334#if defined(RT_ARCH_AMD64)
5335 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5336#elif defined(RT_ARCH_ARM64)
5337 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5338#else
5339# error "Port me"
5340#endif
5341 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5342 return off;
5343}
5344
5345
5346/**
5347 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5348 * right.
5349 */
5350DECL_FORCE_INLINE(uint32_t)
5351iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5352{
5353 Assert(cShift > 0 && cShift < 32);
5354
5355#if defined(RT_ARCH_AMD64)
5356 /* shr dst, cShift */
5357 if (iGprDst >= 8)
5358 pCodeBuf[off++] = X86_OP_REX_B;
5359 if (cShift != 1)
5360 {
5361 pCodeBuf[off++] = 0xc1;
5362 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5363 pCodeBuf[off++] = cShift;
5364 }
5365 else
5366 {
5367 pCodeBuf[off++] = 0xd1;
5368 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5369 }
5370
5371#elif defined(RT_ARCH_ARM64)
5372 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5373
5374#else
5375# error "Port me"
5376#endif
5377 return off;
5378}
5379
5380
5381/**
5382 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5383 * right.
5384 */
5385DECL_INLINE_THROW(uint32_t)
5386iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5387{
5388#if defined(RT_ARCH_AMD64)
5389 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5390#elif defined(RT_ARCH_ARM64)
5391 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5392#else
5393# error "Port me"
5394#endif
5395 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5396 return off;
5397}
5398
5399
5400/**
5401 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5402 * right and assigning it to a different GPR.
5403 */
5404DECL_INLINE_THROW(uint32_t)
5405iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5406{
5407 Assert(cShift > 0); Assert(cShift < 32);
5408#if defined(RT_ARCH_AMD64)
5409 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5410 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5411
5412#elif defined(RT_ARCH_ARM64)
5413 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5414
5415#else
5416# error "Port me"
5417#endif
5418 return off;
5419}
5420
5421
5422/**
5423 * Emits code for rotating a GPR a fixed number of bits to the left.
5424 */
5425DECL_FORCE_INLINE(uint32_t)
5426iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5427{
5428 Assert(cShift > 0 && cShift < 64);
5429
5430#if defined(RT_ARCH_AMD64)
5431 /* rol dst, cShift */
5432 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5433 if (cShift != 1)
5434 {
5435 pCodeBuf[off++] = 0xc1;
5436 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5437 pCodeBuf[off++] = cShift;
5438 }
5439 else
5440 {
5441 pCodeBuf[off++] = 0xd1;
5442 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5443 }
5444
5445#elif defined(RT_ARCH_ARM64)
5446 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
5447
5448#else
5449# error "Port me"
5450#endif
5451 return off;
5452}
5453
5454
5455#if defined(RT_ARCH_AMD64)
5456/**
5457 * Emits code for rotating a 32-bit GPR a fixed number of bits to the left via carry.
5458 */
5459DECL_FORCE_INLINE(uint32_t)
5460iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5461{
5462 Assert(cShift > 0 && cShift < 32);
5463
5464 /* rcl dst, cShift */
5465 if (iGprDst >= 8)
5466 pCodeBuf[off++] = X86_OP_REX_B;
5467 if (cShift != 1)
5468 {
5469 pCodeBuf[off++] = 0xc1;
5470 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5471 pCodeBuf[off++] = cShift;
5472 }
5473 else
5474 {
5475 pCodeBuf[off++] = 0xd1;
5476 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5477 }
5478
5479 return off;
5480}
5481#endif /* RT_ARCH_AMD64 */
5482
5483
5484
5485/**
5486 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
5487 * @note Bits 63:32 of the destination GPR will be cleared.
5488 */
5489DECL_FORCE_INLINE(uint32_t)
5490iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5491{
5492#if defined(RT_ARCH_AMD64)
5493 /*
5494 * There is no bswap r16 on x86 (the encoding exists but does not work).
5495 * So just use a rol (gcc -O2 is doing that).
5496 *
5497 * rol r16, 0x8
5498 */
5499 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5500 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5501 if (iGpr >= 8)
5502 pbCodeBuf[off++] = X86_OP_REX_B;
5503 pbCodeBuf[off++] = 0xc1;
5504 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
5505 pbCodeBuf[off++] = 0x08;
5506#elif defined(RT_ARCH_ARM64)
5507 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5508
5509 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
5510#else
5511# error "Port me"
5512#endif
5513
5514 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5515 return off;
5516}
5517
5518
5519/**
5520 * Emits code for reversing the byte order in a 32-bit GPR.
5521 * @note Bits 63:32 of the destination GPR will be cleared.
5522 */
5523DECL_FORCE_INLINE(uint32_t)
5524iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5525{
5526#if defined(RT_ARCH_AMD64)
5527 /* bswap r32 */
5528 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5529
5530 if (iGpr >= 8)
5531 pbCodeBuf[off++] = X86_OP_REX_B;
5532 pbCodeBuf[off++] = 0x0f;
5533 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5534#elif defined(RT_ARCH_ARM64)
5535 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5536
5537 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
5538#else
5539# error "Port me"
5540#endif
5541
5542 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5543 return off;
5544}
5545
5546
5547/**
5548 * Emits code for reversing the byte order in a 64-bit GPR.
5549 */
5550DECL_FORCE_INLINE(uint32_t)
5551iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5552{
5553#if defined(RT_ARCH_AMD64)
5554 /* bswap r64 */
5555 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5556
5557 if (iGpr >= 8)
5558 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
5559 else
5560 pbCodeBuf[off++] = X86_OP_REX_W;
5561 pbCodeBuf[off++] = 0x0f;
5562 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5563#elif defined(RT_ARCH_ARM64)
5564 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5565
5566 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
5567#else
5568# error "Port me"
5569#endif
5570
5571 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5572 return off;
5573}
5574
5575
5576/*********************************************************************************************************************************
5577* Compare and Testing *
5578*********************************************************************************************************************************/
5579
5580
5581#ifdef RT_ARCH_ARM64
5582/**
5583 * Emits an ARM64 compare instruction.
5584 */
5585DECL_INLINE_THROW(uint32_t)
5586iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
5587 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
5588{
5589 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5590 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
5591 f64Bit, true /*fSetFlags*/, cShift, enmShift);
5592 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5593 return off;
5594}
5595#endif
5596
5597
5598/**
5599 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5600 * with conditional instruction.
5601 */
5602DECL_FORCE_INLINE(uint32_t)
5603iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5604{
5605#ifdef RT_ARCH_AMD64
5606 /* cmp Gv, Ev */
5607 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5608 pCodeBuf[off++] = 0x3b;
5609 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5610
5611#elif defined(RT_ARCH_ARM64)
5612 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
5613
5614#else
5615# error "Port me!"
5616#endif
5617 return off;
5618}
5619
5620
5621/**
5622 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5623 * with conditional instruction.
5624 */
5625DECL_INLINE_THROW(uint32_t)
5626iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5627{
5628#ifdef RT_ARCH_AMD64
5629 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5630#elif defined(RT_ARCH_ARM64)
5631 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5632#else
5633# error "Port me!"
5634#endif
5635 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5636 return off;
5637}
5638
5639
5640/**
5641 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5642 * with conditional instruction.
5643 */
5644DECL_FORCE_INLINE(uint32_t)
5645iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5646{
5647#ifdef RT_ARCH_AMD64
5648 /* cmp Gv, Ev */
5649 if (iGprLeft >= 8 || iGprRight >= 8)
5650 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5651 pCodeBuf[off++] = 0x3b;
5652 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5653
5654#elif defined(RT_ARCH_ARM64)
5655 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
5656
5657#else
5658# error "Port me!"
5659#endif
5660 return off;
5661}
5662
5663
5664/**
5665 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5666 * with conditional instruction.
5667 */
5668DECL_INLINE_THROW(uint32_t)
5669iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5670{
5671#ifdef RT_ARCH_AMD64
5672 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5673#elif defined(RT_ARCH_ARM64)
5674 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5675#else
5676# error "Port me!"
5677#endif
5678 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5679 return off;
5680}
5681
5682
5683/**
5684 * Emits a compare of a 64-bit GPR with a constant value, settings status
5685 * flags/whatever for use with conditional instruction.
5686 */
5687DECL_INLINE_THROW(uint32_t)
5688iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
5689{
5690#ifdef RT_ARCH_AMD64
5691 if (uImm <= UINT32_C(0xff))
5692 {
5693 /* cmp Ev, Ib */
5694 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5695 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
5696 pbCodeBuf[off++] = 0x83;
5697 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5698 pbCodeBuf[off++] = (uint8_t)uImm;
5699 }
5700 else if ((int64_t)uImm == (int32_t)uImm)
5701 {
5702 /* cmp Ev, imm */
5703 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5704 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
5705 pbCodeBuf[off++] = 0x81;
5706 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5707 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5708 pbCodeBuf[off++] = RT_BYTE1(uImm);
5709 pbCodeBuf[off++] = RT_BYTE2(uImm);
5710 pbCodeBuf[off++] = RT_BYTE3(uImm);
5711 pbCodeBuf[off++] = RT_BYTE4(uImm);
5712 }
5713 else
5714 {
5715 /* Use temporary register for the immediate. */
5716 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5717 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
5718 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5719 }
5720
5721#elif defined(RT_ARCH_ARM64)
5722 /** @todo guess there are clevere things we can do here... */
5723 if (uImm < _4K)
5724 {
5725 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5726 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5727 true /*64Bit*/, true /*fSetFlags*/);
5728 }
5729 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5730 {
5731 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5732 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
5733 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5734 }
5735 else
5736 {
5737 /* Use temporary register for the immediate. */
5738 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5739 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
5740 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5741 }
5742
5743#else
5744# error "Port me!"
5745#endif
5746
5747 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5748 return off;
5749}
5750
5751
5752/**
5753 * Emits a compare of a 32-bit GPR with a constant value, settings status
5754 * flags/whatever for use with conditional instruction.
5755 *
5756 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
5757 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
5758 * bits all zero). Will release assert or throw exception if the caller
5759 * violates this restriction.
5760 */
5761DECL_FORCE_INLINE_THROW(uint32_t)
5762iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
5763{
5764#ifdef RT_ARCH_AMD64
5765 if (iGprLeft >= 8)
5766 pCodeBuf[off++] = X86_OP_REX_B;
5767 if (uImm <= UINT32_C(0x7f))
5768 {
5769 /* cmp Ev, Ib */
5770 pCodeBuf[off++] = 0x83;
5771 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5772 pCodeBuf[off++] = (uint8_t)uImm;
5773 }
5774 else
5775 {
5776 /* cmp Ev, imm */
5777 pCodeBuf[off++] = 0x81;
5778 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5779 pCodeBuf[off++] = RT_BYTE1(uImm);
5780 pCodeBuf[off++] = RT_BYTE2(uImm);
5781 pCodeBuf[off++] = RT_BYTE3(uImm);
5782 pCodeBuf[off++] = RT_BYTE4(uImm);
5783 }
5784
5785#elif defined(RT_ARCH_ARM64)
5786 /** @todo guess there are clevere things we can do here... */
5787 if (uImm < _4K)
5788 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5789 false /*64Bit*/, true /*fSetFlags*/);
5790 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5791 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5792 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5793 else
5794# ifdef IEM_WITH_THROW_CATCH
5795 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5796# else
5797 AssertReleaseFailedStmt(off = UINT32_MAX);
5798# endif
5799
5800#else
5801# error "Port me!"
5802#endif
5803 return off;
5804}
5805
5806
5807/**
5808 * Emits a compare of a 32-bit GPR with a constant value, settings status
5809 * flags/whatever for use with conditional instruction.
5810 */
5811DECL_INLINE_THROW(uint32_t)
5812iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
5813{
5814#ifdef RT_ARCH_AMD64
5815 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
5816
5817#elif defined(RT_ARCH_ARM64)
5818 /** @todo guess there are clevere things we can do here... */
5819 if (uImm < _4K)
5820 {
5821 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5822 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5823 false /*64Bit*/, true /*fSetFlags*/);
5824 }
5825 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5826 {
5827 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5828 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5829 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5830 }
5831 else
5832 {
5833 /* Use temporary register for the immediate. */
5834 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5835 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
5836 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5837 }
5838
5839#else
5840# error "Port me!"
5841#endif
5842
5843 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5844 return off;
5845}
5846
5847
5848/**
5849 * Emits a compare of a 32-bit GPR with a constant value, settings status
5850 * flags/whatever for use with conditional instruction.
5851 *
5852 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
5853 * 16-bit value from @a iGrpLeft.
5854 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
5855 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
5856 * bits all zero). Will release assert or throw exception if the caller
5857 * violates this restriction.
5858 */
5859DECL_FORCE_INLINE_THROW(uint32_t)
5860iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
5861 uint8_t idxTmpReg = UINT8_MAX)
5862{
5863#ifdef RT_ARCH_AMD64
5864 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5865 if (iGprLeft >= 8)
5866 pCodeBuf[off++] = X86_OP_REX_B;
5867 if (uImm <= UINT32_C(0x7f))
5868 {
5869 /* cmp Ev, Ib */
5870 pCodeBuf[off++] = 0x83;
5871 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5872 pCodeBuf[off++] = (uint8_t)uImm;
5873 }
5874 else
5875 {
5876 /* cmp Ev, imm */
5877 pCodeBuf[off++] = 0x81;
5878 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5879 pCodeBuf[off++] = RT_BYTE1(uImm);
5880 pCodeBuf[off++] = RT_BYTE2(uImm);
5881 }
5882 RT_NOREF(idxTmpReg);
5883
5884#elif defined(RT_ARCH_ARM64)
5885# ifdef IEM_WITH_THROW_CATCH
5886 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5887# else
5888 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
5889# endif
5890 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
5891 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
5892 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
5893
5894#else
5895# error "Port me!"
5896#endif
5897 return off;
5898}
5899
5900
5901/**
5902 * Emits a compare of a 16-bit GPR with a constant value, settings status
5903 * flags/whatever for use with conditional instruction.
5904 *
5905 * @note ARM64: Helper register is required (idxTmpReg).
5906 */
5907DECL_INLINE_THROW(uint32_t)
5908iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
5909 uint8_t idxTmpReg = UINT8_MAX)
5910{
5911#ifdef RT_ARCH_AMD64
5912 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
5913#elif defined(RT_ARCH_ARM64)
5914 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
5915#else
5916# error "Port me!"
5917#endif
5918 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5919 return off;
5920}
5921
5922
5923
5924/*********************************************************************************************************************************
5925* Branching *
5926*********************************************************************************************************************************/
5927
5928/**
5929 * Emits a JMP rel32 / B imm19 to the given label.
5930 */
5931DECL_FORCE_INLINE_THROW(uint32_t)
5932iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
5933{
5934 Assert(idxLabel < pReNative->cLabels);
5935
5936#ifdef RT_ARCH_AMD64
5937 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
5938 {
5939 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
5940 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
5941 {
5942 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
5943 pCodeBuf[off++] = (uint8_t)offRel;
5944 }
5945 else
5946 {
5947 offRel -= 3;
5948 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
5949 pCodeBuf[off++] = RT_BYTE1(offRel);
5950 pCodeBuf[off++] = RT_BYTE2(offRel);
5951 pCodeBuf[off++] = RT_BYTE3(offRel);
5952 pCodeBuf[off++] = RT_BYTE4(offRel);
5953 }
5954 }
5955 else
5956 {
5957 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
5958 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
5959 pCodeBuf[off++] = 0xfe;
5960 pCodeBuf[off++] = 0xff;
5961 pCodeBuf[off++] = 0xff;
5962 pCodeBuf[off++] = 0xff;
5963 }
5964 pCodeBuf[off++] = 0xcc; /* int3 poison */
5965
5966#elif defined(RT_ARCH_ARM64)
5967 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
5968 pCodeBuf[off++] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
5969 else
5970 {
5971 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
5972 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
5973 }
5974
5975#else
5976# error "Port me!"
5977#endif
5978 return off;
5979}
5980
5981
5982/**
5983 * Emits a JMP rel32 / B imm19 to the given label.
5984 */
5985DECL_INLINE_THROW(uint32_t)
5986iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5987{
5988#ifdef RT_ARCH_AMD64
5989 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
5990#elif defined(RT_ARCH_ARM64)
5991 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
5992#else
5993# error "Port me!"
5994#endif
5995 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5996 return off;
5997}
5998
5999
6000/**
6001 * Emits a JMP rel32 / B imm19 to a new undefined label.
6002 */
6003DECL_INLINE_THROW(uint32_t)
6004iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6005{
6006 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6007 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
6008}
6009
6010/** Condition type. */
6011#ifdef RT_ARCH_AMD64
6012typedef enum IEMNATIVEINSTRCOND : uint8_t
6013{
6014 kIemNativeInstrCond_o = 0,
6015 kIemNativeInstrCond_no,
6016 kIemNativeInstrCond_c,
6017 kIemNativeInstrCond_nc,
6018 kIemNativeInstrCond_e,
6019 kIemNativeInstrCond_ne,
6020 kIemNativeInstrCond_be,
6021 kIemNativeInstrCond_nbe,
6022 kIemNativeInstrCond_s,
6023 kIemNativeInstrCond_ns,
6024 kIemNativeInstrCond_p,
6025 kIemNativeInstrCond_np,
6026 kIemNativeInstrCond_l,
6027 kIemNativeInstrCond_nl,
6028 kIemNativeInstrCond_le,
6029 kIemNativeInstrCond_nle
6030} IEMNATIVEINSTRCOND;
6031#elif defined(RT_ARCH_ARM64)
6032typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
6033# define kIemNativeInstrCond_o todo_conditional_codes
6034# define kIemNativeInstrCond_no todo_conditional_codes
6035# define kIemNativeInstrCond_c todo_conditional_codes
6036# define kIemNativeInstrCond_nc todo_conditional_codes
6037# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
6038# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
6039# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
6040# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
6041# define kIemNativeInstrCond_s todo_conditional_codes
6042# define kIemNativeInstrCond_ns todo_conditional_codes
6043# define kIemNativeInstrCond_p todo_conditional_codes
6044# define kIemNativeInstrCond_np todo_conditional_codes
6045# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
6046# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
6047# define kIemNativeInstrCond_le kArmv8InstrCond_Le
6048# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
6049#else
6050# error "Port me!"
6051#endif
6052
6053
6054/**
6055 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6056 */
6057DECL_FORCE_INLINE_THROW(uint32_t)
6058iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6059 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6060{
6061 Assert(idxLabel < pReNative->cLabels);
6062
6063 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
6064#ifdef RT_ARCH_AMD64
6065 if (offLabel >= off)
6066 {
6067 /* jcc rel32 */
6068 pCodeBuf[off++] = 0x0f;
6069 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6070 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6071 pCodeBuf[off++] = 0x00;
6072 pCodeBuf[off++] = 0x00;
6073 pCodeBuf[off++] = 0x00;
6074 pCodeBuf[off++] = 0x00;
6075 }
6076 else
6077 {
6078 int32_t offDisp = offLabel - (off + 2);
6079 if ((int8_t)offDisp == offDisp)
6080 {
6081 /* jcc rel8 */
6082 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6083 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6084 }
6085 else
6086 {
6087 /* jcc rel32 */
6088 offDisp -= 4;
6089 pCodeBuf[off++] = 0x0f;
6090 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6091 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6092 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6093 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6094 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6095 }
6096 }
6097
6098#elif defined(RT_ARCH_ARM64)
6099 if (offLabel >= off)
6100 {
6101 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6102 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
6103 }
6104 else
6105 {
6106 Assert(off - offLabel <= 0x3ffffU);
6107 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
6108 }
6109
6110#else
6111# error "Port me!"
6112#endif
6113 return off;
6114}
6115
6116
6117/**
6118 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6119 */
6120DECL_INLINE_THROW(uint32_t)
6121iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6122{
6123#ifdef RT_ARCH_AMD64
6124 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
6125#elif defined(RT_ARCH_ARM64)
6126 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
6127#else
6128# error "Port me!"
6129#endif
6130 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6131 return off;
6132}
6133
6134
6135/**
6136 * Emits a Jcc rel32 / B.cc imm19 to a new label.
6137 */
6138DECL_INLINE_THROW(uint32_t)
6139iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6140 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
6141{
6142 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6143 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
6144}
6145
6146
6147/**
6148 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
6149 */
6150DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6151{
6152#ifdef RT_ARCH_AMD64
6153 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
6154#elif defined(RT_ARCH_ARM64)
6155 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
6156#else
6157# error "Port me!"
6158#endif
6159}
6160
6161/**
6162 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
6163 */
6164DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6165 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6166{
6167#ifdef RT_ARCH_AMD64
6168 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
6169#elif defined(RT_ARCH_ARM64)
6170 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
6171#else
6172# error "Port me!"
6173#endif
6174}
6175
6176
6177/**
6178 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
6179 */
6180DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6181{
6182#ifdef RT_ARCH_AMD64
6183 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
6184#elif defined(RT_ARCH_ARM64)
6185 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
6186#else
6187# error "Port me!"
6188#endif
6189}
6190
6191/**
6192 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
6193 */
6194DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6195 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6196{
6197#ifdef RT_ARCH_AMD64
6198 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
6199#elif defined(RT_ARCH_ARM64)
6200 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
6201#else
6202# error "Port me!"
6203#endif
6204}
6205
6206
6207/**
6208 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
6209 */
6210DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6211{
6212#ifdef RT_ARCH_AMD64
6213 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
6214#elif defined(RT_ARCH_ARM64)
6215 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
6216#else
6217# error "Port me!"
6218#endif
6219}
6220
6221/**
6222 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
6223 */
6224DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6225 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6226{
6227#ifdef RT_ARCH_AMD64
6228 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
6229#elif defined(RT_ARCH_ARM64)
6230 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
6231#else
6232# error "Port me!"
6233#endif
6234}
6235
6236
6237/**
6238 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
6239 */
6240DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6241{
6242#ifdef RT_ARCH_AMD64
6243 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
6244#elif defined(RT_ARCH_ARM64)
6245 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
6246#else
6247# error "Port me!"
6248#endif
6249}
6250
6251/**
6252 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
6253 */
6254DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6255 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6256{
6257#ifdef RT_ARCH_AMD64
6258 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
6259#elif defined(RT_ARCH_ARM64)
6260 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
6261#else
6262# error "Port me!"
6263#endif
6264}
6265
6266
6267/**
6268 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
6269 */
6270DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6271{
6272#ifdef RT_ARCH_AMD64
6273 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
6274#elif defined(RT_ARCH_ARM64)
6275 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
6276#else
6277# error "Port me!"
6278#endif
6279}
6280
6281/**
6282 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
6283 */
6284DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6285 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6286{
6287#ifdef RT_ARCH_AMD64
6288 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
6289#elif defined(RT_ARCH_ARM64)
6290 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
6291#else
6292# error "Port me!"
6293#endif
6294}
6295
6296
6297/**
6298 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6299 *
6300 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6301 *
6302 * Only use hardcoded jumps forward when emitting for exactly one
6303 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6304 * the right target address on all platforms!
6305 *
6306 * Please also note that on x86 it is necessary pass off + 256 or higher
6307 * for @a offTarget one believe the intervening code is more than 127
6308 * bytes long.
6309 */
6310DECL_FORCE_INLINE(uint32_t)
6311iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6312{
6313#ifdef RT_ARCH_AMD64
6314 /* jcc rel8 / rel32 */
6315 int32_t offDisp = (int32_t)(offTarget - (off + 2));
6316 if (offDisp < 128 && offDisp >= -128)
6317 {
6318 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6319 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6320 }
6321 else
6322 {
6323 offDisp -= 4;
6324 pCodeBuf[off++] = 0x0f;
6325 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6326 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6327 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6328 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6329 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6330 }
6331
6332#elif defined(RT_ARCH_ARM64)
6333 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
6334
6335#else
6336# error "Port me!"
6337#endif
6338 return off;
6339}
6340
6341
6342/**
6343 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6344 *
6345 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6346 *
6347 * Only use hardcoded jumps forward when emitting for exactly one
6348 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6349 * the right target address on all platforms!
6350 *
6351 * Please also note that on x86 it is necessary pass off + 256 or higher
6352 * for @a offTarget if one believe the intervening code is more than 127
6353 * bytes long.
6354 */
6355DECL_INLINE_THROW(uint32_t)
6356iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6357{
6358#ifdef RT_ARCH_AMD64
6359 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
6360#elif defined(RT_ARCH_ARM64)
6361 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
6362#else
6363# error "Port me!"
6364#endif
6365 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6366 return off;
6367}
6368
6369
6370/**
6371 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
6372 *
6373 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6374 */
6375DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6376{
6377#ifdef RT_ARCH_AMD64
6378 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
6379#elif defined(RT_ARCH_ARM64)
6380 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
6381#else
6382# error "Port me!"
6383#endif
6384}
6385
6386
6387/**
6388 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
6389 *
6390 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6391 */
6392DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6393{
6394#ifdef RT_ARCH_AMD64
6395 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
6396#elif defined(RT_ARCH_ARM64)
6397 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
6398#else
6399# error "Port me!"
6400#endif
6401}
6402
6403
6404/**
6405 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
6406 *
6407 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6408 */
6409DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6410{
6411#ifdef RT_ARCH_AMD64
6412 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
6413#elif defined(RT_ARCH_ARM64)
6414 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
6415#else
6416# error "Port me!"
6417#endif
6418}
6419
6420
6421/**
6422 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
6423 *
6424 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6425 */
6426DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6427{
6428#ifdef RT_ARCH_AMD64
6429 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
6430#elif defined(RT_ARCH_ARM64)
6431 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
6432#else
6433# error "Port me!"
6434#endif
6435}
6436
6437
6438/**
6439 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6440 *
6441 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6442 */
6443DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
6444{
6445#ifdef RT_ARCH_AMD64
6446 /* jmp rel8 or rel32 */
6447 int32_t offDisp = offTarget - (off + 2);
6448 if (offDisp < 128 && offDisp >= -128)
6449 {
6450 pCodeBuf[off++] = 0xeb;
6451 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6452 }
6453 else
6454 {
6455 offDisp -= 3;
6456 pCodeBuf[off++] = 0xe9;
6457 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6458 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6459 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6460 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6461 }
6462
6463#elif defined(RT_ARCH_ARM64)
6464 pCodeBuf[off++] = Armv8A64MkInstrB((int32_t)(offTarget - off));
6465
6466#else
6467# error "Port me!"
6468#endif
6469 return off;
6470}
6471
6472
6473/**
6474 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6475 *
6476 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6477 */
6478DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6479{
6480#ifdef RT_ARCH_AMD64
6481 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
6482#elif defined(RT_ARCH_ARM64)
6483 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
6484#else
6485# error "Port me!"
6486#endif
6487 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6488 return off;
6489}
6490
6491
6492/**
6493 * Fixes up a conditional jump to a fixed label.
6494 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
6495 * iemNativeEmitJzToFixed, ...
6496 */
6497DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
6498{
6499#ifdef RT_ARCH_AMD64
6500 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
6501 uint8_t const bOpcode = pbCodeBuf[offFixup];
6502 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
6503 {
6504 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
6505 AssertStmt(pbCodeBuf[offFixup + 1] == offTarget - (offFixup + 2),
6506 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
6507 }
6508 else
6509 {
6510 if (bOpcode != 0x0f)
6511 Assert(bOpcode == 0xe9);
6512 else
6513 {
6514 offFixup += 1;
6515 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
6516 }
6517 uint32_t const offRel32 = offTarget - (offFixup + 5);
6518 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
6519 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
6520 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
6521 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
6522 }
6523
6524#elif defined(RT_ARCH_ARM64)
6525 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
6526 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
6527 {
6528 /* B.COND + BC.COND */
6529 int32_t const offDisp = offTarget - offFixup;
6530 Assert(offDisp >= -262144 && offDisp < 262144);
6531 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
6532 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6533 }
6534 else
6535 {
6536 /* B imm26 */
6537 Assert((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000));
6538 int32_t const offDisp = offTarget - offFixup;
6539 Assert(offDisp >= -33554432 && offDisp < 33554432);
6540 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
6541 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6542 }
6543
6544#else
6545# error "Port me!"
6546#endif
6547}
6548
6549
6550#ifdef RT_ARCH_AMD64
6551/**
6552 * For doing bt on a register.
6553 */
6554DECL_INLINE_THROW(uint32_t)
6555iemNativeEmitAmd64TestBitInGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
6556{
6557 Assert(iBitNo < 64);
6558 /* bt Ev, imm8 */
6559 if (iBitNo >= 32)
6560 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6561 else if (iGprSrc >= 8)
6562 pCodeBuf[off++] = X86_OP_REX_B;
6563 pCodeBuf[off++] = 0x0f;
6564 pCodeBuf[off++] = 0xba;
6565 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6566 pCodeBuf[off++] = iBitNo;
6567 return off;
6568}
6569#endif /* RT_ARCH_AMD64 */
6570
6571
6572/**
6573 * Internal helper, don't call directly.
6574 */
6575DECL_INLINE_THROW(uint32_t)
6576iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
6577 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
6578{
6579 Assert(iBitNo < 64);
6580#ifdef RT_ARCH_AMD64
6581 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6582 if (iBitNo < 8)
6583 {
6584 /* test Eb, imm8 */
6585 if (iGprSrc >= 4)
6586 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6587 pbCodeBuf[off++] = 0xf6;
6588 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6589 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
6590 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6591 }
6592 else
6593 {
6594 /* bt Ev, imm8 */
6595 if (iBitNo >= 32)
6596 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6597 else if (iGprSrc >= 8)
6598 pbCodeBuf[off++] = X86_OP_REX_B;
6599 pbCodeBuf[off++] = 0x0f;
6600 pbCodeBuf[off++] = 0xba;
6601 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6602 pbCodeBuf[off++] = iBitNo;
6603 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
6604 }
6605
6606#elif defined(RT_ARCH_ARM64)
6607 /* Use the TBNZ instruction here. */
6608 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6609 if (pReNative->paLabels[idxLabel].enmType > kIemNativeLabelType_LastWholeTbBranch)
6610 {
6611 AssertMsg(pReNative->paLabels[idxLabel].off == UINT32_MAX,
6612 ("TODO: Please enable & test commented out code for jumping back to a predefined label.\n"));
6613 //uint32_t offLabel = pReNative->paLabels[idxLabel].off;
6614 //if (offLabel == UINT32_MAX)
6615 {
6616 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
6617 pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
6618 }
6619 //else
6620 //{
6621 // RT_BREAKPOINT();
6622 // Assert(off - offLabel <= 0x1fffU);
6623 // pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, offLabel - off, iGprSrc, iBitNo);
6624 //
6625 //}
6626 }
6627 else
6628 {
6629 Assert(Armv8A64ConvertImmRImmS2Mask64(0x40, (64U - iBitNo) & 63U) == RT_BIT_64(iBitNo));
6630 pu32CodeBuf[off++] = Armv8A64MkInstrTstImm(iGprSrc, 0x40, (64U - iBitNo) & 63U);
6631 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6632 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(fJmpIfSet ? kArmv8InstrCond_Ne : kArmv8InstrCond_Eq, 0);
6633 }
6634
6635#else
6636# error "Port me!"
6637#endif
6638 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6639 return off;
6640}
6641
6642
6643/**
6644 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
6645 * @a iGprSrc.
6646 *
6647 * @note On ARM64 the range is only +/-8191 instructions.
6648 */
6649DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6650 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
6651{
6652 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
6653}
6654
6655
6656/**
6657 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
6658 * _set_ in @a iGprSrc.
6659 *
6660 * @note On ARM64 the range is only +/-8191 instructions.
6661 */
6662DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6663 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
6664{
6665 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
6666}
6667
6668
6669/**
6670 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
6671 * flags accordingly.
6672 */
6673DECL_INLINE_THROW(uint32_t)
6674iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
6675{
6676 Assert(fBits != 0);
6677#ifdef RT_ARCH_AMD64
6678
6679 if (fBits >= UINT32_MAX)
6680 {
6681 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6682
6683 /* test Ev,Gv */
6684 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6685 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
6686 pbCodeBuf[off++] = 0x85;
6687 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
6688
6689 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6690 }
6691 else if (fBits <= UINT32_MAX)
6692 {
6693 /* test Eb, imm8 or test Ev, imm32 */
6694 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6695 if (fBits <= UINT8_MAX)
6696 {
6697 if (iGprSrc >= 4)
6698 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6699 pbCodeBuf[off++] = 0xf6;
6700 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6701 pbCodeBuf[off++] = (uint8_t)fBits;
6702 }
6703 else
6704 {
6705 if (iGprSrc >= 8)
6706 pbCodeBuf[off++] = X86_OP_REX_B;
6707 pbCodeBuf[off++] = 0xf7;
6708 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6709 pbCodeBuf[off++] = RT_BYTE1(fBits);
6710 pbCodeBuf[off++] = RT_BYTE2(fBits);
6711 pbCodeBuf[off++] = RT_BYTE3(fBits);
6712 pbCodeBuf[off++] = RT_BYTE4(fBits);
6713 }
6714 }
6715 /** @todo implement me. */
6716 else
6717 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
6718
6719#elif defined(RT_ARCH_ARM64)
6720 uint32_t uImmR = 0;
6721 uint32_t uImmNandS = 0;
6722 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
6723 {
6724 /* ands xzr, iGprSrc, #fBits */
6725 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6726 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
6727 }
6728 else
6729 {
6730 /* ands xzr, iGprSrc, iTmpReg */
6731 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6732 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6733 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
6734 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6735 }
6736
6737#else
6738# error "Port me!"
6739#endif
6740 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6741 return off;
6742}
6743
6744
6745/**
6746 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
6747 * @a iGprSrc, setting CPU flags accordingly.
6748 *
6749 * @note For ARM64 this only supports @a fBits values that can be expressed
6750 * using the two 6-bit immediates of the ANDS instruction. The caller
6751 * must make sure this is possible!
6752 */
6753DECL_FORCE_INLINE_THROW(uint32_t)
6754iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits)
6755{
6756 Assert(fBits != 0);
6757
6758#ifdef RT_ARCH_AMD64
6759 if (fBits <= UINT8_MAX)
6760 {
6761 /* test Eb, imm8 */
6762 if (iGprSrc >= 4)
6763 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6764 pCodeBuf[off++] = 0xf6;
6765 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6766 pCodeBuf[off++] = (uint8_t)fBits;
6767 }
6768 else
6769 {
6770 /* test Ev, imm32 */
6771 if (iGprSrc >= 8)
6772 pCodeBuf[off++] = X86_OP_REX_B;
6773 pCodeBuf[off++] = 0xf7;
6774 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6775 pCodeBuf[off++] = RT_BYTE1(fBits);
6776 pCodeBuf[off++] = RT_BYTE2(fBits);
6777 pCodeBuf[off++] = RT_BYTE3(fBits);
6778 pCodeBuf[off++] = RT_BYTE4(fBits);
6779 }
6780
6781#elif defined(RT_ARCH_ARM64)
6782 /* ands xzr, src, #fBits */
6783 uint32_t uImmR = 0;
6784 uint32_t uImmNandS = 0;
6785 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6786 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6787 else
6788# ifdef IEM_WITH_THROW_CATCH
6789 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6790# else
6791 AssertReleaseFailedStmt(off = UINT32_MAX);
6792# endif
6793
6794#else
6795# error "Port me!"
6796#endif
6797 return off;
6798}
6799
6800
6801
6802/**
6803 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
6804 * @a iGprSrc, setting CPU flags accordingly.
6805 *
6806 * @note For ARM64 this only supports @a fBits values that can be expressed
6807 * using the two 6-bit immediates of the ANDS instruction. The caller
6808 * must make sure this is possible!
6809 */
6810DECL_FORCE_INLINE_THROW(uint32_t)
6811iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
6812{
6813 Assert(fBits != 0);
6814
6815#ifdef RT_ARCH_AMD64
6816 /* test Eb, imm8 */
6817 if (iGprSrc >= 4)
6818 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6819 pCodeBuf[off++] = 0xf6;
6820 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6821 pCodeBuf[off++] = fBits;
6822
6823#elif defined(RT_ARCH_ARM64)
6824 /* ands xzr, src, #fBits */
6825 uint32_t uImmR = 0;
6826 uint32_t uImmNandS = 0;
6827 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6828 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6829 else
6830# ifdef IEM_WITH_THROW_CATCH
6831 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6832# else
6833 AssertReleaseFailedStmt(off = UINT32_MAX);
6834# endif
6835
6836#else
6837# error "Port me!"
6838#endif
6839 return off;
6840}
6841
6842
6843/**
6844 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
6845 * @a iGprSrc, setting CPU flags accordingly.
6846 */
6847DECL_INLINE_THROW(uint32_t)
6848iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
6849{
6850 Assert(fBits != 0);
6851
6852#ifdef RT_ARCH_AMD64
6853 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
6854
6855#elif defined(RT_ARCH_ARM64)
6856 /* ands xzr, src, [tmp|#imm] */
6857 uint32_t uImmR = 0;
6858 uint32_t uImmNandS = 0;
6859 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6860 {
6861 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6862 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6863 }
6864 else
6865 {
6866 /* Use temporary register for the 64-bit immediate. */
6867 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6868 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6869 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
6870 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6871 }
6872
6873#else
6874# error "Port me!"
6875#endif
6876 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6877 return off;
6878}
6879
6880
6881/**
6882 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
6883 * are set in @a iGprSrc.
6884 */
6885DECL_INLINE_THROW(uint32_t)
6886iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6887 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
6888{
6889 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
6890
6891 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
6892 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6893
6894 return off;
6895}
6896
6897
6898/**
6899 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
6900 * are set in @a iGprSrc.
6901 */
6902DECL_INLINE_THROW(uint32_t)
6903iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6904 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
6905{
6906 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
6907
6908 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
6909 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
6910
6911 return off;
6912}
6913
6914
6915/**
6916 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6917 *
6918 * The operand size is given by @a f64Bit.
6919 */
6920DECL_FORCE_INLINE_THROW(uint32_t)
6921iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6922 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
6923{
6924 Assert(idxLabel < pReNative->cLabels);
6925
6926#ifdef RT_ARCH_AMD64
6927 /* test reg32,reg32 / test reg64,reg64 */
6928 if (f64Bit)
6929 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
6930 else if (iGprSrc >= 8)
6931 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
6932 pCodeBuf[off++] = 0x85;
6933 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
6934
6935 /* jnz idxLabel */
6936 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
6937 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6938
6939#elif defined(RT_ARCH_ARM64)
6940 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6941 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
6942 iGprSrc, f64Bit);
6943 else
6944 {
6945 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6946 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
6947 }
6948
6949#else
6950# error "Port me!"
6951#endif
6952 return off;
6953}
6954
6955
6956/**
6957 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6958 *
6959 * The operand size is given by @a f64Bit.
6960 */
6961DECL_FORCE_INLINE_THROW(uint32_t)
6962iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
6963 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
6964{
6965#ifdef RT_ARCH_AMD64
6966 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
6967 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
6968#elif defined(RT_ARCH_ARM64)
6969 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
6970 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
6971#else
6972# error "Port me!"
6973#endif
6974 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6975 return off;
6976}
6977
6978
6979/* if (Grp1 == 0) Jmp idxLabel; */
6980
6981/**
6982 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
6983 *
6984 * The operand size is given by @a f64Bit.
6985 */
6986DECL_FORCE_INLINE_THROW(uint32_t)
6987iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6988 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6989{
6990 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
6991 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
6992}
6993
6994
6995/**
6996 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
6997 *
6998 * The operand size is given by @a f64Bit.
6999 */
7000DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7001 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7002{
7003 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7004}
7005
7006
7007/**
7008 * Emits code that jumps to a new label if @a iGprSrc is zero.
7009 *
7010 * The operand size is given by @a f64Bit.
7011 */
7012DECL_INLINE_THROW(uint32_t)
7013iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7014 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7015{
7016 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7017 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7018}
7019
7020
7021/* if (Grp1 != 0) Jmp idxLabel; */
7022
7023/**
7024 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7025 *
7026 * The operand size is given by @a f64Bit.
7027 */
7028DECL_FORCE_INLINE_THROW(uint32_t)
7029iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7030 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7031{
7032 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7033 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7034}
7035
7036
7037/**
7038 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7039 *
7040 * The operand size is given by @a f64Bit.
7041 */
7042DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7043 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7044{
7045 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7046}
7047
7048
7049/**
7050 * Emits code that jumps to a new label if @a iGprSrc is not zero.
7051 *
7052 * The operand size is given by @a f64Bit.
7053 */
7054DECL_INLINE_THROW(uint32_t)
7055iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7056 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7057{
7058 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7059 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7060}
7061
7062
7063/* if (Grp1 != Gpr2) Jmp idxLabel; */
7064
7065/**
7066 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
7067 * differs.
7068 */
7069DECL_INLINE_THROW(uint32_t)
7070iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7071 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
7072{
7073 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
7074 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7075 return off;
7076}
7077
7078
7079/**
7080 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
7081 */
7082DECL_INLINE_THROW(uint32_t)
7083iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7084 uint8_t iGprLeft, uint8_t iGprRight,
7085 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7086{
7087 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7088 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
7089}
7090
7091
7092/* if (Grp != Imm) Jmp idxLabel; */
7093
7094/**
7095 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
7096 */
7097DECL_INLINE_THROW(uint32_t)
7098iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7099 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7100{
7101 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7102 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7103 return off;
7104}
7105
7106
7107/**
7108 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
7109 */
7110DECL_INLINE_THROW(uint32_t)
7111iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7112 uint8_t iGprSrc, uint64_t uImm,
7113 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7114{
7115 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7116 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7117}
7118
7119
7120/**
7121 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
7122 * @a uImm.
7123 */
7124DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7125 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7126{
7127 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7128 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7129 return off;
7130}
7131
7132
7133/**
7134 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
7135 * @a uImm.
7136 */
7137DECL_INLINE_THROW(uint32_t)
7138iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7139 uint8_t iGprSrc, uint32_t uImm,
7140 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7141{
7142 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7143 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7144}
7145
7146
7147/**
7148 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
7149 * @a uImm.
7150 */
7151DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7152 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
7153{
7154 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
7155 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7156 return off;
7157}
7158
7159
7160/**
7161 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
7162 * @a uImm.
7163 */
7164DECL_INLINE_THROW(uint32_t)
7165iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7166 uint8_t iGprSrc, uint16_t uImm,
7167 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7168{
7169 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7170 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7171}
7172
7173
7174/* if (Grp == Imm) Jmp idxLabel; */
7175
7176/**
7177 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
7178 */
7179DECL_INLINE_THROW(uint32_t)
7180iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7181 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7182{
7183 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7184 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7185 return off;
7186}
7187
7188
7189/**
7190 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
7191 */
7192DECL_INLINE_THROW(uint32_t)
7193iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
7194 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7195{
7196 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7197 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7198}
7199
7200
7201/**
7202 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
7203 */
7204DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7205 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7206{
7207 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7208 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7209 return off;
7210}
7211
7212
7213/**
7214 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
7215 */
7216DECL_INLINE_THROW(uint32_t)
7217iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
7218 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7219{
7220 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7221 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7222}
7223
7224
7225/**
7226 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
7227 *
7228 * @note ARM64: Helper register is required (idxTmpReg).
7229 */
7230DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7231 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
7232 uint8_t idxTmpReg = UINT8_MAX)
7233{
7234 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
7235 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7236 return off;
7237}
7238
7239
7240/**
7241 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
7242 *
7243 * @note ARM64: Helper register is required (idxTmpReg).
7244 */
7245DECL_INLINE_THROW(uint32_t)
7246iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
7247 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
7248 uint8_t idxTmpReg = UINT8_MAX)
7249{
7250 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7251 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
7252}
7253
7254
7255/*********************************************************************************************************************************
7256* Calls. *
7257*********************************************************************************************************************************/
7258
7259/**
7260 * Emits a call to a 64-bit address.
7261 */
7262DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
7263{
7264#ifdef RT_ARCH_AMD64
7265 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
7266
7267 /* call rax */
7268 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7269 pbCodeBuf[off++] = 0xff;
7270 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
7271
7272#elif defined(RT_ARCH_ARM64)
7273 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
7274
7275 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7276 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
7277
7278#else
7279# error "port me"
7280#endif
7281 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7282 return off;
7283}
7284
7285
7286/**
7287 * Emits code to load a stack variable into an argument GPR.
7288 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7289 */
7290DECL_FORCE_INLINE_THROW(uint32_t)
7291iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7292 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
7293 bool fSpilledVarsInVolatileRegs = false)
7294{
7295 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7296 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7297 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7298
7299 uint8_t const idxRegVar = pVar->idxReg;
7300 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
7301 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
7302 || !fSpilledVarsInVolatileRegs ))
7303 {
7304 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
7305 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
7306 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
7307 if (!offAddend)
7308 {
7309 if (idxRegArg != idxRegVar)
7310 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
7311 }
7312 else
7313 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
7314 }
7315 else
7316 {
7317 uint8_t const idxStackSlot = pVar->idxStackSlot;
7318 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7319 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
7320 if (offAddend)
7321 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
7322 }
7323 return off;
7324}
7325
7326
7327/**
7328 * Emits code to load a stack or immediate variable value into an argument GPR,
7329 * optional with a addend.
7330 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7331 */
7332DECL_FORCE_INLINE_THROW(uint32_t)
7333iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7334 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
7335 bool fSpilledVarsInVolatileRegs = false)
7336{
7337 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7338 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7339 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7340 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
7341 else
7342 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
7343 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
7344 return off;
7345}
7346
7347
7348/**
7349 * Emits code to load the variable address into an argument GPR.
7350 *
7351 * This only works for uninitialized and stack variables.
7352 */
7353DECL_FORCE_INLINE_THROW(uint32_t)
7354iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7355 bool fFlushShadows)
7356{
7357 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7358 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7359 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7360 || pVar->enmKind == kIemNativeVarKind_Stack,
7361 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7362 AssertStmt(!pVar->fSimdReg,
7363 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7364
7365 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7366 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7367
7368 uint8_t const idxRegVar = pVar->idxReg;
7369 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
7370 {
7371 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
7372 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
7373 Assert(pVar->idxReg == UINT8_MAX);
7374 }
7375 Assert( pVar->idxStackSlot != UINT8_MAX
7376 && pVar->idxReg == UINT8_MAX);
7377
7378 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7379}
7380
7381
7382#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7383/**
7384 * Emits code to load the variable address into an argument GPR.
7385 *
7386 * This is a special variant intended for SIMD variables only and only called
7387 * by the TLB miss path in the memory fetch/store code because there we pass
7388 * the value by reference and need both the register and stack depending on which
7389 * path is taken (TLB hit vs. miss).
7390 */
7391DECL_FORCE_INLINE_THROW(uint32_t)
7392iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7393 bool fSyncRegWithStack = true)
7394{
7395 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7396 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7397 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7398 || pVar->enmKind == kIemNativeVarKind_Stack,
7399 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7400 AssertStmt(pVar->fSimdReg,
7401 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7402 Assert( pVar->idxStackSlot != UINT8_MAX
7403 && pVar->idxReg != UINT8_MAX);
7404
7405 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7406 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7407
7408 uint8_t const idxRegVar = pVar->idxReg;
7409 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7410 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
7411
7412 if (fSyncRegWithStack)
7413 {
7414 if (pVar->cbVar == sizeof(RTUINT128U))
7415 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDisp, idxRegVar);
7416 else
7417 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDisp, idxRegVar);
7418 }
7419
7420 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7421}
7422
7423
7424/**
7425 * Emits code to sync the host SIMD register assigned to the given SIMD variable.
7426 *
7427 * This is a special helper and only called
7428 * by the TLB miss path in the memory fetch/store code because there we pass
7429 * the value by reference and need to sync the value on the stack with the assigned host register
7430 * after a TLB miss where the value ends up on the stack.
7431 */
7432DECL_FORCE_INLINE_THROW(uint32_t)
7433iemNativeEmitSimdVarSyncStackToRegister(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar)
7434{
7435 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7436 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7437 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7438 || pVar->enmKind == kIemNativeVarKind_Stack,
7439 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7440 AssertStmt(pVar->fSimdReg,
7441 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7442 Assert( pVar->idxStackSlot != UINT8_MAX
7443 && pVar->idxReg != UINT8_MAX);
7444
7445 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7446 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7447
7448 uint8_t const idxRegVar = pVar->idxReg;
7449 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7450 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
7451
7452 if (pVar->cbVar == sizeof(RTUINT128U))
7453 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxRegVar, offBpDisp);
7454 else
7455 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxRegVar, offBpDisp);
7456
7457 return off;
7458}
7459
7460
7461/**
7462 * Emits a gprdst = ~gprsrc store.
7463 */
7464DECL_FORCE_INLINE_THROW(uint32_t)
7465iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7466{
7467#ifdef RT_ARCH_AMD64
7468 if (iGprDst != iGprSrc)
7469 {
7470 /* mov gprdst, gprsrc. */
7471 if (f64Bit)
7472 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
7473 else
7474 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
7475 }
7476
7477 /* not gprdst */
7478 if (f64Bit || iGprDst >= 8)
7479 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
7480 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
7481 pCodeBuf[off++] = 0xf7;
7482 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
7483#elif defined(RT_ARCH_ARM64)
7484 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
7485#else
7486# error "port me"
7487#endif
7488 return off;
7489}
7490
7491
7492/**
7493 * Emits a gprdst = ~gprsrc store.
7494 */
7495DECL_INLINE_THROW(uint32_t)
7496iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7497{
7498#ifdef RT_ARCH_AMD64
7499 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
7500#elif defined(RT_ARCH_ARM64)
7501 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
7502#else
7503# error "port me"
7504#endif
7505 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7506 return off;
7507}
7508
7509
7510/**
7511 * Emits a 128-bit vector register store to a VCpu value.
7512 */
7513DECL_FORCE_INLINE_THROW(uint32_t)
7514iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7515{
7516#ifdef RT_ARCH_AMD64
7517 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
7518 pCodeBuf[off++] = 0x66;
7519 if (iVecReg >= 8)
7520 pCodeBuf[off++] = X86_OP_REX_R;
7521 pCodeBuf[off++] = 0x0f;
7522 pCodeBuf[off++] = 0x7f;
7523 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7524#elif defined(RT_ARCH_ARM64)
7525 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7526
7527#else
7528# error "port me"
7529#endif
7530 return off;
7531}
7532
7533
7534/**
7535 * Emits a 128-bit vector register load of a VCpu value.
7536 */
7537DECL_INLINE_THROW(uint32_t)
7538iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7539{
7540#ifdef RT_ARCH_AMD64
7541 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7542#elif defined(RT_ARCH_ARM64)
7543 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7544#else
7545# error "port me"
7546#endif
7547 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7548 return off;
7549}
7550
7551
7552/**
7553 * Emits a high 128-bit vector register store to a VCpu value.
7554 */
7555DECL_FORCE_INLINE_THROW(uint32_t)
7556iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7557{
7558#ifdef RT_ARCH_AMD64
7559 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
7560 pCodeBuf[off++] = X86_OP_VEX3;
7561 if (iVecReg >= 8)
7562 pCodeBuf[off++] = 0x63;
7563 else
7564 pCodeBuf[off++] = 0xe3;
7565 pCodeBuf[off++] = 0x7d;
7566 pCodeBuf[off++] = 0x39;
7567 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7568 pCodeBuf[off++] = 0x01; /* Immediate */
7569#elif defined(RT_ARCH_ARM64)
7570 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7571#else
7572# error "port me"
7573#endif
7574 return off;
7575}
7576
7577
7578/**
7579 * Emits a high 128-bit vector register load of a VCpu value.
7580 */
7581DECL_INLINE_THROW(uint32_t)
7582iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7583{
7584#ifdef RT_ARCH_AMD64
7585 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7586#elif defined(RT_ARCH_ARM64)
7587 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7588 Assert(!(iVecReg & 0x1));
7589 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
7590#else
7591# error "port me"
7592#endif
7593 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7594 return off;
7595}
7596
7597
7598/**
7599 * Emits a 128-bit vector register load of a VCpu value.
7600 */
7601DECL_FORCE_INLINE_THROW(uint32_t)
7602iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7603{
7604#ifdef RT_ARCH_AMD64
7605 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
7606 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7607 if (iVecReg >= 8)
7608 pCodeBuf[off++] = X86_OP_REX_R;
7609 pCodeBuf[off++] = 0x0f;
7610 pCodeBuf[off++] = 0x6f;
7611 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7612#elif defined(RT_ARCH_ARM64)
7613 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
7614
7615#else
7616# error "port me"
7617#endif
7618 return off;
7619}
7620
7621
7622/**
7623 * Emits a 128-bit vector register load of a VCpu value.
7624 */
7625DECL_INLINE_THROW(uint32_t)
7626iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7627{
7628#ifdef RT_ARCH_AMD64
7629 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7630#elif defined(RT_ARCH_ARM64)
7631 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7632#else
7633# error "port me"
7634#endif
7635 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7636 return off;
7637}
7638
7639
7640/**
7641 * Emits a 128-bit vector register load of a VCpu value.
7642 */
7643DECL_FORCE_INLINE_THROW(uint32_t)
7644iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7645{
7646#ifdef RT_ARCH_AMD64
7647 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
7648 pCodeBuf[off++] = X86_OP_VEX3;
7649 if (iVecReg >= 8)
7650 pCodeBuf[off++] = 0x63;
7651 else
7652 pCodeBuf[off++] = 0xe3;
7653 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
7654 pCodeBuf[off++] = 0x38;
7655 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7656 pCodeBuf[off++] = 0x01; /* Immediate */
7657#elif defined(RT_ARCH_ARM64)
7658 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
7659#else
7660# error "port me"
7661#endif
7662 return off;
7663}
7664
7665
7666/**
7667 * Emits a 128-bit vector register load of a VCpu value.
7668 */
7669DECL_INLINE_THROW(uint32_t)
7670iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7671{
7672#ifdef RT_ARCH_AMD64
7673 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7674#elif defined(RT_ARCH_ARM64)
7675 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7676 Assert(!(iVecReg & 0x1));
7677 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
7678#else
7679# error "port me"
7680#endif
7681 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7682 return off;
7683}
7684
7685
7686/**
7687 * Emits a vecdst = vecsrc load.
7688 */
7689DECL_FORCE_INLINE(uint32_t)
7690iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7691{
7692#ifdef RT_ARCH_AMD64
7693 /* movdqu vecdst, vecsrc */
7694 pCodeBuf[off++] = 0xf3;
7695
7696 if ((iVecRegDst | iVecRegSrc) >= 8)
7697 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
7698 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
7699 : X86_OP_REX_R;
7700 pCodeBuf[off++] = 0x0f;
7701 pCodeBuf[off++] = 0x6f;
7702 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7703
7704#elif defined(RT_ARCH_ARM64)
7705 /* mov dst, src; alias for: orr dst, src, src */
7706 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
7707
7708#else
7709# error "port me"
7710#endif
7711 return off;
7712}
7713
7714
7715/**
7716 * Emits a vecdst = vecsrc load, 128-bit.
7717 */
7718DECL_INLINE_THROW(uint32_t)
7719iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7720{
7721#ifdef RT_ARCH_AMD64
7722 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
7723#elif defined(RT_ARCH_ARM64)
7724 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
7725#else
7726# error "port me"
7727#endif
7728 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7729 return off;
7730}
7731
7732
7733/**
7734 * Emits a vecdst = vecsrc load, 256-bit.
7735 */
7736DECL_INLINE_THROW(uint32_t)
7737iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7738{
7739#ifdef RT_ARCH_AMD64
7740 /* vmovdqa ymm, ymm */
7741 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7742 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
7743 {
7744 pbCodeBuf[off++] = X86_OP_VEX3;
7745 pbCodeBuf[off++] = 0x41;
7746 pbCodeBuf[off++] = 0x7d;
7747 pbCodeBuf[off++] = 0x6f;
7748 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7749 }
7750 else
7751 {
7752 pbCodeBuf[off++] = X86_OP_VEX2;
7753 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
7754 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
7755 pbCodeBuf[off++] = iVecRegSrc >= 8
7756 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
7757 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7758 }
7759#elif defined(RT_ARCH_ARM64)
7760 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7761 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
7762 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
7763 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
7764#else
7765# error "port me"
7766#endif
7767 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7768 return off;
7769}
7770
7771
7772/**
7773 * Emits a gprdst = vecsrc[x] load, 64-bit.
7774 */
7775DECL_FORCE_INLINE(uint32_t)
7776iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
7777{
7778#ifdef RT_ARCH_AMD64
7779 if (iQWord >= 2)
7780 {
7781 /** @todo Currently not used. */
7782 AssertReleaseFailed();
7783 }
7784 else
7785 {
7786 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
7787 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7788 pCodeBuf[off++] = X86_OP_REX_W
7789 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
7790 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
7791 pCodeBuf[off++] = 0x0f;
7792 pCodeBuf[off++] = 0x3a;
7793 pCodeBuf[off++] = 0x16;
7794 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
7795 pCodeBuf[off++] = iQWord;
7796 }
7797#elif defined(RT_ARCH_ARM64)
7798 /* umov gprdst, vecsrc[iQWord] */
7799 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
7800#else
7801# error "port me"
7802#endif
7803 return off;
7804}
7805
7806
7807/**
7808 * Emits a gprdst = vecsrc[x] load, 64-bit.
7809 */
7810DECL_INLINE_THROW(uint32_t)
7811iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
7812{
7813 Assert(iQWord <= 3);
7814
7815#ifdef RT_ARCH_AMD64
7816 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iQWord);
7817#elif defined(RT_ARCH_ARM64)
7818 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7819 Assert(!(iVecRegSrc & 0x1));
7820 /* Need to access the "high" 128-bit vector register. */
7821 if (iQWord >= 2)
7822 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
7823 else
7824 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
7825#else
7826# error "port me"
7827#endif
7828 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7829 return off;
7830}
7831
7832
7833/**
7834 * Emits a gprdst = vecsrc[x] load, 32-bit.
7835 */
7836DECL_FORCE_INLINE(uint32_t)
7837iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
7838{
7839#ifdef RT_ARCH_AMD64
7840 if (iDWord >= 4)
7841 {
7842 /** @todo Currently not used. */
7843 AssertReleaseFailed();
7844 }
7845 else
7846 {
7847 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
7848 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7849 if (iGprDst >= 8 || iVecRegSrc >= 8)
7850 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
7851 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
7852 pCodeBuf[off++] = 0x0f;
7853 pCodeBuf[off++] = 0x3a;
7854 pCodeBuf[off++] = 0x16;
7855 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
7856 pCodeBuf[off++] = iDWord;
7857 }
7858#elif defined(RT_ARCH_ARM64)
7859 /* umov gprdst, vecsrc[iDWord] */
7860 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
7861#else
7862# error "port me"
7863#endif
7864 return off;
7865}
7866
7867
7868/**
7869 * Emits a gprdst = vecsrc[x] load, 32-bit.
7870 */
7871DECL_INLINE_THROW(uint32_t)
7872iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
7873{
7874 Assert(iDWord <= 7);
7875
7876#ifdef RT_ARCH_AMD64
7877 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iDWord);
7878#elif defined(RT_ARCH_ARM64)
7879 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7880 Assert(!(iVecRegSrc & 0x1));
7881 /* Need to access the "high" 128-bit vector register. */
7882 if (iDWord >= 4)
7883 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
7884 else
7885 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
7886#else
7887# error "port me"
7888#endif
7889 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7890 return off;
7891}
7892
7893
7894/**
7895 * Emits a gprdst = vecsrc[x] load, 16-bit.
7896 */
7897DECL_FORCE_INLINE(uint32_t)
7898iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
7899{
7900#ifdef RT_ARCH_AMD64
7901 if (iWord >= 8)
7902 {
7903 /** @todo Currently not used. */
7904 AssertReleaseFailed();
7905 }
7906 else
7907 {
7908 /* pextrw gpr, vecsrc, #iWord */
7909 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7910 if (iGprDst >= 8 || iVecRegSrc >= 8)
7911 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
7912 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
7913 pCodeBuf[off++] = 0x0f;
7914 pCodeBuf[off++] = 0xc5;
7915 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
7916 pCodeBuf[off++] = iWord;
7917 }
7918#elif defined(RT_ARCH_ARM64)
7919 /* umov gprdst, vecsrc[iWord] */
7920 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
7921#else
7922# error "port me"
7923#endif
7924 return off;
7925}
7926
7927
7928/**
7929 * Emits a gprdst = vecsrc[x] load, 16-bit.
7930 */
7931DECL_INLINE_THROW(uint32_t)
7932iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
7933{
7934 Assert(iWord <= 16);
7935
7936#ifdef RT_ARCH_AMD64
7937 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
7938#elif defined(RT_ARCH_ARM64)
7939 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7940 Assert(!(iVecRegSrc & 0x1));
7941 /* Need to access the "high" 128-bit vector register. */
7942 if (iWord >= 8)
7943 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
7944 else
7945 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
7946#else
7947# error "port me"
7948#endif
7949 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7950 return off;
7951}
7952
7953
7954/**
7955 * Emits a gprdst = vecsrc[x] load, 8-bit.
7956 */
7957DECL_FORCE_INLINE(uint32_t)
7958iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
7959{
7960#ifdef RT_ARCH_AMD64
7961 if (iByte >= 16)
7962 {
7963 /** @todo Currently not used. */
7964 AssertReleaseFailed();
7965 }
7966 else
7967 {
7968 /* pextrb gpr, vecsrc, #iByte */
7969 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7970 if (iGprDst >= 8 || iVecRegSrc >= 8)
7971 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
7972 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
7973 pCodeBuf[off++] = 0x0f;
7974 pCodeBuf[off++] = 0x3a;
7975 pCodeBuf[off++] = 0x14;
7976 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
7977 pCodeBuf[off++] = iByte;
7978 }
7979#elif defined(RT_ARCH_ARM64)
7980 /* umov gprdst, vecsrc[iByte] */
7981 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
7982#else
7983# error "port me"
7984#endif
7985 return off;
7986}
7987
7988
7989/**
7990 * Emits a gprdst = vecsrc[x] load, 8-bit.
7991 */
7992DECL_INLINE_THROW(uint32_t)
7993iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
7994{
7995 Assert(iByte <= 32);
7996
7997#ifdef RT_ARCH_AMD64
7998 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iByte);
7999#elif defined(RT_ARCH_ARM64)
8000 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8001 Assert(!(iVecRegSrc & 0x1));
8002 /* Need to access the "high" 128-bit vector register. */
8003 if (iByte >= 16)
8004 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
8005 else
8006 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
8007#else
8008# error "port me"
8009#endif
8010 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8011 return off;
8012}
8013
8014
8015/**
8016 * Emits a vecdst[x] = gprsrc store, 64-bit.
8017 */
8018DECL_FORCE_INLINE(uint32_t)
8019iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
8020{
8021#ifdef RT_ARCH_AMD64
8022 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
8023 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8024 pCodeBuf[off++] = X86_OP_REX_W
8025 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8026 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8027 pCodeBuf[off++] = 0x0f;
8028 pCodeBuf[off++] = 0x3a;
8029 pCodeBuf[off++] = 0x22;
8030 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8031 pCodeBuf[off++] = iQWord;
8032#elif defined(RT_ARCH_ARM64)
8033 /* ins vecsrc[iQWord], gpr */
8034 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
8035#else
8036# error "port me"
8037#endif
8038 return off;
8039}
8040
8041
8042/**
8043 * Emits a vecdst[x] = gprsrc store, 64-bit.
8044 */
8045DECL_INLINE_THROW(uint32_t)
8046iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
8047{
8048 Assert(iQWord <= 1);
8049
8050#ifdef RT_ARCH_AMD64
8051 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iQWord);
8052#elif defined(RT_ARCH_ARM64)
8053 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
8054#else
8055# error "port me"
8056#endif
8057 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8058 return off;
8059}
8060
8061
8062/**
8063 * Emits a vecdst[x] = gprsrc store, 32-bit.
8064 */
8065DECL_FORCE_INLINE(uint32_t)
8066iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
8067{
8068#ifdef RT_ARCH_AMD64
8069 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
8070 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8071 if (iVecRegDst >= 8 || iGprSrc >= 8)
8072 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8073 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8074 pCodeBuf[off++] = 0x0f;
8075 pCodeBuf[off++] = 0x3a;
8076 pCodeBuf[off++] = 0x22;
8077 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8078 pCodeBuf[off++] = iDWord;
8079#elif defined(RT_ARCH_ARM64)
8080 /* ins vecsrc[iDWord], gpr */
8081 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
8082#else
8083# error "port me"
8084#endif
8085 return off;
8086}
8087
8088
8089/**
8090 * Emits a vecdst[x] = gprsrc store, 64-bit.
8091 */
8092DECL_INLINE_THROW(uint32_t)
8093iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
8094{
8095 Assert(iDWord <= 3);
8096
8097#ifdef RT_ARCH_AMD64
8098 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iDWord);
8099#elif defined(RT_ARCH_ARM64)
8100 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
8101#else
8102# error "port me"
8103#endif
8104 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8105 return off;
8106}
8107
8108
8109/**
8110 * Emits a vecdst.au32[iDWord] = 0 store.
8111 */
8112DECL_FORCE_INLINE(uint32_t)
8113iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
8114{
8115 Assert(iDWord <= 7);
8116
8117#ifdef RT_ARCH_AMD64
8118 /*
8119 * xor tmp0, tmp0
8120 * pinsrd xmm, tmp0, iDword
8121 */
8122 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
8123 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8124 pCodeBuf[off++] = 0x33;
8125 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
8126 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(&pCodeBuf[off], off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
8127#elif defined(RT_ARCH_ARM64)
8128 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8129 Assert(!(iVecReg & 0x1));
8130 /* ins vecsrc[iDWord], wzr */
8131 if (iDWord >= 4)
8132 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
8133 else
8134 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
8135#else
8136# error "port me"
8137#endif
8138 return off;
8139}
8140
8141
8142/**
8143 * Emits a vecdst.au32[iDWord] = 0 store.
8144 */
8145DECL_INLINE_THROW(uint32_t)
8146iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
8147{
8148
8149#ifdef RT_ARCH_AMD64
8150 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
8151#elif defined(RT_ARCH_ARM64)
8152 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
8153#else
8154# error "port me"
8155#endif
8156 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8157 return off;
8158}
8159
8160
8161/**
8162 * Emits a vecdst[0:127] = 0 store.
8163 */
8164DECL_FORCE_INLINE(uint32_t)
8165iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8166{
8167#ifdef RT_ARCH_AMD64
8168 /* pxor xmm, xmm */
8169 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8170 if (iVecReg >= 8)
8171 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
8172 pCodeBuf[off++] = 0x0f;
8173 pCodeBuf[off++] = 0xef;
8174 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8175#elif defined(RT_ARCH_ARM64)
8176 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8177 Assert(!(iVecReg & 0x1));
8178 /* eor vecreg, vecreg, vecreg */
8179 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
8180#else
8181# error "port me"
8182#endif
8183 return off;
8184}
8185
8186
8187/**
8188 * Emits a vecdst[0:127] = 0 store.
8189 */
8190DECL_INLINE_THROW(uint32_t)
8191iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8192{
8193#ifdef RT_ARCH_AMD64
8194 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
8195#elif defined(RT_ARCH_ARM64)
8196 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
8197#else
8198# error "port me"
8199#endif
8200 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8201 return off;
8202}
8203
8204
8205/**
8206 * Emits a vecdst[128:255] = 0 store.
8207 */
8208DECL_FORCE_INLINE(uint32_t)
8209iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8210{
8211#ifdef RT_ARCH_AMD64
8212 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
8213 if (iVecReg < 8)
8214 {
8215 pCodeBuf[off++] = X86_OP_VEX2;
8216 pCodeBuf[off++] = 0xf9;
8217 }
8218 else
8219 {
8220 pCodeBuf[off++] = X86_OP_VEX3;
8221 pCodeBuf[off++] = 0x41;
8222 pCodeBuf[off++] = 0x79;
8223 }
8224 pCodeBuf[off++] = 0x6f;
8225 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8226#elif defined(RT_ARCH_ARM64)
8227 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8228 Assert(!(iVecReg & 0x1));
8229 /* eor vecreg, vecreg, vecreg */
8230 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
8231#else
8232# error "port me"
8233#endif
8234 return off;
8235}
8236
8237
8238/**
8239 * Emits a vecdst[128:255] = 0 store.
8240 */
8241DECL_INLINE_THROW(uint32_t)
8242iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8243{
8244#ifdef RT_ARCH_AMD64
8245 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
8246#elif defined(RT_ARCH_ARM64)
8247 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
8248#else
8249# error "port me"
8250#endif
8251 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8252 return off;
8253}
8254
8255
8256/**
8257 * Emits a vecdst[0:255] = 0 store.
8258 */
8259DECL_FORCE_INLINE(uint32_t)
8260iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8261{
8262#ifdef RT_ARCH_AMD64
8263 /* vpxor ymm, ymm, ymm */
8264 if (iVecReg < 8)
8265 {
8266 pCodeBuf[off++] = X86_OP_VEX2;
8267 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8268 }
8269 else
8270 {
8271 pCodeBuf[off++] = X86_OP_VEX3;
8272 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
8273 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8274 }
8275 pCodeBuf[off++] = 0xef;
8276 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8277#elif defined(RT_ARCH_ARM64)
8278 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8279 Assert(!(iVecReg & 0x1));
8280 /* eor vecreg, vecreg, vecreg */
8281 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
8282 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
8283#else
8284# error "port me"
8285#endif
8286 return off;
8287}
8288
8289
8290/**
8291 * Emits a vecdst[0:255] = 0 store.
8292 */
8293DECL_INLINE_THROW(uint32_t)
8294iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8295{
8296#ifdef RT_ARCH_AMD64
8297 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
8298#elif defined(RT_ARCH_ARM64)
8299 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
8300#else
8301# error "port me"
8302#endif
8303 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8304 return off;
8305}
8306
8307
8308/**
8309 * Emits a vecdst = gprsrc broadcast, 8-bit.
8310 */
8311DECL_FORCE_INLINE(uint32_t)
8312iemNativeEmitSimdBroadcastGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8313{
8314#ifdef RT_ARCH_AMD64
8315 /* pinsrb vecdst, gpr, #0 (ASSUMES SSE 4.1) */
8316 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8317 if (iVecRegDst >= 8 || iGprSrc >= 8)
8318 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8319 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8320 pCodeBuf[off++] = 0x0f;
8321 pCodeBuf[off++] = 0x3a;
8322 pCodeBuf[off++] = 0x20;
8323 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8324 pCodeBuf[off++] = 0x00;
8325
8326 /* vpbroadcastb {y,x}mm, xmm (ASSUMES AVX2). */
8327 pCodeBuf[off++] = X86_OP_VEX3;
8328 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8329 | 0x02 /* opcode map. */
8330 | ( iVecRegDst >= 8
8331 ? 0
8332 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8333 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8334 pCodeBuf[off++] = 0x78;
8335 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8336#elif defined(RT_ARCH_ARM64)
8337 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8338 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8339
8340 /* dup vecsrc, gpr */
8341 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U8);
8342 if (f256Bit)
8343 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U8);
8344#else
8345# error "port me"
8346#endif
8347 return off;
8348}
8349
8350
8351/**
8352 * Emits a vecdst[x] = gprsrc broadcast, 8-bit.
8353 */
8354DECL_INLINE_THROW(uint32_t)
8355iemNativeEmitSimdBroadcastGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8356{
8357#ifdef RT_ARCH_AMD64
8358 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8359#elif defined(RT_ARCH_ARM64)
8360 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8361#else
8362# error "port me"
8363#endif
8364 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8365 return off;
8366}
8367
8368
8369/**
8370 * Emits a vecdst = gprsrc broadcast, 16-bit.
8371 */
8372DECL_FORCE_INLINE(uint32_t)
8373iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8374{
8375#ifdef RT_ARCH_AMD64
8376 /* pinsrw vecdst, gpr, #0 */
8377 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8378 if (iVecRegDst >= 8 || iGprSrc >= 8)
8379 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8380 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8381 pCodeBuf[off++] = 0x0f;
8382 pCodeBuf[off++] = 0xc4;
8383 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8384 pCodeBuf[off++] = 0x00;
8385
8386 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
8387 pCodeBuf[off++] = X86_OP_VEX3;
8388 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8389 | 0x02 /* opcode map. */
8390 | ( iVecRegDst >= 8
8391 ? 0
8392 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8393 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8394 pCodeBuf[off++] = 0x79;
8395 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8396#elif defined(RT_ARCH_ARM64)
8397 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8398 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8399
8400 /* dup vecsrc, gpr */
8401 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
8402 if (f256Bit)
8403 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
8404#else
8405# error "port me"
8406#endif
8407 return off;
8408}
8409
8410
8411/**
8412 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
8413 */
8414DECL_INLINE_THROW(uint32_t)
8415iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8416{
8417#ifdef RT_ARCH_AMD64
8418 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8419#elif defined(RT_ARCH_ARM64)
8420 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8421#else
8422# error "port me"
8423#endif
8424 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8425 return off;
8426}
8427
8428
8429/**
8430 * Emits a vecdst = gprsrc broadcast, 32-bit.
8431 */
8432DECL_FORCE_INLINE(uint32_t)
8433iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8434{
8435#ifdef RT_ARCH_AMD64
8436 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
8437 * vbroadcast needs a memory operand or another xmm register to work... */
8438
8439 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
8440 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8441 if (iVecRegDst >= 8 || iGprSrc >= 8)
8442 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8443 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8444 pCodeBuf[off++] = 0x0f;
8445 pCodeBuf[off++] = 0x3a;
8446 pCodeBuf[off++] = 0x22;
8447 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8448 pCodeBuf[off++] = 0x00;
8449
8450 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
8451 pCodeBuf[off++] = X86_OP_VEX3;
8452 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8453 | 0x02 /* opcode map. */
8454 | ( iVecRegDst >= 8
8455 ? 0
8456 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8457 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8458 pCodeBuf[off++] = 0x58;
8459 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8460#elif defined(RT_ARCH_ARM64)
8461 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8462 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8463
8464 /* dup vecsrc, gpr */
8465 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
8466 if (f256Bit)
8467 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
8468#else
8469# error "port me"
8470#endif
8471 return off;
8472}
8473
8474
8475/**
8476 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
8477 */
8478DECL_INLINE_THROW(uint32_t)
8479iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8480{
8481#ifdef RT_ARCH_AMD64
8482 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8483#elif defined(RT_ARCH_ARM64)
8484 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8485#else
8486# error "port me"
8487#endif
8488 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8489 return off;
8490}
8491
8492
8493/**
8494 * Emits a vecdst = gprsrc broadcast, 64-bit.
8495 */
8496DECL_FORCE_INLINE(uint32_t)
8497iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8498{
8499#ifdef RT_ARCH_AMD64
8500 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
8501 * vbroadcast needs a memory operand or another xmm register to work... */
8502
8503 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
8504 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8505 pCodeBuf[off++] = X86_OP_REX_W
8506 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8507 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8508 pCodeBuf[off++] = 0x0f;
8509 pCodeBuf[off++] = 0x3a;
8510 pCodeBuf[off++] = 0x22;
8511 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8512 pCodeBuf[off++] = 0x00;
8513
8514 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
8515 pCodeBuf[off++] = X86_OP_VEX3;
8516 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8517 | 0x02 /* opcode map. */
8518 | ( iVecRegDst >= 8
8519 ? 0
8520 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8521 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8522 pCodeBuf[off++] = 0x59;
8523 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8524#elif defined(RT_ARCH_ARM64)
8525 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8526 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8527
8528 /* dup vecsrc, gpr */
8529 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
8530 if (f256Bit)
8531 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
8532#else
8533# error "port me"
8534#endif
8535 return off;
8536}
8537
8538
8539/**
8540 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
8541 */
8542DECL_INLINE_THROW(uint32_t)
8543iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8544{
8545#ifdef RT_ARCH_AMD64
8546 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
8547#elif defined(RT_ARCH_ARM64)
8548 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8549#else
8550# error "port me"
8551#endif
8552 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8553 return off;
8554}
8555
8556#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
8557
8558/** @} */
8559
8560#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
8561
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette