VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 103995

Last change on this file since 103995 was 103973, checked in by vboxsync, 11 months ago

VMM/IEM: Implement native emitters for IEM_MC_STORE_YREG_U128(), IEM_MC_COPY_YREG_U256_ZX_VLMAX() and IEM_MC_COPY_YREG_U128_ZX_VLMAX(), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 310.8 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 103973 2024-03-20 17:10:28Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 pu32CodeBuf[off++] = 0xd503201f;
71
72 RT_NOREF(uInfo);
73#else
74# error "port me"
75#endif
76 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
77 return off;
78}
79
80
81/**
82 * Emit a breakpoint instruction.
83 */
84DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
85{
86#ifdef RT_ARCH_AMD64
87 pCodeBuf[off++] = 0xcc;
88 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
89
90#elif defined(RT_ARCH_ARM64)
91 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
92
93#else
94# error "error"
95#endif
96 return off;
97}
98
99
100/**
101 * Emit a breakpoint instruction.
102 */
103DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
104{
105#ifdef RT_ARCH_AMD64
106 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
107#elif defined(RT_ARCH_ARM64)
108 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
109#else
110# error "error"
111#endif
112 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
113 return off;
114}
115
116
117/*********************************************************************************************************************************
118* Loads, Stores and Related Stuff. *
119*********************************************************************************************************************************/
120
121#ifdef RT_ARCH_AMD64
122/**
123 * Common bit of iemNativeEmitLoadGprByGpr and friends.
124 */
125DECL_FORCE_INLINE(uint32_t)
126iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
127{
128 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
129 {
130 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
131 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
132 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
133 }
134 else if (offDisp == (int8_t)offDisp)
135 {
136 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
137 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
138 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
139 pbCodeBuf[off++] = (uint8_t)offDisp;
140 }
141 else
142 {
143 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
144 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
145 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
146 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
147 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
148 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
149 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
150 }
151 return off;
152}
153#endif /* RT_ARCH_AMD64 */
154
155/**
156 * Emits setting a GPR to zero.
157 */
158DECL_INLINE_THROW(uint32_t)
159iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
160{
161#ifdef RT_ARCH_AMD64
162 /* xor gpr32, gpr32 */
163 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
164 if (iGpr >= 8)
165 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
166 pbCodeBuf[off++] = 0x33;
167 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
168
169#elif defined(RT_ARCH_ARM64)
170 /* mov gpr, #0x0 */
171 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
172 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
173
174#else
175# error "port me"
176#endif
177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
178 return off;
179}
180
181
182/**
183 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
184 * buffer space.
185 *
186 * Max buffer consumption:
187 * - AMD64: 10 instruction bytes.
188 * - ARM64: 4 instruction words (16 bytes).
189 */
190DECL_FORCE_INLINE(uint32_t)
191iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
192{
193#ifdef RT_ARCH_AMD64
194 if (uImm64 == 0)
195 {
196 /* xor gpr, gpr */
197 if (iGpr >= 8)
198 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
199 pCodeBuf[off++] = 0x33;
200 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
201 }
202 else if (uImm64 <= UINT32_MAX)
203 {
204 /* mov gpr, imm32 */
205 if (iGpr >= 8)
206 pCodeBuf[off++] = X86_OP_REX_B;
207 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
208 pCodeBuf[off++] = RT_BYTE1(uImm64);
209 pCodeBuf[off++] = RT_BYTE2(uImm64);
210 pCodeBuf[off++] = RT_BYTE3(uImm64);
211 pCodeBuf[off++] = RT_BYTE4(uImm64);
212 }
213 else if (uImm64 == (uint64_t)(int32_t)uImm64)
214 {
215 /* mov gpr, sx(imm32) */
216 if (iGpr < 8)
217 pCodeBuf[off++] = X86_OP_REX_W;
218 else
219 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
220 pCodeBuf[off++] = 0xc7;
221 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
222 pCodeBuf[off++] = RT_BYTE1(uImm64);
223 pCodeBuf[off++] = RT_BYTE2(uImm64);
224 pCodeBuf[off++] = RT_BYTE3(uImm64);
225 pCodeBuf[off++] = RT_BYTE4(uImm64);
226 }
227 else
228 {
229 /* mov gpr, imm64 */
230 if (iGpr < 8)
231 pCodeBuf[off++] = X86_OP_REX_W;
232 else
233 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
234 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
235 pCodeBuf[off++] = RT_BYTE1(uImm64);
236 pCodeBuf[off++] = RT_BYTE2(uImm64);
237 pCodeBuf[off++] = RT_BYTE3(uImm64);
238 pCodeBuf[off++] = RT_BYTE4(uImm64);
239 pCodeBuf[off++] = RT_BYTE5(uImm64);
240 pCodeBuf[off++] = RT_BYTE6(uImm64);
241 pCodeBuf[off++] = RT_BYTE7(uImm64);
242 pCodeBuf[off++] = RT_BYTE8(uImm64);
243 }
244
245#elif defined(RT_ARCH_ARM64)
246 /*
247 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
248 * supply remaining bits using 'movk grp, imm16, lsl #x'.
249 *
250 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
251 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
252 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
253 * after the first non-zero immediate component so we switch to movk for
254 * the remainder.
255 */
256 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
257 + !((uImm64 >> 16) & UINT16_MAX)
258 + !((uImm64 >> 32) & UINT16_MAX)
259 + !((uImm64 >> 48) & UINT16_MAX);
260 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
261 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
262 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
263 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
264 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
265 if (cFfffHalfWords <= cZeroHalfWords)
266 {
267 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
268
269 /* movz gpr, imm16 */
270 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
271 if (uImmPart || cZeroHalfWords == 4)
272 {
273 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
274 fMovBase |= RT_BIT_32(29);
275 }
276 /* mov[z/k] gpr, imm16, lsl #16 */
277 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
278 if (uImmPart)
279 {
280 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
281 fMovBase |= RT_BIT_32(29);
282 }
283 /* mov[z/k] gpr, imm16, lsl #32 */
284 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
285 if (uImmPart)
286 {
287 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
288 fMovBase |= RT_BIT_32(29);
289 }
290 /* mov[z/k] gpr, imm16, lsl #48 */
291 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
292 if (uImmPart)
293 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
294 }
295 else
296 {
297 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
298
299 /* find the first half-word that isn't UINT16_MAX. */
300 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
301 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
302 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
303
304 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
305 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
306 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
307 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
308 /* movk gpr, imm16 */
309 if (iHwNotFfff != 0)
310 {
311 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
312 if (uImmPart != UINT32_C(0xffff))
313 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
314 }
315 /* movk gpr, imm16, lsl #16 */
316 if (iHwNotFfff != 1)
317 {
318 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
319 if (uImmPart != UINT32_C(0xffff))
320 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
321 }
322 /* movk gpr, imm16, lsl #32 */
323 if (iHwNotFfff != 2)
324 {
325 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
326 if (uImmPart != UINT32_C(0xffff))
327 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
328 }
329 /* movk gpr, imm16, lsl #48 */
330 if (iHwNotFfff != 3)
331 {
332 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
333 if (uImmPart != UINT32_C(0xffff))
334 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
335 }
336 }
337
338 /** @todo load into 'w' register instead of 'x' when imm64 <= UINT32_MAX?
339 * clang 12.x does that, only to use the 'x' version for the
340 * addressing in the following ldr). */
341
342#else
343# error "port me"
344#endif
345 return off;
346}
347
348
349/**
350 * Emits loading a constant into a 64-bit GPR
351 */
352DECL_INLINE_THROW(uint32_t)
353iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
354{
355#ifdef RT_ARCH_AMD64
356 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
357#elif defined(RT_ARCH_ARM64)
358 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
359#else
360# error "port me"
361#endif
362 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
363 return off;
364}
365
366
367/**
368 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
369 * buffer space.
370 *
371 * Max buffer consumption:
372 * - AMD64: 6 instruction bytes.
373 * - ARM64: 2 instruction words (8 bytes).
374 *
375 * @note The top 32 bits will be cleared.
376 */
377DECLINLINE(uint32_t) iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
378{
379#ifdef RT_ARCH_AMD64
380 if (uImm32 == 0)
381 {
382 /* xor gpr, gpr */
383 if (iGpr >= 8)
384 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
385 pCodeBuf[off++] = 0x33;
386 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
387 }
388 else
389 {
390 /* mov gpr, imm32 */
391 if (iGpr >= 8)
392 pCodeBuf[off++] = X86_OP_REX_B;
393 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
394 pCodeBuf[off++] = RT_BYTE1(uImm32);
395 pCodeBuf[off++] = RT_BYTE2(uImm32);
396 pCodeBuf[off++] = RT_BYTE3(uImm32);
397 pCodeBuf[off++] = RT_BYTE4(uImm32);
398 }
399
400#elif defined(RT_ARCH_ARM64)
401 if ((uImm32 >> 16) == 0)
402 /* movz gpr, imm16 */
403 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
404 else if ((uImm32 & UINT32_C(0xffff)) == 0)
405 /* movz gpr, imm16, lsl #16 */
406 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
407 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
408 /* movn gpr, imm16, lsl #16 */
409 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
410 else if ((uImm32 >> 16) == UINT32_C(0xffff))
411 /* movn gpr, imm16 */
412 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
413 else
414 {
415 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
416 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
417 }
418
419#else
420# error "port me"
421#endif
422 return off;
423}
424
425
426/**
427 * Emits loading a constant into a 32-bit GPR.
428 * @note The top 32 bits will be cleared.
429 */
430DECL_INLINE_THROW(uint32_t)
431iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
432{
433#ifdef RT_ARCH_AMD64
434 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
435#elif defined(RT_ARCH_ARM64)
436 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
437#else
438# error "port me"
439#endif
440 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
441 return off;
442}
443
444
445/**
446 * Emits loading a constant into a 8-bit GPR
447 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
448 * only the ARM64 version does that.
449 */
450DECL_INLINE_THROW(uint32_t)
451iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
452{
453#ifdef RT_ARCH_AMD64
454 /* mov gpr, imm8 */
455 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
456 if (iGpr >= 8)
457 pbCodeBuf[off++] = X86_OP_REX_B;
458 else if (iGpr >= 4)
459 pbCodeBuf[off++] = X86_OP_REX;
460 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
461 pbCodeBuf[off++] = RT_BYTE1(uImm8);
462
463#elif defined(RT_ARCH_ARM64)
464 /* movz gpr, imm16, lsl #0 */
465 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
466 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
467
468#else
469# error "port me"
470#endif
471 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
472 return off;
473}
474
475
476#ifdef RT_ARCH_AMD64
477/**
478 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
479 */
480DECL_FORCE_INLINE(uint32_t)
481iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
482{
483 if (offVCpu < 128)
484 {
485 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
486 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
487 }
488 else
489 {
490 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
491 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
492 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
493 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
494 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
495 }
496 return off;
497}
498
499#elif defined(RT_ARCH_ARM64)
500
501/**
502 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
503 *
504 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
505 * registers (@a iGprTmp).
506 * @note DON'T try this with prefetch.
507 */
508DECL_FORCE_INLINE_THROW(uint32_t)
509iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
510 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
511{
512 /*
513 * There are a couple of ldr variants that takes an immediate offset, so
514 * try use those if we can, otherwise we have to use the temporary register
515 * help with the addressing.
516 */
517 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
518 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
519 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
520 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
521 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
522 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
523 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
524 {
525 /* The offset is too large, so we must load it into a register and use
526 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
527 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
528 if (iGprTmp == UINT8_MAX)
529 iGprTmp = iGprReg;
530 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
531 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
532 }
533 else
534# ifdef IEM_WITH_THROW_CATCH
535 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
536# else
537 AssertReleaseFailedStmt(off = UINT32_MAX);
538# endif
539
540 return off;
541}
542
543/**
544 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
545 */
546DECL_FORCE_INLINE_THROW(uint32_t)
547iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
548 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
549{
550 /*
551 * There are a couple of ldr variants that takes an immediate offset, so
552 * try use those if we can, otherwise we have to use the temporary register
553 * help with the addressing.
554 */
555 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
556 {
557 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
558 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
559 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
560 }
561 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
562 {
563 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
564 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
565 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
566 }
567 else
568 {
569 /* The offset is too large, so we must load it into a register and use
570 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
571 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
572 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
573 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
574 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
575 IEMNATIVE_REG_FIXED_TMP0);
576 }
577 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
578 return off;
579}
580
581#endif /* RT_ARCH_ARM64 */
582
583
584/**
585 * Emits a 64-bit GPR load of a VCpu value.
586 */
587DECL_FORCE_INLINE_THROW(uint32_t)
588iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
589{
590#ifdef RT_ARCH_AMD64
591 /* mov reg64, mem64 */
592 if (iGpr < 8)
593 pCodeBuf[off++] = X86_OP_REX_W;
594 else
595 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
596 pCodeBuf[off++] = 0x8b;
597 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off,iGpr, offVCpu);
598
599#elif defined(RT_ARCH_ARM64)
600 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
601
602#else
603# error "port me"
604#endif
605 return off;
606}
607
608
609/**
610 * Emits a 64-bit GPR load of a VCpu value.
611 */
612DECL_INLINE_THROW(uint32_t)
613iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
614{
615#ifdef RT_ARCH_AMD64
616 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
617 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
618
619#elif defined(RT_ARCH_ARM64)
620 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
621
622#else
623# error "port me"
624#endif
625 return off;
626}
627
628
629/**
630 * Emits a 32-bit GPR load of a VCpu value.
631 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
632 */
633DECL_INLINE_THROW(uint32_t)
634iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
635{
636#ifdef RT_ARCH_AMD64
637 /* mov reg32, mem32 */
638 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
639 if (iGpr >= 8)
640 pbCodeBuf[off++] = X86_OP_REX_R;
641 pbCodeBuf[off++] = 0x8b;
642 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
644
645#elif defined(RT_ARCH_ARM64)
646 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
647
648#else
649# error "port me"
650#endif
651 return off;
652}
653
654
655/**
656 * Emits a 16-bit GPR load of a VCpu value.
657 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
658 */
659DECL_INLINE_THROW(uint32_t)
660iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
661{
662#ifdef RT_ARCH_AMD64
663 /* movzx reg32, mem16 */
664 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
665 if (iGpr >= 8)
666 pbCodeBuf[off++] = X86_OP_REX_R;
667 pbCodeBuf[off++] = 0x0f;
668 pbCodeBuf[off++] = 0xb7;
669 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
670 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
671
672#elif defined(RT_ARCH_ARM64)
673 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
674
675#else
676# error "port me"
677#endif
678 return off;
679}
680
681
682/**
683 * Emits a 8-bit GPR load of a VCpu value.
684 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
685 */
686DECL_INLINE_THROW(uint32_t)
687iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
688{
689#ifdef RT_ARCH_AMD64
690 /* movzx reg32, mem8 */
691 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
692 if (iGpr >= 8)
693 pbCodeBuf[off++] = X86_OP_REX_R;
694 pbCodeBuf[off++] = 0x0f;
695 pbCodeBuf[off++] = 0xb6;
696 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
697 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
698
699#elif defined(RT_ARCH_ARM64)
700 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
701
702#else
703# error "port me"
704#endif
705 return off;
706}
707
708
709/**
710 * Emits a store of a GPR value to a 64-bit VCpu field.
711 */
712DECL_FORCE_INLINE_THROW(uint32_t)
713iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
714 uint8_t iGprTmp = UINT8_MAX)
715{
716#ifdef RT_ARCH_AMD64
717 /* mov mem64, reg64 */
718 if (iGpr < 8)
719 pCodeBuf[off++] = X86_OP_REX_W;
720 else
721 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
722 pCodeBuf[off++] = 0x89;
723 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
724 RT_NOREF(iGprTmp);
725
726#elif defined(RT_ARCH_ARM64)
727 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
728
729#else
730# error "port me"
731#endif
732 return off;
733}
734
735
736/**
737 * Emits a store of a GPR value to a 64-bit VCpu field.
738 */
739DECL_INLINE_THROW(uint32_t)
740iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
741{
742#ifdef RT_ARCH_AMD64
743 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
744#elif defined(RT_ARCH_ARM64)
745 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
746 IEMNATIVE_REG_FIXED_TMP0);
747#else
748# error "port me"
749#endif
750 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
751 return off;
752}
753
754
755/**
756 * Emits a store of a GPR value to a 32-bit VCpu field.
757 */
758DECL_INLINE_THROW(uint32_t)
759iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
760{
761#ifdef RT_ARCH_AMD64
762 /* mov mem32, reg32 */
763 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
764 if (iGpr >= 8)
765 pbCodeBuf[off++] = X86_OP_REX_R;
766 pbCodeBuf[off++] = 0x89;
767 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
769
770#elif defined(RT_ARCH_ARM64)
771 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
772
773#else
774# error "port me"
775#endif
776 return off;
777}
778
779
780/**
781 * Emits a store of a GPR value to a 16-bit VCpu field.
782 */
783DECL_INLINE_THROW(uint32_t)
784iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
785{
786#ifdef RT_ARCH_AMD64
787 /* mov mem16, reg16 */
788 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
789 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
790 if (iGpr >= 8)
791 pbCodeBuf[off++] = X86_OP_REX_R;
792 pbCodeBuf[off++] = 0x89;
793 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
794 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
795
796#elif defined(RT_ARCH_ARM64)
797 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
798
799#else
800# error "port me"
801#endif
802 return off;
803}
804
805
806/**
807 * Emits a store of a GPR value to a 8-bit VCpu field.
808 */
809DECL_INLINE_THROW(uint32_t)
810iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
811{
812#ifdef RT_ARCH_AMD64
813 /* mov mem8, reg8 */
814 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
815 if (iGpr >= 8)
816 pbCodeBuf[off++] = X86_OP_REX_R;
817 pbCodeBuf[off++] = 0x88;
818 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
820
821#elif defined(RT_ARCH_ARM64)
822 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
823
824#else
825# error "port me"
826#endif
827 return off;
828}
829
830
831/**
832 * Emits a store of an immediate value to a 32-bit VCpu field.
833 *
834 * @note ARM64: Will allocate temporary registers.
835 */
836DECL_FORCE_INLINE_THROW(uint32_t)
837iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
838{
839#ifdef RT_ARCH_AMD64
840 /* mov mem32, imm32 */
841 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
842 pCodeBuf[off++] = 0xc7;
843 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
844 pCodeBuf[off++] = RT_BYTE1(uImm);
845 pCodeBuf[off++] = RT_BYTE2(uImm);
846 pCodeBuf[off++] = RT_BYTE3(uImm);
847 pCodeBuf[off++] = RT_BYTE4(uImm);
848 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
849
850#elif defined(RT_ARCH_ARM64)
851 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
852 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
853 if (idxRegImm != ARMV8_A64_REG_XZR)
854 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
855
856#else
857# error "port me"
858#endif
859 return off;
860}
861
862
863
864/**
865 * Emits a store of an immediate value to a 16-bit VCpu field.
866 *
867 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
868 * offset can be encoded as an immediate or not. The @a offVCpu immediate
869 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
870 */
871DECL_FORCE_INLINE_THROW(uint32_t)
872iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
873 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
874{
875#ifdef RT_ARCH_AMD64
876 /* mov mem16, imm16 */
877 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
878 pCodeBuf[off++] = 0xc7;
879 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
880 pCodeBuf[off++] = RT_BYTE1(uImm);
881 pCodeBuf[off++] = RT_BYTE2(uImm);
882 RT_NOREF(idxTmp1, idxTmp2);
883
884#elif defined(RT_ARCH_ARM64)
885 if (idxTmp1 != UINT8_MAX)
886 {
887 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
888 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
889 sizeof(uint16_t), idxTmp2);
890 }
891 else
892# ifdef IEM_WITH_THROW_CATCH
893 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
894# else
895 AssertReleaseFailedStmt(off = UINT32_MAX);
896# endif
897
898#else
899# error "port me"
900#endif
901 return off;
902}
903
904
905/**
906 * Emits a store of an immediate value to a 8-bit VCpu field.
907 */
908DECL_INLINE_THROW(uint32_t)
909iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu)
910{
911#ifdef RT_ARCH_AMD64
912 /* mov mem8, imm8 */
913 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
914 pbCodeBuf[off++] = 0xc6;
915 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
916 pbCodeBuf[off++] = bImm;
917 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
918
919#elif defined(RT_ARCH_ARM64)
920 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
921 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
922 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
923 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
924
925#else
926# error "port me"
927#endif
928 return off;
929}
930
931
932/**
933 * Emits a load effective address to a GRP of a VCpu field.
934 */
935DECL_INLINE_THROW(uint32_t)
936iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
937{
938#ifdef RT_ARCH_AMD64
939 /* lea gprdst, [rbx + offDisp] */
940 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
941 if (iGprDst < 8)
942 pbCodeBuf[off++] = X86_OP_REX_W;
943 else
944 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
945 pbCodeBuf[off++] = 0x8d;
946 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
947
948#elif defined(RT_ARCH_ARM64)
949 if (offVCpu < (unsigned)_4K)
950 {
951 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
952 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
953 }
954 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
955 {
956 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
957 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
958 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
959 }
960 else
961 {
962 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
963 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
964 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
965 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
966 }
967
968#else
969# error "port me"
970#endif
971 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
972 return off;
973}
974
975
976/** This is just as a typesafe alternative to RT_UOFFSETOF. */
977DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
978{
979 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
980 Assert(off < sizeof(VMCPU));
981 return off;
982}
983
984
985/** This is just as a typesafe alternative to RT_UOFFSETOF. */
986DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
987{
988 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
989 Assert(off < sizeof(VMCPU));
990 return off;
991}
992
993
994/**
995 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
996 *
997 * @note The two temp registers are not required for AMD64. ARM64 always
998 * requires the first, and the 2nd is needed if the offset cannot be
999 * encoded as an immediate.
1000 */
1001DECL_FORCE_INLINE(uint32_t)
1002iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1003{
1004#ifdef RT_ARCH_AMD64
1005 /* inc qword [pVCpu + off] */
1006 pCodeBuf[off++] = X86_OP_REX_W;
1007 pCodeBuf[off++] = 0xff;
1008 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1009 RT_NOREF(idxTmp1, idxTmp2);
1010
1011#elif defined(RT_ARCH_ARM64)
1012 /* Determine how we're to access pVCpu first. */
1013 uint32_t const cbData = sizeof(STAMCOUNTER);
1014 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1015 {
1016 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1017 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1018 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1019 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1020 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1021 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1022 }
1023 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1024 {
1025 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1026 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1027 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1028 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1029 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1030 }
1031 else
1032 {
1033 /* The offset is too large, so we must load it into a register and use
1034 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1035 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1036 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1037 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1038 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1039 }
1040
1041#else
1042# error "port me"
1043#endif
1044 return off;
1045}
1046
1047
1048/**
1049 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1050 *
1051 * @note The two temp registers are not required for AMD64. ARM64 always
1052 * requires the first, and the 2nd is needed if the offset cannot be
1053 * encoded as an immediate.
1054 */
1055DECL_FORCE_INLINE(uint32_t)
1056iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1057{
1058#ifdef RT_ARCH_AMD64
1059 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1060#elif defined(RT_ARCH_ARM64)
1061 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1062#else
1063# error "port me"
1064#endif
1065 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1066 return off;
1067}
1068
1069
1070/**
1071 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1072 *
1073 * @note The two temp registers are not required for AMD64. ARM64 always
1074 * requires the first, and the 2nd is needed if the offset cannot be
1075 * encoded as an immediate.
1076 */
1077DECL_FORCE_INLINE(uint32_t)
1078iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1079{
1080 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1081#ifdef RT_ARCH_AMD64
1082 /* inc dword [pVCpu + offVCpu] */
1083 pCodeBuf[off++] = 0xff;
1084 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1085 RT_NOREF(idxTmp1, idxTmp2);
1086
1087#elif defined(RT_ARCH_ARM64)
1088 /* Determine how we're to access pVCpu first. */
1089 uint32_t const cbData = sizeof(uint32_t);
1090 if (offVCpu < (unsigned)(_4K * cbData))
1091 {
1092 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1093 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1094 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1095 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1096 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1097 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1098 }
1099 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1100 {
1101 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1102 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1103 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1104 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1105 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1106 }
1107 else
1108 {
1109 /* The offset is too large, so we must load it into a register and use
1110 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1111 of the instruction if that'll reduce the constant to 16-bits. */
1112 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1113 {
1114 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1115 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1116 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1117 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1118 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1119 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1120 }
1121 else
1122 {
1123 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1124 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1125 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1126 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1127 }
1128 }
1129
1130#else
1131# error "port me"
1132#endif
1133 return off;
1134}
1135
1136
1137/**
1138 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1139 *
1140 * @note The two temp registers are not required for AMD64. ARM64 always
1141 * requires the first, and the 2nd is needed if the offset cannot be
1142 * encoded as an immediate.
1143 */
1144DECL_FORCE_INLINE(uint32_t)
1145iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1146{
1147#ifdef RT_ARCH_AMD64
1148 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1149#elif defined(RT_ARCH_ARM64)
1150 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1151#else
1152# error "port me"
1153#endif
1154 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1155 return off;
1156}
1157
1158
1159/**
1160 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1161 *
1162 * @note May allocate temporary registers (not AMD64).
1163 */
1164DECL_FORCE_INLINE(uint32_t)
1165iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1166{
1167 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1168#ifdef RT_ARCH_AMD64
1169 /* or dword [pVCpu + offVCpu], imm8/32 */
1170 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1171 if (fMask < 0x80)
1172 {
1173 pCodeBuf[off++] = 0x83;
1174 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1175 pCodeBuf[off++] = (uint8_t)fMask;
1176 }
1177 else
1178 {
1179 pCodeBuf[off++] = 0x81;
1180 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1181 pCodeBuf[off++] = RT_BYTE1(fMask);
1182 pCodeBuf[off++] = RT_BYTE2(fMask);
1183 pCodeBuf[off++] = RT_BYTE3(fMask);
1184 pCodeBuf[off++] = RT_BYTE4(fMask);
1185 }
1186
1187#elif defined(RT_ARCH_ARM64)
1188 /* If the constant is unwieldy we'll need a register to hold it as well. */
1189 uint32_t uImmSizeLen, uImmRotate;
1190 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1191 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1192
1193 /* We need a temp register for holding the member value we're modifying. */
1194 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1195
1196 /* Determine how we're to access pVCpu first. */
1197 uint32_t const cbData = sizeof(uint32_t);
1198 if (offVCpu < (unsigned)(_4K * cbData))
1199 {
1200 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1201 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1202 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1203 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1204 if (idxTmpMask == UINT8_MAX)
1205 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1206 else
1207 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1208 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1209 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1210 }
1211 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1212 {
1213 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1214 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1215 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1216 if (idxTmpMask == UINT8_MAX)
1217 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1218 else
1219 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1220 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1221 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1222 }
1223 else
1224 {
1225 /* The offset is too large, so we must load it into a register and use
1226 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1227 of the instruction if that'll reduce the constant to 16-bits. */
1228 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1229 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1230 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1231 if (fShifted)
1232 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1233 else
1234 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1235
1236 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1237 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1238
1239 if (idxTmpMask == UINT8_MAX)
1240 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1241 else
1242 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1243
1244 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1245 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1246 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1247 }
1248 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1249 if (idxTmpMask != UINT8_MAX)
1250 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1251
1252#else
1253# error "port me"
1254#endif
1255 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1256 return off;
1257}
1258
1259
1260/**
1261 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1262 *
1263 * @note May allocate temporary registers (not AMD64).
1264 */
1265DECL_FORCE_INLINE(uint32_t)
1266iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1267{
1268 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1269#ifdef RT_ARCH_AMD64
1270 /* and dword [pVCpu + offVCpu], imm8/32 */
1271 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1272 if (fMask < 0x80)
1273 {
1274 pCodeBuf[off++] = 0x83;
1275 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1276 pCodeBuf[off++] = (uint8_t)fMask;
1277 }
1278 else
1279 {
1280 pCodeBuf[off++] = 0x81;
1281 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1282 pCodeBuf[off++] = RT_BYTE1(fMask);
1283 pCodeBuf[off++] = RT_BYTE2(fMask);
1284 pCodeBuf[off++] = RT_BYTE3(fMask);
1285 pCodeBuf[off++] = RT_BYTE4(fMask);
1286 }
1287
1288#elif defined(RT_ARCH_ARM64)
1289 /* If the constant is unwieldy we'll need a register to hold it as well. */
1290 uint32_t uImmSizeLen, uImmRotate;
1291 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1292 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1293
1294 /* We need a temp register for holding the member value we're modifying. */
1295 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1296
1297 /* Determine how we're to access pVCpu first. */
1298 uint32_t const cbData = sizeof(uint32_t);
1299 if (offVCpu < (unsigned)(_4K * cbData))
1300 {
1301 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1302 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1303 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1304 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1305 if (idxTmpMask == UINT8_MAX)
1306 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1307 else
1308 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1309 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1310 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1311 }
1312 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1313 {
1314 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1315 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1316 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1317 if (idxTmpMask == UINT8_MAX)
1318 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1319 else
1320 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1321 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1322 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1323 }
1324 else
1325 {
1326 /* The offset is too large, so we must load it into a register and use
1327 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1328 of the instruction if that'll reduce the constant to 16-bits. */
1329 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1330 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1331 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1332 if (fShifted)
1333 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1334 else
1335 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1336
1337 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1338 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1339
1340 if (idxTmpMask == UINT8_MAX)
1341 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1342 else
1343 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1344
1345 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1346 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1347 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1348 }
1349 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1350 if (idxTmpMask != UINT8_MAX)
1351 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1352
1353#else
1354# error "port me"
1355#endif
1356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1357 return off;
1358}
1359
1360
1361/**
1362 * Emits a gprdst = gprsrc load.
1363 */
1364DECL_FORCE_INLINE(uint32_t)
1365iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1366{
1367#ifdef RT_ARCH_AMD64
1368 /* mov gprdst, gprsrc */
1369 if ((iGprDst | iGprSrc) >= 8)
1370 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1371 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1372 : X86_OP_REX_W | X86_OP_REX_R;
1373 else
1374 pCodeBuf[off++] = X86_OP_REX_W;
1375 pCodeBuf[off++] = 0x8b;
1376 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1377
1378#elif defined(RT_ARCH_ARM64)
1379 /* mov dst, src; alias for: orr dst, xzr, src */
1380 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1381
1382#else
1383# error "port me"
1384#endif
1385 return off;
1386}
1387
1388
1389/**
1390 * Emits a gprdst = gprsrc load.
1391 */
1392DECL_INLINE_THROW(uint32_t)
1393iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1394{
1395#ifdef RT_ARCH_AMD64
1396 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1397#elif defined(RT_ARCH_ARM64)
1398 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1399#else
1400# error "port me"
1401#endif
1402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1403 return off;
1404}
1405
1406
1407/**
1408 * Emits a gprdst = gprsrc[31:0] load.
1409 * @note Bits 63 thru 32 are cleared.
1410 */
1411DECL_FORCE_INLINE(uint32_t)
1412iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1413{
1414#ifdef RT_ARCH_AMD64
1415 /* mov gprdst, gprsrc */
1416 if ((iGprDst | iGprSrc) >= 8)
1417 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1418 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1419 : X86_OP_REX_R;
1420 pCodeBuf[off++] = 0x8b;
1421 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1422
1423#elif defined(RT_ARCH_ARM64)
1424 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1425 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1426
1427#else
1428# error "port me"
1429#endif
1430 return off;
1431}
1432
1433
1434/**
1435 * Emits a gprdst = gprsrc[31:0] load.
1436 * @note Bits 63 thru 32 are cleared.
1437 */
1438DECL_INLINE_THROW(uint32_t)
1439iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1440{
1441#ifdef RT_ARCH_AMD64
1442 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1443#elif defined(RT_ARCH_ARM64)
1444 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1445#else
1446# error "port me"
1447#endif
1448 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1449 return off;
1450}
1451
1452
1453/**
1454 * Emits a gprdst = gprsrc[15:0] load.
1455 * @note Bits 63 thru 15 are cleared.
1456 */
1457DECL_INLINE_THROW(uint32_t)
1458iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1459{
1460#ifdef RT_ARCH_AMD64
1461 /* movzx Gv,Ew */
1462 if ((iGprDst | iGprSrc) >= 8)
1463 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1464 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1465 : X86_OP_REX_R;
1466 pCodeBuf[off++] = 0x0f;
1467 pCodeBuf[off++] = 0xb7;
1468 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1469
1470#elif defined(RT_ARCH_ARM64)
1471 /* and gprdst, gprsrc, #0xffff */
1472# if 1
1473 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1474 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1475# else
1476 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1477 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1478# endif
1479
1480#else
1481# error "port me"
1482#endif
1483 return off;
1484}
1485
1486
1487/**
1488 * Emits a gprdst = gprsrc[15:0] load.
1489 * @note Bits 63 thru 15 are cleared.
1490 */
1491DECL_INLINE_THROW(uint32_t)
1492iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1493{
1494#ifdef RT_ARCH_AMD64
1495 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1496#elif defined(RT_ARCH_ARM64)
1497 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1498#else
1499# error "port me"
1500#endif
1501 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1502 return off;
1503}
1504
1505
1506/**
1507 * Emits a gprdst = gprsrc[7:0] load.
1508 * @note Bits 63 thru 8 are cleared.
1509 */
1510DECL_FORCE_INLINE(uint32_t)
1511iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1512{
1513#ifdef RT_ARCH_AMD64
1514 /* movzx Gv,Eb */
1515 if (iGprDst >= 8 || iGprSrc >= 8)
1516 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1517 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1518 : X86_OP_REX_R;
1519 else if (iGprSrc >= 4)
1520 pCodeBuf[off++] = X86_OP_REX;
1521 pCodeBuf[off++] = 0x0f;
1522 pCodeBuf[off++] = 0xb6;
1523 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1524
1525#elif defined(RT_ARCH_ARM64)
1526 /* and gprdst, gprsrc, #0xff */
1527 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1528 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1529
1530#else
1531# error "port me"
1532#endif
1533 return off;
1534}
1535
1536
1537/**
1538 * Emits a gprdst = gprsrc[7:0] load.
1539 * @note Bits 63 thru 8 are cleared.
1540 */
1541DECL_INLINE_THROW(uint32_t)
1542iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1543{
1544#ifdef RT_ARCH_AMD64
1545 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1546#elif defined(RT_ARCH_ARM64)
1547 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1548#else
1549# error "port me"
1550#endif
1551 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1552 return off;
1553}
1554
1555
1556/**
1557 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1558 * @note Bits 63 thru 8 are cleared.
1559 */
1560DECL_INLINE_THROW(uint32_t)
1561iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1562{
1563#ifdef RT_ARCH_AMD64
1564 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1565
1566 /* movzx Gv,Ew */
1567 if ((iGprDst | iGprSrc) >= 8)
1568 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1569 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1570 : X86_OP_REX_R;
1571 pbCodeBuf[off++] = 0x0f;
1572 pbCodeBuf[off++] = 0xb7;
1573 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1574
1575 /* shr Ev,8 */
1576 if (iGprDst >= 8)
1577 pbCodeBuf[off++] = X86_OP_REX_B;
1578 pbCodeBuf[off++] = 0xc1;
1579 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1580 pbCodeBuf[off++] = 8;
1581
1582#elif defined(RT_ARCH_ARM64)
1583 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1584 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1585 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1586
1587#else
1588# error "port me"
1589#endif
1590 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1591 return off;
1592}
1593
1594
1595/**
1596 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1597 */
1598DECL_INLINE_THROW(uint32_t)
1599iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1600{
1601#ifdef RT_ARCH_AMD64
1602 /* movsxd r64, r/m32 */
1603 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1604 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1605 pbCodeBuf[off++] = 0x63;
1606 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1607
1608#elif defined(RT_ARCH_ARM64)
1609 /* sxtw dst, src */
1610 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1611 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1612
1613#else
1614# error "port me"
1615#endif
1616 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1617 return off;
1618}
1619
1620
1621/**
1622 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1623 */
1624DECL_INLINE_THROW(uint32_t)
1625iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1626{
1627#ifdef RT_ARCH_AMD64
1628 /* movsx r64, r/m16 */
1629 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1630 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1631 pbCodeBuf[off++] = 0x0f;
1632 pbCodeBuf[off++] = 0xbf;
1633 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1634
1635#elif defined(RT_ARCH_ARM64)
1636 /* sxth dst, src */
1637 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1638 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1639
1640#else
1641# error "port me"
1642#endif
1643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1644 return off;
1645}
1646
1647
1648/**
1649 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1650 */
1651DECL_INLINE_THROW(uint32_t)
1652iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1653{
1654#ifdef RT_ARCH_AMD64
1655 /* movsx r64, r/m16 */
1656 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1657 if (iGprDst >= 8 || iGprSrc >= 8)
1658 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1659 pbCodeBuf[off++] = 0x0f;
1660 pbCodeBuf[off++] = 0xbf;
1661 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1662
1663#elif defined(RT_ARCH_ARM64)
1664 /* sxth dst32, src */
1665 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1666 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1667
1668#else
1669# error "port me"
1670#endif
1671 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1672 return off;
1673}
1674
1675
1676/**
1677 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1678 */
1679DECL_INLINE_THROW(uint32_t)
1680iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1681{
1682#ifdef RT_ARCH_AMD64
1683 /* movsx r64, r/m8 */
1684 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1685 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1686 pbCodeBuf[off++] = 0x0f;
1687 pbCodeBuf[off++] = 0xbe;
1688 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1689
1690#elif defined(RT_ARCH_ARM64)
1691 /* sxtb dst, src */
1692 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1693 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1694
1695#else
1696# error "port me"
1697#endif
1698 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1699 return off;
1700}
1701
1702
1703/**
1704 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1705 * @note Bits 63 thru 32 are cleared.
1706 */
1707DECL_INLINE_THROW(uint32_t)
1708iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1709{
1710#ifdef RT_ARCH_AMD64
1711 /* movsx r32, r/m8 */
1712 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1713 if (iGprDst >= 8 || iGprSrc >= 8)
1714 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1715 else if (iGprSrc >= 4)
1716 pbCodeBuf[off++] = X86_OP_REX;
1717 pbCodeBuf[off++] = 0x0f;
1718 pbCodeBuf[off++] = 0xbe;
1719 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1720
1721#elif defined(RT_ARCH_ARM64)
1722 /* sxtb dst32, src32 */
1723 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1724 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1725
1726#else
1727# error "port me"
1728#endif
1729 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1730 return off;
1731}
1732
1733
1734/**
1735 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1736 * @note Bits 63 thru 16 are cleared.
1737 */
1738DECL_INLINE_THROW(uint32_t)
1739iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1740{
1741#ifdef RT_ARCH_AMD64
1742 /* movsx r16, r/m8 */
1743 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1744 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1745 if (iGprDst >= 8 || iGprSrc >= 8)
1746 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1747 else if (iGprSrc >= 4)
1748 pbCodeBuf[off++] = X86_OP_REX;
1749 pbCodeBuf[off++] = 0x0f;
1750 pbCodeBuf[off++] = 0xbe;
1751 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1752
1753 /* movzx r32, r/m16 */
1754 if (iGprDst >= 8)
1755 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
1756 pbCodeBuf[off++] = 0x0f;
1757 pbCodeBuf[off++] = 0xb7;
1758 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
1759
1760#elif defined(RT_ARCH_ARM64)
1761 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
1762 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1763 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1764 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1765 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
1766
1767#else
1768# error "port me"
1769#endif
1770 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1771 return off;
1772}
1773
1774
1775/**
1776 * Emits a gprdst = gprsrc + addend load.
1777 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1778 */
1779#ifdef RT_ARCH_AMD64
1780DECL_INLINE_THROW(uint32_t)
1781iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1782 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1783{
1784 Assert(iAddend != 0);
1785
1786 /* lea gprdst, [gprsrc + iAddend] */
1787 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1788 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1789 pbCodeBuf[off++] = 0x8d;
1790 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1791 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1792 return off;
1793}
1794
1795#elif defined(RT_ARCH_ARM64)
1796DECL_INLINE_THROW(uint32_t)
1797iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1798 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1799{
1800 if ((uint32_t)iAddend < 4096)
1801 {
1802 /* add dst, src, uimm12 */
1803 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1804 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
1805 }
1806 else if ((uint32_t)-iAddend < 4096)
1807 {
1808 /* sub dst, src, uimm12 */
1809 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1810 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
1811 }
1812 else
1813 {
1814 Assert(iGprSrc != iGprDst);
1815 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
1816 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1817 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
1818 }
1819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1820 return off;
1821}
1822#else
1823# error "port me"
1824#endif
1825
1826/**
1827 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
1828 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1829 */
1830#ifdef RT_ARCH_AMD64
1831DECL_INLINE_THROW(uint32_t)
1832iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1833 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1834#else
1835DECL_INLINE_THROW(uint32_t)
1836iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1837 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1838#endif
1839{
1840 if (iAddend != 0)
1841 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1842 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
1843}
1844
1845
1846/**
1847 * Emits a gprdst = gprsrc32 + addend load.
1848 * @note Bits 63 thru 32 are cleared.
1849 */
1850DECL_INLINE_THROW(uint32_t)
1851iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1852 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1853{
1854 Assert(iAddend != 0);
1855
1856#ifdef RT_ARCH_AMD64
1857 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
1858 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1859 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
1860 if ((iGprDst | iGprSrc) >= 8)
1861 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1862 pbCodeBuf[off++] = 0x8d;
1863 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1864
1865#elif defined(RT_ARCH_ARM64)
1866 if ((uint32_t)iAddend < 4096)
1867 {
1868 /* add dst, src, uimm12 */
1869 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1870 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
1871 }
1872 else if ((uint32_t)-iAddend < 4096)
1873 {
1874 /* sub dst, src, uimm12 */
1875 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1876 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
1877 }
1878 else
1879 {
1880 Assert(iGprSrc != iGprDst);
1881 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
1882 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1883 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
1884 }
1885
1886#else
1887# error "port me"
1888#endif
1889 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1890 return off;
1891}
1892
1893
1894/**
1895 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
1896 */
1897DECL_INLINE_THROW(uint32_t)
1898iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1899 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1900{
1901 if (iAddend != 0)
1902 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1903 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
1904}
1905
1906
1907/**
1908 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1909 * destination.
1910 */
1911DECL_FORCE_INLINE(uint32_t)
1912iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1913{
1914#ifdef RT_ARCH_AMD64
1915 /* mov reg16, r/m16 */
1916 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1917 if (idxDst >= 8 || idxSrc >= 8)
1918 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
1919 pCodeBuf[off++] = 0x8b;
1920 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
1921
1922#elif defined(RT_ARCH_ARM64)
1923 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
1924 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
1925
1926#else
1927# error "Port me!"
1928#endif
1929 return off;
1930}
1931
1932
1933/**
1934 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1935 * destination.
1936 */
1937DECL_INLINE_THROW(uint32_t)
1938iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1939{
1940#ifdef RT_ARCH_AMD64
1941 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
1942#elif defined(RT_ARCH_ARM64)
1943 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
1944#else
1945# error "Port me!"
1946#endif
1947 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1948 return off;
1949}
1950
1951
1952#ifdef RT_ARCH_AMD64
1953/**
1954 * Common bit of iemNativeEmitLoadGprByBp and friends.
1955 */
1956DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
1957 PIEMRECOMPILERSTATE pReNativeAssert)
1958{
1959 if (offDisp < 128 && offDisp >= -128)
1960 {
1961 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
1962 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
1963 }
1964 else
1965 {
1966 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
1967 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
1968 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
1969 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
1970 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
1971 }
1972 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
1973 return off;
1974}
1975#elif defined(RT_ARCH_ARM64)
1976/**
1977 * Common bit of iemNativeEmitLoadGprByBp and friends.
1978 */
1979DECL_FORCE_INLINE_THROW(uint32_t)
1980iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
1981 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
1982{
1983 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
1984 {
1985 /* str w/ unsigned imm12 (scaled) */
1986 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1987 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
1988 }
1989 else if (offDisp >= -256 && offDisp <= 256)
1990 {
1991 /* stur w/ signed imm9 (unscaled) */
1992 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1993 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
1994 }
1995 else
1996 {
1997 /* Use temporary indexing register. */
1998 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
1999 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2000 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2001 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2002 }
2003 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2004 return off;
2005}
2006#endif
2007
2008
2009/**
2010 * Emits a 64-bit GRP load instruction with an BP relative source address.
2011 */
2012DECL_INLINE_THROW(uint32_t)
2013iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2014{
2015#ifdef RT_ARCH_AMD64
2016 /* mov gprdst, qword [rbp + offDisp] */
2017 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2018 if (iGprDst < 8)
2019 pbCodeBuf[off++] = X86_OP_REX_W;
2020 else
2021 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2022 pbCodeBuf[off++] = 0x8b;
2023 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2024
2025#elif defined(RT_ARCH_ARM64)
2026 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2027
2028#else
2029# error "port me"
2030#endif
2031}
2032
2033
2034/**
2035 * Emits a 32-bit GRP load instruction with an BP relative source address.
2036 * @note Bits 63 thru 32 of the GPR will be cleared.
2037 */
2038DECL_INLINE_THROW(uint32_t)
2039iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2040{
2041#ifdef RT_ARCH_AMD64
2042 /* mov gprdst, dword [rbp + offDisp] */
2043 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2044 if (iGprDst >= 8)
2045 pbCodeBuf[off++] = X86_OP_REX_R;
2046 pbCodeBuf[off++] = 0x8b;
2047 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2048
2049#elif defined(RT_ARCH_ARM64)
2050 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2051
2052#else
2053# error "port me"
2054#endif
2055}
2056
2057
2058/**
2059 * Emits a 16-bit GRP load instruction with an BP relative source address.
2060 * @note Bits 63 thru 16 of the GPR will be cleared.
2061 */
2062DECL_INLINE_THROW(uint32_t)
2063iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2064{
2065#ifdef RT_ARCH_AMD64
2066 /* movzx gprdst, word [rbp + offDisp] */
2067 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2068 if (iGprDst >= 8)
2069 pbCodeBuf[off++] = X86_OP_REX_R;
2070 pbCodeBuf[off++] = 0x0f;
2071 pbCodeBuf[off++] = 0xb7;
2072 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2073
2074#elif defined(RT_ARCH_ARM64)
2075 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2076
2077#else
2078# error "port me"
2079#endif
2080}
2081
2082
2083/**
2084 * Emits a 8-bit GRP load instruction with an BP relative source address.
2085 * @note Bits 63 thru 8 of the GPR will be cleared.
2086 */
2087DECL_INLINE_THROW(uint32_t)
2088iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2089{
2090#ifdef RT_ARCH_AMD64
2091 /* movzx gprdst, byte [rbp + offDisp] */
2092 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2093 if (iGprDst >= 8)
2094 pbCodeBuf[off++] = X86_OP_REX_R;
2095 pbCodeBuf[off++] = 0x0f;
2096 pbCodeBuf[off++] = 0xb6;
2097 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2098
2099#elif defined(RT_ARCH_ARM64)
2100 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2101
2102#else
2103# error "port me"
2104#endif
2105}
2106
2107
2108#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2109/**
2110 * Emits a 128-bit vector register load instruction with an BP relative source address.
2111 */
2112DECL_FORCE_INLINE_THROW(uint32_t)
2113iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2114{
2115#ifdef RT_ARCH_AMD64
2116 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2117
2118 /* movdqu reg128, mem128 */
2119 pbCodeBuf[off++] = 0xf3;
2120 if (iVecRegDst >= 8)
2121 pbCodeBuf[off++] = X86_OP_REX_R;
2122 pbCodeBuf[off++] = 0x0f;
2123 pbCodeBuf[off++] = 0x6f;
2124 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2125#elif defined(RT_ARCH_ARM64)
2126 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2127#else
2128# error "port me"
2129#endif
2130}
2131
2132
2133/**
2134 * Emits a 256-bit vector register load instruction with an BP relative source address.
2135 */
2136DECL_FORCE_INLINE_THROW(uint32_t)
2137iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2138{
2139#ifdef RT_ARCH_AMD64
2140 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2141
2142 /* vmovdqu reg256, mem256 */
2143 pbCodeBuf[off++] = X86_OP_VEX2;
2144 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2145 pbCodeBuf[off++] = 0x6f;
2146 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2147#elif defined(RT_ARCH_ARM64)
2148 /* ASSUMES two consecutive vector registers for the 256-bit value. */
2149 Assert(!(iVecRegDst & 0x1));
2150 off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2151 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2152#else
2153# error "port me"
2154#endif
2155}
2156
2157#endif
2158
2159
2160/**
2161 * Emits a load effective address to a GRP with an BP relative source address.
2162 */
2163DECL_INLINE_THROW(uint32_t)
2164iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2165{
2166#ifdef RT_ARCH_AMD64
2167 /* lea gprdst, [rbp + offDisp] */
2168 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2169 if (iGprDst < 8)
2170 pbCodeBuf[off++] = X86_OP_REX_W;
2171 else
2172 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2173 pbCodeBuf[off++] = 0x8d;
2174 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2175
2176#elif defined(RT_ARCH_ARM64)
2177 if ((uint32_t)offDisp < (unsigned)_4K)
2178 {
2179 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2180 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)offDisp);
2181 }
2182 else if ((uint32_t)-offDisp < (unsigned)_4K)
2183 {
2184 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2185 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2186 }
2187 else
2188 {
2189 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2190 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offDisp >= 0 ? (uint32_t)offDisp : (uint32_t)-offDisp);
2191 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2192 if (offDisp >= 0)
2193 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2194 else
2195 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2196 }
2197
2198#else
2199# error "port me"
2200#endif
2201
2202 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2203 return off;
2204}
2205
2206
2207/**
2208 * Emits a 64-bit GPR store with an BP relative destination address.
2209 *
2210 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2211 */
2212DECL_INLINE_THROW(uint32_t)
2213iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2214{
2215#ifdef RT_ARCH_AMD64
2216 /* mov qword [rbp + offDisp], gprdst */
2217 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2218 if (iGprSrc < 8)
2219 pbCodeBuf[off++] = X86_OP_REX_W;
2220 else
2221 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2222 pbCodeBuf[off++] = 0x89;
2223 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2224
2225#elif defined(RT_ARCH_ARM64)
2226 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2227 {
2228 /* str w/ unsigned imm12 (scaled) */
2229 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2230 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2231 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2232 }
2233 else if (offDisp >= -256 && offDisp <= 256)
2234 {
2235 /* stur w/ signed imm9 (unscaled) */
2236 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2237 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2238 }
2239 else if ((uint32_t)-offDisp < (unsigned)_4K)
2240 {
2241 /* Use temporary indexing register w/ sub uimm12. */
2242 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2243 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2244 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2245 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2246 }
2247 else
2248 {
2249 /* Use temporary indexing register. */
2250 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2251 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2252 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2253 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2254 }
2255 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2256 return off;
2257
2258#else
2259# error "Port me!"
2260#endif
2261}
2262
2263
2264/**
2265 * Emits a 64-bit immediate store with an BP relative destination address.
2266 *
2267 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2268 */
2269DECL_INLINE_THROW(uint32_t)
2270iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2271{
2272#ifdef RT_ARCH_AMD64
2273 if ((int64_t)uImm64 == (int32_t)uImm64)
2274 {
2275 /* mov qword [rbp + offDisp], imm32 - sign extended */
2276 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2277 pbCodeBuf[off++] = X86_OP_REX_W;
2278 pbCodeBuf[off++] = 0xc7;
2279 if (offDisp < 128 && offDisp >= -128)
2280 {
2281 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2282 pbCodeBuf[off++] = (uint8_t)offDisp;
2283 }
2284 else
2285 {
2286 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2287 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2288 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2289 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2290 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2291 }
2292 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2293 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2294 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2295 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2296 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2297 return off;
2298 }
2299#endif
2300
2301 /* Load tmp0, imm64; Store tmp to bp+disp. */
2302 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2303 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2304}
2305
2306
2307#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2308/**
2309 * Emits a 128-bit vector register store with an BP relative destination address.
2310 *
2311 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2312 */
2313DECL_INLINE_THROW(uint32_t)
2314iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2315{
2316#ifdef RT_ARCH_AMD64
2317 /* movdqu [rbp + offDisp], vecsrc */
2318 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2319 pbCodeBuf[off++] = 0xf3;
2320 if (iVecRegSrc >= 8)
2321 pbCodeBuf[off++] = X86_OP_REX_R;
2322 pbCodeBuf[off++] = 0x0f;
2323 pbCodeBuf[off++] = 0x7f;
2324 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2325
2326#elif defined(RT_ARCH_ARM64)
2327 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2328 {
2329 /* str w/ unsigned imm12 (scaled) */
2330 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2331 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc,
2332 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2333 }
2334 else if (offDisp >= -256 && offDisp <= 256)
2335 {
2336 /* stur w/ signed imm9 (unscaled) */
2337 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2338 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp);
2339 }
2340 else if ((uint32_t)-offDisp < (unsigned)_4K)
2341 {
2342 /* Use temporary indexing register w/ sub uimm12. */
2343 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2344 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2345 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2346 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2347 }
2348 else
2349 {
2350 /* Use temporary indexing register. */
2351 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2352 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2353 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP,
2354 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2355 }
2356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2357 return off;
2358
2359#else
2360# error "Port me!"
2361#endif
2362}
2363
2364
2365/**
2366 * Emits a 256-bit vector register store with an BP relative destination address.
2367 *
2368 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2369 */
2370DECL_INLINE_THROW(uint32_t)
2371iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2372{
2373#ifdef RT_ARCH_AMD64
2374 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2375
2376 /* vmovdqu mem256, reg256 */
2377 pbCodeBuf[off++] = X86_OP_VEX2;
2378 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2379 pbCodeBuf[off++] = 0x7f;
2380 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2381#elif defined(RT_ARCH_ARM64)
2382 Assert(!(iVecRegSrc & 0x1));
2383 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp, iVecRegSrc);
2384 return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1);
2385#else
2386# error "Port me!"
2387#endif
2388}
2389#endif
2390
2391#if defined(RT_ARCH_ARM64)
2392
2393/**
2394 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2395 *
2396 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2397 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2398 * caller does not heed this.
2399 *
2400 * @note DON'T try this with prefetch.
2401 */
2402DECL_FORCE_INLINE_THROW(uint32_t)
2403iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2404 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2405{
2406 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2407 {
2408 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2409 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2410 }
2411 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2412 && iGprReg != iGprBase)
2413 || iGprTmp != UINT8_MAX)
2414 {
2415 /* The offset is too large, so we must load it into a register and use
2416 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2417 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2418 if (iGprTmp == UINT8_MAX)
2419 iGprTmp = iGprReg;
2420 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2421 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2422 }
2423 else
2424# ifdef IEM_WITH_THROW_CATCH
2425 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2426# else
2427 AssertReleaseFailedStmt(off = UINT32_MAX);
2428# endif
2429 return off;
2430}
2431
2432/**
2433 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2434 */
2435DECL_FORCE_INLINE_THROW(uint32_t)
2436iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2437 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2438{
2439 /*
2440 * There are a couple of ldr variants that takes an immediate offset, so
2441 * try use those if we can, otherwise we have to use the temporary register
2442 * help with the addressing.
2443 */
2444 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2445 {
2446 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2447 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2448 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2449 }
2450 else
2451 {
2452 /* The offset is too large, so we must load it into a register and use
2453 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2454 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2455 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2456
2457 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2458 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2459
2460 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2461 }
2462 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2463 return off;
2464}
2465
2466# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2467/**
2468 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2469 *
2470 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2471 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2472 * caller does not heed this.
2473 *
2474 * @note DON'T try this with prefetch.
2475 */
2476DECL_FORCE_INLINE_THROW(uint32_t)
2477iemNativeEmitVecRegByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iGprBase, int32_t offDisp,
2478 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2479{
2480 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2481 {
2482 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2483 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2484 }
2485 else if ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2486 || iGprTmp != UINT8_MAX)
2487 {
2488 /* The offset is too large, so we must load it into a register and use
2489 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2490 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2491 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2492 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, iGprTmp);
2493 }
2494 else
2495# ifdef IEM_WITH_THROW_CATCH
2496 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2497# else
2498 AssertReleaseFailedStmt(off = UINT32_MAX);
2499# endif
2500 return off;
2501}
2502# endif
2503
2504
2505/**
2506 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2507 */
2508DECL_FORCE_INLINE_THROW(uint32_t)
2509iemNativeEmitVecRegByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg,
2510 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2511{
2512 /*
2513 * There are a couple of ldr variants that takes an immediate offset, so
2514 * try use those if we can, otherwise we have to use the temporary register
2515 * help with the addressing.
2516 */
2517 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2518 {
2519 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2520 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2521 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2522 }
2523 else
2524 {
2525 /* The offset is too large, so we must load it into a register and use
2526 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2527 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2528 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2529
2530 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2531 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, idxTmpReg);
2532
2533 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2534 }
2535 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2536 return off;
2537}
2538#endif /* RT_ARCH_ARM64 */
2539
2540/**
2541 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2542 *
2543 * @note ARM64: Misaligned @a offDisp values and values not in the
2544 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2545 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2546 * does not heed this.
2547 */
2548DECL_FORCE_INLINE_THROW(uint32_t)
2549iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2550 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2551{
2552#ifdef RT_ARCH_AMD64
2553 /* mov reg64, mem64 */
2554 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2555 pCodeBuf[off++] = 0x8b;
2556 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2557 RT_NOREF(iGprTmp);
2558
2559#elif defined(RT_ARCH_ARM64)
2560 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2561 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2562
2563#else
2564# error "port me"
2565#endif
2566 return off;
2567}
2568
2569
2570/**
2571 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2572 */
2573DECL_INLINE_THROW(uint32_t)
2574iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2575{
2576#ifdef RT_ARCH_AMD64
2577 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2578 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2579
2580#elif defined(RT_ARCH_ARM64)
2581 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2582
2583#else
2584# error "port me"
2585#endif
2586 return off;
2587}
2588
2589
2590/**
2591 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2592 *
2593 * @note ARM64: Misaligned @a offDisp values and values not in the
2594 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2595 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2596 * caller does not heed this.
2597 *
2598 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2599 */
2600DECL_FORCE_INLINE_THROW(uint32_t)
2601iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2602 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2603{
2604#ifdef RT_ARCH_AMD64
2605 /* mov reg32, mem32 */
2606 if (iGprDst >= 8 || iGprBase >= 8)
2607 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2608 pCodeBuf[off++] = 0x8b;
2609 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2610 RT_NOREF(iGprTmp);
2611
2612#elif defined(RT_ARCH_ARM64)
2613 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2614 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2615
2616#else
2617# error "port me"
2618#endif
2619 return off;
2620}
2621
2622
2623/**
2624 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2625 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2626 */
2627DECL_INLINE_THROW(uint32_t)
2628iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2629{
2630#ifdef RT_ARCH_AMD64
2631 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2632 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2633
2634#elif defined(RT_ARCH_ARM64)
2635 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2636
2637#else
2638# error "port me"
2639#endif
2640 return off;
2641}
2642
2643
2644/**
2645 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2646 * sign-extending the value to 64 bits.
2647 *
2648 * @note ARM64: Misaligned @a offDisp values and values not in the
2649 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2650 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2651 * caller does not heed this.
2652 */
2653DECL_FORCE_INLINE_THROW(uint32_t)
2654iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2655 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2656{
2657#ifdef RT_ARCH_AMD64
2658 /* movsxd reg64, mem32 */
2659 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2660 pCodeBuf[off++] = 0x63;
2661 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2662 RT_NOREF(iGprTmp);
2663
2664#elif defined(RT_ARCH_ARM64)
2665 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2666 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2667
2668#else
2669# error "port me"
2670#endif
2671 return off;
2672}
2673
2674
2675/**
2676 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2677 *
2678 * @note ARM64: Misaligned @a offDisp values and values not in the
2679 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2680 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2681 * caller does not heed this.
2682 *
2683 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2684 */
2685DECL_FORCE_INLINE_THROW(uint32_t)
2686iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2687 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2688{
2689#ifdef RT_ARCH_AMD64
2690 /* movzx reg32, mem16 */
2691 if (iGprDst >= 8 || iGprBase >= 8)
2692 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2693 pCodeBuf[off++] = 0x0f;
2694 pCodeBuf[off++] = 0xb7;
2695 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2696 RT_NOREF(iGprTmp);
2697
2698#elif defined(RT_ARCH_ARM64)
2699 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2700 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2701
2702#else
2703# error "port me"
2704#endif
2705 return off;
2706}
2707
2708
2709/**
2710 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2711 * sign-extending the value to 64 bits.
2712 *
2713 * @note ARM64: Misaligned @a offDisp values and values not in the
2714 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2715 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2716 * caller does not heed this.
2717 */
2718DECL_FORCE_INLINE_THROW(uint32_t)
2719iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2720 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2721{
2722#ifdef RT_ARCH_AMD64
2723 /* movsx reg64, mem16 */
2724 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2725 pCodeBuf[off++] = 0x0f;
2726 pCodeBuf[off++] = 0xbf;
2727 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2728 RT_NOREF(iGprTmp);
2729
2730#elif defined(RT_ARCH_ARM64)
2731 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2732 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2733
2734#else
2735# error "port me"
2736#endif
2737 return off;
2738}
2739
2740
2741/**
2742 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2743 * sign-extending the value to 32 bits.
2744 *
2745 * @note ARM64: Misaligned @a offDisp values and values not in the
2746 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2747 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2748 * caller does not heed this.
2749 *
2750 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2751 */
2752DECL_FORCE_INLINE_THROW(uint32_t)
2753iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2754 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2755{
2756#ifdef RT_ARCH_AMD64
2757 /* movsx reg32, mem16 */
2758 if (iGprDst >= 8 || iGprBase >= 8)
2759 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2760 pCodeBuf[off++] = 0x0f;
2761 pCodeBuf[off++] = 0xbf;
2762 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2763 RT_NOREF(iGprTmp);
2764
2765#elif defined(RT_ARCH_ARM64)
2766 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2767 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
2768
2769#else
2770# error "port me"
2771#endif
2772 return off;
2773}
2774
2775
2776/**
2777 * Emits a 8-bit GPR load via a GPR base address with a displacement.
2778 *
2779 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2780 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2781 * same. Will assert / throw if caller does not heed this.
2782 *
2783 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
2784 */
2785DECL_FORCE_INLINE_THROW(uint32_t)
2786iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2787 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2788{
2789#ifdef RT_ARCH_AMD64
2790 /* movzx reg32, mem8 */
2791 if (iGprDst >= 8 || iGprBase >= 8)
2792 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2793 pCodeBuf[off++] = 0x0f;
2794 pCodeBuf[off++] = 0xb6;
2795 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2796 RT_NOREF(iGprTmp);
2797
2798#elif defined(RT_ARCH_ARM64)
2799 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2800 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
2801
2802#else
2803# error "port me"
2804#endif
2805 return off;
2806}
2807
2808
2809/**
2810 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2811 * sign-extending the value to 64 bits.
2812 *
2813 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2814 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2815 * same. Will assert / throw if caller does not heed this.
2816 */
2817DECL_FORCE_INLINE_THROW(uint32_t)
2818iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2819 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2820{
2821#ifdef RT_ARCH_AMD64
2822 /* movsx reg64, mem8 */
2823 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2824 pCodeBuf[off++] = 0x0f;
2825 pCodeBuf[off++] = 0xbe;
2826 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2827 RT_NOREF(iGprTmp);
2828
2829#elif defined(RT_ARCH_ARM64)
2830 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2831 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
2832
2833#else
2834# error "port me"
2835#endif
2836 return off;
2837}
2838
2839
2840/**
2841 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2842 * sign-extending the value to 32 bits.
2843 *
2844 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2845 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2846 * same. Will assert / throw if caller does not heed this.
2847 *
2848 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2849 */
2850DECL_FORCE_INLINE_THROW(uint32_t)
2851iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2852 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2853{
2854#ifdef RT_ARCH_AMD64
2855 /* movsx reg32, mem8 */
2856 if (iGprDst >= 8 || iGprBase >= 8)
2857 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2858 pCodeBuf[off++] = 0x0f;
2859 pCodeBuf[off++] = 0xbe;
2860 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2861 RT_NOREF(iGprTmp);
2862
2863#elif defined(RT_ARCH_ARM64)
2864 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2865 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2866
2867#else
2868# error "port me"
2869#endif
2870 return off;
2871}
2872
2873
2874/**
2875 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2876 * sign-extending the value to 16 bits.
2877 *
2878 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2879 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2880 * same. Will assert / throw if caller does not heed this.
2881 *
2882 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2883 */
2884DECL_FORCE_INLINE_THROW(uint32_t)
2885iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2886 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2887{
2888#ifdef RT_ARCH_AMD64
2889 /* movsx reg32, mem8 */
2890 if (iGprDst >= 8 || iGprBase >= 8)
2891 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2892 pCodeBuf[off++] = 0x0f;
2893 pCodeBuf[off++] = 0xbe;
2894 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2895# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
2896 /* and reg32, 0xffffh */
2897 if (iGprDst >= 8)
2898 pCodeBuf[off++] = X86_OP_REX_B;
2899 pCodeBuf[off++] = 0x81;
2900 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
2901 pCodeBuf[off++] = 0xff;
2902 pCodeBuf[off++] = 0xff;
2903 pCodeBuf[off++] = 0;
2904 pCodeBuf[off++] = 0;
2905# else
2906 /* movzx reg32, reg16 */
2907 if (iGprDst >= 8)
2908 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
2909 pCodeBuf[off++] = 0x0f;
2910 pCodeBuf[off++] = 0xb7;
2911 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2912# endif
2913 RT_NOREF(iGprTmp);
2914
2915#elif defined(RT_ARCH_ARM64)
2916 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2917 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2918 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2919 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
2920
2921#else
2922# error "port me"
2923#endif
2924 return off;
2925}
2926
2927
2928#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2929/**
2930 * Emits a 128-bit vector register load via a GPR base address with a displacement.
2931 *
2932 * @note ARM64: Misaligned @a offDisp values and values not in the
2933 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2934 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2935 * does not heed this.
2936 */
2937DECL_FORCE_INLINE_THROW(uint32_t)
2938iemNativeEmitLoadVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
2939 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2940{
2941#ifdef RT_ARCH_AMD64
2942 /* movdqu reg128, mem128 */
2943 pCodeBuf[off++] = 0xf3;
2944 if (iVecRegDst >= 8 || iGprBase >= 8)
2945 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2946 pCodeBuf[off++] = 0x0f;
2947 pCodeBuf[off++] = 0x6f;
2948 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
2949 RT_NOREF(iGprTmp);
2950
2951#elif defined(RT_ARCH_ARM64)
2952 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
2953 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
2954
2955#else
2956# error "port me"
2957#endif
2958 return off;
2959}
2960
2961
2962/**
2963 * Emits a 128-bit GPR load via a GPR base address with a displacement.
2964 */
2965DECL_INLINE_THROW(uint32_t)
2966iemNativeEmitLoadVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
2967{
2968#ifdef RT_ARCH_AMD64
2969 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
2970 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2971
2972#elif defined(RT_ARCH_ARM64)
2973 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2974
2975#else
2976# error "port me"
2977#endif
2978 return off;
2979}
2980
2981
2982/**
2983 * Emits a 256-bit vector register load via a GPR base address with a displacement.
2984 *
2985 * @note ARM64: Misaligned @a offDisp values and values not in the
2986 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2987 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2988 * does not heed this.
2989 */
2990DECL_FORCE_INLINE_THROW(uint32_t)
2991iemNativeEmitLoadVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
2992 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2993{
2994#ifdef RT_ARCH_AMD64
2995 /* vmovdqu reg256, mem256 */
2996 pCodeBuf[off++] = X86_OP_VEX3;
2997 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
2998 | X86_OP_VEX3_BYTE1_X
2999 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3000 | UINT8_C(0x01);
3001 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3002 pCodeBuf[off++] = 0x6f;
3003 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3004 RT_NOREF(iGprTmp);
3005
3006#elif defined(RT_ARCH_ARM64)
3007 Assert(!(iVecRegDst & 0x1));
3008 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3009 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3010 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3011 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3012#else
3013# error "port me"
3014#endif
3015 return off;
3016}
3017
3018
3019/**
3020 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3021 */
3022DECL_INLINE_THROW(uint32_t)
3023iemNativeEmitLoadVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3024{
3025#ifdef RT_ARCH_AMD64
3026 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3027 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3028
3029#elif defined(RT_ARCH_ARM64)
3030 Assert(!(iVecRegDst & 0x1));
3031 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3032 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3033 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3034 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3035
3036#else
3037# error "port me"
3038#endif
3039 return off;
3040}
3041#endif
3042
3043
3044/**
3045 * Emits a 64-bit GPR store via a GPR base address with a displacement.
3046 *
3047 * @note ARM64: Misaligned @a offDisp values and values not in the
3048 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3049 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3050 * does not heed this.
3051 */
3052DECL_FORCE_INLINE_THROW(uint32_t)
3053iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3054 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3055{
3056#ifdef RT_ARCH_AMD64
3057 /* mov mem64, reg64 */
3058 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3059 pCodeBuf[off++] = 0x89;
3060 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3061 RT_NOREF(iGprTmp);
3062
3063#elif defined(RT_ARCH_ARM64)
3064 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3065 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
3066
3067#else
3068# error "port me"
3069#endif
3070 return off;
3071}
3072
3073
3074/**
3075 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3076 *
3077 * @note ARM64: Misaligned @a offDisp values and values not in the
3078 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3079 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3080 * does not heed this.
3081 */
3082DECL_FORCE_INLINE_THROW(uint32_t)
3083iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3084 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3085{
3086#ifdef RT_ARCH_AMD64
3087 /* mov mem32, reg32 */
3088 if (iGprSrc >= 8 || iGprBase >= 8)
3089 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3090 pCodeBuf[off++] = 0x89;
3091 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3092 RT_NOREF(iGprTmp);
3093
3094#elif defined(RT_ARCH_ARM64)
3095 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3096 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3097
3098#else
3099# error "port me"
3100#endif
3101 return off;
3102}
3103
3104
3105/**
3106 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3107 *
3108 * @note ARM64: Misaligned @a offDisp values and values not in the
3109 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3110 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3111 * does not heed this.
3112 */
3113DECL_FORCE_INLINE_THROW(uint32_t)
3114iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3115 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3116{
3117#ifdef RT_ARCH_AMD64
3118 /* mov mem16, reg16 */
3119 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3120 if (iGprSrc >= 8 || iGprBase >= 8)
3121 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3122 pCodeBuf[off++] = 0x89;
3123 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3124 RT_NOREF(iGprTmp);
3125
3126#elif defined(RT_ARCH_ARM64)
3127 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3128 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3129
3130#else
3131# error "port me"
3132#endif
3133 return off;
3134}
3135
3136
3137/**
3138 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3139 *
3140 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3141 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3142 * same. Will assert / throw if caller does not heed this.
3143 */
3144DECL_FORCE_INLINE_THROW(uint32_t)
3145iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3146 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3147{
3148#ifdef RT_ARCH_AMD64
3149 /* mov mem8, reg8 */
3150 if (iGprSrc >= 8 || iGprBase >= 8)
3151 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3152 else if (iGprSrc >= 4)
3153 pCodeBuf[off++] = X86_OP_REX;
3154 pCodeBuf[off++] = 0x88;
3155 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3156 RT_NOREF(iGprTmp);
3157
3158#elif defined(RT_ARCH_ARM64)
3159 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3160 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3161
3162#else
3163# error "port me"
3164#endif
3165 return off;
3166}
3167
3168
3169/**
3170 * Emits a 64-bit immediate store via a GPR base address with a displacement.
3171 *
3172 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
3173 * AMD64 it depends on the immediate value.
3174 *
3175 * @note ARM64: Misaligned @a offDisp values and values not in the
3176 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3177 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3178 * does not heed this.
3179 */
3180DECL_FORCE_INLINE_THROW(uint32_t)
3181iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
3182 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3183{
3184#ifdef RT_ARCH_AMD64
3185 if ((int32_t)uImm == (int64_t)uImm)
3186 {
3187 /* mov mem64, imm32 (sign-extended) */
3188 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3189 pCodeBuf[off++] = 0xc7;
3190 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3191 pCodeBuf[off++] = RT_BYTE1(uImm);
3192 pCodeBuf[off++] = RT_BYTE2(uImm);
3193 pCodeBuf[off++] = RT_BYTE3(uImm);
3194 pCodeBuf[off++] = RT_BYTE4(uImm);
3195 }
3196 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
3197 {
3198 /* require temporary register. */
3199 if (iGprImmTmp == UINT8_MAX)
3200 iGprImmTmp = iGprTmp;
3201 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3202 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
3203 }
3204 else
3205# ifdef IEM_WITH_THROW_CATCH
3206 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3207# else
3208 AssertReleaseFailedStmt(off = UINT32_MAX);
3209# endif
3210
3211#elif defined(RT_ARCH_ARM64)
3212 if (uImm == 0)
3213 iGprImmTmp = ARMV8_A64_REG_XZR;
3214 else
3215 {
3216 Assert(iGprImmTmp < 31);
3217 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3218 }
3219 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
3220
3221#else
3222# error "port me"
3223#endif
3224 return off;
3225}
3226
3227
3228/**
3229 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3230 *
3231 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3232 *
3233 * @note ARM64: Misaligned @a offDisp values and values not in the
3234 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3235 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3236 * does not heed this.
3237 */
3238DECL_FORCE_INLINE_THROW(uint32_t)
3239iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
3240 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3241{
3242#ifdef RT_ARCH_AMD64
3243 /* mov mem32, imm32 */
3244 if (iGprBase >= 8)
3245 pCodeBuf[off++] = X86_OP_REX_B;
3246 pCodeBuf[off++] = 0xc7;
3247 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3248 pCodeBuf[off++] = RT_BYTE1(uImm);
3249 pCodeBuf[off++] = RT_BYTE2(uImm);
3250 pCodeBuf[off++] = RT_BYTE3(uImm);
3251 pCodeBuf[off++] = RT_BYTE4(uImm);
3252 RT_NOREF(iGprImmTmp, iGprTmp);
3253
3254#elif defined(RT_ARCH_ARM64)
3255 Assert(iGprImmTmp < 31);
3256 if (uImm == 0)
3257 iGprImmTmp = ARMV8_A64_REG_XZR;
3258 else
3259 {
3260 Assert(iGprImmTmp < 31);
3261 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3262 }
3263 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3264 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3265
3266#else
3267# error "port me"
3268#endif
3269 return off;
3270}
3271
3272
3273/**
3274 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3275 *
3276 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3277 *
3278 * @note ARM64: Misaligned @a offDisp values and values not in the
3279 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3280 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3281 * does not heed this.
3282 */
3283DECL_FORCE_INLINE_THROW(uint32_t)
3284iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
3285 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3286{
3287#ifdef RT_ARCH_AMD64
3288 /* mov mem16, imm16 */
3289 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3290 if (iGprBase >= 8)
3291 pCodeBuf[off++] = X86_OP_REX_B;
3292 pCodeBuf[off++] = 0xc7;
3293 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3294 pCodeBuf[off++] = RT_BYTE1(uImm);
3295 pCodeBuf[off++] = RT_BYTE2(uImm);
3296 RT_NOREF(iGprImmTmp, iGprTmp);
3297
3298#elif defined(RT_ARCH_ARM64)
3299 if (uImm == 0)
3300 iGprImmTmp = ARMV8_A64_REG_XZR;
3301 else
3302 {
3303 Assert(iGprImmTmp < 31);
3304 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3305 }
3306 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3307 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3308
3309#else
3310# error "port me"
3311#endif
3312 return off;
3313}
3314
3315
3316/**
3317 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3318 *
3319 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3320 *
3321 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3322 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3323 * same. Will assert / throw if caller does not heed this.
3324 */
3325DECL_FORCE_INLINE_THROW(uint32_t)
3326iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3327 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3328{
3329#ifdef RT_ARCH_AMD64
3330 /* mov mem8, imm8 */
3331 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3332 if (iGprBase >= 8)
3333 pCodeBuf[off++] = X86_OP_REX_B;
3334 pCodeBuf[off++] = 0xc6;
3335 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3336 pCodeBuf[off++] = uImm;
3337 RT_NOREF(iGprImmTmp, iGprTmp);
3338
3339#elif defined(RT_ARCH_ARM64)
3340 if (uImm == 0)
3341 iGprImmTmp = ARMV8_A64_REG_XZR;
3342 else
3343 {
3344 Assert(iGprImmTmp < 31);
3345 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3346 }
3347 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3348 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3349
3350#else
3351# error "port me"
3352#endif
3353 return off;
3354}
3355
3356
3357#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3358/**
3359 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3360 *
3361 * @note ARM64: Misaligned @a offDisp values and values not in the
3362 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3363 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3364 * does not heed this.
3365 */
3366DECL_FORCE_INLINE_THROW(uint32_t)
3367iemNativeEmitStoreVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3368 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3369{
3370#ifdef RT_ARCH_AMD64
3371 /* movdqu mem128, reg128 */
3372 pCodeBuf[off++] = 0xf3;
3373 if (iVecRegDst >= 8 || iGprBase >= 8)
3374 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3375 pCodeBuf[off++] = 0x0f;
3376 pCodeBuf[off++] = 0x7f;
3377 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3378 RT_NOREF(iGprTmp);
3379
3380#elif defined(RT_ARCH_ARM64)
3381 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3382 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3383
3384#else
3385# error "port me"
3386#endif
3387 return off;
3388}
3389
3390
3391/**
3392 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3393 */
3394DECL_INLINE_THROW(uint32_t)
3395iemNativeEmitStoreVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3396{
3397#ifdef RT_ARCH_AMD64
3398 off = iemNativeEmitStoreVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3399 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3400
3401#elif defined(RT_ARCH_ARM64)
3402 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3403
3404#else
3405# error "port me"
3406#endif
3407 return off;
3408}
3409
3410
3411/**
3412 * Emits a 256-bit vector register store via a GPR base address with a displacement.
3413 *
3414 * @note ARM64: Misaligned @a offDisp values and values not in the
3415 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3416 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3417 * does not heed this.
3418 */
3419DECL_FORCE_INLINE_THROW(uint32_t)
3420iemNativeEmitStoreVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3421 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3422{
3423#ifdef RT_ARCH_AMD64
3424 /* vmovdqu mem256, reg256 */
3425 pCodeBuf[off++] = X86_OP_VEX3;
3426 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3427 | X86_OP_VEX3_BYTE1_X
3428 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3429 | UINT8_C(0x01);
3430 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3431 pCodeBuf[off++] = 0x7f;
3432 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3433 RT_NOREF(iGprTmp);
3434
3435#elif defined(RT_ARCH_ARM64)
3436 Assert(!(iVecRegDst & 0x1));
3437 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3438 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3439 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3440 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3441#else
3442# error "port me"
3443#endif
3444 return off;
3445}
3446
3447
3448/**
3449 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3450 */
3451DECL_INLINE_THROW(uint32_t)
3452iemNativeEmitStoreVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3453{
3454#ifdef RT_ARCH_AMD64
3455 off = iemNativeEmitStoreVecRegByGprU256Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3456 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3457
3458#elif defined(RT_ARCH_ARM64)
3459 Assert(!(iVecRegDst & 0x1));
3460 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3461 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3462 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3463 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3464
3465#else
3466# error "port me"
3467#endif
3468 return off;
3469}
3470#endif
3471
3472
3473
3474/*********************************************************************************************************************************
3475* Subtraction and Additions *
3476*********************************************************************************************************************************/
3477
3478/**
3479 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3480 * @note The AMD64 version sets flags.
3481 */
3482DECL_INLINE_THROW(uint32_t)
3483iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3484{
3485#if defined(RT_ARCH_AMD64)
3486 /* sub Gv,Ev */
3487 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3488 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3489 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3490 pbCodeBuf[off++] = 0x2b;
3491 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3492
3493#elif defined(RT_ARCH_ARM64)
3494 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3495 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3496
3497#else
3498# error "Port me"
3499#endif
3500 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3501 return off;
3502}
3503
3504
3505/**
3506 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3507 * @note The AMD64 version sets flags.
3508 */
3509DECL_FORCE_INLINE(uint32_t)
3510iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3511{
3512#if defined(RT_ARCH_AMD64)
3513 /* sub Gv,Ev */
3514 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3515 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3516 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3517 pCodeBuf[off++] = 0x2b;
3518 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3519
3520#elif defined(RT_ARCH_ARM64)
3521 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3522
3523#else
3524# error "Port me"
3525#endif
3526 return off;
3527}
3528
3529
3530/**
3531 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3532 * @note The AMD64 version sets flags.
3533 */
3534DECL_INLINE_THROW(uint32_t)
3535iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3536{
3537#if defined(RT_ARCH_AMD64)
3538 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3539#elif defined(RT_ARCH_ARM64)
3540 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3541#else
3542# error "Port me"
3543#endif
3544 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3545 return off;
3546}
3547
3548
3549/**
3550 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3551 *
3552 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3553 *
3554 * @note Larger constants will require a temporary register. Failing to specify
3555 * one when needed will trigger fatal assertion / throw.
3556 */
3557DECL_FORCE_INLINE_THROW(uint32_t)
3558iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3559 uint8_t iGprTmp = UINT8_MAX)
3560{
3561#ifdef RT_ARCH_AMD64
3562 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3563 if (iSubtrahend == 1)
3564 {
3565 /* dec r/m64 */
3566 pCodeBuf[off++] = 0xff;
3567 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3568 }
3569 else if (iSubtrahend == -1)
3570 {
3571 /* inc r/m64 */
3572 pCodeBuf[off++] = 0xff;
3573 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3574 }
3575 else if ((int8_t)iSubtrahend == iSubtrahend)
3576 {
3577 /* sub r/m64, imm8 */
3578 pCodeBuf[off++] = 0x83;
3579 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3580 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3581 }
3582 else if ((int32_t)iSubtrahend == iSubtrahend)
3583 {
3584 /* sub r/m64, imm32 */
3585 pCodeBuf[off++] = 0x81;
3586 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3587 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3588 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3589 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3590 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3591 }
3592 else if (iGprTmp != UINT8_MAX)
3593 {
3594 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3595 /* sub r/m64, r64 */
3596 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3597 pCodeBuf[off++] = 0x29;
3598 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3599 }
3600 else
3601# ifdef IEM_WITH_THROW_CATCH
3602 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3603# else
3604 AssertReleaseFailedStmt(off = UINT32_MAX);
3605# endif
3606
3607#elif defined(RT_ARCH_ARM64)
3608 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3609 if (uAbsSubtrahend < 4096)
3610 {
3611 if (iSubtrahend >= 0)
3612 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3613 else
3614 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3615 }
3616 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3617 {
3618 if (iSubtrahend >= 0)
3619 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3620 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3621 else
3622 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3623 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3624 }
3625 else if (iGprTmp != UINT8_MAX)
3626 {
3627 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3628 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3629 }
3630 else
3631# ifdef IEM_WITH_THROW_CATCH
3632 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3633# else
3634 AssertReleaseFailedStmt(off = UINT32_MAX);
3635# endif
3636
3637#else
3638# error "Port me"
3639#endif
3640 return off;
3641}
3642
3643
3644/**
3645 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3646 *
3647 * @note Larger constants will require a temporary register. Failing to specify
3648 * one when needed will trigger fatal assertion / throw.
3649 */
3650DECL_INLINE_THROW(uint32_t)
3651iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3652 uint8_t iGprTmp = UINT8_MAX)
3653
3654{
3655#ifdef RT_ARCH_AMD64
3656 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3657#elif defined(RT_ARCH_ARM64)
3658 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3659#else
3660# error "Port me"
3661#endif
3662 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3663 return off;
3664}
3665
3666
3667/**
3668 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3669 *
3670 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3671 *
3672 * @note ARM64: Larger constants will require a temporary register. Failing to
3673 * specify one when needed will trigger fatal assertion / throw.
3674 */
3675DECL_FORCE_INLINE_THROW(uint32_t)
3676iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3677 uint8_t iGprTmp = UINT8_MAX)
3678{
3679#ifdef RT_ARCH_AMD64
3680 if (iGprDst >= 8)
3681 pCodeBuf[off++] = X86_OP_REX_B;
3682 if (iSubtrahend == 1)
3683 {
3684 /* dec r/m32 */
3685 pCodeBuf[off++] = 0xff;
3686 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3687 }
3688 else if (iSubtrahend == -1)
3689 {
3690 /* inc r/m32 */
3691 pCodeBuf[off++] = 0xff;
3692 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3693 }
3694 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3695 {
3696 /* sub r/m32, imm8 */
3697 pCodeBuf[off++] = 0x83;
3698 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3699 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3700 }
3701 else
3702 {
3703 /* sub r/m32, imm32 */
3704 pCodeBuf[off++] = 0x81;
3705 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3706 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3707 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3708 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3709 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3710 }
3711 RT_NOREF(iGprTmp);
3712
3713#elif defined(RT_ARCH_ARM64)
3714 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3715 if (uAbsSubtrahend < 4096)
3716 {
3717 if (iSubtrahend >= 0)
3718 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3719 else
3720 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3721 }
3722 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3723 {
3724 if (iSubtrahend >= 0)
3725 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3726 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3727 else
3728 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3729 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3730 }
3731 else if (iGprTmp != UINT8_MAX)
3732 {
3733 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3734 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3735 }
3736 else
3737# ifdef IEM_WITH_THROW_CATCH
3738 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3739# else
3740 AssertReleaseFailedStmt(off = UINT32_MAX);
3741# endif
3742
3743#else
3744# error "Port me"
3745#endif
3746 return off;
3747}
3748
3749
3750/**
3751 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3752 *
3753 * @note ARM64: Larger constants will require a temporary register. Failing to
3754 * specify one when needed will trigger fatal assertion / throw.
3755 */
3756DECL_INLINE_THROW(uint32_t)
3757iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3758 uint8_t iGprTmp = UINT8_MAX)
3759
3760{
3761#ifdef RT_ARCH_AMD64
3762 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
3763#elif defined(RT_ARCH_ARM64)
3764 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
3765#else
3766# error "Port me"
3767#endif
3768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3769 return off;
3770}
3771
3772
3773/**
3774 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
3775 *
3776 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
3777 * so not suitable as a base for conditional jumps.
3778 *
3779 * @note AMD64: Will only update the lower 16 bits of the register.
3780 * @note ARM64: Will update the entire register.
3781 * @note ARM64: Larger constants will require a temporary register. Failing to
3782 * specify one when needed will trigger fatal assertion / throw.
3783 */
3784DECL_FORCE_INLINE_THROW(uint32_t)
3785iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
3786 uint8_t iGprTmp = UINT8_MAX)
3787{
3788#ifdef RT_ARCH_AMD64
3789 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3790 if (iGprDst >= 8)
3791 pCodeBuf[off++] = X86_OP_REX_B;
3792 if (iSubtrahend == 1)
3793 {
3794 /* dec r/m16 */
3795 pCodeBuf[off++] = 0xff;
3796 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3797 }
3798 else if (iSubtrahend == -1)
3799 {
3800 /* inc r/m16 */
3801 pCodeBuf[off++] = 0xff;
3802 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3803 }
3804 else if ((int8_t)iSubtrahend == iSubtrahend)
3805 {
3806 /* sub r/m16, imm8 */
3807 pCodeBuf[off++] = 0x83;
3808 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3809 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3810 }
3811 else
3812 {
3813 /* sub r/m16, imm16 */
3814 pCodeBuf[off++] = 0x81;
3815 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3816 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
3817 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
3818 }
3819 RT_NOREF(iGprTmp);
3820
3821#elif defined(RT_ARCH_ARM64)
3822 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3823 if (uAbsSubtrahend < 4096)
3824 {
3825 if (iSubtrahend >= 0)
3826 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3827 else
3828 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3829 }
3830 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3831 {
3832 if (iSubtrahend >= 0)
3833 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3834 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3835 else
3836 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3837 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3838 }
3839 else if (iGprTmp != UINT8_MAX)
3840 {
3841 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3842 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3843 }
3844 else
3845# ifdef IEM_WITH_THROW_CATCH
3846 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3847# else
3848 AssertReleaseFailedStmt(off = UINT32_MAX);
3849# endif
3850 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3851
3852#else
3853# error "Port me"
3854#endif
3855 return off;
3856}
3857
3858
3859/**
3860 * Emits adding a 64-bit GPR to another, storing the result in the first.
3861 * @note The AMD64 version sets flags.
3862 */
3863DECL_FORCE_INLINE(uint32_t)
3864iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3865{
3866#if defined(RT_ARCH_AMD64)
3867 /* add Gv,Ev */
3868 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3869 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
3870 pCodeBuf[off++] = 0x03;
3871 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3872
3873#elif defined(RT_ARCH_ARM64)
3874 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
3875
3876#else
3877# error "Port me"
3878#endif
3879 return off;
3880}
3881
3882
3883/**
3884 * Emits adding a 64-bit GPR to another, storing the result in the first.
3885 * @note The AMD64 version sets flags.
3886 */
3887DECL_INLINE_THROW(uint32_t)
3888iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3889{
3890#if defined(RT_ARCH_AMD64)
3891 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3892#elif defined(RT_ARCH_ARM64)
3893 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3894#else
3895# error "Port me"
3896#endif
3897 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3898 return off;
3899}
3900
3901
3902/**
3903 * Emits adding a 64-bit GPR to another, storing the result in the first.
3904 * @note The AMD64 version sets flags.
3905 */
3906DECL_FORCE_INLINE(uint32_t)
3907iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3908{
3909#if defined(RT_ARCH_AMD64)
3910 /* add Gv,Ev */
3911 if (iGprDst >= 8 || iGprAddend >= 8)
3912 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
3913 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
3914 pCodeBuf[off++] = 0x03;
3915 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3916
3917#elif defined(RT_ARCH_ARM64)
3918 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
3919
3920#else
3921# error "Port me"
3922#endif
3923 return off;
3924}
3925
3926
3927/**
3928 * Emits adding a 64-bit GPR to another, storing the result in the first.
3929 * @note The AMD64 version sets flags.
3930 */
3931DECL_INLINE_THROW(uint32_t)
3932iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3933{
3934#if defined(RT_ARCH_AMD64)
3935 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3936#elif defined(RT_ARCH_ARM64)
3937 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3938#else
3939# error "Port me"
3940#endif
3941 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3942 return off;
3943}
3944
3945
3946/**
3947 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3948 */
3949DECL_INLINE_THROW(uint32_t)
3950iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3951{
3952#if defined(RT_ARCH_AMD64)
3953 /* add or inc */
3954 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3955 if (iImm8 != 1)
3956 {
3957 pCodeBuf[off++] = 0x83;
3958 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3959 pCodeBuf[off++] = (uint8_t)iImm8;
3960 }
3961 else
3962 {
3963 pCodeBuf[off++] = 0xff;
3964 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3965 }
3966
3967#elif defined(RT_ARCH_ARM64)
3968 if (iImm8 >= 0)
3969 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
3970 else
3971 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
3972
3973#else
3974# error "Port me"
3975#endif
3976 return off;
3977}
3978
3979
3980/**
3981 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3982 */
3983DECL_INLINE_THROW(uint32_t)
3984iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3985{
3986#if defined(RT_ARCH_AMD64)
3987 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
3988#elif defined(RT_ARCH_ARM64)
3989 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
3990#else
3991# error "Port me"
3992#endif
3993 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3994 return off;
3995}
3996
3997
3998/**
3999 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4000 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4001 */
4002DECL_FORCE_INLINE(uint32_t)
4003iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4004{
4005#if defined(RT_ARCH_AMD64)
4006 /* add or inc */
4007 if (iGprDst >= 8)
4008 pCodeBuf[off++] = X86_OP_REX_B;
4009 if (iImm8 != 1)
4010 {
4011 pCodeBuf[off++] = 0x83;
4012 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4013 pCodeBuf[off++] = (uint8_t)iImm8;
4014 }
4015 else
4016 {
4017 pCodeBuf[off++] = 0xff;
4018 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4019 }
4020
4021#elif defined(RT_ARCH_ARM64)
4022 if (iImm8 >= 0)
4023 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
4024 else
4025 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
4026
4027#else
4028# error "Port me"
4029#endif
4030 return off;
4031}
4032
4033
4034/**
4035 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4036 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4037 */
4038DECL_INLINE_THROW(uint32_t)
4039iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4040{
4041#if defined(RT_ARCH_AMD64)
4042 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4043#elif defined(RT_ARCH_ARM64)
4044 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4045#else
4046# error "Port me"
4047#endif
4048 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4049 return off;
4050}
4051
4052
4053/**
4054 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4055 *
4056 * @note Will assert / throw if @a iGprTmp is not specified when needed.
4057 */
4058DECL_FORCE_INLINE_THROW(uint32_t)
4059iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4060{
4061#if defined(RT_ARCH_AMD64)
4062 if ((int8_t)iAddend == iAddend)
4063 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4064
4065 if ((int32_t)iAddend == iAddend)
4066 {
4067 /* add grp, imm32 */
4068 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4069 pCodeBuf[off++] = 0x81;
4070 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4071 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4072 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4073 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4074 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4075 }
4076 else if (iGprTmp != UINT8_MAX)
4077 {
4078 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4079
4080 /* add dst, tmpreg */
4081 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4082 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
4083 pCodeBuf[off++] = 0x03;
4084 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
4085 }
4086 else
4087# ifdef IEM_WITH_THROW_CATCH
4088 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4089# else
4090 AssertReleaseFailedStmt(off = UINT32_MAX);
4091# endif
4092
4093#elif defined(RT_ARCH_ARM64)
4094 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4095 if (uAbsAddend < 4096)
4096 {
4097 if (iAddend >= 0)
4098 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
4099 else
4100 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
4101 }
4102 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4103 {
4104 if (iAddend >= 0)
4105 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
4106 true /*f64Bit*/, true /*fShift12*/);
4107 else
4108 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
4109 true /*f64Bit*/, true /*fShift12*/);
4110 }
4111 else if (iGprTmp != UINT8_MAX)
4112 {
4113 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4114 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
4115 }
4116 else
4117# ifdef IEM_WITH_THROW_CATCH
4118 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4119# else
4120 AssertReleaseFailedStmt(off = UINT32_MAX);
4121# endif
4122
4123#else
4124# error "Port me"
4125#endif
4126 return off;
4127}
4128
4129
4130/**
4131 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4132 */
4133DECL_INLINE_THROW(uint32_t)
4134iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
4135{
4136#if defined(RT_ARCH_AMD64)
4137 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4138 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
4139
4140 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
4141 {
4142 /* add grp, imm32 */
4143 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4144 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4145 pbCodeBuf[off++] = 0x81;
4146 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4147 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4148 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4149 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4150 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4151 }
4152 else
4153 {
4154 /* Best to use a temporary register to deal with this in the simplest way: */
4155 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4156
4157 /* add dst, tmpreg */
4158 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4159 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4160 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
4161 pbCodeBuf[off++] = 0x03;
4162 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
4163
4164 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4165 }
4166
4167#elif defined(RT_ARCH_ARM64)
4168 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
4169 {
4170 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4171 if (iAddend >= 0)
4172 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend);
4173 else
4174 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend);
4175 }
4176 else
4177 {
4178 /* Use temporary register for the immediate. */
4179 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4180
4181 /* add gprdst, gprdst, tmpreg */
4182 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4183 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg);
4184
4185 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4186 }
4187
4188#else
4189# error "Port me"
4190#endif
4191 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4192 return off;
4193}
4194
4195
4196/**
4197 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4198 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4199 * @note For ARM64 the iAddend value must be in the range 0x000..0xfff,
4200 * or that range shifted 12 bits to the left (e.g. 0x1000..0xfff000 with
4201 * the lower 12 bits always zero). The negative ranges are also allowed,
4202 * making it behave like a subtraction. If the constant does not conform,
4203 * bad stuff will happen.
4204 */
4205DECL_FORCE_INLINE_THROW(uint32_t)
4206iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4207{
4208#if defined(RT_ARCH_AMD64)
4209 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4210 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4211
4212 /* add grp, imm32 */
4213 if (iGprDst >= 8)
4214 pCodeBuf[off++] = X86_OP_REX_B;
4215 pCodeBuf[off++] = 0x81;
4216 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4217 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4218 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4219 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4220 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4221
4222#elif defined(RT_ARCH_ARM64)
4223 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4224 if (uAbsAddend <= 0xfff)
4225 {
4226 if (iAddend >= 0)
4227 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4228 else
4229 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4230 }
4231 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4232 {
4233 if (iAddend >= 0)
4234 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
4235 false /*f64Bit*/, true /*fShift12*/);
4236 else
4237 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
4238 false /*f64Bit*/, true /*fShift12*/);
4239 }
4240 else
4241# ifdef IEM_WITH_THROW_CATCH
4242 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4243# else
4244 AssertReleaseFailedStmt(off = UINT32_MAX);
4245# endif
4246
4247#else
4248# error "Port me"
4249#endif
4250 return off;
4251}
4252
4253
4254/**
4255 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4256 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4257 */
4258DECL_INLINE_THROW(uint32_t)
4259iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4260{
4261#if defined(RT_ARCH_AMD64)
4262 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
4263
4264#elif defined(RT_ARCH_ARM64)
4265 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
4266 {
4267 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4268 if (iAddend >= 0)
4269 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend, false /*f64Bit*/);
4270 else
4271 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend, false /*f64Bit*/);
4272 }
4273 else
4274 {
4275 /* Use temporary register for the immediate. */
4276 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint32_t)iAddend);
4277
4278 /* add gprdst, gprdst, tmpreg */
4279 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4280 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
4281
4282 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4283 }
4284
4285#else
4286# error "Port me"
4287#endif
4288 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4289 return off;
4290}
4291
4292
4293/**
4294 * Emits a 16-bit GPR add with a signed immediate addend.
4295 *
4296 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
4297 * so not suitable as a base for conditional jumps.
4298 *
4299 * @note AMD64: Will only update the lower 16 bits of the register.
4300 * @note ARM64: Will update the entire register.
4301 * @note ARM64: Larger constants will require a temporary register. Failing to
4302 * specify one when needed will trigger fatal assertion / throw.
4303 * @sa iemNativeEmitSubGpr16ImmEx
4304 */
4305DECL_FORCE_INLINE_THROW(uint32_t)
4306iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend,
4307 uint8_t iGprTmp = UINT8_MAX)
4308{
4309#ifdef RT_ARCH_AMD64
4310 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4311 if (iGprDst >= 8)
4312 pCodeBuf[off++] = X86_OP_REX_B;
4313 if (iAddend == 1)
4314 {
4315 /* inc r/m16 */
4316 pCodeBuf[off++] = 0xff;
4317 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4318 }
4319 else if (iAddend == -1)
4320 {
4321 /* dec r/m16 */
4322 pCodeBuf[off++] = 0xff;
4323 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4324 }
4325 else if ((int8_t)iAddend == iAddend)
4326 {
4327 /* add r/m16, imm8 */
4328 pCodeBuf[off++] = 0x83;
4329 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4330 pCodeBuf[off++] = (uint8_t)iAddend;
4331 }
4332 else
4333 {
4334 /* add r/m16, imm16 */
4335 pCodeBuf[off++] = 0x81;
4336 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4337 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
4338 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
4339 }
4340 RT_NOREF(iGprTmp);
4341
4342#elif defined(RT_ARCH_ARM64)
4343 uint32_t uAbsAddend = RT_ABS(iAddend);
4344 if (uAbsAddend < 4096)
4345 {
4346 if (iAddend >= 0)
4347 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4348 else
4349 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4350 }
4351 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4352 {
4353 if (iAddend >= 0)
4354 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
4355 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4356 else
4357 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
4358 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4359 }
4360 else if (iGprTmp != UINT8_MAX)
4361 {
4362 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iAddend);
4363 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4364 }
4365 else
4366# ifdef IEM_WITH_THROW_CATCH
4367 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4368# else
4369 AssertReleaseFailedStmt(off = UINT32_MAX);
4370# endif
4371 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4372
4373#else
4374# error "Port me"
4375#endif
4376 return off;
4377}
4378
4379
4380
4381/**
4382 * Adds two 64-bit GPRs together, storing the result in a third register.
4383 */
4384DECL_FORCE_INLINE(uint32_t)
4385iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4386{
4387#ifdef RT_ARCH_AMD64
4388 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4389 {
4390 /** @todo consider LEA */
4391 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
4392 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
4393 }
4394 else
4395 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4396
4397#elif defined(RT_ARCH_ARM64)
4398 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
4399
4400#else
4401# error "Port me!"
4402#endif
4403 return off;
4404}
4405
4406
4407
4408/**
4409 * Adds two 32-bit GPRs together, storing the result in a third register.
4410 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4411 */
4412DECL_FORCE_INLINE(uint32_t)
4413iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4414{
4415#ifdef RT_ARCH_AMD64
4416 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4417 {
4418 /** @todo consider LEA */
4419 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
4420 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
4421 }
4422 else
4423 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4424
4425#elif defined(RT_ARCH_ARM64)
4426 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
4427
4428#else
4429# error "Port me!"
4430#endif
4431 return off;
4432}
4433
4434
4435/**
4436 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
4437 * third register.
4438 *
4439 * @note The ARM64 version does not work for non-trivial constants if the
4440 * two registers are the same. Will assert / throw exception.
4441 */
4442DECL_FORCE_INLINE_THROW(uint32_t)
4443iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4444{
4445#ifdef RT_ARCH_AMD64
4446 /** @todo consider LEA */
4447 if ((int8_t)iImmAddend == iImmAddend)
4448 {
4449 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4450 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4451 }
4452 else
4453 {
4454 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4455 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4456 }
4457
4458#elif defined(RT_ARCH_ARM64)
4459 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4460 if (uAbsImmAddend < 4096)
4461 {
4462 if (iImmAddend >= 0)
4463 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4464 else
4465 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4466 }
4467 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4468 {
4469 if (iImmAddend >= 0)
4470 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4471 else
4472 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4473 }
4474 else if (iGprDst != iGprAddend)
4475 {
4476 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4477 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4478 }
4479 else
4480# ifdef IEM_WITH_THROW_CATCH
4481 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4482# else
4483 AssertReleaseFailedStmt(off = UINT32_MAX);
4484# endif
4485
4486#else
4487# error "Port me!"
4488#endif
4489 return off;
4490}
4491
4492
4493/**
4494 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4495 * third register.
4496 *
4497 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4498 *
4499 * @note The ARM64 version does not work for non-trivial constants if the
4500 * two registers are the same. Will assert / throw exception.
4501 */
4502DECL_FORCE_INLINE_THROW(uint32_t)
4503iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4504{
4505#ifdef RT_ARCH_AMD64
4506 /** @todo consider LEA */
4507 if ((int8_t)iImmAddend == iImmAddend)
4508 {
4509 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4510 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4511 }
4512 else
4513 {
4514 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4515 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4516 }
4517
4518#elif defined(RT_ARCH_ARM64)
4519 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4520 if (uAbsImmAddend < 4096)
4521 {
4522 if (iImmAddend >= 0)
4523 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4524 else
4525 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4526 }
4527 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4528 {
4529 if (iImmAddend >= 0)
4530 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4531 else
4532 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4533 }
4534 else if (iGprDst != iGprAddend)
4535 {
4536 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4537 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4538 }
4539 else
4540# ifdef IEM_WITH_THROW_CATCH
4541 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4542# else
4543 AssertReleaseFailedStmt(off = UINT32_MAX);
4544# endif
4545
4546#else
4547# error "Port me!"
4548#endif
4549 return off;
4550}
4551
4552
4553/*********************************************************************************************************************************
4554* Unary Operations *
4555*********************************************************************************************************************************/
4556
4557/**
4558 * Emits code for two complement negation of a 64-bit GPR.
4559 */
4560DECL_FORCE_INLINE_THROW(uint32_t)
4561iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4562{
4563#if defined(RT_ARCH_AMD64)
4564 /* neg Ev */
4565 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4566 pCodeBuf[off++] = 0xf7;
4567 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4568
4569#elif defined(RT_ARCH_ARM64)
4570 /* sub dst, xzr, dst */
4571 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4572
4573#else
4574# error "Port me"
4575#endif
4576 return off;
4577}
4578
4579
4580/**
4581 * Emits code for two complement negation of a 64-bit GPR.
4582 */
4583DECL_INLINE_THROW(uint32_t)
4584iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4585{
4586#if defined(RT_ARCH_AMD64)
4587 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4588#elif defined(RT_ARCH_ARM64)
4589 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4590#else
4591# error "Port me"
4592#endif
4593 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4594 return off;
4595}
4596
4597
4598/**
4599 * Emits code for two complement negation of a 32-bit GPR.
4600 * @note bit 32 thru 63 are set to zero.
4601 */
4602DECL_FORCE_INLINE_THROW(uint32_t)
4603iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4604{
4605#if defined(RT_ARCH_AMD64)
4606 /* neg Ev */
4607 if (iGprDst >= 8)
4608 pCodeBuf[off++] = X86_OP_REX_B;
4609 pCodeBuf[off++] = 0xf7;
4610 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4611
4612#elif defined(RT_ARCH_ARM64)
4613 /* sub dst, xzr, dst */
4614 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4615
4616#else
4617# error "Port me"
4618#endif
4619 return off;
4620}
4621
4622
4623/**
4624 * Emits code for two complement negation of a 32-bit GPR.
4625 * @note bit 32 thru 63 are set to zero.
4626 */
4627DECL_INLINE_THROW(uint32_t)
4628iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4629{
4630#if defined(RT_ARCH_AMD64)
4631 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4632#elif defined(RT_ARCH_ARM64)
4633 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4634#else
4635# error "Port me"
4636#endif
4637 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4638 return off;
4639}
4640
4641
4642
4643/*********************************************************************************************************************************
4644* Bit Operations *
4645*********************************************************************************************************************************/
4646
4647/**
4648 * Emits code for clearing bits 16 thru 63 in the GPR.
4649 */
4650DECL_INLINE_THROW(uint32_t)
4651iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4652{
4653#if defined(RT_ARCH_AMD64)
4654 /* movzx Gv,Ew */
4655 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4656 if (iGprDst >= 8)
4657 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4658 pbCodeBuf[off++] = 0x0f;
4659 pbCodeBuf[off++] = 0xb7;
4660 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4661
4662#elif defined(RT_ARCH_ARM64)
4663 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4664# if 1
4665 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4666# else
4667 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4668 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4669# endif
4670#else
4671# error "Port me"
4672#endif
4673 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4674 return off;
4675}
4676
4677
4678/**
4679 * Emits code for AND'ing two 64-bit GPRs.
4680 *
4681 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4682 * and ARM64 hosts.
4683 */
4684DECL_FORCE_INLINE(uint32_t)
4685iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4686{
4687#if defined(RT_ARCH_AMD64)
4688 /* and Gv, Ev */
4689 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4690 pCodeBuf[off++] = 0x23;
4691 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4692 RT_NOREF(fSetFlags);
4693
4694#elif defined(RT_ARCH_ARM64)
4695 if (!fSetFlags)
4696 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4697 else
4698 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4699
4700#else
4701# error "Port me"
4702#endif
4703 return off;
4704}
4705
4706
4707/**
4708 * Emits code for AND'ing two 64-bit GPRs.
4709 *
4710 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4711 * and ARM64 hosts.
4712 */
4713DECL_INLINE_THROW(uint32_t)
4714iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4715{
4716#if defined(RT_ARCH_AMD64)
4717 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4718#elif defined(RT_ARCH_ARM64)
4719 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4720#else
4721# error "Port me"
4722#endif
4723 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4724 return off;
4725}
4726
4727
4728/**
4729 * Emits code for AND'ing two 32-bit GPRs.
4730 */
4731DECL_FORCE_INLINE(uint32_t)
4732iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4733{
4734#if defined(RT_ARCH_AMD64)
4735 /* and Gv, Ev */
4736 if (iGprDst >= 8 || iGprSrc >= 8)
4737 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4738 pCodeBuf[off++] = 0x23;
4739 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4740 RT_NOREF(fSetFlags);
4741
4742#elif defined(RT_ARCH_ARM64)
4743 if (!fSetFlags)
4744 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4745 else
4746 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4747
4748#else
4749# error "Port me"
4750#endif
4751 return off;
4752}
4753
4754
4755/**
4756 * Emits code for AND'ing two 32-bit GPRs.
4757 */
4758DECL_INLINE_THROW(uint32_t)
4759iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4760{
4761#if defined(RT_ARCH_AMD64)
4762 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4763#elif defined(RT_ARCH_ARM64)
4764 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4765#else
4766# error "Port me"
4767#endif
4768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4769 return off;
4770}
4771
4772
4773/**
4774 * Emits code for AND'ing a 64-bit GPRs with a constant.
4775 *
4776 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4777 * and ARM64 hosts.
4778 */
4779DECL_INLINE_THROW(uint32_t)
4780iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4781{
4782#if defined(RT_ARCH_AMD64)
4783 if ((int64_t)uImm == (int8_t)uImm)
4784 {
4785 /* and Ev, imm8 */
4786 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4787 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4788 pbCodeBuf[off++] = 0x83;
4789 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4790 pbCodeBuf[off++] = (uint8_t)uImm;
4791 }
4792 else if ((int64_t)uImm == (int32_t)uImm)
4793 {
4794 /* and Ev, imm32 */
4795 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4796 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4797 pbCodeBuf[off++] = 0x81;
4798 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4799 pbCodeBuf[off++] = RT_BYTE1(uImm);
4800 pbCodeBuf[off++] = RT_BYTE2(uImm);
4801 pbCodeBuf[off++] = RT_BYTE3(uImm);
4802 pbCodeBuf[off++] = RT_BYTE4(uImm);
4803 }
4804 else
4805 {
4806 /* Use temporary register for the 64-bit immediate. */
4807 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4808 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
4809 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4810 }
4811 RT_NOREF(fSetFlags);
4812
4813#elif defined(RT_ARCH_ARM64)
4814 uint32_t uImmR = 0;
4815 uint32_t uImmNandS = 0;
4816 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4817 {
4818 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4819 if (!fSetFlags)
4820 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
4821 else
4822 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
4823 }
4824 else
4825 {
4826 /* Use temporary register for the 64-bit immediate. */
4827 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4828 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4829 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4830 }
4831
4832#else
4833# error "Port me"
4834#endif
4835 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4836 return off;
4837}
4838
4839
4840/**
4841 * Emits code for AND'ing an 32-bit GPRs with a constant.
4842 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4843 * @note For ARM64 this only supports @a uImm values that can be expressed using
4844 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
4845 * make sure this is possible!
4846 */
4847DECL_FORCE_INLINE_THROW(uint32_t)
4848iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4849{
4850#if defined(RT_ARCH_AMD64)
4851 /* and Ev, imm */
4852 if (iGprDst >= 8)
4853 pCodeBuf[off++] = X86_OP_REX_B;
4854 if ((int32_t)uImm == (int8_t)uImm)
4855 {
4856 pCodeBuf[off++] = 0x83;
4857 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4858 pCodeBuf[off++] = (uint8_t)uImm;
4859 }
4860 else
4861 {
4862 pCodeBuf[off++] = 0x81;
4863 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4864 pCodeBuf[off++] = RT_BYTE1(uImm);
4865 pCodeBuf[off++] = RT_BYTE2(uImm);
4866 pCodeBuf[off++] = RT_BYTE3(uImm);
4867 pCodeBuf[off++] = RT_BYTE4(uImm);
4868 }
4869 RT_NOREF(fSetFlags);
4870
4871#elif defined(RT_ARCH_ARM64)
4872 uint32_t uImmR = 0;
4873 uint32_t uImmNandS = 0;
4874 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4875 {
4876 if (!fSetFlags)
4877 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4878 else
4879 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4880 }
4881 else
4882# ifdef IEM_WITH_THROW_CATCH
4883 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4884# else
4885 AssertReleaseFailedStmt(off = UINT32_MAX);
4886# endif
4887
4888#else
4889# error "Port me"
4890#endif
4891 return off;
4892}
4893
4894
4895/**
4896 * Emits code for AND'ing an 32-bit GPRs with a constant.
4897 *
4898 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4899 */
4900DECL_INLINE_THROW(uint32_t)
4901iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4902{
4903#if defined(RT_ARCH_AMD64)
4904 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
4905
4906#elif defined(RT_ARCH_ARM64)
4907 uint32_t uImmR = 0;
4908 uint32_t uImmNandS = 0;
4909 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4910 {
4911 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4912 if (!fSetFlags)
4913 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4914 else
4915 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4916 }
4917 else
4918 {
4919 /* Use temporary register for the 64-bit immediate. */
4920 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4921 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4922 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4923 }
4924
4925#else
4926# error "Port me"
4927#endif
4928 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4929 return off;
4930}
4931
4932
4933/**
4934 * Emits code for AND'ing an 64-bit GPRs with a constant.
4935 *
4936 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4937 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4938 * the same.
4939 */
4940DECL_FORCE_INLINE_THROW(uint32_t)
4941iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
4942 bool fSetFlags = false)
4943{
4944#if defined(RT_ARCH_AMD64)
4945 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4946 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
4947 RT_NOREF(fSetFlags);
4948
4949#elif defined(RT_ARCH_ARM64)
4950 uint32_t uImmR = 0;
4951 uint32_t uImmNandS = 0;
4952 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4953 {
4954 if (!fSetFlags)
4955 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4956 else
4957 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4958 }
4959 else if (iGprDst != iGprSrc)
4960 {
4961 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4962 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4963 }
4964 else
4965# ifdef IEM_WITH_THROW_CATCH
4966 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4967# else
4968 AssertReleaseFailedStmt(off = UINT32_MAX);
4969# endif
4970
4971#else
4972# error "Port me"
4973#endif
4974 return off;
4975}
4976
4977/**
4978 * Emits code for AND'ing an 32-bit GPRs with a constant.
4979 *
4980 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4981 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4982 * the same.
4983 *
4984 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4985 */
4986DECL_FORCE_INLINE_THROW(uint32_t)
4987iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
4988 bool fSetFlags = false)
4989{
4990#if defined(RT_ARCH_AMD64)
4991 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
4992 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
4993 RT_NOREF(fSetFlags);
4994
4995#elif defined(RT_ARCH_ARM64)
4996 uint32_t uImmR = 0;
4997 uint32_t uImmNandS = 0;
4998 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4999 {
5000 if (!fSetFlags)
5001 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5002 else
5003 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5004 }
5005 else if (iGprDst != iGprSrc)
5006 {
5007 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5008 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
5009 }
5010 else
5011# ifdef IEM_WITH_THROW_CATCH
5012 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5013# else
5014 AssertReleaseFailedStmt(off = UINT32_MAX);
5015# endif
5016
5017#else
5018# error "Port me"
5019#endif
5020 return off;
5021}
5022
5023
5024/**
5025 * Emits code for OR'ing two 64-bit GPRs.
5026 */
5027DECL_FORCE_INLINE(uint32_t)
5028iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5029{
5030#if defined(RT_ARCH_AMD64)
5031 /* or Gv, Ev */
5032 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5033 pCodeBuf[off++] = 0x0b;
5034 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5035
5036#elif defined(RT_ARCH_ARM64)
5037 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
5038
5039#else
5040# error "Port me"
5041#endif
5042 return off;
5043}
5044
5045
5046/**
5047 * Emits code for OR'ing two 64-bit GPRs.
5048 */
5049DECL_INLINE_THROW(uint32_t)
5050iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5051{
5052#if defined(RT_ARCH_AMD64)
5053 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5054#elif defined(RT_ARCH_ARM64)
5055 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5056#else
5057# error "Port me"
5058#endif
5059 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5060 return off;
5061}
5062
5063
5064/**
5065 * Emits code for OR'ing two 32-bit GPRs.
5066 * @note Bits 63:32 of the destination GPR will be cleared.
5067 */
5068DECL_FORCE_INLINE(uint32_t)
5069iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5070{
5071#if defined(RT_ARCH_AMD64)
5072 /* or Gv, Ev */
5073 if (iGprDst >= 8 || iGprSrc >= 8)
5074 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5075 pCodeBuf[off++] = 0x0b;
5076 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5077
5078#elif defined(RT_ARCH_ARM64)
5079 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5080
5081#else
5082# error "Port me"
5083#endif
5084 return off;
5085}
5086
5087
5088/**
5089 * Emits code for OR'ing two 32-bit GPRs.
5090 * @note Bits 63:32 of the destination GPR will be cleared.
5091 */
5092DECL_INLINE_THROW(uint32_t)
5093iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5094{
5095#if defined(RT_ARCH_AMD64)
5096 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5097#elif defined(RT_ARCH_ARM64)
5098 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5099#else
5100# error "Port me"
5101#endif
5102 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5103 return off;
5104}
5105
5106
5107/**
5108 * Emits code for OR'ing a 64-bit GPRs with a constant.
5109 */
5110DECL_INLINE_THROW(uint32_t)
5111iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
5112{
5113#if defined(RT_ARCH_AMD64)
5114 if ((int64_t)uImm == (int8_t)uImm)
5115 {
5116 /* or Ev, imm8 */
5117 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5118 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5119 pbCodeBuf[off++] = 0x83;
5120 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5121 pbCodeBuf[off++] = (uint8_t)uImm;
5122 }
5123 else if ((int64_t)uImm == (int32_t)uImm)
5124 {
5125 /* or Ev, imm32 */
5126 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5127 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5128 pbCodeBuf[off++] = 0x81;
5129 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5130 pbCodeBuf[off++] = RT_BYTE1(uImm);
5131 pbCodeBuf[off++] = RT_BYTE2(uImm);
5132 pbCodeBuf[off++] = RT_BYTE3(uImm);
5133 pbCodeBuf[off++] = RT_BYTE4(uImm);
5134 }
5135 else
5136 {
5137 /* Use temporary register for the 64-bit immediate. */
5138 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5139 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
5140 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5141 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5142 }
5143
5144#elif defined(RT_ARCH_ARM64)
5145 uint32_t uImmR = 0;
5146 uint32_t uImmNandS = 0;
5147 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5148 {
5149 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5150 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
5151 }
5152 else
5153 {
5154 /* Use temporary register for the 64-bit immediate. */
5155 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5156 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
5157 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5158 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5159 }
5160
5161#else
5162# error "Port me"
5163#endif
5164 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5165 return off;
5166}
5167
5168
5169/**
5170 * Emits code for OR'ing an 32-bit GPRs with a constant.
5171 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5172 * @note For ARM64 this only supports @a uImm values that can be expressed using
5173 * the two 6-bit immediates of the OR instructions. The caller must make
5174 * sure this is possible!
5175 */
5176DECL_FORCE_INLINE_THROW(uint32_t)
5177iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5178{
5179#if defined(RT_ARCH_AMD64)
5180 /* or Ev, imm */
5181 if (iGprDst >= 8)
5182 pCodeBuf[off++] = X86_OP_REX_B;
5183 if ((int32_t)uImm == (int8_t)uImm)
5184 {
5185 pCodeBuf[off++] = 0x83;
5186 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5187 pCodeBuf[off++] = (uint8_t)uImm;
5188 }
5189 else
5190 {
5191 pCodeBuf[off++] = 0x81;
5192 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5193 pCodeBuf[off++] = RT_BYTE1(uImm);
5194 pCodeBuf[off++] = RT_BYTE2(uImm);
5195 pCodeBuf[off++] = RT_BYTE3(uImm);
5196 pCodeBuf[off++] = RT_BYTE4(uImm);
5197 }
5198
5199#elif defined(RT_ARCH_ARM64)
5200 uint32_t uImmR = 0;
5201 uint32_t uImmNandS = 0;
5202 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5203 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5204 else
5205# ifdef IEM_WITH_THROW_CATCH
5206 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5207# else
5208 AssertReleaseFailedStmt(off = UINT32_MAX);
5209# endif
5210
5211#else
5212# error "Port me"
5213#endif
5214 return off;
5215}
5216
5217
5218/**
5219 * Emits code for OR'ing an 32-bit GPRs with a constant.
5220 *
5221 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5222 */
5223DECL_INLINE_THROW(uint32_t)
5224iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5225{
5226#if defined(RT_ARCH_AMD64)
5227 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5228
5229#elif defined(RT_ARCH_ARM64)
5230 uint32_t uImmR = 0;
5231 uint32_t uImmNandS = 0;
5232 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5233 {
5234 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5235 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5236 }
5237 else
5238 {
5239 /* Use temporary register for the 64-bit immediate. */
5240 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5241 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
5242 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5243 }
5244
5245#else
5246# error "Port me"
5247#endif
5248 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5249 return off;
5250}
5251
5252
5253
5254/**
5255 * ORs two 64-bit GPRs together, storing the result in a third register.
5256 */
5257DECL_FORCE_INLINE(uint32_t)
5258iemNativeEmitGprEqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5259{
5260#ifdef RT_ARCH_AMD64
5261 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5262 {
5263 /** @todo consider LEA */
5264 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc1);
5265 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5266 }
5267 else
5268 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5269
5270#elif defined(RT_ARCH_ARM64)
5271 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2);
5272
5273#else
5274# error "Port me!"
5275#endif
5276 return off;
5277}
5278
5279
5280
5281/**
5282 * Ors two 32-bit GPRs together, storing the result in a third register.
5283 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
5284 */
5285DECL_FORCE_INLINE(uint32_t)
5286iemNativeEmitGpr32EqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5287{
5288#ifdef RT_ARCH_AMD64
5289 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5290 {
5291 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc1);
5292 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5293 }
5294 else
5295 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5296
5297#elif defined(RT_ARCH_ARM64)
5298 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2, false /*f64Bit*/);
5299
5300#else
5301# error "Port me!"
5302#endif
5303 return off;
5304}
5305
5306
5307/**
5308 * Emits code for XOR'ing two 64-bit GPRs.
5309 */
5310DECL_INLINE_THROW(uint32_t)
5311iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5312{
5313#if defined(RT_ARCH_AMD64)
5314 /* and Gv, Ev */
5315 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5316 pCodeBuf[off++] = 0x33;
5317 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5318
5319#elif defined(RT_ARCH_ARM64)
5320 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
5321
5322#else
5323# error "Port me"
5324#endif
5325 return off;
5326}
5327
5328
5329/**
5330 * Emits code for XOR'ing two 64-bit GPRs.
5331 */
5332DECL_INLINE_THROW(uint32_t)
5333iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5334{
5335#if defined(RT_ARCH_AMD64)
5336 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5337#elif defined(RT_ARCH_ARM64)
5338 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5339#else
5340# error "Port me"
5341#endif
5342 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5343 return off;
5344}
5345
5346
5347/**
5348 * Emits code for XOR'ing two 32-bit GPRs.
5349 */
5350DECL_INLINE_THROW(uint32_t)
5351iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5352{
5353#if defined(RT_ARCH_AMD64)
5354 /* and Gv, Ev */
5355 if (iGprDst >= 8 || iGprSrc >= 8)
5356 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5357 pCodeBuf[off++] = 0x33;
5358 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5359
5360#elif defined(RT_ARCH_ARM64)
5361 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5362
5363#else
5364# error "Port me"
5365#endif
5366 return off;
5367}
5368
5369
5370/**
5371 * Emits code for XOR'ing two 32-bit GPRs.
5372 */
5373DECL_INLINE_THROW(uint32_t)
5374iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5375{
5376#if defined(RT_ARCH_AMD64)
5377 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5378#elif defined(RT_ARCH_ARM64)
5379 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5380#else
5381# error "Port me"
5382#endif
5383 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5384 return off;
5385}
5386
5387
5388/**
5389 * Emits code for XOR'ing an 32-bit GPRs with a constant.
5390 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5391 * @note For ARM64 this only supports @a uImm values that can be expressed using
5392 * the two 6-bit immediates of the EOR instructions. The caller must make
5393 * sure this is possible!
5394 */
5395DECL_FORCE_INLINE_THROW(uint32_t)
5396iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5397{
5398#if defined(RT_ARCH_AMD64)
5399 /* and Ev, imm */
5400 if (iGprDst >= 8)
5401 pCodeBuf[off++] = X86_OP_REX_B;
5402 if ((int32_t)uImm == (int8_t)uImm)
5403 {
5404 pCodeBuf[off++] = 0x83;
5405 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5406 pCodeBuf[off++] = (uint8_t)uImm;
5407 }
5408 else
5409 {
5410 pCodeBuf[off++] = 0x81;
5411 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5412 pCodeBuf[off++] = RT_BYTE1(uImm);
5413 pCodeBuf[off++] = RT_BYTE2(uImm);
5414 pCodeBuf[off++] = RT_BYTE3(uImm);
5415 pCodeBuf[off++] = RT_BYTE4(uImm);
5416 }
5417
5418#elif defined(RT_ARCH_ARM64)
5419 uint32_t uImmR = 0;
5420 uint32_t uImmNandS = 0;
5421 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5422 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5423 else
5424# ifdef IEM_WITH_THROW_CATCH
5425 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5426# else
5427 AssertReleaseFailedStmt(off = UINT32_MAX);
5428# endif
5429
5430#else
5431# error "Port me"
5432#endif
5433 return off;
5434}
5435
5436
5437/*********************************************************************************************************************************
5438* Shifting *
5439*********************************************************************************************************************************/
5440
5441/**
5442 * Emits code for shifting a GPR a fixed number of bits to the left.
5443 */
5444DECL_FORCE_INLINE(uint32_t)
5445iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5446{
5447 Assert(cShift > 0 && cShift < 64);
5448
5449#if defined(RT_ARCH_AMD64)
5450 /* shl dst, cShift */
5451 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5452 if (cShift != 1)
5453 {
5454 pCodeBuf[off++] = 0xc1;
5455 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5456 pCodeBuf[off++] = cShift;
5457 }
5458 else
5459 {
5460 pCodeBuf[off++] = 0xd1;
5461 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5462 }
5463
5464#elif defined(RT_ARCH_ARM64)
5465 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
5466
5467#else
5468# error "Port me"
5469#endif
5470 return off;
5471}
5472
5473
5474/**
5475 * Emits code for shifting a GPR a fixed number of bits to the left.
5476 */
5477DECL_INLINE_THROW(uint32_t)
5478iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5479{
5480#if defined(RT_ARCH_AMD64)
5481 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5482#elif defined(RT_ARCH_ARM64)
5483 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5484#else
5485# error "Port me"
5486#endif
5487 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5488 return off;
5489}
5490
5491
5492/**
5493 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5494 */
5495DECL_FORCE_INLINE(uint32_t)
5496iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5497{
5498 Assert(cShift > 0 && cShift < 32);
5499
5500#if defined(RT_ARCH_AMD64)
5501 /* shl dst, cShift */
5502 if (iGprDst >= 8)
5503 pCodeBuf[off++] = X86_OP_REX_B;
5504 if (cShift != 1)
5505 {
5506 pCodeBuf[off++] = 0xc1;
5507 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5508 pCodeBuf[off++] = cShift;
5509 }
5510 else
5511 {
5512 pCodeBuf[off++] = 0xd1;
5513 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5514 }
5515
5516#elif defined(RT_ARCH_ARM64)
5517 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5518
5519#else
5520# error "Port me"
5521#endif
5522 return off;
5523}
5524
5525
5526/**
5527 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5528 */
5529DECL_INLINE_THROW(uint32_t)
5530iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5531{
5532#if defined(RT_ARCH_AMD64)
5533 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5534#elif defined(RT_ARCH_ARM64)
5535 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5536#else
5537# error "Port me"
5538#endif
5539 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5540 return off;
5541}
5542
5543
5544/**
5545 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5546 */
5547DECL_FORCE_INLINE(uint32_t)
5548iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5549{
5550 Assert(cShift > 0 && cShift < 64);
5551
5552#if defined(RT_ARCH_AMD64)
5553 /* shr dst, cShift */
5554 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5555 if (cShift != 1)
5556 {
5557 pCodeBuf[off++] = 0xc1;
5558 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5559 pCodeBuf[off++] = cShift;
5560 }
5561 else
5562 {
5563 pCodeBuf[off++] = 0xd1;
5564 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5565 }
5566
5567#elif defined(RT_ARCH_ARM64)
5568 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5569
5570#else
5571# error "Port me"
5572#endif
5573 return off;
5574}
5575
5576
5577/**
5578 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5579 */
5580DECL_INLINE_THROW(uint32_t)
5581iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5582{
5583#if defined(RT_ARCH_AMD64)
5584 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5585#elif defined(RT_ARCH_ARM64)
5586 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5587#else
5588# error "Port me"
5589#endif
5590 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5591 return off;
5592}
5593
5594
5595/**
5596 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5597 * right.
5598 */
5599DECL_FORCE_INLINE(uint32_t)
5600iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5601{
5602 Assert(cShift > 0 && cShift < 32);
5603
5604#if defined(RT_ARCH_AMD64)
5605 /* shr dst, cShift */
5606 if (iGprDst >= 8)
5607 pCodeBuf[off++] = X86_OP_REX_B;
5608 if (cShift != 1)
5609 {
5610 pCodeBuf[off++] = 0xc1;
5611 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5612 pCodeBuf[off++] = cShift;
5613 }
5614 else
5615 {
5616 pCodeBuf[off++] = 0xd1;
5617 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5618 }
5619
5620#elif defined(RT_ARCH_ARM64)
5621 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5622
5623#else
5624# error "Port me"
5625#endif
5626 return off;
5627}
5628
5629
5630/**
5631 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5632 * right.
5633 */
5634DECL_INLINE_THROW(uint32_t)
5635iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5636{
5637#if defined(RT_ARCH_AMD64)
5638 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5639#elif defined(RT_ARCH_ARM64)
5640 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5641#else
5642# error "Port me"
5643#endif
5644 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5645 return off;
5646}
5647
5648
5649/**
5650 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5651 * right and assigning it to a different GPR.
5652 */
5653DECL_INLINE_THROW(uint32_t)
5654iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5655{
5656 Assert(cShift > 0); Assert(cShift < 32);
5657#if defined(RT_ARCH_AMD64)
5658 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5659 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5660
5661#elif defined(RT_ARCH_ARM64)
5662 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5663
5664#else
5665# error "Port me"
5666#endif
5667 return off;
5668}
5669
5670
5671/**
5672 * Emits code for rotating a GPR a fixed number of bits to the left.
5673 */
5674DECL_FORCE_INLINE(uint32_t)
5675iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5676{
5677 Assert(cShift > 0 && cShift < 64);
5678
5679#if defined(RT_ARCH_AMD64)
5680 /* rol dst, cShift */
5681 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5682 if (cShift != 1)
5683 {
5684 pCodeBuf[off++] = 0xc1;
5685 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5686 pCodeBuf[off++] = cShift;
5687 }
5688 else
5689 {
5690 pCodeBuf[off++] = 0xd1;
5691 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5692 }
5693
5694#elif defined(RT_ARCH_ARM64)
5695 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
5696
5697#else
5698# error "Port me"
5699#endif
5700 return off;
5701}
5702
5703
5704#if defined(RT_ARCH_AMD64)
5705/**
5706 * Emits code for rotating a 32-bit GPR a fixed number of bits to the left via carry.
5707 */
5708DECL_FORCE_INLINE(uint32_t)
5709iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5710{
5711 Assert(cShift > 0 && cShift < 32);
5712
5713 /* rcl dst, cShift */
5714 if (iGprDst >= 8)
5715 pCodeBuf[off++] = X86_OP_REX_B;
5716 if (cShift != 1)
5717 {
5718 pCodeBuf[off++] = 0xc1;
5719 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5720 pCodeBuf[off++] = cShift;
5721 }
5722 else
5723 {
5724 pCodeBuf[off++] = 0xd1;
5725 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5726 }
5727
5728 return off;
5729}
5730#endif /* RT_ARCH_AMD64 */
5731
5732
5733
5734/**
5735 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
5736 * @note Bits 63:32 of the destination GPR will be cleared.
5737 */
5738DECL_FORCE_INLINE(uint32_t)
5739iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5740{
5741#if defined(RT_ARCH_AMD64)
5742 /*
5743 * There is no bswap r16 on x86 (the encoding exists but does not work).
5744 * So just use a rol (gcc -O2 is doing that).
5745 *
5746 * rol r16, 0x8
5747 */
5748 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5749 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5750 if (iGpr >= 8)
5751 pbCodeBuf[off++] = X86_OP_REX_B;
5752 pbCodeBuf[off++] = 0xc1;
5753 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
5754 pbCodeBuf[off++] = 0x08;
5755#elif defined(RT_ARCH_ARM64)
5756 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5757
5758 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
5759#else
5760# error "Port me"
5761#endif
5762
5763 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5764 return off;
5765}
5766
5767
5768/**
5769 * Emits code for reversing the byte order in a 32-bit GPR.
5770 * @note Bits 63:32 of the destination GPR will be cleared.
5771 */
5772DECL_FORCE_INLINE(uint32_t)
5773iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5774{
5775#if defined(RT_ARCH_AMD64)
5776 /* bswap r32 */
5777 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5778
5779 if (iGpr >= 8)
5780 pbCodeBuf[off++] = X86_OP_REX_B;
5781 pbCodeBuf[off++] = 0x0f;
5782 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5783#elif defined(RT_ARCH_ARM64)
5784 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5785
5786 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
5787#else
5788# error "Port me"
5789#endif
5790
5791 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5792 return off;
5793}
5794
5795
5796/**
5797 * Emits code for reversing the byte order in a 64-bit GPR.
5798 */
5799DECL_FORCE_INLINE(uint32_t)
5800iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5801{
5802#if defined(RT_ARCH_AMD64)
5803 /* bswap r64 */
5804 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5805
5806 if (iGpr >= 8)
5807 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
5808 else
5809 pbCodeBuf[off++] = X86_OP_REX_W;
5810 pbCodeBuf[off++] = 0x0f;
5811 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5812#elif defined(RT_ARCH_ARM64)
5813 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5814
5815 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
5816#else
5817# error "Port me"
5818#endif
5819
5820 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5821 return off;
5822}
5823
5824
5825/*********************************************************************************************************************************
5826* Compare and Testing *
5827*********************************************************************************************************************************/
5828
5829
5830#ifdef RT_ARCH_ARM64
5831/**
5832 * Emits an ARM64 compare instruction.
5833 */
5834DECL_INLINE_THROW(uint32_t)
5835iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
5836 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
5837{
5838 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5839 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
5840 f64Bit, true /*fSetFlags*/, cShift, enmShift);
5841 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5842 return off;
5843}
5844#endif
5845
5846
5847/**
5848 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5849 * with conditional instruction.
5850 */
5851DECL_FORCE_INLINE(uint32_t)
5852iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5853{
5854#ifdef RT_ARCH_AMD64
5855 /* cmp Gv, Ev */
5856 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5857 pCodeBuf[off++] = 0x3b;
5858 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5859
5860#elif defined(RT_ARCH_ARM64)
5861 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
5862
5863#else
5864# error "Port me!"
5865#endif
5866 return off;
5867}
5868
5869
5870/**
5871 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5872 * with conditional instruction.
5873 */
5874DECL_INLINE_THROW(uint32_t)
5875iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5876{
5877#ifdef RT_ARCH_AMD64
5878 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5879#elif defined(RT_ARCH_ARM64)
5880 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5881#else
5882# error "Port me!"
5883#endif
5884 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5885 return off;
5886}
5887
5888
5889/**
5890 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5891 * with conditional instruction.
5892 */
5893DECL_FORCE_INLINE(uint32_t)
5894iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5895{
5896#ifdef RT_ARCH_AMD64
5897 /* cmp Gv, Ev */
5898 if (iGprLeft >= 8 || iGprRight >= 8)
5899 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5900 pCodeBuf[off++] = 0x3b;
5901 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5902
5903#elif defined(RT_ARCH_ARM64)
5904 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
5905
5906#else
5907# error "Port me!"
5908#endif
5909 return off;
5910}
5911
5912
5913/**
5914 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5915 * with conditional instruction.
5916 */
5917DECL_INLINE_THROW(uint32_t)
5918iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5919{
5920#ifdef RT_ARCH_AMD64
5921 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5922#elif defined(RT_ARCH_ARM64)
5923 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5924#else
5925# error "Port me!"
5926#endif
5927 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5928 return off;
5929}
5930
5931
5932/**
5933 * Emits a compare of a 64-bit GPR with a constant value, settings status
5934 * flags/whatever for use with conditional instruction.
5935 */
5936DECL_INLINE_THROW(uint32_t)
5937iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
5938{
5939#ifdef RT_ARCH_AMD64
5940 if (uImm <= UINT32_C(0xff))
5941 {
5942 /* cmp Ev, Ib */
5943 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5944 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
5945 pbCodeBuf[off++] = 0x83;
5946 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5947 pbCodeBuf[off++] = (uint8_t)uImm;
5948 }
5949 else if ((int64_t)uImm == (int32_t)uImm)
5950 {
5951 /* cmp Ev, imm */
5952 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5953 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
5954 pbCodeBuf[off++] = 0x81;
5955 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5956 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5957 pbCodeBuf[off++] = RT_BYTE1(uImm);
5958 pbCodeBuf[off++] = RT_BYTE2(uImm);
5959 pbCodeBuf[off++] = RT_BYTE3(uImm);
5960 pbCodeBuf[off++] = RT_BYTE4(uImm);
5961 }
5962 else
5963 {
5964 /* Use temporary register for the immediate. */
5965 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5966 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
5967 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5968 }
5969
5970#elif defined(RT_ARCH_ARM64)
5971 /** @todo guess there are clevere things we can do here... */
5972 if (uImm < _4K)
5973 {
5974 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5975 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5976 true /*64Bit*/, true /*fSetFlags*/);
5977 }
5978 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5979 {
5980 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5981 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
5982 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5983 }
5984 else
5985 {
5986 /* Use temporary register for the immediate. */
5987 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5988 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
5989 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5990 }
5991
5992#else
5993# error "Port me!"
5994#endif
5995
5996 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5997 return off;
5998}
5999
6000
6001/**
6002 * Emits a compare of a 32-bit GPR with a constant value, settings status
6003 * flags/whatever for use with conditional instruction.
6004 *
6005 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6006 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6007 * bits all zero). Will release assert or throw exception if the caller
6008 * violates this restriction.
6009 */
6010DECL_FORCE_INLINE_THROW(uint32_t)
6011iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6012{
6013#ifdef RT_ARCH_AMD64
6014 if (iGprLeft >= 8)
6015 pCodeBuf[off++] = X86_OP_REX_B;
6016 if (uImm <= UINT32_C(0x7f))
6017 {
6018 /* cmp Ev, Ib */
6019 pCodeBuf[off++] = 0x83;
6020 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6021 pCodeBuf[off++] = (uint8_t)uImm;
6022 }
6023 else
6024 {
6025 /* cmp Ev, imm */
6026 pCodeBuf[off++] = 0x81;
6027 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6028 pCodeBuf[off++] = RT_BYTE1(uImm);
6029 pCodeBuf[off++] = RT_BYTE2(uImm);
6030 pCodeBuf[off++] = RT_BYTE3(uImm);
6031 pCodeBuf[off++] = RT_BYTE4(uImm);
6032 }
6033
6034#elif defined(RT_ARCH_ARM64)
6035 /** @todo guess there are clevere things we can do here... */
6036 if (uImm < _4K)
6037 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6038 false /*64Bit*/, true /*fSetFlags*/);
6039 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
6040 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6041 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6042 else
6043# ifdef IEM_WITH_THROW_CATCH
6044 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6045# else
6046 AssertReleaseFailedStmt(off = UINT32_MAX);
6047# endif
6048
6049#else
6050# error "Port me!"
6051#endif
6052 return off;
6053}
6054
6055
6056/**
6057 * Emits a compare of a 32-bit GPR with a constant value, settings status
6058 * flags/whatever for use with conditional instruction.
6059 */
6060DECL_INLINE_THROW(uint32_t)
6061iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6062{
6063#ifdef RT_ARCH_AMD64
6064 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
6065
6066#elif defined(RT_ARCH_ARM64)
6067 /** @todo guess there are clevere things we can do here... */
6068 if (uImm < _4K)
6069 {
6070 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6071 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6072 false /*64Bit*/, true /*fSetFlags*/);
6073 }
6074 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
6075 {
6076 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6077 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6078 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6079 }
6080 else
6081 {
6082 /* Use temporary register for the immediate. */
6083 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6084 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
6085 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6086 }
6087
6088#else
6089# error "Port me!"
6090#endif
6091
6092 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6093 return off;
6094}
6095
6096
6097/**
6098 * Emits a compare of a 32-bit GPR with a constant value, settings status
6099 * flags/whatever for use with conditional instruction.
6100 *
6101 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
6102 * 16-bit value from @a iGrpLeft.
6103 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6104 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6105 * bits all zero). Will release assert or throw exception if the caller
6106 * violates this restriction.
6107 */
6108DECL_FORCE_INLINE_THROW(uint32_t)
6109iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6110 uint8_t idxTmpReg = UINT8_MAX)
6111{
6112#ifdef RT_ARCH_AMD64
6113 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6114 if (iGprLeft >= 8)
6115 pCodeBuf[off++] = X86_OP_REX_B;
6116 if (uImm <= UINT32_C(0x7f))
6117 {
6118 /* cmp Ev, Ib */
6119 pCodeBuf[off++] = 0x83;
6120 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6121 pCodeBuf[off++] = (uint8_t)uImm;
6122 }
6123 else
6124 {
6125 /* cmp Ev, imm */
6126 pCodeBuf[off++] = 0x81;
6127 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6128 pCodeBuf[off++] = RT_BYTE1(uImm);
6129 pCodeBuf[off++] = RT_BYTE2(uImm);
6130 }
6131 RT_NOREF(idxTmpReg);
6132
6133#elif defined(RT_ARCH_ARM64)
6134# ifdef IEM_WITH_THROW_CATCH
6135 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6136# else
6137 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
6138# endif
6139 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6140 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
6141 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
6142
6143#else
6144# error "Port me!"
6145#endif
6146 return off;
6147}
6148
6149
6150/**
6151 * Emits a compare of a 16-bit GPR with a constant value, settings status
6152 * flags/whatever for use with conditional instruction.
6153 *
6154 * @note ARM64: Helper register is required (idxTmpReg).
6155 */
6156DECL_INLINE_THROW(uint32_t)
6157iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6158 uint8_t idxTmpReg = UINT8_MAX)
6159{
6160#ifdef RT_ARCH_AMD64
6161 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
6162#elif defined(RT_ARCH_ARM64)
6163 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
6164#else
6165# error "Port me!"
6166#endif
6167 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6168 return off;
6169}
6170
6171
6172
6173/*********************************************************************************************************************************
6174* Branching *
6175*********************************************************************************************************************************/
6176
6177/**
6178 * Emits a JMP rel32 / B imm19 to the given label.
6179 */
6180DECL_FORCE_INLINE_THROW(uint32_t)
6181iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
6182{
6183 Assert(idxLabel < pReNative->cLabels);
6184
6185#ifdef RT_ARCH_AMD64
6186 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6187 {
6188 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
6189 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
6190 {
6191 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
6192 pCodeBuf[off++] = (uint8_t)offRel;
6193 }
6194 else
6195 {
6196 offRel -= 3;
6197 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6198 pCodeBuf[off++] = RT_BYTE1(offRel);
6199 pCodeBuf[off++] = RT_BYTE2(offRel);
6200 pCodeBuf[off++] = RT_BYTE3(offRel);
6201 pCodeBuf[off++] = RT_BYTE4(offRel);
6202 }
6203 }
6204 else
6205 {
6206 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6207 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6208 pCodeBuf[off++] = 0xfe;
6209 pCodeBuf[off++] = 0xff;
6210 pCodeBuf[off++] = 0xff;
6211 pCodeBuf[off++] = 0xff;
6212 }
6213 pCodeBuf[off++] = 0xcc; /* int3 poison */
6214
6215#elif defined(RT_ARCH_ARM64)
6216 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6217 pCodeBuf[off++] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
6218 else
6219 {
6220 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
6221 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
6222 }
6223
6224#else
6225# error "Port me!"
6226#endif
6227 return off;
6228}
6229
6230
6231/**
6232 * Emits a JMP rel32 / B imm19 to the given label.
6233 */
6234DECL_INLINE_THROW(uint32_t)
6235iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6236{
6237#ifdef RT_ARCH_AMD64
6238 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
6239#elif defined(RT_ARCH_ARM64)
6240 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
6241#else
6242# error "Port me!"
6243#endif
6244 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6245 return off;
6246}
6247
6248
6249/**
6250 * Emits a JMP rel32 / B imm19 to a new undefined label.
6251 */
6252DECL_INLINE_THROW(uint32_t)
6253iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6254{
6255 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6256 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
6257}
6258
6259/** Condition type. */
6260#ifdef RT_ARCH_AMD64
6261typedef enum IEMNATIVEINSTRCOND : uint8_t
6262{
6263 kIemNativeInstrCond_o = 0,
6264 kIemNativeInstrCond_no,
6265 kIemNativeInstrCond_c,
6266 kIemNativeInstrCond_nc,
6267 kIemNativeInstrCond_e,
6268 kIemNativeInstrCond_ne,
6269 kIemNativeInstrCond_be,
6270 kIemNativeInstrCond_nbe,
6271 kIemNativeInstrCond_s,
6272 kIemNativeInstrCond_ns,
6273 kIemNativeInstrCond_p,
6274 kIemNativeInstrCond_np,
6275 kIemNativeInstrCond_l,
6276 kIemNativeInstrCond_nl,
6277 kIemNativeInstrCond_le,
6278 kIemNativeInstrCond_nle
6279} IEMNATIVEINSTRCOND;
6280#elif defined(RT_ARCH_ARM64)
6281typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
6282# define kIemNativeInstrCond_o todo_conditional_codes
6283# define kIemNativeInstrCond_no todo_conditional_codes
6284# define kIemNativeInstrCond_c todo_conditional_codes
6285# define kIemNativeInstrCond_nc todo_conditional_codes
6286# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
6287# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
6288# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
6289# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
6290# define kIemNativeInstrCond_s todo_conditional_codes
6291# define kIemNativeInstrCond_ns todo_conditional_codes
6292# define kIemNativeInstrCond_p todo_conditional_codes
6293# define kIemNativeInstrCond_np todo_conditional_codes
6294# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
6295# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
6296# define kIemNativeInstrCond_le kArmv8InstrCond_Le
6297# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
6298#else
6299# error "Port me!"
6300#endif
6301
6302
6303/**
6304 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6305 */
6306DECL_FORCE_INLINE_THROW(uint32_t)
6307iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6308 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6309{
6310 Assert(idxLabel < pReNative->cLabels);
6311
6312 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
6313#ifdef RT_ARCH_AMD64
6314 if (offLabel >= off)
6315 {
6316 /* jcc rel32 */
6317 pCodeBuf[off++] = 0x0f;
6318 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6319 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6320 pCodeBuf[off++] = 0x00;
6321 pCodeBuf[off++] = 0x00;
6322 pCodeBuf[off++] = 0x00;
6323 pCodeBuf[off++] = 0x00;
6324 }
6325 else
6326 {
6327 int32_t offDisp = offLabel - (off + 2);
6328 if ((int8_t)offDisp == offDisp)
6329 {
6330 /* jcc rel8 */
6331 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6332 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6333 }
6334 else
6335 {
6336 /* jcc rel32 */
6337 offDisp -= 4;
6338 pCodeBuf[off++] = 0x0f;
6339 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6340 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6341 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6342 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6343 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6344 }
6345 }
6346
6347#elif defined(RT_ARCH_ARM64)
6348 if (offLabel >= off)
6349 {
6350 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6351 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
6352 }
6353 else
6354 {
6355 Assert(off - offLabel <= 0x3ffffU);
6356 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
6357 }
6358
6359#else
6360# error "Port me!"
6361#endif
6362 return off;
6363}
6364
6365
6366/**
6367 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6368 */
6369DECL_INLINE_THROW(uint32_t)
6370iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6371{
6372#ifdef RT_ARCH_AMD64
6373 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
6374#elif defined(RT_ARCH_ARM64)
6375 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
6376#else
6377# error "Port me!"
6378#endif
6379 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6380 return off;
6381}
6382
6383
6384/**
6385 * Emits a Jcc rel32 / B.cc imm19 to a new label.
6386 */
6387DECL_INLINE_THROW(uint32_t)
6388iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6389 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
6390{
6391 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6392 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
6393}
6394
6395
6396/**
6397 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
6398 */
6399DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6400{
6401#ifdef RT_ARCH_AMD64
6402 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
6403#elif defined(RT_ARCH_ARM64)
6404 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
6405#else
6406# error "Port me!"
6407#endif
6408}
6409
6410/**
6411 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
6412 */
6413DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6414 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6415{
6416#ifdef RT_ARCH_AMD64
6417 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
6418#elif defined(RT_ARCH_ARM64)
6419 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
6420#else
6421# error "Port me!"
6422#endif
6423}
6424
6425
6426/**
6427 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
6428 */
6429DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6430{
6431#ifdef RT_ARCH_AMD64
6432 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
6433#elif defined(RT_ARCH_ARM64)
6434 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
6435#else
6436# error "Port me!"
6437#endif
6438}
6439
6440/**
6441 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
6442 */
6443DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6444 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6445{
6446#ifdef RT_ARCH_AMD64
6447 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
6448#elif defined(RT_ARCH_ARM64)
6449 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
6450#else
6451# error "Port me!"
6452#endif
6453}
6454
6455
6456/**
6457 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
6458 */
6459DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6460{
6461#ifdef RT_ARCH_AMD64
6462 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
6463#elif defined(RT_ARCH_ARM64)
6464 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
6465#else
6466# error "Port me!"
6467#endif
6468}
6469
6470/**
6471 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
6472 */
6473DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6474 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6475{
6476#ifdef RT_ARCH_AMD64
6477 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
6478#elif defined(RT_ARCH_ARM64)
6479 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
6480#else
6481# error "Port me!"
6482#endif
6483}
6484
6485
6486/**
6487 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
6488 */
6489DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6490{
6491#ifdef RT_ARCH_AMD64
6492 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
6493#elif defined(RT_ARCH_ARM64)
6494 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
6495#else
6496# error "Port me!"
6497#endif
6498}
6499
6500/**
6501 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
6502 */
6503DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6504 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6505{
6506#ifdef RT_ARCH_AMD64
6507 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
6508#elif defined(RT_ARCH_ARM64)
6509 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
6510#else
6511# error "Port me!"
6512#endif
6513}
6514
6515
6516/**
6517 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
6518 */
6519DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6520{
6521#ifdef RT_ARCH_AMD64
6522 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
6523#elif defined(RT_ARCH_ARM64)
6524 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
6525#else
6526# error "Port me!"
6527#endif
6528}
6529
6530/**
6531 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
6532 */
6533DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6534 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6535{
6536#ifdef RT_ARCH_AMD64
6537 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
6538#elif defined(RT_ARCH_ARM64)
6539 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
6540#else
6541# error "Port me!"
6542#endif
6543}
6544
6545
6546/**
6547 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6548 *
6549 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6550 *
6551 * Only use hardcoded jumps forward when emitting for exactly one
6552 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6553 * the right target address on all platforms!
6554 *
6555 * Please also note that on x86 it is necessary pass off + 256 or higher
6556 * for @a offTarget one believe the intervening code is more than 127
6557 * bytes long.
6558 */
6559DECL_FORCE_INLINE(uint32_t)
6560iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6561{
6562#ifdef RT_ARCH_AMD64
6563 /* jcc rel8 / rel32 */
6564 int32_t offDisp = (int32_t)(offTarget - (off + 2));
6565 if (offDisp < 128 && offDisp >= -128)
6566 {
6567 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6568 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6569 }
6570 else
6571 {
6572 offDisp -= 4;
6573 pCodeBuf[off++] = 0x0f;
6574 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6575 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6576 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6577 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6578 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6579 }
6580
6581#elif defined(RT_ARCH_ARM64)
6582 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
6583
6584#else
6585# error "Port me!"
6586#endif
6587 return off;
6588}
6589
6590
6591/**
6592 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6593 *
6594 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6595 *
6596 * Only use hardcoded jumps forward when emitting for exactly one
6597 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6598 * the right target address on all platforms!
6599 *
6600 * Please also note that on x86 it is necessary pass off + 256 or higher
6601 * for @a offTarget if one believe the intervening code is more than 127
6602 * bytes long.
6603 */
6604DECL_INLINE_THROW(uint32_t)
6605iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6606{
6607#ifdef RT_ARCH_AMD64
6608 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
6609#elif defined(RT_ARCH_ARM64)
6610 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
6611#else
6612# error "Port me!"
6613#endif
6614 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6615 return off;
6616}
6617
6618
6619/**
6620 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
6621 *
6622 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6623 */
6624DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6625{
6626#ifdef RT_ARCH_AMD64
6627 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
6628#elif defined(RT_ARCH_ARM64)
6629 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
6630#else
6631# error "Port me!"
6632#endif
6633}
6634
6635
6636/**
6637 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
6638 *
6639 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6640 */
6641DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6642{
6643#ifdef RT_ARCH_AMD64
6644 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
6645#elif defined(RT_ARCH_ARM64)
6646 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
6647#else
6648# error "Port me!"
6649#endif
6650}
6651
6652
6653/**
6654 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
6655 *
6656 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6657 */
6658DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6659{
6660#ifdef RT_ARCH_AMD64
6661 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
6662#elif defined(RT_ARCH_ARM64)
6663 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
6664#else
6665# error "Port me!"
6666#endif
6667}
6668
6669
6670/**
6671 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
6672 *
6673 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6674 */
6675DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6676{
6677#ifdef RT_ARCH_AMD64
6678 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
6679#elif defined(RT_ARCH_ARM64)
6680 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
6681#else
6682# error "Port me!"
6683#endif
6684}
6685
6686
6687/**
6688 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6689 *
6690 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6691 */
6692DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
6693{
6694#ifdef RT_ARCH_AMD64
6695 /* jmp rel8 or rel32 */
6696 int32_t offDisp = offTarget - (off + 2);
6697 if (offDisp < 128 && offDisp >= -128)
6698 {
6699 pCodeBuf[off++] = 0xeb;
6700 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6701 }
6702 else
6703 {
6704 offDisp -= 3;
6705 pCodeBuf[off++] = 0xe9;
6706 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6707 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6708 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6709 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6710 }
6711
6712#elif defined(RT_ARCH_ARM64)
6713 pCodeBuf[off++] = Armv8A64MkInstrB((int32_t)(offTarget - off));
6714
6715#else
6716# error "Port me!"
6717#endif
6718 return off;
6719}
6720
6721
6722/**
6723 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6724 *
6725 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6726 */
6727DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6728{
6729#ifdef RT_ARCH_AMD64
6730 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
6731#elif defined(RT_ARCH_ARM64)
6732 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
6733#else
6734# error "Port me!"
6735#endif
6736 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6737 return off;
6738}
6739
6740
6741/**
6742 * Fixes up a conditional jump to a fixed label.
6743 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
6744 * iemNativeEmitJzToFixed, ...
6745 */
6746DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
6747{
6748#ifdef RT_ARCH_AMD64
6749 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
6750 uint8_t const bOpcode = pbCodeBuf[offFixup];
6751 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
6752 {
6753 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
6754 AssertStmt(pbCodeBuf[offFixup + 1] == offTarget - (offFixup + 2),
6755 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
6756 }
6757 else
6758 {
6759 if (bOpcode != 0x0f)
6760 Assert(bOpcode == 0xe9);
6761 else
6762 {
6763 offFixup += 1;
6764 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
6765 }
6766 uint32_t const offRel32 = offTarget - (offFixup + 5);
6767 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
6768 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
6769 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
6770 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
6771 }
6772
6773#elif defined(RT_ARCH_ARM64)
6774 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
6775 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
6776 {
6777 /* B.COND + BC.COND */
6778 int32_t const offDisp = offTarget - offFixup;
6779 Assert(offDisp >= -262144 && offDisp < 262144);
6780 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
6781 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6782 }
6783 else
6784 {
6785 /* B imm26 */
6786 Assert((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000));
6787 int32_t const offDisp = offTarget - offFixup;
6788 Assert(offDisp >= -33554432 && offDisp < 33554432);
6789 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
6790 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6791 }
6792
6793#else
6794# error "Port me!"
6795#endif
6796}
6797
6798
6799#ifdef RT_ARCH_AMD64
6800/**
6801 * For doing bt on a register.
6802 */
6803DECL_INLINE_THROW(uint32_t)
6804iemNativeEmitAmd64TestBitInGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
6805{
6806 Assert(iBitNo < 64);
6807 /* bt Ev, imm8 */
6808 if (iBitNo >= 32)
6809 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6810 else if (iGprSrc >= 8)
6811 pCodeBuf[off++] = X86_OP_REX_B;
6812 pCodeBuf[off++] = 0x0f;
6813 pCodeBuf[off++] = 0xba;
6814 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6815 pCodeBuf[off++] = iBitNo;
6816 return off;
6817}
6818#endif /* RT_ARCH_AMD64 */
6819
6820
6821/**
6822 * Internal helper, don't call directly.
6823 */
6824DECL_INLINE_THROW(uint32_t)
6825iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
6826 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
6827{
6828 Assert(iBitNo < 64);
6829#ifdef RT_ARCH_AMD64
6830 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6831 if (iBitNo < 8)
6832 {
6833 /* test Eb, imm8 */
6834 if (iGprSrc >= 4)
6835 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6836 pbCodeBuf[off++] = 0xf6;
6837 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6838 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
6839 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6840 }
6841 else
6842 {
6843 /* bt Ev, imm8 */
6844 if (iBitNo >= 32)
6845 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6846 else if (iGprSrc >= 8)
6847 pbCodeBuf[off++] = X86_OP_REX_B;
6848 pbCodeBuf[off++] = 0x0f;
6849 pbCodeBuf[off++] = 0xba;
6850 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6851 pbCodeBuf[off++] = iBitNo;
6852 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
6853 }
6854
6855#elif defined(RT_ARCH_ARM64)
6856 /* Use the TBNZ instruction here. */
6857 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6858 if (pReNative->paLabels[idxLabel].enmType > kIemNativeLabelType_LastWholeTbBranch)
6859 {
6860 AssertMsg(pReNative->paLabels[idxLabel].off == UINT32_MAX,
6861 ("TODO: Please enable & test commented out code for jumping back to a predefined label.\n"));
6862 //uint32_t offLabel = pReNative->paLabels[idxLabel].off;
6863 //if (offLabel == UINT32_MAX)
6864 {
6865 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
6866 pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
6867 }
6868 //else
6869 //{
6870 // RT_BREAKPOINT();
6871 // Assert(off - offLabel <= 0x1fffU);
6872 // pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, offLabel - off, iGprSrc, iBitNo);
6873 //
6874 //}
6875 }
6876 else
6877 {
6878 Assert(Armv8A64ConvertImmRImmS2Mask64(0x40, (64U - iBitNo) & 63U) == RT_BIT_64(iBitNo));
6879 pu32CodeBuf[off++] = Armv8A64MkInstrTstImm(iGprSrc, 0x40, (64U - iBitNo) & 63U);
6880 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6881 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(fJmpIfSet ? kArmv8InstrCond_Ne : kArmv8InstrCond_Eq, 0);
6882 }
6883
6884#else
6885# error "Port me!"
6886#endif
6887 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6888 return off;
6889}
6890
6891
6892/**
6893 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
6894 * @a iGprSrc.
6895 *
6896 * @note On ARM64 the range is only +/-8191 instructions.
6897 */
6898DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6899 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
6900{
6901 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
6902}
6903
6904
6905/**
6906 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
6907 * _set_ in @a iGprSrc.
6908 *
6909 * @note On ARM64 the range is only +/-8191 instructions.
6910 */
6911DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6912 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
6913{
6914 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
6915}
6916
6917
6918/**
6919 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
6920 * flags accordingly.
6921 */
6922DECL_INLINE_THROW(uint32_t)
6923iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
6924{
6925 Assert(fBits != 0);
6926#ifdef RT_ARCH_AMD64
6927
6928 if (fBits >= UINT32_MAX)
6929 {
6930 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6931
6932 /* test Ev,Gv */
6933 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6934 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
6935 pbCodeBuf[off++] = 0x85;
6936 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
6937
6938 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6939 }
6940 else if (fBits <= UINT32_MAX)
6941 {
6942 /* test Eb, imm8 or test Ev, imm32 */
6943 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6944 if (fBits <= UINT8_MAX)
6945 {
6946 if (iGprSrc >= 4)
6947 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6948 pbCodeBuf[off++] = 0xf6;
6949 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6950 pbCodeBuf[off++] = (uint8_t)fBits;
6951 }
6952 else
6953 {
6954 if (iGprSrc >= 8)
6955 pbCodeBuf[off++] = X86_OP_REX_B;
6956 pbCodeBuf[off++] = 0xf7;
6957 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6958 pbCodeBuf[off++] = RT_BYTE1(fBits);
6959 pbCodeBuf[off++] = RT_BYTE2(fBits);
6960 pbCodeBuf[off++] = RT_BYTE3(fBits);
6961 pbCodeBuf[off++] = RT_BYTE4(fBits);
6962 }
6963 }
6964 /** @todo implement me. */
6965 else
6966 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
6967
6968#elif defined(RT_ARCH_ARM64)
6969 uint32_t uImmR = 0;
6970 uint32_t uImmNandS = 0;
6971 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
6972 {
6973 /* ands xzr, iGprSrc, #fBits */
6974 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6975 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
6976 }
6977 else
6978 {
6979 /* ands xzr, iGprSrc, iTmpReg */
6980 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6981 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6982 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
6983 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6984 }
6985
6986#else
6987# error "Port me!"
6988#endif
6989 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6990 return off;
6991}
6992
6993
6994/**
6995 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
6996 * @a iGprSrc, setting CPU flags accordingly.
6997 *
6998 * @note For ARM64 this only supports @a fBits values that can be expressed
6999 * using the two 6-bit immediates of the ANDS instruction. The caller
7000 * must make sure this is possible!
7001 */
7002DECL_FORCE_INLINE_THROW(uint32_t)
7003iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits)
7004{
7005 Assert(fBits != 0);
7006
7007#ifdef RT_ARCH_AMD64
7008 if (fBits <= UINT8_MAX)
7009 {
7010 /* test Eb, imm8 */
7011 if (iGprSrc >= 4)
7012 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7013 pCodeBuf[off++] = 0xf6;
7014 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7015 pCodeBuf[off++] = (uint8_t)fBits;
7016 }
7017 else
7018 {
7019 /* test Ev, imm32 */
7020 if (iGprSrc >= 8)
7021 pCodeBuf[off++] = X86_OP_REX_B;
7022 pCodeBuf[off++] = 0xf7;
7023 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7024 pCodeBuf[off++] = RT_BYTE1(fBits);
7025 pCodeBuf[off++] = RT_BYTE2(fBits);
7026 pCodeBuf[off++] = RT_BYTE3(fBits);
7027 pCodeBuf[off++] = RT_BYTE4(fBits);
7028 }
7029
7030#elif defined(RT_ARCH_ARM64)
7031 /* ands xzr, src, #fBits */
7032 uint32_t uImmR = 0;
7033 uint32_t uImmNandS = 0;
7034 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7035 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7036 else
7037# ifdef IEM_WITH_THROW_CATCH
7038 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7039# else
7040 AssertReleaseFailedStmt(off = UINT32_MAX);
7041# endif
7042
7043#else
7044# error "Port me!"
7045#endif
7046 return off;
7047}
7048
7049
7050
7051/**
7052 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7053 * @a iGprSrc, setting CPU flags accordingly.
7054 *
7055 * @note For ARM64 this only supports @a fBits values that can be expressed
7056 * using the two 6-bit immediates of the ANDS instruction. The caller
7057 * must make sure this is possible!
7058 */
7059DECL_FORCE_INLINE_THROW(uint32_t)
7060iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7061{
7062 Assert(fBits != 0);
7063
7064#ifdef RT_ARCH_AMD64
7065 /* test Eb, imm8 */
7066 if (iGprSrc >= 4)
7067 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7068 pCodeBuf[off++] = 0xf6;
7069 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7070 pCodeBuf[off++] = fBits;
7071
7072#elif defined(RT_ARCH_ARM64)
7073 /* ands xzr, src, #fBits */
7074 uint32_t uImmR = 0;
7075 uint32_t uImmNandS = 0;
7076 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7077 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7078 else
7079# ifdef IEM_WITH_THROW_CATCH
7080 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7081# else
7082 AssertReleaseFailedStmt(off = UINT32_MAX);
7083# endif
7084
7085#else
7086# error "Port me!"
7087#endif
7088 return off;
7089}
7090
7091
7092/**
7093 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7094 * @a iGprSrc, setting CPU flags accordingly.
7095 */
7096DECL_INLINE_THROW(uint32_t)
7097iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7098{
7099 Assert(fBits != 0);
7100
7101#ifdef RT_ARCH_AMD64
7102 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
7103
7104#elif defined(RT_ARCH_ARM64)
7105 /* ands xzr, src, [tmp|#imm] */
7106 uint32_t uImmR = 0;
7107 uint32_t uImmNandS = 0;
7108 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7109 {
7110 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7111 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7112 }
7113 else
7114 {
7115 /* Use temporary register for the 64-bit immediate. */
7116 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7117 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7118 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
7119 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7120 }
7121
7122#else
7123# error "Port me!"
7124#endif
7125 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7126 return off;
7127}
7128
7129
7130/**
7131 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
7132 * are set in @a iGprSrc.
7133 */
7134DECL_INLINE_THROW(uint32_t)
7135iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7136 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7137{
7138 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7139
7140 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7141 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7142
7143 return off;
7144}
7145
7146
7147/**
7148 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
7149 * are set in @a iGprSrc.
7150 */
7151DECL_INLINE_THROW(uint32_t)
7152iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7153 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7154{
7155 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7156
7157 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7158 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7159
7160 return off;
7161}
7162
7163
7164/**
7165 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7166 *
7167 * The operand size is given by @a f64Bit.
7168 */
7169DECL_FORCE_INLINE_THROW(uint32_t)
7170iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7171 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7172{
7173 Assert(idxLabel < pReNative->cLabels);
7174
7175#ifdef RT_ARCH_AMD64
7176 /* test reg32,reg32 / test reg64,reg64 */
7177 if (f64Bit)
7178 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7179 else if (iGprSrc >= 8)
7180 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7181 pCodeBuf[off++] = 0x85;
7182 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7183
7184 /* jnz idxLabel */
7185 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7186 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7187
7188#elif defined(RT_ARCH_ARM64)
7189 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
7190 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
7191 iGprSrc, f64Bit);
7192 else
7193 {
7194 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7195 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
7196 }
7197
7198#else
7199# error "Port me!"
7200#endif
7201 return off;
7202}
7203
7204
7205/**
7206 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7207 *
7208 * The operand size is given by @a f64Bit.
7209 */
7210DECL_FORCE_INLINE_THROW(uint32_t)
7211iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7212 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7213{
7214#ifdef RT_ARCH_AMD64
7215 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7216 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7217#elif defined(RT_ARCH_ARM64)
7218 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
7219 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7220#else
7221# error "Port me!"
7222#endif
7223 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7224 return off;
7225}
7226
7227
7228/* if (Grp1 == 0) Jmp idxLabel; */
7229
7230/**
7231 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7232 *
7233 * The operand size is given by @a f64Bit.
7234 */
7235DECL_FORCE_INLINE_THROW(uint32_t)
7236iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7237 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7238{
7239 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7240 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7241}
7242
7243
7244/**
7245 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7246 *
7247 * The operand size is given by @a f64Bit.
7248 */
7249DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7250 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7251{
7252 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7253}
7254
7255
7256/**
7257 * Emits code that jumps to a new label if @a iGprSrc is zero.
7258 *
7259 * The operand size is given by @a f64Bit.
7260 */
7261DECL_INLINE_THROW(uint32_t)
7262iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7263 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7264{
7265 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7266 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7267}
7268
7269
7270/* if (Grp1 != 0) Jmp idxLabel; */
7271
7272/**
7273 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7274 *
7275 * The operand size is given by @a f64Bit.
7276 */
7277DECL_FORCE_INLINE_THROW(uint32_t)
7278iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7279 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7280{
7281 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7282 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7283}
7284
7285
7286/**
7287 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7288 *
7289 * The operand size is given by @a f64Bit.
7290 */
7291DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7292 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7293{
7294 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7295}
7296
7297
7298/**
7299 * Emits code that jumps to a new label if @a iGprSrc is not zero.
7300 *
7301 * The operand size is given by @a f64Bit.
7302 */
7303DECL_INLINE_THROW(uint32_t)
7304iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7305 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7306{
7307 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7308 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7309}
7310
7311
7312/* if (Grp1 != Gpr2) Jmp idxLabel; */
7313
7314/**
7315 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
7316 * differs.
7317 */
7318DECL_INLINE_THROW(uint32_t)
7319iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7320 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
7321{
7322 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
7323 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7324 return off;
7325}
7326
7327
7328/**
7329 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
7330 */
7331DECL_INLINE_THROW(uint32_t)
7332iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7333 uint8_t iGprLeft, uint8_t iGprRight,
7334 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7335{
7336 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7337 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
7338}
7339
7340
7341/* if (Grp != Imm) Jmp idxLabel; */
7342
7343/**
7344 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
7345 */
7346DECL_INLINE_THROW(uint32_t)
7347iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7348 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7349{
7350 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7351 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7352 return off;
7353}
7354
7355
7356/**
7357 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
7358 */
7359DECL_INLINE_THROW(uint32_t)
7360iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7361 uint8_t iGprSrc, uint64_t uImm,
7362 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7363{
7364 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7365 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7366}
7367
7368
7369/**
7370 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
7371 * @a uImm.
7372 */
7373DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7374 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7375{
7376 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7377 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7378 return off;
7379}
7380
7381
7382/**
7383 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
7384 * @a uImm.
7385 */
7386DECL_INLINE_THROW(uint32_t)
7387iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7388 uint8_t iGprSrc, uint32_t uImm,
7389 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7390{
7391 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7392 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7393}
7394
7395
7396/**
7397 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
7398 * @a uImm.
7399 */
7400DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7401 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
7402{
7403 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
7404 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7405 return off;
7406}
7407
7408
7409/**
7410 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
7411 * @a uImm.
7412 */
7413DECL_INLINE_THROW(uint32_t)
7414iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7415 uint8_t iGprSrc, uint16_t uImm,
7416 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7417{
7418 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7419 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7420}
7421
7422
7423/* if (Grp == Imm) Jmp idxLabel; */
7424
7425/**
7426 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
7427 */
7428DECL_INLINE_THROW(uint32_t)
7429iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7430 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7431{
7432 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7433 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7434 return off;
7435}
7436
7437
7438/**
7439 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
7440 */
7441DECL_INLINE_THROW(uint32_t)
7442iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
7443 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7444{
7445 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7446 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7447}
7448
7449
7450/**
7451 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
7452 */
7453DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7454 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7455{
7456 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7457 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7458 return off;
7459}
7460
7461
7462/**
7463 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
7464 */
7465DECL_INLINE_THROW(uint32_t)
7466iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
7467 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7468{
7469 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7470 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7471}
7472
7473
7474/**
7475 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
7476 *
7477 * @note ARM64: Helper register is required (idxTmpReg).
7478 */
7479DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7480 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
7481 uint8_t idxTmpReg = UINT8_MAX)
7482{
7483 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
7484 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7485 return off;
7486}
7487
7488
7489/**
7490 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
7491 *
7492 * @note ARM64: Helper register is required (idxTmpReg).
7493 */
7494DECL_INLINE_THROW(uint32_t)
7495iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
7496 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
7497 uint8_t idxTmpReg = UINT8_MAX)
7498{
7499 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7500 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
7501}
7502
7503
7504/*********************************************************************************************************************************
7505* Calls. *
7506*********************************************************************************************************************************/
7507
7508/**
7509 * Emits a call to a 64-bit address.
7510 */
7511DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
7512{
7513#ifdef RT_ARCH_AMD64
7514 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
7515
7516 /* call rax */
7517 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7518 pbCodeBuf[off++] = 0xff;
7519 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
7520
7521#elif defined(RT_ARCH_ARM64)
7522 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
7523
7524 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7525 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
7526
7527#else
7528# error "port me"
7529#endif
7530 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7531 return off;
7532}
7533
7534
7535/**
7536 * Emits code to load a stack variable into an argument GPR.
7537 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7538 */
7539DECL_FORCE_INLINE_THROW(uint32_t)
7540iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7541 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
7542 bool fSpilledVarsInVolatileRegs = false)
7543{
7544 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7545 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7546 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7547
7548 uint8_t const idxRegVar = pVar->idxReg;
7549 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
7550 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
7551 || !fSpilledVarsInVolatileRegs ))
7552 {
7553 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
7554 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
7555 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
7556 if (!offAddend)
7557 {
7558 if (idxRegArg != idxRegVar)
7559 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
7560 }
7561 else
7562 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
7563 }
7564 else
7565 {
7566 uint8_t const idxStackSlot = pVar->idxStackSlot;
7567 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7568 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
7569 if (offAddend)
7570 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
7571 }
7572 return off;
7573}
7574
7575
7576/**
7577 * Emits code to load a stack or immediate variable value into an argument GPR,
7578 * optional with a addend.
7579 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7580 */
7581DECL_FORCE_INLINE_THROW(uint32_t)
7582iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7583 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
7584 bool fSpilledVarsInVolatileRegs = false)
7585{
7586 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7587 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7588 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7589 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
7590 else
7591 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
7592 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
7593 return off;
7594}
7595
7596
7597/**
7598 * Emits code to load the variable address into an argument GPR.
7599 *
7600 * This only works for uninitialized and stack variables.
7601 */
7602DECL_FORCE_INLINE_THROW(uint32_t)
7603iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7604 bool fFlushShadows)
7605{
7606 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7607 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7608 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7609 || pVar->enmKind == kIemNativeVarKind_Stack,
7610 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7611 AssertStmt(!pVar->fSimdReg,
7612 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7613
7614 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7615 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7616
7617 uint8_t const idxRegVar = pVar->idxReg;
7618 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
7619 {
7620 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
7621 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
7622 Assert(pVar->idxReg == UINT8_MAX);
7623 }
7624 Assert( pVar->idxStackSlot != UINT8_MAX
7625 && pVar->idxReg == UINT8_MAX);
7626
7627 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7628}
7629
7630
7631#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7632/**
7633 * Emits code to load the variable address into an argument GPR.
7634 *
7635 * This is a special variant intended for SIMD variables only and only called
7636 * by the TLB miss path in the memory fetch/store code because there we pass
7637 * the value by reference and need both the register and stack depending on which
7638 * path is taken (TLB hit vs. miss).
7639 */
7640DECL_FORCE_INLINE_THROW(uint32_t)
7641iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7642 bool fSyncRegWithStack = true)
7643{
7644 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7645 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7646 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7647 || pVar->enmKind == kIemNativeVarKind_Stack,
7648 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7649 AssertStmt(pVar->fSimdReg,
7650 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7651 Assert( pVar->idxStackSlot != UINT8_MAX
7652 && pVar->idxReg != UINT8_MAX);
7653
7654 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7655 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7656
7657 uint8_t const idxRegVar = pVar->idxReg;
7658 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7659 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
7660
7661 if (fSyncRegWithStack)
7662 {
7663 if (pVar->cbVar == sizeof(RTUINT128U))
7664 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDisp, idxRegVar);
7665 else
7666 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDisp, idxRegVar);
7667 }
7668
7669 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7670}
7671
7672
7673/**
7674 * Emits code to sync the host SIMD register assigned to the given SIMD variable.
7675 *
7676 * This is a special helper and only called
7677 * by the TLB miss path in the memory fetch/store code because there we pass
7678 * the value by reference and need to sync the value on the stack with the assigned host register
7679 * after a TLB miss where the value ends up on the stack.
7680 */
7681DECL_FORCE_INLINE_THROW(uint32_t)
7682iemNativeEmitSimdVarSyncStackToRegister(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar)
7683{
7684 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7685 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7686 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7687 || pVar->enmKind == kIemNativeVarKind_Stack,
7688 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7689 AssertStmt(pVar->fSimdReg,
7690 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7691 Assert( pVar->idxStackSlot != UINT8_MAX
7692 && pVar->idxReg != UINT8_MAX);
7693
7694 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7695 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7696
7697 uint8_t const idxRegVar = pVar->idxReg;
7698 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7699 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
7700
7701 if (pVar->cbVar == sizeof(RTUINT128U))
7702 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxRegVar, offBpDisp);
7703 else
7704 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxRegVar, offBpDisp);
7705
7706 return off;
7707}
7708
7709
7710/**
7711 * Emits a gprdst = ~gprsrc store.
7712 */
7713DECL_FORCE_INLINE_THROW(uint32_t)
7714iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7715{
7716#ifdef RT_ARCH_AMD64
7717 if (iGprDst != iGprSrc)
7718 {
7719 /* mov gprdst, gprsrc. */
7720 if (f64Bit)
7721 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
7722 else
7723 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
7724 }
7725
7726 /* not gprdst */
7727 if (f64Bit || iGprDst >= 8)
7728 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
7729 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
7730 pCodeBuf[off++] = 0xf7;
7731 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
7732#elif defined(RT_ARCH_ARM64)
7733 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
7734#else
7735# error "port me"
7736#endif
7737 return off;
7738}
7739
7740
7741/**
7742 * Emits a gprdst = ~gprsrc store.
7743 */
7744DECL_INLINE_THROW(uint32_t)
7745iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7746{
7747#ifdef RT_ARCH_AMD64
7748 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
7749#elif defined(RT_ARCH_ARM64)
7750 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
7751#else
7752# error "port me"
7753#endif
7754 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7755 return off;
7756}
7757
7758
7759/**
7760 * Emits a 128-bit vector register store to a VCpu value.
7761 */
7762DECL_FORCE_INLINE_THROW(uint32_t)
7763iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7764{
7765#ifdef RT_ARCH_AMD64
7766 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
7767 pCodeBuf[off++] = 0x66;
7768 if (iVecReg >= 8)
7769 pCodeBuf[off++] = X86_OP_REX_R;
7770 pCodeBuf[off++] = 0x0f;
7771 pCodeBuf[off++] = 0x7f;
7772 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7773#elif defined(RT_ARCH_ARM64)
7774 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7775
7776#else
7777# error "port me"
7778#endif
7779 return off;
7780}
7781
7782
7783/**
7784 * Emits a 128-bit vector register load of a VCpu value.
7785 */
7786DECL_INLINE_THROW(uint32_t)
7787iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7788{
7789#ifdef RT_ARCH_AMD64
7790 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7791#elif defined(RT_ARCH_ARM64)
7792 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7793#else
7794# error "port me"
7795#endif
7796 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7797 return off;
7798}
7799
7800
7801/**
7802 * Emits a high 128-bit vector register store to a VCpu value.
7803 */
7804DECL_FORCE_INLINE_THROW(uint32_t)
7805iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7806{
7807#ifdef RT_ARCH_AMD64
7808 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
7809 pCodeBuf[off++] = X86_OP_VEX3;
7810 if (iVecReg >= 8)
7811 pCodeBuf[off++] = 0x63;
7812 else
7813 pCodeBuf[off++] = 0xe3;
7814 pCodeBuf[off++] = 0x7d;
7815 pCodeBuf[off++] = 0x39;
7816 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7817 pCodeBuf[off++] = 0x01; /* Immediate */
7818#elif defined(RT_ARCH_ARM64)
7819 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7820#else
7821# error "port me"
7822#endif
7823 return off;
7824}
7825
7826
7827/**
7828 * Emits a high 128-bit vector register load of a VCpu value.
7829 */
7830DECL_INLINE_THROW(uint32_t)
7831iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7832{
7833#ifdef RT_ARCH_AMD64
7834 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7835#elif defined(RT_ARCH_ARM64)
7836 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7837 Assert(!(iVecReg & 0x1));
7838 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
7839#else
7840# error "port me"
7841#endif
7842 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7843 return off;
7844}
7845
7846
7847/**
7848 * Emits a 128-bit vector register load of a VCpu value.
7849 */
7850DECL_FORCE_INLINE_THROW(uint32_t)
7851iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7852{
7853#ifdef RT_ARCH_AMD64
7854 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
7855 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7856 if (iVecReg >= 8)
7857 pCodeBuf[off++] = X86_OP_REX_R;
7858 pCodeBuf[off++] = 0x0f;
7859 pCodeBuf[off++] = 0x6f;
7860 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7861#elif defined(RT_ARCH_ARM64)
7862 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
7863
7864#else
7865# error "port me"
7866#endif
7867 return off;
7868}
7869
7870
7871/**
7872 * Emits a 128-bit vector register load of a VCpu value.
7873 */
7874DECL_INLINE_THROW(uint32_t)
7875iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7876{
7877#ifdef RT_ARCH_AMD64
7878 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7879#elif defined(RT_ARCH_ARM64)
7880 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7881#else
7882# error "port me"
7883#endif
7884 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7885 return off;
7886}
7887
7888
7889/**
7890 * Emits a 128-bit vector register load of a VCpu value.
7891 */
7892DECL_FORCE_INLINE_THROW(uint32_t)
7893iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7894{
7895#ifdef RT_ARCH_AMD64
7896 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
7897 pCodeBuf[off++] = X86_OP_VEX3;
7898 if (iVecReg >= 8)
7899 pCodeBuf[off++] = 0x63;
7900 else
7901 pCodeBuf[off++] = 0xe3;
7902 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
7903 pCodeBuf[off++] = 0x38;
7904 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7905 pCodeBuf[off++] = 0x01; /* Immediate */
7906#elif defined(RT_ARCH_ARM64)
7907 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
7908#else
7909# error "port me"
7910#endif
7911 return off;
7912}
7913
7914
7915/**
7916 * Emits a 128-bit vector register load of a VCpu value.
7917 */
7918DECL_INLINE_THROW(uint32_t)
7919iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7920{
7921#ifdef RT_ARCH_AMD64
7922 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7923#elif defined(RT_ARCH_ARM64)
7924 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7925 Assert(!(iVecReg & 0x1));
7926 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
7927#else
7928# error "port me"
7929#endif
7930 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7931 return off;
7932}
7933
7934
7935/**
7936 * Emits a vecdst = vecsrc load.
7937 */
7938DECL_FORCE_INLINE(uint32_t)
7939iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7940{
7941#ifdef RT_ARCH_AMD64
7942 /* movdqu vecdst, vecsrc */
7943 pCodeBuf[off++] = 0xf3;
7944
7945 if ((iVecRegDst | iVecRegSrc) >= 8)
7946 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
7947 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
7948 : X86_OP_REX_R;
7949 pCodeBuf[off++] = 0x0f;
7950 pCodeBuf[off++] = 0x6f;
7951 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7952
7953#elif defined(RT_ARCH_ARM64)
7954 /* mov dst, src; alias for: orr dst, src, src */
7955 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
7956
7957#else
7958# error "port me"
7959#endif
7960 return off;
7961}
7962
7963
7964/**
7965 * Emits a vecdst = vecsrc load, 128-bit.
7966 */
7967DECL_INLINE_THROW(uint32_t)
7968iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7969{
7970#ifdef RT_ARCH_AMD64
7971 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
7972#elif defined(RT_ARCH_ARM64)
7973 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
7974#else
7975# error "port me"
7976#endif
7977 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7978 return off;
7979}
7980
7981
7982/**
7983 * Emits a vecdst = vecsrc load, 256-bit.
7984 */
7985DECL_INLINE_THROW(uint32_t)
7986iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7987{
7988#ifdef RT_ARCH_AMD64
7989 /* vmovdqa ymm, ymm */
7990 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7991 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
7992 {
7993 pbCodeBuf[off++] = X86_OP_VEX3;
7994 pbCodeBuf[off++] = 0x41;
7995 pbCodeBuf[off++] = 0x7d;
7996 pbCodeBuf[off++] = 0x6f;
7997 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7998 }
7999 else
8000 {
8001 pbCodeBuf[off++] = X86_OP_VEX2;
8002 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
8003 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
8004 pbCodeBuf[off++] = iVecRegSrc >= 8
8005 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
8006 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8007 }
8008#elif defined(RT_ARCH_ARM64)
8009 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8010 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
8011 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
8012 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
8013#else
8014# error "port me"
8015#endif
8016 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8017 return off;
8018}
8019
8020
8021/**
8022 * Emits a vecdst = vecsrc load.
8023 */
8024DECL_FORCE_INLINE(uint32_t)
8025iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8026{
8027#ifdef RT_ARCH_AMD64
8028 /* vinserti128 dst, dst, src, 1. */ /* ASSUMES AVX2 support */
8029 pCodeBuf[off++] = X86_OP_VEX3;
8030 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
8031 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8032 pCodeBuf[off++] = 0x38;
8033 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8034 pCodeBuf[off++] = 0x01; /* Immediate */
8035
8036#elif defined(RT_ARCH_ARM64)
8037 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8038 /* mov dst, src; alias for: orr dst, src, src */
8039 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
8040
8041#else
8042# error "port me"
8043#endif
8044 return off;
8045}
8046
8047
8048/**
8049 * Emits a vecdst[128:255] = vecsrc[0:127] load, 128-bit.
8050 */
8051DECL_INLINE_THROW(uint32_t)
8052iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8053{
8054#ifdef RT_ARCH_AMD64
8055 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
8056#elif defined(RT_ARCH_ARM64)
8057 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
8058#else
8059# error "port me"
8060#endif
8061 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8062 return off;
8063}
8064
8065
8066/**
8067 * Emits a gprdst = vecsrc[x] load, 64-bit.
8068 */
8069DECL_FORCE_INLINE(uint32_t)
8070iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
8071{
8072#ifdef RT_ARCH_AMD64
8073 if (iQWord >= 2)
8074 {
8075 /** @todo Currently not used. */
8076 AssertReleaseFailed();
8077 }
8078 else
8079 {
8080 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
8081 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8082 pCodeBuf[off++] = X86_OP_REX_W
8083 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8084 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8085 pCodeBuf[off++] = 0x0f;
8086 pCodeBuf[off++] = 0x3a;
8087 pCodeBuf[off++] = 0x16;
8088 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8089 pCodeBuf[off++] = iQWord;
8090 }
8091#elif defined(RT_ARCH_ARM64)
8092 /* umov gprdst, vecsrc[iQWord] */
8093 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
8094#else
8095# error "port me"
8096#endif
8097 return off;
8098}
8099
8100
8101/**
8102 * Emits a gprdst = vecsrc[x] load, 64-bit.
8103 */
8104DECL_INLINE_THROW(uint32_t)
8105iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
8106{
8107 Assert(iQWord <= 3);
8108
8109#ifdef RT_ARCH_AMD64
8110 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iQWord);
8111#elif defined(RT_ARCH_ARM64)
8112 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8113 Assert(!(iVecRegSrc & 0x1));
8114 /* Need to access the "high" 128-bit vector register. */
8115 if (iQWord >= 2)
8116 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
8117 else
8118 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
8119#else
8120# error "port me"
8121#endif
8122 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8123 return off;
8124}
8125
8126
8127/**
8128 * Emits a gprdst = vecsrc[x] load, 32-bit.
8129 */
8130DECL_FORCE_INLINE(uint32_t)
8131iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
8132{
8133#ifdef RT_ARCH_AMD64
8134 if (iDWord >= 4)
8135 {
8136 /** @todo Currently not used. */
8137 AssertReleaseFailed();
8138 }
8139 else
8140 {
8141 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
8142 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8143 if (iGprDst >= 8 || iVecRegSrc >= 8)
8144 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8145 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8146 pCodeBuf[off++] = 0x0f;
8147 pCodeBuf[off++] = 0x3a;
8148 pCodeBuf[off++] = 0x16;
8149 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8150 pCodeBuf[off++] = iDWord;
8151 }
8152#elif defined(RT_ARCH_ARM64)
8153 /* umov gprdst, vecsrc[iDWord] */
8154 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
8155#else
8156# error "port me"
8157#endif
8158 return off;
8159}
8160
8161
8162/**
8163 * Emits a gprdst = vecsrc[x] load, 32-bit.
8164 */
8165DECL_INLINE_THROW(uint32_t)
8166iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
8167{
8168 Assert(iDWord <= 7);
8169
8170#ifdef RT_ARCH_AMD64
8171 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iDWord);
8172#elif defined(RT_ARCH_ARM64)
8173 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8174 Assert(!(iVecRegSrc & 0x1));
8175 /* Need to access the "high" 128-bit vector register. */
8176 if (iDWord >= 4)
8177 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
8178 else
8179 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
8180#else
8181# error "port me"
8182#endif
8183 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8184 return off;
8185}
8186
8187
8188/**
8189 * Emits a gprdst = vecsrc[x] load, 16-bit.
8190 */
8191DECL_FORCE_INLINE(uint32_t)
8192iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
8193{
8194#ifdef RT_ARCH_AMD64
8195 if (iWord >= 8)
8196 {
8197 /** @todo Currently not used. */
8198 AssertReleaseFailed();
8199 }
8200 else
8201 {
8202 /* pextrw gpr, vecsrc, #iWord */
8203 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8204 if (iGprDst >= 8 || iVecRegSrc >= 8)
8205 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
8206 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
8207 pCodeBuf[off++] = 0x0f;
8208 pCodeBuf[off++] = 0xc5;
8209 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
8210 pCodeBuf[off++] = iWord;
8211 }
8212#elif defined(RT_ARCH_ARM64)
8213 /* umov gprdst, vecsrc[iWord] */
8214 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
8215#else
8216# error "port me"
8217#endif
8218 return off;
8219}
8220
8221
8222/**
8223 * Emits a gprdst = vecsrc[x] load, 16-bit.
8224 */
8225DECL_INLINE_THROW(uint32_t)
8226iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
8227{
8228 Assert(iWord <= 16);
8229
8230#ifdef RT_ARCH_AMD64
8231 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
8232#elif defined(RT_ARCH_ARM64)
8233 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8234 Assert(!(iVecRegSrc & 0x1));
8235 /* Need to access the "high" 128-bit vector register. */
8236 if (iWord >= 8)
8237 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
8238 else
8239 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
8240#else
8241# error "port me"
8242#endif
8243 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8244 return off;
8245}
8246
8247
8248/**
8249 * Emits a gprdst = vecsrc[x] load, 8-bit.
8250 */
8251DECL_FORCE_INLINE(uint32_t)
8252iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
8253{
8254#ifdef RT_ARCH_AMD64
8255 if (iByte >= 16)
8256 {
8257 /** @todo Currently not used. */
8258 AssertReleaseFailed();
8259 }
8260 else
8261 {
8262 /* pextrb gpr, vecsrc, #iByte */
8263 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8264 if (iGprDst >= 8 || iVecRegSrc >= 8)
8265 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8266 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8267 pCodeBuf[off++] = 0x0f;
8268 pCodeBuf[off++] = 0x3a;
8269 pCodeBuf[off++] = 0x14;
8270 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8271 pCodeBuf[off++] = iByte;
8272 }
8273#elif defined(RT_ARCH_ARM64)
8274 /* umov gprdst, vecsrc[iByte] */
8275 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
8276#else
8277# error "port me"
8278#endif
8279 return off;
8280}
8281
8282
8283/**
8284 * Emits a gprdst = vecsrc[x] load, 8-bit.
8285 */
8286DECL_INLINE_THROW(uint32_t)
8287iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
8288{
8289 Assert(iByte <= 32);
8290
8291#ifdef RT_ARCH_AMD64
8292 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iByte);
8293#elif defined(RT_ARCH_ARM64)
8294 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8295 Assert(!(iVecRegSrc & 0x1));
8296 /* Need to access the "high" 128-bit vector register. */
8297 if (iByte >= 16)
8298 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
8299 else
8300 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
8301#else
8302# error "port me"
8303#endif
8304 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8305 return off;
8306}
8307
8308
8309/**
8310 * Emits a vecdst[x] = gprsrc store, 64-bit.
8311 */
8312DECL_FORCE_INLINE(uint32_t)
8313iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
8314{
8315#ifdef RT_ARCH_AMD64
8316 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
8317 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8318 pCodeBuf[off++] = X86_OP_REX_W
8319 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8320 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8321 pCodeBuf[off++] = 0x0f;
8322 pCodeBuf[off++] = 0x3a;
8323 pCodeBuf[off++] = 0x22;
8324 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8325 pCodeBuf[off++] = iQWord;
8326#elif defined(RT_ARCH_ARM64)
8327 /* ins vecsrc[iQWord], gpr */
8328 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
8329#else
8330# error "port me"
8331#endif
8332 return off;
8333}
8334
8335
8336/**
8337 * Emits a vecdst[x] = gprsrc store, 64-bit.
8338 */
8339DECL_INLINE_THROW(uint32_t)
8340iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
8341{
8342 Assert(iQWord <= 1);
8343
8344#ifdef RT_ARCH_AMD64
8345 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iQWord);
8346#elif defined(RT_ARCH_ARM64)
8347 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
8348#else
8349# error "port me"
8350#endif
8351 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8352 return off;
8353}
8354
8355
8356/**
8357 * Emits a vecdst[x] = gprsrc store, 32-bit.
8358 */
8359DECL_FORCE_INLINE(uint32_t)
8360iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
8361{
8362#ifdef RT_ARCH_AMD64
8363 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
8364 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8365 if (iVecRegDst >= 8 || iGprSrc >= 8)
8366 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8367 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8368 pCodeBuf[off++] = 0x0f;
8369 pCodeBuf[off++] = 0x3a;
8370 pCodeBuf[off++] = 0x22;
8371 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8372 pCodeBuf[off++] = iDWord;
8373#elif defined(RT_ARCH_ARM64)
8374 /* ins vecsrc[iDWord], gpr */
8375 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
8376#else
8377# error "port me"
8378#endif
8379 return off;
8380}
8381
8382
8383/**
8384 * Emits a vecdst[x] = gprsrc store, 64-bit.
8385 */
8386DECL_INLINE_THROW(uint32_t)
8387iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
8388{
8389 Assert(iDWord <= 3);
8390
8391#ifdef RT_ARCH_AMD64
8392 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iDWord);
8393#elif defined(RT_ARCH_ARM64)
8394 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
8395#else
8396# error "port me"
8397#endif
8398 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8399 return off;
8400}
8401
8402
8403/**
8404 * Emits a vecdst.au32[iDWord] = 0 store.
8405 */
8406DECL_FORCE_INLINE(uint32_t)
8407iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
8408{
8409 Assert(iDWord <= 7);
8410
8411#ifdef RT_ARCH_AMD64
8412 /*
8413 * xor tmp0, tmp0
8414 * pinsrd xmm, tmp0, iDword
8415 */
8416 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
8417 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8418 pCodeBuf[off++] = 0x33;
8419 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
8420 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(&pCodeBuf[off], off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
8421#elif defined(RT_ARCH_ARM64)
8422 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8423 Assert(!(iVecReg & 0x1));
8424 /* ins vecsrc[iDWord], wzr */
8425 if (iDWord >= 4)
8426 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
8427 else
8428 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
8429#else
8430# error "port me"
8431#endif
8432 return off;
8433}
8434
8435
8436/**
8437 * Emits a vecdst.au32[iDWord] = 0 store.
8438 */
8439DECL_INLINE_THROW(uint32_t)
8440iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
8441{
8442
8443#ifdef RT_ARCH_AMD64
8444 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
8445#elif defined(RT_ARCH_ARM64)
8446 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
8447#else
8448# error "port me"
8449#endif
8450 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8451 return off;
8452}
8453
8454
8455/**
8456 * Emits a vecdst[0:127] = 0 store.
8457 */
8458DECL_FORCE_INLINE(uint32_t)
8459iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8460{
8461#ifdef RT_ARCH_AMD64
8462 /* pxor xmm, xmm */
8463 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8464 if (iVecReg >= 8)
8465 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
8466 pCodeBuf[off++] = 0x0f;
8467 pCodeBuf[off++] = 0xef;
8468 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8469#elif defined(RT_ARCH_ARM64)
8470 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8471 Assert(!(iVecReg & 0x1));
8472 /* eor vecreg, vecreg, vecreg */
8473 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
8474#else
8475# error "port me"
8476#endif
8477 return off;
8478}
8479
8480
8481/**
8482 * Emits a vecdst[0:127] = 0 store.
8483 */
8484DECL_INLINE_THROW(uint32_t)
8485iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8486{
8487#ifdef RT_ARCH_AMD64
8488 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
8489#elif defined(RT_ARCH_ARM64)
8490 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
8491#else
8492# error "port me"
8493#endif
8494 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8495 return off;
8496}
8497
8498
8499/**
8500 * Emits a vecdst[128:255] = 0 store.
8501 */
8502DECL_FORCE_INLINE(uint32_t)
8503iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8504{
8505#ifdef RT_ARCH_AMD64
8506 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
8507 if (iVecReg < 8)
8508 {
8509 pCodeBuf[off++] = X86_OP_VEX2;
8510 pCodeBuf[off++] = 0xf9;
8511 }
8512 else
8513 {
8514 pCodeBuf[off++] = X86_OP_VEX3;
8515 pCodeBuf[off++] = 0x41;
8516 pCodeBuf[off++] = 0x79;
8517 }
8518 pCodeBuf[off++] = 0x6f;
8519 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8520#elif defined(RT_ARCH_ARM64)
8521 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8522 Assert(!(iVecReg & 0x1));
8523 /* eor vecreg, vecreg, vecreg */
8524 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
8525#else
8526# error "port me"
8527#endif
8528 return off;
8529}
8530
8531
8532/**
8533 * Emits a vecdst[128:255] = 0 store.
8534 */
8535DECL_INLINE_THROW(uint32_t)
8536iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8537{
8538#ifdef RT_ARCH_AMD64
8539 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
8540#elif defined(RT_ARCH_ARM64)
8541 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
8542#else
8543# error "port me"
8544#endif
8545 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8546 return off;
8547}
8548
8549
8550/**
8551 * Emits a vecdst[0:255] = 0 store.
8552 */
8553DECL_FORCE_INLINE(uint32_t)
8554iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8555{
8556#ifdef RT_ARCH_AMD64
8557 /* vpxor ymm, ymm, ymm */
8558 if (iVecReg < 8)
8559 {
8560 pCodeBuf[off++] = X86_OP_VEX2;
8561 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8562 }
8563 else
8564 {
8565 pCodeBuf[off++] = X86_OP_VEX3;
8566 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
8567 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8568 }
8569 pCodeBuf[off++] = 0xef;
8570 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8571#elif defined(RT_ARCH_ARM64)
8572 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8573 Assert(!(iVecReg & 0x1));
8574 /* eor vecreg, vecreg, vecreg */
8575 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
8576 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
8577#else
8578# error "port me"
8579#endif
8580 return off;
8581}
8582
8583
8584/**
8585 * Emits a vecdst[0:255] = 0 store.
8586 */
8587DECL_INLINE_THROW(uint32_t)
8588iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8589{
8590#ifdef RT_ARCH_AMD64
8591 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
8592#elif defined(RT_ARCH_ARM64)
8593 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
8594#else
8595# error "port me"
8596#endif
8597 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8598 return off;
8599}
8600
8601
8602/**
8603 * Emits a vecdst = gprsrc broadcast, 8-bit.
8604 */
8605DECL_FORCE_INLINE(uint32_t)
8606iemNativeEmitSimdBroadcastGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8607{
8608#ifdef RT_ARCH_AMD64
8609 /* pinsrb vecdst, gpr, #0 (ASSUMES SSE 4.1) */
8610 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8611 if (iVecRegDst >= 8 || iGprSrc >= 8)
8612 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8613 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8614 pCodeBuf[off++] = 0x0f;
8615 pCodeBuf[off++] = 0x3a;
8616 pCodeBuf[off++] = 0x20;
8617 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8618 pCodeBuf[off++] = 0x00;
8619
8620 /* vpbroadcastb {y,x}mm, xmm (ASSUMES AVX2). */
8621 pCodeBuf[off++] = X86_OP_VEX3;
8622 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8623 | 0x02 /* opcode map. */
8624 | ( iVecRegDst >= 8
8625 ? 0
8626 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8627 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8628 pCodeBuf[off++] = 0x78;
8629 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8630#elif defined(RT_ARCH_ARM64)
8631 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8632 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8633
8634 /* dup vecsrc, gpr */
8635 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U8);
8636 if (f256Bit)
8637 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U8);
8638#else
8639# error "port me"
8640#endif
8641 return off;
8642}
8643
8644
8645/**
8646 * Emits a vecdst[x] = gprsrc broadcast, 8-bit.
8647 */
8648DECL_INLINE_THROW(uint32_t)
8649iemNativeEmitSimdBroadcastGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8650{
8651#ifdef RT_ARCH_AMD64
8652 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8653#elif defined(RT_ARCH_ARM64)
8654 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8655#else
8656# error "port me"
8657#endif
8658 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8659 return off;
8660}
8661
8662
8663/**
8664 * Emits a vecdst = gprsrc broadcast, 16-bit.
8665 */
8666DECL_FORCE_INLINE(uint32_t)
8667iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8668{
8669#ifdef RT_ARCH_AMD64
8670 /* pinsrw vecdst, gpr, #0 */
8671 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8672 if (iVecRegDst >= 8 || iGprSrc >= 8)
8673 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8674 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8675 pCodeBuf[off++] = 0x0f;
8676 pCodeBuf[off++] = 0xc4;
8677 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8678 pCodeBuf[off++] = 0x00;
8679
8680 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
8681 pCodeBuf[off++] = X86_OP_VEX3;
8682 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8683 | 0x02 /* opcode map. */
8684 | ( iVecRegDst >= 8
8685 ? 0
8686 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8687 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8688 pCodeBuf[off++] = 0x79;
8689 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8690#elif defined(RT_ARCH_ARM64)
8691 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8692 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8693
8694 /* dup vecsrc, gpr */
8695 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
8696 if (f256Bit)
8697 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
8698#else
8699# error "port me"
8700#endif
8701 return off;
8702}
8703
8704
8705/**
8706 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
8707 */
8708DECL_INLINE_THROW(uint32_t)
8709iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8710{
8711#ifdef RT_ARCH_AMD64
8712 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8713#elif defined(RT_ARCH_ARM64)
8714 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8715#else
8716# error "port me"
8717#endif
8718 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8719 return off;
8720}
8721
8722
8723/**
8724 * Emits a vecdst = gprsrc broadcast, 32-bit.
8725 */
8726DECL_FORCE_INLINE(uint32_t)
8727iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8728{
8729#ifdef RT_ARCH_AMD64
8730 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
8731 * vbroadcast needs a memory operand or another xmm register to work... */
8732
8733 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
8734 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8735 if (iVecRegDst >= 8 || iGprSrc >= 8)
8736 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8737 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8738 pCodeBuf[off++] = 0x0f;
8739 pCodeBuf[off++] = 0x3a;
8740 pCodeBuf[off++] = 0x22;
8741 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8742 pCodeBuf[off++] = 0x00;
8743
8744 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
8745 pCodeBuf[off++] = X86_OP_VEX3;
8746 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8747 | 0x02 /* opcode map. */
8748 | ( iVecRegDst >= 8
8749 ? 0
8750 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8751 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8752 pCodeBuf[off++] = 0x58;
8753 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8754#elif defined(RT_ARCH_ARM64)
8755 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8756 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8757
8758 /* dup vecsrc, gpr */
8759 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
8760 if (f256Bit)
8761 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
8762#else
8763# error "port me"
8764#endif
8765 return off;
8766}
8767
8768
8769/**
8770 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
8771 */
8772DECL_INLINE_THROW(uint32_t)
8773iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8774{
8775#ifdef RT_ARCH_AMD64
8776 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8777#elif defined(RT_ARCH_ARM64)
8778 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8779#else
8780# error "port me"
8781#endif
8782 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8783 return off;
8784}
8785
8786
8787/**
8788 * Emits a vecdst = gprsrc broadcast, 64-bit.
8789 */
8790DECL_FORCE_INLINE(uint32_t)
8791iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8792{
8793#ifdef RT_ARCH_AMD64
8794 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
8795 * vbroadcast needs a memory operand or another xmm register to work... */
8796
8797 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
8798 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8799 pCodeBuf[off++] = X86_OP_REX_W
8800 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8801 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8802 pCodeBuf[off++] = 0x0f;
8803 pCodeBuf[off++] = 0x3a;
8804 pCodeBuf[off++] = 0x22;
8805 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8806 pCodeBuf[off++] = 0x00;
8807
8808 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
8809 pCodeBuf[off++] = X86_OP_VEX3;
8810 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8811 | 0x02 /* opcode map. */
8812 | ( iVecRegDst >= 8
8813 ? 0
8814 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8815 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8816 pCodeBuf[off++] = 0x59;
8817 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8818#elif defined(RT_ARCH_ARM64)
8819 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8820 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8821
8822 /* dup vecsrc, gpr */
8823 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
8824 if (f256Bit)
8825 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
8826#else
8827# error "port me"
8828#endif
8829 return off;
8830}
8831
8832
8833/**
8834 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
8835 */
8836DECL_INLINE_THROW(uint32_t)
8837iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8838{
8839#ifdef RT_ARCH_AMD64
8840 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
8841#elif defined(RT_ARCH_ARM64)
8842 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8843#else
8844# error "port me"
8845#endif
8846 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8847 return off;
8848}
8849
8850
8851/**
8852 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
8853 */
8854DECL_FORCE_INLINE(uint32_t)
8855iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8856{
8857#ifdef RT_ARCH_AMD64
8858 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(pCodeBuf, off, iVecRegDst, iVecRegSrc);
8859
8860 /* vinserti128 ymm, ymm, xmm, 1. */ /* ASSUMES AVX2 support */
8861 pCodeBuf[off++] = X86_OP_VEX3;
8862 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
8863 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8864 pCodeBuf[off++] = 0x38;
8865 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8866 pCodeBuf[off++] = 0x01; /* Immediate */
8867#elif defined(RT_ARCH_ARM64)
8868 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8869 Assert(!(iVecRegDst & 0x1));
8870
8871 /* mov dst, src; alias for: orr dst, src, src */
8872 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
8873 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
8874#else
8875# error "port me"
8876#endif
8877 return off;
8878}
8879
8880
8881/**
8882 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
8883 */
8884DECL_INLINE_THROW(uint32_t)
8885iemNativeEmitSimdBroadcastVecRegU128ToVecReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8886{
8887#ifdef RT_ARCH_AMD64
8888 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 11), off, iVecRegDst, iVecRegSrc);
8889#elif defined(RT_ARCH_ARM64)
8890 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecRegDst, iVecRegSrc);
8891#else
8892# error "port me"
8893#endif
8894 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8895 return off;
8896}
8897
8898#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
8899
8900/** @} */
8901
8902#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
8903
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette