VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 103849

Last change on this file since 103849 was 103849, checked in by vboxsync, 9 months ago

VMM/IEM: Implement native emitter for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 280.3 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 103849 2024-03-14 12:04:43Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 pu32CodeBuf[off++] = 0xd503201f;
71
72 RT_NOREF(uInfo);
73#else
74# error "port me"
75#endif
76 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
77 return off;
78}
79
80
81/**
82 * Emit a breakpoint instruction.
83 */
84DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
85{
86#ifdef RT_ARCH_AMD64
87 pCodeBuf[off++] = 0xcc;
88 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
89
90#elif defined(RT_ARCH_ARM64)
91 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
92
93#else
94# error "error"
95#endif
96 return off;
97}
98
99
100/**
101 * Emit a breakpoint instruction.
102 */
103DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
104{
105#ifdef RT_ARCH_AMD64
106 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
107#elif defined(RT_ARCH_ARM64)
108 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
109#else
110# error "error"
111#endif
112 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
113 return off;
114}
115
116
117/*********************************************************************************************************************************
118* Loads, Stores and Related Stuff. *
119*********************************************************************************************************************************/
120
121#ifdef RT_ARCH_AMD64
122/**
123 * Common bit of iemNativeEmitLoadGprByGpr and friends.
124 */
125DECL_FORCE_INLINE(uint32_t)
126iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
127{
128 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
129 {
130 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
131 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
132 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
133 }
134 else if (offDisp == (int8_t)offDisp)
135 {
136 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
137 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
138 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
139 pbCodeBuf[off++] = (uint8_t)offDisp;
140 }
141 else
142 {
143 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
144 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
145 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
146 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
147 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
148 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
149 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
150 }
151 return off;
152}
153#endif /* RT_ARCH_AMD64 */
154
155/**
156 * Emits setting a GPR to zero.
157 */
158DECL_INLINE_THROW(uint32_t)
159iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
160{
161#ifdef RT_ARCH_AMD64
162 /* xor gpr32, gpr32 */
163 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
164 if (iGpr >= 8)
165 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
166 pbCodeBuf[off++] = 0x33;
167 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
168
169#elif defined(RT_ARCH_ARM64)
170 /* mov gpr, #0x0 */
171 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
172 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
173
174#else
175# error "port me"
176#endif
177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
178 return off;
179}
180
181
182/**
183 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
184 * buffer space.
185 *
186 * Max buffer consumption:
187 * - AMD64: 10 instruction bytes.
188 * - ARM64: 4 instruction words (16 bytes).
189 */
190DECL_FORCE_INLINE(uint32_t)
191iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
192{
193#ifdef RT_ARCH_AMD64
194 if (uImm64 == 0)
195 {
196 /* xor gpr, gpr */
197 if (iGpr >= 8)
198 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
199 pCodeBuf[off++] = 0x33;
200 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
201 }
202 else if (uImm64 <= UINT32_MAX)
203 {
204 /* mov gpr, imm32 */
205 if (iGpr >= 8)
206 pCodeBuf[off++] = X86_OP_REX_B;
207 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
208 pCodeBuf[off++] = RT_BYTE1(uImm64);
209 pCodeBuf[off++] = RT_BYTE2(uImm64);
210 pCodeBuf[off++] = RT_BYTE3(uImm64);
211 pCodeBuf[off++] = RT_BYTE4(uImm64);
212 }
213 else if (uImm64 == (uint64_t)(int32_t)uImm64)
214 {
215 /* mov gpr, sx(imm32) */
216 if (iGpr < 8)
217 pCodeBuf[off++] = X86_OP_REX_W;
218 else
219 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
220 pCodeBuf[off++] = 0xc7;
221 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
222 pCodeBuf[off++] = RT_BYTE1(uImm64);
223 pCodeBuf[off++] = RT_BYTE2(uImm64);
224 pCodeBuf[off++] = RT_BYTE3(uImm64);
225 pCodeBuf[off++] = RT_BYTE4(uImm64);
226 }
227 else
228 {
229 /* mov gpr, imm64 */
230 if (iGpr < 8)
231 pCodeBuf[off++] = X86_OP_REX_W;
232 else
233 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
234 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
235 pCodeBuf[off++] = RT_BYTE1(uImm64);
236 pCodeBuf[off++] = RT_BYTE2(uImm64);
237 pCodeBuf[off++] = RT_BYTE3(uImm64);
238 pCodeBuf[off++] = RT_BYTE4(uImm64);
239 pCodeBuf[off++] = RT_BYTE5(uImm64);
240 pCodeBuf[off++] = RT_BYTE6(uImm64);
241 pCodeBuf[off++] = RT_BYTE7(uImm64);
242 pCodeBuf[off++] = RT_BYTE8(uImm64);
243 }
244
245#elif defined(RT_ARCH_ARM64)
246 /*
247 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
248 * supply remaining bits using 'movk grp, imm16, lsl #x'.
249 *
250 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
251 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
252 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
253 * after the first non-zero immediate component so we switch to movk for
254 * the remainder.
255 */
256 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
257 + !((uImm64 >> 16) & UINT16_MAX)
258 + !((uImm64 >> 32) & UINT16_MAX)
259 + !((uImm64 >> 48) & UINT16_MAX);
260 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
261 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
262 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
263 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
264 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
265 if (cFfffHalfWords <= cZeroHalfWords)
266 {
267 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
268
269 /* movz gpr, imm16 */
270 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
271 if (uImmPart || cZeroHalfWords == 4)
272 {
273 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
274 fMovBase |= RT_BIT_32(29);
275 }
276 /* mov[z/k] gpr, imm16, lsl #16 */
277 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
278 if (uImmPart)
279 {
280 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
281 fMovBase |= RT_BIT_32(29);
282 }
283 /* mov[z/k] gpr, imm16, lsl #32 */
284 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
285 if (uImmPart)
286 {
287 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
288 fMovBase |= RT_BIT_32(29);
289 }
290 /* mov[z/k] gpr, imm16, lsl #48 */
291 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
292 if (uImmPart)
293 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
294 }
295 else
296 {
297 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
298
299 /* find the first half-word that isn't UINT16_MAX. */
300 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
301 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
302 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
303
304 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
305 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
306 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
307 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
308 /* movk gpr, imm16 */
309 if (iHwNotFfff != 0)
310 {
311 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
312 if (uImmPart != UINT32_C(0xffff))
313 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
314 }
315 /* movk gpr, imm16, lsl #16 */
316 if (iHwNotFfff != 1)
317 {
318 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
319 if (uImmPart != UINT32_C(0xffff))
320 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
321 }
322 /* movk gpr, imm16, lsl #32 */
323 if (iHwNotFfff != 2)
324 {
325 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
326 if (uImmPart != UINT32_C(0xffff))
327 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
328 }
329 /* movk gpr, imm16, lsl #48 */
330 if (iHwNotFfff != 3)
331 {
332 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
333 if (uImmPart != UINT32_C(0xffff))
334 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
335 }
336 }
337
338 /** @todo load into 'w' register instead of 'x' when imm64 <= UINT32_MAX?
339 * clang 12.x does that, only to use the 'x' version for the
340 * addressing in the following ldr). */
341
342#else
343# error "port me"
344#endif
345 return off;
346}
347
348
349/**
350 * Emits loading a constant into a 64-bit GPR
351 */
352DECL_INLINE_THROW(uint32_t)
353iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
354{
355#ifdef RT_ARCH_AMD64
356 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
357#elif defined(RT_ARCH_ARM64)
358 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
359#else
360# error "port me"
361#endif
362 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
363 return off;
364}
365
366
367/**
368 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
369 * buffer space.
370 *
371 * Max buffer consumption:
372 * - AMD64: 6 instruction bytes.
373 * - ARM64: 2 instruction words (8 bytes).
374 *
375 * @note The top 32 bits will be cleared.
376 */
377DECLINLINE(uint32_t) iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
378{
379#ifdef RT_ARCH_AMD64
380 if (uImm32 == 0)
381 {
382 /* xor gpr, gpr */
383 if (iGpr >= 8)
384 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
385 pCodeBuf[off++] = 0x33;
386 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
387 }
388 else
389 {
390 /* mov gpr, imm32 */
391 if (iGpr >= 8)
392 pCodeBuf[off++] = X86_OP_REX_B;
393 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
394 pCodeBuf[off++] = RT_BYTE1(uImm32);
395 pCodeBuf[off++] = RT_BYTE2(uImm32);
396 pCodeBuf[off++] = RT_BYTE3(uImm32);
397 pCodeBuf[off++] = RT_BYTE4(uImm32);
398 }
399
400#elif defined(RT_ARCH_ARM64)
401 if ((uImm32 >> 16) == 0)
402 /* movz gpr, imm16 */
403 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
404 else if ((uImm32 & UINT32_C(0xffff)) == 0)
405 /* movz gpr, imm16, lsl #16 */
406 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
407 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
408 /* movn gpr, imm16, lsl #16 */
409 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
410 else if ((uImm32 >> 16) == UINT32_C(0xffff))
411 /* movn gpr, imm16 */
412 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
413 else
414 {
415 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
416 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
417 }
418
419#else
420# error "port me"
421#endif
422 return off;
423}
424
425
426/**
427 * Emits loading a constant into a 32-bit GPR.
428 * @note The top 32 bits will be cleared.
429 */
430DECL_INLINE_THROW(uint32_t)
431iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
432{
433#ifdef RT_ARCH_AMD64
434 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
435#elif defined(RT_ARCH_ARM64)
436 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
437#else
438# error "port me"
439#endif
440 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
441 return off;
442}
443
444
445/**
446 * Emits loading a constant into a 8-bit GPR
447 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
448 * only the ARM64 version does that.
449 */
450DECL_INLINE_THROW(uint32_t)
451iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
452{
453#ifdef RT_ARCH_AMD64
454 /* mov gpr, imm8 */
455 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
456 if (iGpr >= 8)
457 pbCodeBuf[off++] = X86_OP_REX_B;
458 else if (iGpr >= 4)
459 pbCodeBuf[off++] = X86_OP_REX;
460 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
461 pbCodeBuf[off++] = RT_BYTE1(uImm8);
462
463#elif defined(RT_ARCH_ARM64)
464 /* movz gpr, imm16, lsl #0 */
465 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
466 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
467
468#else
469# error "port me"
470#endif
471 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
472 return off;
473}
474
475
476#ifdef RT_ARCH_AMD64
477/**
478 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
479 */
480DECL_FORCE_INLINE(uint32_t)
481iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
482{
483 if (offVCpu < 128)
484 {
485 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
486 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
487 }
488 else
489 {
490 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
491 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
492 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
493 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
494 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
495 }
496 return off;
497}
498
499#elif defined(RT_ARCH_ARM64)
500
501/**
502 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
503 *
504 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
505 * registers (@a iGprTmp).
506 * @note DON'T try this with prefetch.
507 */
508DECL_FORCE_INLINE_THROW(uint32_t)
509iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
510 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
511{
512 /*
513 * There are a couple of ldr variants that takes an immediate offset, so
514 * try use those if we can, otherwise we have to use the temporary register
515 * help with the addressing.
516 */
517 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
518 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
519 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
520 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
521 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
522 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
523 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
524 {
525 /* The offset is too large, so we must load it into a register and use
526 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
527 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
528 if (iGprTmp == UINT8_MAX)
529 iGprTmp = iGprReg;
530 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
531 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
532 }
533 else
534# ifdef IEM_WITH_THROW_CATCH
535 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
536# else
537 AssertReleaseFailedStmt(off = UINT32_MAX);
538# endif
539
540 return off;
541}
542
543/**
544 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
545 */
546DECL_FORCE_INLINE_THROW(uint32_t)
547iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
548 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
549{
550 /*
551 * There are a couple of ldr variants that takes an immediate offset, so
552 * try use those if we can, otherwise we have to use the temporary register
553 * help with the addressing.
554 */
555 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
556 {
557 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
558 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
559 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
560 }
561 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
562 {
563 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
564 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
565 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
566 }
567 else
568 {
569 /* The offset is too large, so we must load it into a register and use
570 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
571 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
572 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
573 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
574 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
575 IEMNATIVE_REG_FIXED_TMP0);
576 }
577 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
578 return off;
579}
580
581#endif /* RT_ARCH_ARM64 */
582
583
584/**
585 * Emits a 64-bit GPR load of a VCpu value.
586 */
587DECL_FORCE_INLINE_THROW(uint32_t)
588iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
589{
590#ifdef RT_ARCH_AMD64
591 /* mov reg64, mem64 */
592 if (iGpr < 8)
593 pCodeBuf[off++] = X86_OP_REX_W;
594 else
595 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
596 pCodeBuf[off++] = 0x8b;
597 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off,iGpr, offVCpu);
598
599#elif defined(RT_ARCH_ARM64)
600 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
601
602#else
603# error "port me"
604#endif
605 return off;
606}
607
608
609/**
610 * Emits a 64-bit GPR load of a VCpu value.
611 */
612DECL_INLINE_THROW(uint32_t)
613iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
614{
615#ifdef RT_ARCH_AMD64
616 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
617 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
618
619#elif defined(RT_ARCH_ARM64)
620 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
621
622#else
623# error "port me"
624#endif
625 return off;
626}
627
628
629/**
630 * Emits a 32-bit GPR load of a VCpu value.
631 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
632 */
633DECL_INLINE_THROW(uint32_t)
634iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
635{
636#ifdef RT_ARCH_AMD64
637 /* mov reg32, mem32 */
638 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
639 if (iGpr >= 8)
640 pbCodeBuf[off++] = X86_OP_REX_R;
641 pbCodeBuf[off++] = 0x8b;
642 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
644
645#elif defined(RT_ARCH_ARM64)
646 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
647
648#else
649# error "port me"
650#endif
651 return off;
652}
653
654
655/**
656 * Emits a 16-bit GPR load of a VCpu value.
657 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
658 */
659DECL_INLINE_THROW(uint32_t)
660iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
661{
662#ifdef RT_ARCH_AMD64
663 /* movzx reg32, mem16 */
664 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
665 if (iGpr >= 8)
666 pbCodeBuf[off++] = X86_OP_REX_R;
667 pbCodeBuf[off++] = 0x0f;
668 pbCodeBuf[off++] = 0xb7;
669 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
670 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
671
672#elif defined(RT_ARCH_ARM64)
673 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
674
675#else
676# error "port me"
677#endif
678 return off;
679}
680
681
682/**
683 * Emits a 8-bit GPR load of a VCpu value.
684 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
685 */
686DECL_INLINE_THROW(uint32_t)
687iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
688{
689#ifdef RT_ARCH_AMD64
690 /* movzx reg32, mem8 */
691 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
692 if (iGpr >= 8)
693 pbCodeBuf[off++] = X86_OP_REX_R;
694 pbCodeBuf[off++] = 0x0f;
695 pbCodeBuf[off++] = 0xb6;
696 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
697 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
698
699#elif defined(RT_ARCH_ARM64)
700 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
701
702#else
703# error "port me"
704#endif
705 return off;
706}
707
708
709/**
710 * Emits a store of a GPR value to a 64-bit VCpu field.
711 */
712DECL_FORCE_INLINE_THROW(uint32_t)
713iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
714 uint8_t iGprTmp = UINT8_MAX)
715{
716#ifdef RT_ARCH_AMD64
717 /* mov mem64, reg64 */
718 if (iGpr < 8)
719 pCodeBuf[off++] = X86_OP_REX_W;
720 else
721 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
722 pCodeBuf[off++] = 0x89;
723 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
724 RT_NOREF(iGprTmp);
725
726#elif defined(RT_ARCH_ARM64)
727 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
728
729#else
730# error "port me"
731#endif
732 return off;
733}
734
735
736/**
737 * Emits a store of a GPR value to a 64-bit VCpu field.
738 */
739DECL_INLINE_THROW(uint32_t)
740iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
741{
742#ifdef RT_ARCH_AMD64
743 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
744#elif defined(RT_ARCH_ARM64)
745 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
746 IEMNATIVE_REG_FIXED_TMP0);
747#else
748# error "port me"
749#endif
750 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
751 return off;
752}
753
754
755/**
756 * Emits a store of a GPR value to a 32-bit VCpu field.
757 */
758DECL_INLINE_THROW(uint32_t)
759iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
760{
761#ifdef RT_ARCH_AMD64
762 /* mov mem32, reg32 */
763 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
764 if (iGpr >= 8)
765 pbCodeBuf[off++] = X86_OP_REX_R;
766 pbCodeBuf[off++] = 0x89;
767 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
769
770#elif defined(RT_ARCH_ARM64)
771 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
772
773#else
774# error "port me"
775#endif
776 return off;
777}
778
779
780/**
781 * Emits a store of a GPR value to a 16-bit VCpu field.
782 */
783DECL_INLINE_THROW(uint32_t)
784iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
785{
786#ifdef RT_ARCH_AMD64
787 /* mov mem16, reg16 */
788 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
789 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
790 if (iGpr >= 8)
791 pbCodeBuf[off++] = X86_OP_REX_R;
792 pbCodeBuf[off++] = 0x89;
793 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
794 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
795
796#elif defined(RT_ARCH_ARM64)
797 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
798
799#else
800# error "port me"
801#endif
802 return off;
803}
804
805
806/**
807 * Emits a store of a GPR value to a 8-bit VCpu field.
808 */
809DECL_INLINE_THROW(uint32_t)
810iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
811{
812#ifdef RT_ARCH_AMD64
813 /* mov mem8, reg8 */
814 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
815 if (iGpr >= 8)
816 pbCodeBuf[off++] = X86_OP_REX_R;
817 pbCodeBuf[off++] = 0x88;
818 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
820
821#elif defined(RT_ARCH_ARM64)
822 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
823
824#else
825# error "port me"
826#endif
827 return off;
828}
829
830
831/**
832 * Emits a store of an immediate value to a 32-bit VCpu field.
833 *
834 * @note ARM64: Will allocate temporary registers.
835 */
836DECL_FORCE_INLINE_THROW(uint32_t)
837iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
838{
839#ifdef RT_ARCH_AMD64
840 /* mov mem32, imm32 */
841 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
842 pCodeBuf[off++] = 0xc7;
843 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
844 pCodeBuf[off++] = RT_BYTE1(uImm);
845 pCodeBuf[off++] = RT_BYTE2(uImm);
846 pCodeBuf[off++] = RT_BYTE3(uImm);
847 pCodeBuf[off++] = RT_BYTE4(uImm);
848 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
849
850#elif defined(RT_ARCH_ARM64)
851 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
852 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
853 if (idxRegImm != ARMV8_A64_REG_XZR)
854 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
855
856#else
857# error "port me"
858#endif
859 return off;
860}
861
862
863
864/**
865 * Emits a store of an immediate value to a 16-bit VCpu field.
866 *
867 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
868 * offset can be encoded as an immediate or not. The @a offVCpu immediate
869 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
870 */
871DECL_FORCE_INLINE_THROW(uint32_t)
872iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
873 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
874{
875#ifdef RT_ARCH_AMD64
876 /* mov mem16, imm16 */
877 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
878 pCodeBuf[off++] = 0xc7;
879 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
880 pCodeBuf[off++] = RT_BYTE1(uImm);
881 pCodeBuf[off++] = RT_BYTE2(uImm);
882 RT_NOREF(idxTmp1, idxTmp2);
883
884#elif defined(RT_ARCH_ARM64)
885 if (idxTmp1 != UINT8_MAX)
886 {
887 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
888 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
889 sizeof(uint16_t), idxTmp2);
890 }
891 else
892# ifdef IEM_WITH_THROW_CATCH
893 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
894# else
895 AssertReleaseFailedStmt(off = UINT32_MAX);
896# endif
897
898#else
899# error "port me"
900#endif
901 return off;
902}
903
904
905/**
906 * Emits a store of an immediate value to a 8-bit VCpu field.
907 */
908DECL_INLINE_THROW(uint32_t)
909iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu)
910{
911#ifdef RT_ARCH_AMD64
912 /* mov mem8, imm8 */
913 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
914 pbCodeBuf[off++] = 0xc6;
915 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
916 pbCodeBuf[off++] = bImm;
917 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
918
919#elif defined(RT_ARCH_ARM64)
920 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
921 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
922 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
923 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
924
925#else
926# error "port me"
927#endif
928 return off;
929}
930
931
932/**
933 * Emits a load effective address to a GRP of a VCpu field.
934 */
935DECL_INLINE_THROW(uint32_t)
936iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
937{
938#ifdef RT_ARCH_AMD64
939 /* lea gprdst, [rbx + offDisp] */
940 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
941 if (iGprDst < 8)
942 pbCodeBuf[off++] = X86_OP_REX_W;
943 else
944 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
945 pbCodeBuf[off++] = 0x8d;
946 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
947
948#elif defined(RT_ARCH_ARM64)
949 if (offVCpu < (unsigned)_4K)
950 {
951 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
952 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
953 }
954 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
955 {
956 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
957 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
958 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
959 }
960 else
961 {
962 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
963 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
964 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
965 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
966 }
967
968#else
969# error "port me"
970#endif
971 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
972 return off;
973}
974
975
976/** This is just as a typesafe alternative to RT_UOFFSETOF. */
977DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
978{
979 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
980 Assert(off < sizeof(VMCPU));
981 return off;
982}
983
984
985/** This is just as a typesafe alternative to RT_UOFFSETOF. */
986DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
987{
988 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
989 Assert(off < sizeof(VMCPU));
990 return off;
991}
992
993
994/**
995 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
996 *
997 * @note The two temp registers are not required for AMD64. ARM64 always
998 * requires the first, and the 2nd is needed if the offset cannot be
999 * encoded as an immediate.
1000 */
1001DECL_FORCE_INLINE(uint32_t)
1002iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1003{
1004#ifdef RT_ARCH_AMD64
1005 /* inc qword [pVCpu + off] */
1006 pCodeBuf[off++] = X86_OP_REX_W;
1007 pCodeBuf[off++] = 0xff;
1008 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1009 RT_NOREF(idxTmp1, idxTmp2);
1010
1011#elif defined(RT_ARCH_ARM64)
1012 /* Determine how we're to access pVCpu first. */
1013 uint32_t const cbData = sizeof(STAMCOUNTER);
1014 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1015 {
1016 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1017 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1018 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1019 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1020 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1021 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1022 }
1023 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1024 {
1025 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1026 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1027 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1028 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1029 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1030 }
1031 else
1032 {
1033 /* The offset is too large, so we must load it into a register and use
1034 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1035 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1036 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1037 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1038 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1039 }
1040
1041#else
1042# error "port me"
1043#endif
1044 return off;
1045}
1046
1047
1048/**
1049 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1050 *
1051 * @note The two temp registers are not required for AMD64. ARM64 always
1052 * requires the first, and the 2nd is needed if the offset cannot be
1053 * encoded as an immediate.
1054 */
1055DECL_FORCE_INLINE(uint32_t)
1056iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1057{
1058#ifdef RT_ARCH_AMD64
1059 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1060#elif defined(RT_ARCH_ARM64)
1061 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1062#else
1063# error "port me"
1064#endif
1065 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1066 return off;
1067}
1068
1069
1070/**
1071 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1072 *
1073 * @note The two temp registers are not required for AMD64. ARM64 always
1074 * requires the first, and the 2nd is needed if the offset cannot be
1075 * encoded as an immediate.
1076 */
1077DECL_FORCE_INLINE(uint32_t)
1078iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1079{
1080 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1081#ifdef RT_ARCH_AMD64
1082 /* inc dword [pVCpu + offVCpu] */
1083 pCodeBuf[off++] = 0xff;
1084 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1085 RT_NOREF(idxTmp1, idxTmp2);
1086
1087#elif defined(RT_ARCH_ARM64)
1088 /* Determine how we're to access pVCpu first. */
1089 uint32_t const cbData = sizeof(uint32_t);
1090 if (offVCpu < (unsigned)(_4K * cbData))
1091 {
1092 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1093 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1094 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1095 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1096 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1097 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1098 }
1099 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1100 {
1101 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1102 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1103 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1104 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1105 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1106 }
1107 else
1108 {
1109 /* The offset is too large, so we must load it into a register and use
1110 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1111 of the instruction if that'll reduce the constant to 16-bits. */
1112 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1113 {
1114 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1115 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1116 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1117 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1118 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1119 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1120 }
1121 else
1122 {
1123 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1124 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1125 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1126 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1127 }
1128 }
1129
1130#else
1131# error "port me"
1132#endif
1133 return off;
1134}
1135
1136
1137/**
1138 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1139 *
1140 * @note The two temp registers are not required for AMD64. ARM64 always
1141 * requires the first, and the 2nd is needed if the offset cannot be
1142 * encoded as an immediate.
1143 */
1144DECL_FORCE_INLINE(uint32_t)
1145iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1146{
1147#ifdef RT_ARCH_AMD64
1148 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1149#elif defined(RT_ARCH_ARM64)
1150 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1151#else
1152# error "port me"
1153#endif
1154 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1155 return off;
1156}
1157
1158
1159/**
1160 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1161 *
1162 * @note May allocate temporary registers (not AMD64).
1163 */
1164DECL_FORCE_INLINE(uint32_t)
1165iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1166{
1167 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1168#ifdef RT_ARCH_AMD64
1169 /* or dword [pVCpu + offVCpu], imm8/32 */
1170 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1171 if (fMask < 0x80)
1172 {
1173 pCodeBuf[off++] = 0x83;
1174 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1175 pCodeBuf[off++] = (uint8_t)fMask;
1176 }
1177 else
1178 {
1179 pCodeBuf[off++] = 0x81;
1180 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1181 pCodeBuf[off++] = RT_BYTE1(fMask);
1182 pCodeBuf[off++] = RT_BYTE2(fMask);
1183 pCodeBuf[off++] = RT_BYTE3(fMask);
1184 pCodeBuf[off++] = RT_BYTE4(fMask);
1185 }
1186
1187#elif defined(RT_ARCH_ARM64)
1188 /* If the constant is unwieldy we'll need a register to hold it as well. */
1189 uint32_t uImmSizeLen, uImmRotate;
1190 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1191 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1192
1193 /* We need a temp register for holding the member value we're modifying. */
1194 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1195
1196 /* Determine how we're to access pVCpu first. */
1197 uint32_t const cbData = sizeof(uint32_t);
1198 if (offVCpu < (unsigned)(_4K * cbData))
1199 {
1200 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1201 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1202 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmpValue,
1203 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1204 if (idxTmpMask == UINT8_MAX)
1205 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1206 else
1207 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1208 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmpValue,
1209 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1210 }
1211 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1212 {
1213 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1214 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1215 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1216 if (idxTmpMask == UINT8_MAX)
1217 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1218 else
1219 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1220 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1221 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1222 }
1223 else
1224 {
1225 /* The offset is too large, so we must load it into a register and use
1226 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1227 of the instruction if that'll reduce the constant to 16-bits. */
1228 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1229 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1230 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1231 if (fShifted)
1232 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1233 else
1234 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1235
1236 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1237 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1238
1239 if (idxTmpMask == UINT8_MAX)
1240 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1241 else
1242 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1243
1244 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1245 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1246 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1247 }
1248 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1249 if (idxTmpMask != UINT8_MAX)
1250 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1251
1252#else
1253# error "port me"
1254#endif
1255 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1256 return off;
1257}
1258
1259
1260/**
1261 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1262 *
1263 * @note May allocate temporary registers (not AMD64).
1264 */
1265DECL_FORCE_INLINE(uint32_t)
1266iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1267{
1268 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1269#ifdef RT_ARCH_AMD64
1270 /* and dword [pVCpu + offVCpu], imm8/32 */
1271 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1272 if (fMask < 0x80)
1273 {
1274 pCodeBuf[off++] = 0x83;
1275 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1276 pCodeBuf[off++] = (uint8_t)fMask;
1277 }
1278 else
1279 {
1280 pCodeBuf[off++] = 0x81;
1281 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1282 pCodeBuf[off++] = RT_BYTE1(fMask);
1283 pCodeBuf[off++] = RT_BYTE2(fMask);
1284 pCodeBuf[off++] = RT_BYTE3(fMask);
1285 pCodeBuf[off++] = RT_BYTE4(fMask);
1286 }
1287
1288#elif defined(RT_ARCH_ARM64)
1289 /* If the constant is unwieldy we'll need a register to hold it as well. */
1290 uint32_t uImmSizeLen, uImmRotate;
1291 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1292 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1293
1294 /* We need a temp register for holding the member value we're modifying. */
1295 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1296
1297 /* Determine how we're to access pVCpu first. */
1298 uint32_t const cbData = sizeof(uint32_t);
1299 if (offVCpu < (unsigned)(_4K * cbData))
1300 {
1301 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1302 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1303 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmpValue,
1304 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1305 if (idxTmpMask == UINT8_MAX)
1306 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1307 else
1308 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1309 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmpValue,
1310 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1311 }
1312 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1313 {
1314 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1315 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1316 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1317 if (idxTmpMask == UINT8_MAX)
1318 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1319 else
1320 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1321 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1322 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1323 }
1324 else
1325 {
1326 /* The offset is too large, so we must load it into a register and use
1327 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1328 of the instruction if that'll reduce the constant to 16-bits. */
1329 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1330 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1331 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1332 if (fShifted)
1333 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1334 else
1335 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1336
1337 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1338 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1339
1340 if (idxTmpMask == UINT8_MAX)
1341 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1342 else
1343 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1344
1345 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1346 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1347 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1348 }
1349 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1350 if (idxTmpMask != UINT8_MAX)
1351 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1352
1353#else
1354# error "port me"
1355#endif
1356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1357 return off;
1358}
1359
1360
1361/**
1362 * Emits a gprdst = gprsrc load.
1363 */
1364DECL_FORCE_INLINE(uint32_t)
1365iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1366{
1367#ifdef RT_ARCH_AMD64
1368 /* mov gprdst, gprsrc */
1369 if ((iGprDst | iGprSrc) >= 8)
1370 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1371 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1372 : X86_OP_REX_W | X86_OP_REX_R;
1373 else
1374 pCodeBuf[off++] = X86_OP_REX_W;
1375 pCodeBuf[off++] = 0x8b;
1376 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1377
1378#elif defined(RT_ARCH_ARM64)
1379 /* mov dst, src; alias for: orr dst, xzr, src */
1380 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1381
1382#else
1383# error "port me"
1384#endif
1385 return off;
1386}
1387
1388
1389/**
1390 * Emits a gprdst = gprsrc load.
1391 */
1392DECL_INLINE_THROW(uint32_t)
1393iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1394{
1395#ifdef RT_ARCH_AMD64
1396 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1397#elif defined(RT_ARCH_ARM64)
1398 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1399#else
1400# error "port me"
1401#endif
1402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1403 return off;
1404}
1405
1406
1407/**
1408 * Emits a gprdst = gprsrc[31:0] load.
1409 * @note Bits 63 thru 32 are cleared.
1410 */
1411DECL_FORCE_INLINE(uint32_t)
1412iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1413{
1414#ifdef RT_ARCH_AMD64
1415 /* mov gprdst, gprsrc */
1416 if ((iGprDst | iGprSrc) >= 8)
1417 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1418 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1419 : X86_OP_REX_R;
1420 pCodeBuf[off++] = 0x8b;
1421 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1422
1423#elif defined(RT_ARCH_ARM64)
1424 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1425 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1426
1427#else
1428# error "port me"
1429#endif
1430 return off;
1431}
1432
1433
1434/**
1435 * Emits a gprdst = gprsrc[31:0] load.
1436 * @note Bits 63 thru 32 are cleared.
1437 */
1438DECL_INLINE_THROW(uint32_t)
1439iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1440{
1441#ifdef RT_ARCH_AMD64
1442 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1443#elif defined(RT_ARCH_ARM64)
1444 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1445#else
1446# error "port me"
1447#endif
1448 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1449 return off;
1450}
1451
1452
1453/**
1454 * Emits a gprdst = gprsrc[15:0] load.
1455 * @note Bits 63 thru 15 are cleared.
1456 */
1457DECL_INLINE_THROW(uint32_t)
1458iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1459{
1460#ifdef RT_ARCH_AMD64
1461 /* movzx Gv,Ew */
1462 if ((iGprDst | iGprSrc) >= 8)
1463 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1464 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1465 : X86_OP_REX_R;
1466 pCodeBuf[off++] = 0x0f;
1467 pCodeBuf[off++] = 0xb7;
1468 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1469
1470#elif defined(RT_ARCH_ARM64)
1471 /* and gprdst, gprsrc, #0xffff */
1472# if 1
1473 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1474 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1475# else
1476 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1477 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1478# endif
1479
1480#else
1481# error "port me"
1482#endif
1483 return off;
1484}
1485
1486
1487/**
1488 * Emits a gprdst = gprsrc[15:0] load.
1489 * @note Bits 63 thru 15 are cleared.
1490 */
1491DECL_INLINE_THROW(uint32_t)
1492iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1493{
1494#ifdef RT_ARCH_AMD64
1495 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1496#elif defined(RT_ARCH_ARM64)
1497 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1498#else
1499# error "port me"
1500#endif
1501 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1502 return off;
1503}
1504
1505
1506/**
1507 * Emits a gprdst = gprsrc[7:0] load.
1508 * @note Bits 63 thru 8 are cleared.
1509 */
1510DECL_FORCE_INLINE(uint32_t)
1511iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1512{
1513#ifdef RT_ARCH_AMD64
1514 /* movzx Gv,Eb */
1515 if (iGprDst >= 8 || iGprSrc >= 8)
1516 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1517 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1518 : X86_OP_REX_R;
1519 else if (iGprSrc >= 4)
1520 pCodeBuf[off++] = X86_OP_REX;
1521 pCodeBuf[off++] = 0x0f;
1522 pCodeBuf[off++] = 0xb6;
1523 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1524
1525#elif defined(RT_ARCH_ARM64)
1526 /* and gprdst, gprsrc, #0xff */
1527 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1528 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1529
1530#else
1531# error "port me"
1532#endif
1533 return off;
1534}
1535
1536
1537/**
1538 * Emits a gprdst = gprsrc[7:0] load.
1539 * @note Bits 63 thru 8 are cleared.
1540 */
1541DECL_INLINE_THROW(uint32_t)
1542iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1543{
1544#ifdef RT_ARCH_AMD64
1545 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1546#elif defined(RT_ARCH_ARM64)
1547 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1548#else
1549# error "port me"
1550#endif
1551 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1552 return off;
1553}
1554
1555
1556/**
1557 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1558 * @note Bits 63 thru 8 are cleared.
1559 */
1560DECL_INLINE_THROW(uint32_t)
1561iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1562{
1563#ifdef RT_ARCH_AMD64
1564 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1565
1566 /* movzx Gv,Ew */
1567 if ((iGprDst | iGprSrc) >= 8)
1568 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1569 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1570 : X86_OP_REX_R;
1571 pbCodeBuf[off++] = 0x0f;
1572 pbCodeBuf[off++] = 0xb7;
1573 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1574
1575 /* shr Ev,8 */
1576 if (iGprDst >= 8)
1577 pbCodeBuf[off++] = X86_OP_REX_B;
1578 pbCodeBuf[off++] = 0xc1;
1579 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1580 pbCodeBuf[off++] = 8;
1581
1582#elif defined(RT_ARCH_ARM64)
1583 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1584 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1585 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1586
1587#else
1588# error "port me"
1589#endif
1590 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1591 return off;
1592}
1593
1594
1595/**
1596 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1597 */
1598DECL_INLINE_THROW(uint32_t)
1599iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1600{
1601#ifdef RT_ARCH_AMD64
1602 /* movsxd r64, r/m32 */
1603 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1604 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1605 pbCodeBuf[off++] = 0x63;
1606 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1607
1608#elif defined(RT_ARCH_ARM64)
1609 /* sxtw dst, src */
1610 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1611 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1612
1613#else
1614# error "port me"
1615#endif
1616 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1617 return off;
1618}
1619
1620
1621/**
1622 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1623 */
1624DECL_INLINE_THROW(uint32_t)
1625iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1626{
1627#ifdef RT_ARCH_AMD64
1628 /* movsx r64, r/m16 */
1629 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1630 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1631 pbCodeBuf[off++] = 0x0f;
1632 pbCodeBuf[off++] = 0xbf;
1633 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1634
1635#elif defined(RT_ARCH_ARM64)
1636 /* sxth dst, src */
1637 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1638 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1639
1640#else
1641# error "port me"
1642#endif
1643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1644 return off;
1645}
1646
1647
1648/**
1649 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1650 */
1651DECL_INLINE_THROW(uint32_t)
1652iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1653{
1654#ifdef RT_ARCH_AMD64
1655 /* movsx r64, r/m16 */
1656 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1657 if (iGprDst >= 8 || iGprSrc >= 8)
1658 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1659 pbCodeBuf[off++] = 0x0f;
1660 pbCodeBuf[off++] = 0xbf;
1661 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1662
1663#elif defined(RT_ARCH_ARM64)
1664 /* sxth dst32, src */
1665 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1666 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1667
1668#else
1669# error "port me"
1670#endif
1671 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1672 return off;
1673}
1674
1675
1676/**
1677 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1678 */
1679DECL_INLINE_THROW(uint32_t)
1680iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1681{
1682#ifdef RT_ARCH_AMD64
1683 /* movsx r64, r/m8 */
1684 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1685 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1686 pbCodeBuf[off++] = 0x0f;
1687 pbCodeBuf[off++] = 0xbe;
1688 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1689
1690#elif defined(RT_ARCH_ARM64)
1691 /* sxtb dst, src */
1692 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1693 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1694
1695#else
1696# error "port me"
1697#endif
1698 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1699 return off;
1700}
1701
1702
1703/**
1704 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1705 * @note Bits 63 thru 32 are cleared.
1706 */
1707DECL_INLINE_THROW(uint32_t)
1708iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1709{
1710#ifdef RT_ARCH_AMD64
1711 /* movsx r32, r/m8 */
1712 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1713 if (iGprDst >= 8 || iGprSrc >= 8)
1714 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1715 else if (iGprSrc >= 4)
1716 pbCodeBuf[off++] = X86_OP_REX;
1717 pbCodeBuf[off++] = 0x0f;
1718 pbCodeBuf[off++] = 0xbe;
1719 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1720
1721#elif defined(RT_ARCH_ARM64)
1722 /* sxtb dst32, src32 */
1723 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1724 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1725
1726#else
1727# error "port me"
1728#endif
1729 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1730 return off;
1731}
1732
1733
1734/**
1735 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1736 * @note Bits 63 thru 16 are cleared.
1737 */
1738DECL_INLINE_THROW(uint32_t)
1739iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1740{
1741#ifdef RT_ARCH_AMD64
1742 /* movsx r16, r/m8 */
1743 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1744 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1745 if (iGprDst >= 8 || iGprSrc >= 8)
1746 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1747 else if (iGprSrc >= 4)
1748 pbCodeBuf[off++] = X86_OP_REX;
1749 pbCodeBuf[off++] = 0x0f;
1750 pbCodeBuf[off++] = 0xbe;
1751 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1752
1753 /* movzx r32, r/m16 */
1754 if (iGprDst >= 8)
1755 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
1756 pbCodeBuf[off++] = 0x0f;
1757 pbCodeBuf[off++] = 0xb7;
1758 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
1759
1760#elif defined(RT_ARCH_ARM64)
1761 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
1762 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1763 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1764 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1765 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
1766
1767#else
1768# error "port me"
1769#endif
1770 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1771 return off;
1772}
1773
1774
1775/**
1776 * Emits a gprdst = gprsrc + addend load.
1777 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1778 */
1779#ifdef RT_ARCH_AMD64
1780DECL_INLINE_THROW(uint32_t)
1781iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1782 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1783{
1784 Assert(iAddend != 0);
1785
1786 /* lea gprdst, [gprsrc + iAddend] */
1787 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1788 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1789 pbCodeBuf[off++] = 0x8d;
1790 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1791 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1792 return off;
1793}
1794
1795#elif defined(RT_ARCH_ARM64)
1796DECL_INLINE_THROW(uint32_t)
1797iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1798 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1799{
1800 if ((uint32_t)iAddend < 4096)
1801 {
1802 /* add dst, src, uimm12 */
1803 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1804 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
1805 }
1806 else if ((uint32_t)-iAddend < 4096)
1807 {
1808 /* sub dst, src, uimm12 */
1809 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1810 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
1811 }
1812 else
1813 {
1814 Assert(iGprSrc != iGprDst);
1815 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
1816 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1817 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
1818 }
1819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1820 return off;
1821}
1822#else
1823# error "port me"
1824#endif
1825
1826/**
1827 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
1828 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1829 */
1830#ifdef RT_ARCH_AMD64
1831DECL_INLINE_THROW(uint32_t)
1832iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1833 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1834#else
1835DECL_INLINE_THROW(uint32_t)
1836iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1837 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1838#endif
1839{
1840 if (iAddend != 0)
1841 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1842 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
1843}
1844
1845
1846/**
1847 * Emits a gprdst = gprsrc32 + addend load.
1848 * @note Bits 63 thru 32 are cleared.
1849 */
1850DECL_INLINE_THROW(uint32_t)
1851iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1852 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1853{
1854 Assert(iAddend != 0);
1855
1856#ifdef RT_ARCH_AMD64
1857 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
1858 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1859 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
1860 if ((iGprDst | iGprSrc) >= 8)
1861 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1862 pbCodeBuf[off++] = 0x8d;
1863 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1864
1865#elif defined(RT_ARCH_ARM64)
1866 if ((uint32_t)iAddend < 4096)
1867 {
1868 /* add dst, src, uimm12 */
1869 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1870 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
1871 }
1872 else if ((uint32_t)-iAddend < 4096)
1873 {
1874 /* sub dst, src, uimm12 */
1875 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1876 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
1877 }
1878 else
1879 {
1880 Assert(iGprSrc != iGprDst);
1881 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
1882 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1883 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
1884 }
1885
1886#else
1887# error "port me"
1888#endif
1889 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1890 return off;
1891}
1892
1893
1894/**
1895 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
1896 */
1897DECL_INLINE_THROW(uint32_t)
1898iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1899 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1900{
1901 if (iAddend != 0)
1902 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1903 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
1904}
1905
1906
1907/**
1908 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1909 * destination.
1910 */
1911DECL_FORCE_INLINE(uint32_t)
1912iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1913{
1914#ifdef RT_ARCH_AMD64
1915 /* mov reg16, r/m16 */
1916 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1917 if (idxDst >= 8 || idxSrc >= 8)
1918 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
1919 pCodeBuf[off++] = 0x8b;
1920 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
1921
1922#elif defined(RT_ARCH_ARM64)
1923 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
1924 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
1925
1926#else
1927# error "Port me!"
1928#endif
1929 return off;
1930}
1931
1932
1933/**
1934 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1935 * destination.
1936 */
1937DECL_INLINE_THROW(uint32_t)
1938iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1939{
1940#ifdef RT_ARCH_AMD64
1941 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
1942#elif defined(RT_ARCH_ARM64)
1943 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
1944#else
1945# error "Port me!"
1946#endif
1947 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1948 return off;
1949}
1950
1951
1952#ifdef RT_ARCH_AMD64
1953/**
1954 * Common bit of iemNativeEmitLoadGprByBp and friends.
1955 */
1956DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
1957 PIEMRECOMPILERSTATE pReNativeAssert)
1958{
1959 if (offDisp < 128 && offDisp >= -128)
1960 {
1961 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
1962 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
1963 }
1964 else
1965 {
1966 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
1967 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
1968 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
1969 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
1970 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
1971 }
1972 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
1973 return off;
1974}
1975#elif defined(RT_ARCH_ARM64)
1976/**
1977 * Common bit of iemNativeEmitLoadGprByBp and friends.
1978 */
1979DECL_FORCE_INLINE_THROW(uint32_t)
1980iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
1981 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
1982{
1983 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
1984 {
1985 /* str w/ unsigned imm12 (scaled) */
1986 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1987 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
1988 }
1989 else if (offDisp >= -256 && offDisp <= 256)
1990 {
1991 /* stur w/ signed imm9 (unscaled) */
1992 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1993 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
1994 }
1995 else
1996 {
1997 /* Use temporary indexing register. */
1998 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
1999 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2000 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2001 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2002 }
2003 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2004 return off;
2005}
2006#endif
2007
2008
2009/**
2010 * Emits a 64-bit GRP load instruction with an BP relative source address.
2011 */
2012DECL_INLINE_THROW(uint32_t)
2013iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2014{
2015#ifdef RT_ARCH_AMD64
2016 /* mov gprdst, qword [rbp + offDisp] */
2017 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2018 if (iGprDst < 8)
2019 pbCodeBuf[off++] = X86_OP_REX_W;
2020 else
2021 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2022 pbCodeBuf[off++] = 0x8b;
2023 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2024
2025#elif defined(RT_ARCH_ARM64)
2026 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2027
2028#else
2029# error "port me"
2030#endif
2031}
2032
2033
2034/**
2035 * Emits a 32-bit GRP load instruction with an BP relative source address.
2036 * @note Bits 63 thru 32 of the GPR will be cleared.
2037 */
2038DECL_INLINE_THROW(uint32_t)
2039iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2040{
2041#ifdef RT_ARCH_AMD64
2042 /* mov gprdst, dword [rbp + offDisp] */
2043 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2044 if (iGprDst >= 8)
2045 pbCodeBuf[off++] = X86_OP_REX_R;
2046 pbCodeBuf[off++] = 0x8b;
2047 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2048
2049#elif defined(RT_ARCH_ARM64)
2050 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2051
2052#else
2053# error "port me"
2054#endif
2055}
2056
2057
2058/**
2059 * Emits a 16-bit GRP load instruction with an BP relative source address.
2060 * @note Bits 63 thru 16 of the GPR will be cleared.
2061 */
2062DECL_INLINE_THROW(uint32_t)
2063iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2064{
2065#ifdef RT_ARCH_AMD64
2066 /* movzx gprdst, word [rbp + offDisp] */
2067 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2068 if (iGprDst >= 8)
2069 pbCodeBuf[off++] = X86_OP_REX_R;
2070 pbCodeBuf[off++] = 0x0f;
2071 pbCodeBuf[off++] = 0xb7;
2072 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2073
2074#elif defined(RT_ARCH_ARM64)
2075 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2076
2077#else
2078# error "port me"
2079#endif
2080}
2081
2082
2083/**
2084 * Emits a 8-bit GRP load instruction with an BP relative source address.
2085 * @note Bits 63 thru 8 of the GPR will be cleared.
2086 */
2087DECL_INLINE_THROW(uint32_t)
2088iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2089{
2090#ifdef RT_ARCH_AMD64
2091 /* movzx gprdst, byte [rbp + offDisp] */
2092 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2093 if (iGprDst >= 8)
2094 pbCodeBuf[off++] = X86_OP_REX_R;
2095 pbCodeBuf[off++] = 0x0f;
2096 pbCodeBuf[off++] = 0xb6;
2097 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2098
2099#elif defined(RT_ARCH_ARM64)
2100 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2101
2102#else
2103# error "port me"
2104#endif
2105}
2106
2107
2108/**
2109 * Emits a load effective address to a GRP with an BP relative source address.
2110 */
2111DECL_INLINE_THROW(uint32_t)
2112iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2113{
2114#ifdef RT_ARCH_AMD64
2115 /* lea gprdst, [rbp + offDisp] */
2116 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2117 if (iGprDst < 8)
2118 pbCodeBuf[off++] = X86_OP_REX_W;
2119 else
2120 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2121 pbCodeBuf[off++] = 0x8d;
2122 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2123
2124#elif defined(RT_ARCH_ARM64)
2125 if ((uint32_t)offDisp < (unsigned)_4K)
2126 {
2127 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2128 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)offDisp);
2129 }
2130 else if ((uint32_t)-offDisp < (unsigned)_4K)
2131 {
2132 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2133 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2134 }
2135 else
2136 {
2137 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2138 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offDisp >= 0 ? (uint32_t)offDisp : (uint32_t)-offDisp);
2139 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2140 if (offDisp >= 0)
2141 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2142 else
2143 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2144 }
2145
2146#else
2147# error "port me"
2148#endif
2149
2150 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2151 return off;
2152}
2153
2154
2155/**
2156 * Emits a 64-bit GPR store with an BP relative destination address.
2157 *
2158 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2159 */
2160DECL_INLINE_THROW(uint32_t)
2161iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2162{
2163#ifdef RT_ARCH_AMD64
2164 /* mov qword [rbp + offDisp], gprdst */
2165 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2166 if (iGprSrc < 8)
2167 pbCodeBuf[off++] = X86_OP_REX_W;
2168 else
2169 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2170 pbCodeBuf[off++] = 0x89;
2171 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2172
2173#elif defined(RT_ARCH_ARM64)
2174 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2175 {
2176 /* str w/ unsigned imm12 (scaled) */
2177 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2178 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2179 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2180 }
2181 else if (offDisp >= -256 && offDisp <= 256)
2182 {
2183 /* stur w/ signed imm9 (unscaled) */
2184 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2185 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2186 }
2187 else if ((uint32_t)-offDisp < (unsigned)_4K)
2188 {
2189 /* Use temporary indexing register w/ sub uimm12. */
2190 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2191 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2192 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2193 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2194 }
2195 else
2196 {
2197 /* Use temporary indexing register. */
2198 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2199 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2200 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2201 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2202 }
2203 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2204 return off;
2205
2206#else
2207# error "Port me!"
2208#endif
2209}
2210
2211
2212/**
2213 * Emits a 64-bit immediate store with an BP relative destination address.
2214 *
2215 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2216 */
2217DECL_INLINE_THROW(uint32_t)
2218iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2219{
2220#ifdef RT_ARCH_AMD64
2221 if ((int64_t)uImm64 == (int32_t)uImm64)
2222 {
2223 /* mov qword [rbp + offDisp], imm32 - sign extended */
2224 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2225 pbCodeBuf[off++] = X86_OP_REX_W;
2226 pbCodeBuf[off++] = 0xc7;
2227 if (offDisp < 128 && offDisp >= -128)
2228 {
2229 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2230 pbCodeBuf[off++] = (uint8_t)offDisp;
2231 }
2232 else
2233 {
2234 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2235 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2236 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2237 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2238 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2239 }
2240 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2241 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2242 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2243 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2244 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2245 return off;
2246 }
2247#endif
2248
2249 /* Load tmp0, imm64; Store tmp to bp+disp. */
2250 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2251 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2252}
2253
2254#if defined(RT_ARCH_ARM64)
2255
2256/**
2257 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2258 *
2259 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2260 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2261 * caller does not heed this.
2262 *
2263 * @note DON'T try this with prefetch.
2264 */
2265DECL_FORCE_INLINE_THROW(uint32_t)
2266iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2267 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2268{
2269 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2270 {
2271 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2272 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2273 }
2274 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2275 && iGprReg != iGprBase)
2276 || iGprTmp != UINT8_MAX)
2277 {
2278 /* The offset is too large, so we must load it into a register and use
2279 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2280 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2281 if (iGprTmp == UINT8_MAX)
2282 iGprTmp = iGprReg;
2283 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2284 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2285 }
2286 else
2287# ifdef IEM_WITH_THROW_CATCH
2288 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2289# else
2290 AssertReleaseFailedStmt(off = UINT32_MAX);
2291# endif
2292 return off;
2293}
2294
2295/**
2296 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2297 */
2298DECL_FORCE_INLINE_THROW(uint32_t)
2299iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2300 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2301{
2302 /*
2303 * There are a couple of ldr variants that takes an immediate offset, so
2304 * try use those if we can, otherwise we have to use the temporary register
2305 * help with the addressing.
2306 */
2307 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2308 {
2309 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2310 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2311 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2312 }
2313 else
2314 {
2315 /* The offset is too large, so we must load it into a register and use
2316 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2317 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2318 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2319
2320 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2321 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2322
2323 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2324 }
2325 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2326 return off;
2327}
2328
2329#endif /* RT_ARCH_ARM64 */
2330
2331/**
2332 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2333 *
2334 * @note ARM64: Misaligned @a offDisp values and values not in the
2335 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2336 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2337 * does not heed this.
2338 */
2339DECL_FORCE_INLINE_THROW(uint32_t)
2340iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2341 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2342{
2343#ifdef RT_ARCH_AMD64
2344 /* mov reg64, mem64 */
2345 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2346 pCodeBuf[off++] = 0x8b;
2347 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2348 RT_NOREF(iGprTmp);
2349
2350#elif defined(RT_ARCH_ARM64)
2351 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2352 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2353
2354#else
2355# error "port me"
2356#endif
2357 return off;
2358}
2359
2360
2361/**
2362 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2363 */
2364DECL_INLINE_THROW(uint32_t)
2365iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2366{
2367#ifdef RT_ARCH_AMD64
2368 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2369 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2370
2371#elif defined(RT_ARCH_ARM64)
2372 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2373
2374#else
2375# error "port me"
2376#endif
2377 return off;
2378}
2379
2380
2381/**
2382 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2383 *
2384 * @note ARM64: Misaligned @a offDisp values and values not in the
2385 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2386 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2387 * caller does not heed this.
2388 *
2389 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2390 */
2391DECL_FORCE_INLINE_THROW(uint32_t)
2392iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2393 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2394{
2395#ifdef RT_ARCH_AMD64
2396 /* mov reg32, mem32 */
2397 if (iGprDst >= 8 || iGprBase >= 8)
2398 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2399 pCodeBuf[off++] = 0x8b;
2400 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2401 RT_NOREF(iGprTmp);
2402
2403#elif defined(RT_ARCH_ARM64)
2404 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2405 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2406
2407#else
2408# error "port me"
2409#endif
2410 return off;
2411}
2412
2413
2414/**
2415 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2416 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2417 */
2418DECL_INLINE_THROW(uint32_t)
2419iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2420{
2421#ifdef RT_ARCH_AMD64
2422 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2423 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2424
2425#elif defined(RT_ARCH_ARM64)
2426 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2427
2428#else
2429# error "port me"
2430#endif
2431 return off;
2432}
2433
2434
2435/**
2436 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2437 * sign-extending the value to 64 bits.
2438 *
2439 * @note ARM64: Misaligned @a offDisp values and values not in the
2440 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2441 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2442 * caller does not heed this.
2443 */
2444DECL_FORCE_INLINE_THROW(uint32_t)
2445iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2446 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2447{
2448#ifdef RT_ARCH_AMD64
2449 /* movsxd reg64, mem32 */
2450 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2451 pCodeBuf[off++] = 0x63;
2452 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2453 RT_NOREF(iGprTmp);
2454
2455#elif defined(RT_ARCH_ARM64)
2456 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2457 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2458
2459#else
2460# error "port me"
2461#endif
2462 return off;
2463}
2464
2465
2466/**
2467 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2468 *
2469 * @note ARM64: Misaligned @a offDisp values and values not in the
2470 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2471 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2472 * caller does not heed this.
2473 *
2474 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2475 */
2476DECL_FORCE_INLINE_THROW(uint32_t)
2477iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2478 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2479{
2480#ifdef RT_ARCH_AMD64
2481 /* movzx reg32, mem16 */
2482 if (iGprDst >= 8 || iGprBase >= 8)
2483 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2484 pCodeBuf[off++] = 0x0f;
2485 pCodeBuf[off++] = 0xb7;
2486 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2487 RT_NOREF(iGprTmp);
2488
2489#elif defined(RT_ARCH_ARM64)
2490 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2491 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2492
2493#else
2494# error "port me"
2495#endif
2496 return off;
2497}
2498
2499
2500/**
2501 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2502 * sign-extending the value to 64 bits.
2503 *
2504 * @note ARM64: Misaligned @a offDisp values and values not in the
2505 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2506 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2507 * caller does not heed this.
2508 */
2509DECL_FORCE_INLINE_THROW(uint32_t)
2510iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2511 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2512{
2513#ifdef RT_ARCH_AMD64
2514 /* movsx reg64, mem16 */
2515 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2516 pCodeBuf[off++] = 0x0f;
2517 pCodeBuf[off++] = 0xbf;
2518 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2519 RT_NOREF(iGprTmp);
2520
2521#elif defined(RT_ARCH_ARM64)
2522 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2523 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2524
2525#else
2526# error "port me"
2527#endif
2528 return off;
2529}
2530
2531
2532/**
2533 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2534 * sign-extending the value to 32 bits.
2535 *
2536 * @note ARM64: Misaligned @a offDisp values and values not in the
2537 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2538 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2539 * caller does not heed this.
2540 *
2541 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2542 */
2543DECL_FORCE_INLINE_THROW(uint32_t)
2544iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2545 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2546{
2547#ifdef RT_ARCH_AMD64
2548 /* movsx reg32, mem16 */
2549 if (iGprDst >= 8 || iGprBase >= 8)
2550 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2551 pCodeBuf[off++] = 0x0f;
2552 pCodeBuf[off++] = 0xbf;
2553 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2554 RT_NOREF(iGprTmp);
2555
2556#elif defined(RT_ARCH_ARM64)
2557 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2558 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
2559
2560#else
2561# error "port me"
2562#endif
2563 return off;
2564}
2565
2566
2567/**
2568 * Emits a 8-bit GPR load via a GPR base address with a displacement.
2569 *
2570 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2571 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2572 * same. Will assert / throw if caller does not heed this.
2573 *
2574 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
2575 */
2576DECL_FORCE_INLINE_THROW(uint32_t)
2577iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2578 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2579{
2580#ifdef RT_ARCH_AMD64
2581 /* movzx reg32, mem8 */
2582 if (iGprDst >= 8 || iGprBase >= 8)
2583 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2584 pCodeBuf[off++] = 0x0f;
2585 pCodeBuf[off++] = 0xb6;
2586 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2587 RT_NOREF(iGprTmp);
2588
2589#elif defined(RT_ARCH_ARM64)
2590 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2591 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
2592
2593#else
2594# error "port me"
2595#endif
2596 return off;
2597}
2598
2599
2600/**
2601 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2602 * sign-extending the value to 64 bits.
2603 *
2604 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2605 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2606 * same. Will assert / throw if caller does not heed this.
2607 */
2608DECL_FORCE_INLINE_THROW(uint32_t)
2609iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2610 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2611{
2612#ifdef RT_ARCH_AMD64
2613 /* movsx reg64, mem8 */
2614 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2615 pCodeBuf[off++] = 0x0f;
2616 pCodeBuf[off++] = 0xbe;
2617 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2618 RT_NOREF(iGprTmp);
2619
2620#elif defined(RT_ARCH_ARM64)
2621 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2622 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
2623
2624#else
2625# error "port me"
2626#endif
2627 return off;
2628}
2629
2630
2631/**
2632 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2633 * sign-extending the value to 32 bits.
2634 *
2635 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2636 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2637 * same. Will assert / throw if caller does not heed this.
2638 *
2639 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2640 */
2641DECL_FORCE_INLINE_THROW(uint32_t)
2642iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2643 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2644{
2645#ifdef RT_ARCH_AMD64
2646 /* movsx reg32, mem8 */
2647 if (iGprDst >= 8 || iGprBase >= 8)
2648 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2649 pCodeBuf[off++] = 0x0f;
2650 pCodeBuf[off++] = 0xbe;
2651 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2652 RT_NOREF(iGprTmp);
2653
2654#elif defined(RT_ARCH_ARM64)
2655 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2656 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2657
2658#else
2659# error "port me"
2660#endif
2661 return off;
2662}
2663
2664
2665/**
2666 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2667 * sign-extending the value to 16 bits.
2668 *
2669 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2670 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2671 * same. Will assert / throw if caller does not heed this.
2672 *
2673 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2674 */
2675DECL_FORCE_INLINE_THROW(uint32_t)
2676iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2677 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2678{
2679#ifdef RT_ARCH_AMD64
2680 /* movsx reg32, mem8 */
2681 if (iGprDst >= 8 || iGprBase >= 8)
2682 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2683 pCodeBuf[off++] = 0x0f;
2684 pCodeBuf[off++] = 0xbe;
2685 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2686# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
2687 /* and reg32, 0xffffh */
2688 if (iGprDst >= 8)
2689 pCodeBuf[off++] = X86_OP_REX_B;
2690 pCodeBuf[off++] = 0x81;
2691 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
2692 pCodeBuf[off++] = 0xff;
2693 pCodeBuf[off++] = 0xff;
2694 pCodeBuf[off++] = 0;
2695 pCodeBuf[off++] = 0;
2696# else
2697 /* movzx reg32, reg16 */
2698 if (iGprDst >= 8)
2699 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
2700 pCodeBuf[off++] = 0x0f;
2701 pCodeBuf[off++] = 0xb7;
2702 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2703# endif
2704 RT_NOREF(iGprTmp);
2705
2706#elif defined(RT_ARCH_ARM64)
2707 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2708 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2709 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2710 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
2711
2712#else
2713# error "port me"
2714#endif
2715 return off;
2716}
2717
2718
2719/**
2720 * Emits a 64-bit GPR store via a GPR base address with a displacement.
2721 *
2722 * @note ARM64: Misaligned @a offDisp values and values not in the
2723 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2724 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2725 * does not heed this.
2726 */
2727DECL_FORCE_INLINE_THROW(uint32_t)
2728iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2729 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2730{
2731#ifdef RT_ARCH_AMD64
2732 /* mov mem64, reg64 */
2733 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2734 pCodeBuf[off++] = 0x89;
2735 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2736 RT_NOREF(iGprTmp);
2737
2738#elif defined(RT_ARCH_ARM64)
2739 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2740 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
2741
2742#else
2743# error "port me"
2744#endif
2745 return off;
2746}
2747
2748
2749/**
2750 * Emits a 32-bit GPR store via a GPR base address with a displacement.
2751 *
2752 * @note ARM64: Misaligned @a offDisp values and values not in the
2753 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
2754 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2755 * does not heed this.
2756 */
2757DECL_FORCE_INLINE_THROW(uint32_t)
2758iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2759 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2760{
2761#ifdef RT_ARCH_AMD64
2762 /* mov mem32, reg32 */
2763 if (iGprSrc >= 8 || iGprBase >= 8)
2764 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2765 pCodeBuf[off++] = 0x89;
2766 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2767 RT_NOREF(iGprTmp);
2768
2769#elif defined(RT_ARCH_ARM64)
2770 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2771 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
2772
2773#else
2774# error "port me"
2775#endif
2776 return off;
2777}
2778
2779
2780/**
2781 * Emits a 16-bit GPR store via a GPR base address with a displacement.
2782 *
2783 * @note ARM64: Misaligned @a offDisp values and values not in the
2784 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
2785 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2786 * does not heed this.
2787 */
2788DECL_FORCE_INLINE_THROW(uint32_t)
2789iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2790 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2791{
2792#ifdef RT_ARCH_AMD64
2793 /* mov mem16, reg16 */
2794 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2795 if (iGprSrc >= 8 || iGprBase >= 8)
2796 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2797 pCodeBuf[off++] = 0x89;
2798 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2799 RT_NOREF(iGprTmp);
2800
2801#elif defined(RT_ARCH_ARM64)
2802 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2803 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
2804
2805#else
2806# error "port me"
2807#endif
2808 return off;
2809}
2810
2811
2812/**
2813 * Emits a 8-bit GPR store via a GPR base address with a displacement.
2814 *
2815 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2816 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2817 * same. Will assert / throw if caller does not heed this.
2818 */
2819DECL_FORCE_INLINE_THROW(uint32_t)
2820iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
2821 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2822{
2823#ifdef RT_ARCH_AMD64
2824 /* mov mem8, reg8 */
2825 if (iGprSrc >= 8 || iGprBase >= 8)
2826 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2827 else if (iGprSrc >= 4)
2828 pCodeBuf[off++] = X86_OP_REX;
2829 pCodeBuf[off++] = 0x88;
2830 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
2831 RT_NOREF(iGprTmp);
2832
2833#elif defined(RT_ARCH_ARM64)
2834 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
2835 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
2836
2837#else
2838# error "port me"
2839#endif
2840 return off;
2841}
2842
2843
2844/**
2845 * Emits a 64-bit immediate store via a GPR base address with a displacement.
2846 *
2847 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
2848 * AMD64 it depends on the immediate value.
2849 *
2850 * @note ARM64: Misaligned @a offDisp values and values not in the
2851 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2852 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2853 * does not heed this.
2854 */
2855DECL_FORCE_INLINE_THROW(uint32_t)
2856iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
2857 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2858{
2859#ifdef RT_ARCH_AMD64
2860 if ((int32_t)uImm == (int64_t)uImm)
2861 {
2862 /* mov mem64, imm32 (sign-extended) */
2863 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2864 pCodeBuf[off++] = 0xc7;
2865 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
2866 pCodeBuf[off++] = RT_BYTE1(uImm);
2867 pCodeBuf[off++] = RT_BYTE2(uImm);
2868 pCodeBuf[off++] = RT_BYTE3(uImm);
2869 pCodeBuf[off++] = RT_BYTE4(uImm);
2870 }
2871 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
2872 {
2873 /* require temporary register. */
2874 if (iGprImmTmp == UINT8_MAX)
2875 iGprImmTmp = iGprTmp;
2876 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
2877 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
2878 }
2879 else
2880# ifdef IEM_WITH_THROW_CATCH
2881 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2882# else
2883 AssertReleaseFailedStmt(off = UINT32_MAX);
2884# endif
2885
2886#elif defined(RT_ARCH_ARM64)
2887 if (uImm == 0)
2888 iGprImmTmp = ARMV8_A64_REG_XZR;
2889 else
2890 {
2891 Assert(iGprImmTmp < 31);
2892 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
2893 }
2894 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
2895
2896#else
2897# error "port me"
2898#endif
2899 return off;
2900}
2901
2902
2903/**
2904 * Emits a 32-bit GPR store via a GPR base address with a displacement.
2905 *
2906 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
2907 *
2908 * @note ARM64: Misaligned @a offDisp values and values not in the
2909 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
2910 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2911 * does not heed this.
2912 */
2913DECL_FORCE_INLINE_THROW(uint32_t)
2914iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
2915 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2916{
2917#ifdef RT_ARCH_AMD64
2918 /* mov mem32, imm32 */
2919 if (iGprBase >= 8)
2920 pCodeBuf[off++] = X86_OP_REX_B;
2921 pCodeBuf[off++] = 0xc7;
2922 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
2923 pCodeBuf[off++] = RT_BYTE1(uImm);
2924 pCodeBuf[off++] = RT_BYTE2(uImm);
2925 pCodeBuf[off++] = RT_BYTE3(uImm);
2926 pCodeBuf[off++] = RT_BYTE4(uImm);
2927 RT_NOREF(iGprImmTmp, iGprTmp);
2928
2929#elif defined(RT_ARCH_ARM64)
2930 Assert(iGprImmTmp < 31);
2931 if (uImm == 0)
2932 iGprImmTmp = ARMV8_A64_REG_XZR;
2933 else
2934 {
2935 Assert(iGprImmTmp < 31);
2936 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
2937 }
2938 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
2939 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
2940
2941#else
2942# error "port me"
2943#endif
2944 return off;
2945}
2946
2947
2948/**
2949 * Emits a 16-bit GPR store via a GPR base address with a displacement.
2950 *
2951 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
2952 *
2953 * @note ARM64: Misaligned @a offDisp values and values not in the
2954 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
2955 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2956 * does not heed this.
2957 */
2958DECL_FORCE_INLINE_THROW(uint32_t)
2959iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
2960 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2961{
2962#ifdef RT_ARCH_AMD64
2963 /* mov mem16, imm16 */
2964 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2965 if (iGprBase >= 8)
2966 pCodeBuf[off++] = X86_OP_REX_B;
2967 pCodeBuf[off++] = 0xc7;
2968 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
2969 pCodeBuf[off++] = RT_BYTE1(uImm);
2970 pCodeBuf[off++] = RT_BYTE2(uImm);
2971 RT_NOREF(iGprImmTmp, iGprTmp);
2972
2973#elif defined(RT_ARCH_ARM64)
2974 if (uImm == 0)
2975 iGprImmTmp = ARMV8_A64_REG_XZR;
2976 else
2977 {
2978 Assert(iGprImmTmp < 31);
2979 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
2980 }
2981 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
2982 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
2983
2984#else
2985# error "port me"
2986#endif
2987 return off;
2988}
2989
2990
2991/**
2992 * Emits a 8-bit GPR store via a GPR base address with a displacement.
2993 *
2994 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
2995 *
2996 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2997 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2998 * same. Will assert / throw if caller does not heed this.
2999 */
3000DECL_FORCE_INLINE_THROW(uint32_t)
3001iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3002 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3003{
3004#ifdef RT_ARCH_AMD64
3005 /* mov mem8, imm8 */
3006 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3007 if (iGprBase >= 8)
3008 pCodeBuf[off++] = X86_OP_REX_B;
3009 pCodeBuf[off++] = 0xc6;
3010 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3011 pCodeBuf[off++] = uImm;
3012 RT_NOREF(iGprImmTmp, iGprTmp);
3013
3014#elif defined(RT_ARCH_ARM64)
3015 if (uImm == 0)
3016 iGprImmTmp = ARMV8_A64_REG_XZR;
3017 else
3018 {
3019 Assert(iGprImmTmp < 31);
3020 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3021 }
3022 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3023 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3024
3025#else
3026# error "port me"
3027#endif
3028 return off;
3029}
3030
3031
3032
3033/*********************************************************************************************************************************
3034* Subtraction and Additions *
3035*********************************************************************************************************************************/
3036
3037/**
3038 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3039 * @note The AMD64 version sets flags.
3040 */
3041DECL_INLINE_THROW(uint32_t)
3042iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3043{
3044#if defined(RT_ARCH_AMD64)
3045 /* sub Gv,Ev */
3046 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3047 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3048 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3049 pbCodeBuf[off++] = 0x2b;
3050 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3051
3052#elif defined(RT_ARCH_ARM64)
3053 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3054 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3055
3056#else
3057# error "Port me"
3058#endif
3059 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3060 return off;
3061}
3062
3063
3064/**
3065 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3066 * @note The AMD64 version sets flags.
3067 */
3068DECL_FORCE_INLINE(uint32_t)
3069iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3070{
3071#if defined(RT_ARCH_AMD64)
3072 /* sub Gv,Ev */
3073 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3074 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3075 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3076 pCodeBuf[off++] = 0x2b;
3077 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3078
3079#elif defined(RT_ARCH_ARM64)
3080 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3081
3082#else
3083# error "Port me"
3084#endif
3085 return off;
3086}
3087
3088
3089/**
3090 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3091 * @note The AMD64 version sets flags.
3092 */
3093DECL_INLINE_THROW(uint32_t)
3094iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3095{
3096#if defined(RT_ARCH_AMD64)
3097 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3098#elif defined(RT_ARCH_ARM64)
3099 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3100#else
3101# error "Port me"
3102#endif
3103 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3104 return off;
3105}
3106
3107
3108/**
3109 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3110 *
3111 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3112 *
3113 * @note Larger constants will require a temporary register. Failing to specify
3114 * one when needed will trigger fatal assertion / throw.
3115 */
3116DECL_FORCE_INLINE_THROW(uint32_t)
3117iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3118 uint8_t iGprTmp = UINT8_MAX)
3119{
3120#ifdef RT_ARCH_AMD64
3121 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3122 if (iSubtrahend == 1)
3123 {
3124 /* dec r/m64 */
3125 pCodeBuf[off++] = 0xff;
3126 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3127 }
3128 else if (iSubtrahend == -1)
3129 {
3130 /* inc r/m64 */
3131 pCodeBuf[off++] = 0xff;
3132 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3133 }
3134 else if ((int8_t)iSubtrahend == iSubtrahend)
3135 {
3136 /* sub r/m64, imm8 */
3137 pCodeBuf[off++] = 0x83;
3138 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3139 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3140 }
3141 else if ((int32_t)iSubtrahend == iSubtrahend)
3142 {
3143 /* sub r/m64, imm32 */
3144 pCodeBuf[off++] = 0x81;
3145 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3146 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3147 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3148 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3149 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3150 }
3151 else if (iGprTmp != UINT8_MAX)
3152 {
3153 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3154 /* sub r/m64, r64 */
3155 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3156 pCodeBuf[off++] = 0x29;
3157 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3158 }
3159 else
3160# ifdef IEM_WITH_THROW_CATCH
3161 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3162# else
3163 AssertReleaseFailedStmt(off = UINT32_MAX);
3164# endif
3165
3166#elif defined(RT_ARCH_ARM64)
3167 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3168 if (uAbsSubtrahend < 4096)
3169 {
3170 if (iSubtrahend >= 0)
3171 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3172 else
3173 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3174 }
3175 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3176 {
3177 if (iSubtrahend >= 0)
3178 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3179 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3180 else
3181 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3182 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3183 }
3184 else if (iGprTmp != UINT8_MAX)
3185 {
3186 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3187 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3188 }
3189 else
3190# ifdef IEM_WITH_THROW_CATCH
3191 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3192# else
3193 AssertReleaseFailedStmt(off = UINT32_MAX);
3194# endif
3195
3196#else
3197# error "Port me"
3198#endif
3199 return off;
3200}
3201
3202
3203/**
3204 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3205 *
3206 * @note Larger constants will require a temporary register. Failing to specify
3207 * one when needed will trigger fatal assertion / throw.
3208 */
3209DECL_INLINE_THROW(uint32_t)
3210iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3211 uint8_t iGprTmp = UINT8_MAX)
3212
3213{
3214#ifdef RT_ARCH_AMD64
3215 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3216#elif defined(RT_ARCH_ARM64)
3217 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3218#else
3219# error "Port me"
3220#endif
3221 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3222 return off;
3223}
3224
3225
3226/**
3227 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3228 *
3229 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3230 *
3231 * @note ARM64: Larger constants will require a temporary register. Failing to
3232 * specify one when needed will trigger fatal assertion / throw.
3233 */
3234DECL_FORCE_INLINE_THROW(uint32_t)
3235iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3236 uint8_t iGprTmp = UINT8_MAX)
3237{
3238#ifdef RT_ARCH_AMD64
3239 if (iGprDst >= 8)
3240 pCodeBuf[off++] = X86_OP_REX_B;
3241 if (iSubtrahend == 1)
3242 {
3243 /* dec r/m32 */
3244 pCodeBuf[off++] = 0xff;
3245 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3246 }
3247 else if (iSubtrahend == -1)
3248 {
3249 /* inc r/m32 */
3250 pCodeBuf[off++] = 0xff;
3251 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3252 }
3253 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3254 {
3255 /* sub r/m32, imm8 */
3256 pCodeBuf[off++] = 0x83;
3257 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3258 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3259 }
3260 else
3261 {
3262 /* sub r/m32, imm32 */
3263 pCodeBuf[off++] = 0x81;
3264 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3265 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3266 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3267 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3268 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3269 }
3270 RT_NOREF(iGprTmp);
3271
3272#elif defined(RT_ARCH_ARM64)
3273 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3274 if (uAbsSubtrahend < 4096)
3275 {
3276 if (iSubtrahend >= 0)
3277 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3278 else
3279 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3280 }
3281 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3282 {
3283 if (iSubtrahend >= 0)
3284 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3285 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3286 else
3287 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3288 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3289 }
3290 else if (iGprTmp != UINT8_MAX)
3291 {
3292 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3293 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3294 }
3295 else
3296# ifdef IEM_WITH_THROW_CATCH
3297 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3298# else
3299 AssertReleaseFailedStmt(off = UINT32_MAX);
3300# endif
3301
3302#else
3303# error "Port me"
3304#endif
3305 return off;
3306}
3307
3308
3309/**
3310 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3311 *
3312 * @note ARM64: Larger constants will require a temporary register. Failing to
3313 * specify one when needed will trigger fatal assertion / throw.
3314 */
3315DECL_INLINE_THROW(uint32_t)
3316iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3317 uint8_t iGprTmp = UINT8_MAX)
3318
3319{
3320#ifdef RT_ARCH_AMD64
3321 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
3322#elif defined(RT_ARCH_ARM64)
3323 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
3324#else
3325# error "Port me"
3326#endif
3327 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3328 return off;
3329}
3330
3331
3332/**
3333 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
3334 *
3335 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
3336 * so not suitable as a base for conditional jumps.
3337 *
3338 * @note AMD64: Will only update the lower 16 bits of the register.
3339 * @note ARM64: Will update the entire register.
3340 * @note ARM64: Larger constants will require a temporary register. Failing to
3341 * specify one when needed will trigger fatal assertion / throw.
3342 */
3343DECL_FORCE_INLINE_THROW(uint32_t)
3344iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
3345 uint8_t iGprTmp = UINT8_MAX)
3346{
3347#ifdef RT_ARCH_AMD64
3348 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3349 if (iGprDst >= 8)
3350 pCodeBuf[off++] = X86_OP_REX_B;
3351 if (iSubtrahend == 1)
3352 {
3353 /* dec r/m16 */
3354 pCodeBuf[off++] = 0xff;
3355 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3356 }
3357 else if (iSubtrahend == -1)
3358 {
3359 /* inc r/m16 */
3360 pCodeBuf[off++] = 0xff;
3361 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3362 }
3363 else if ((int8_t)iSubtrahend == iSubtrahend)
3364 {
3365 /* sub r/m16, imm8 */
3366 pCodeBuf[off++] = 0x83;
3367 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3368 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3369 }
3370 else
3371 {
3372 /* sub r/m16, imm16 */
3373 pCodeBuf[off++] = 0x81;
3374 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3375 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
3376 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
3377 }
3378 RT_NOREF(iGprTmp);
3379
3380#elif defined(RT_ARCH_ARM64)
3381 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3382 if (uAbsSubtrahend < 4096)
3383 {
3384 if (iSubtrahend >= 0)
3385 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3386 else
3387 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3388 }
3389 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3390 {
3391 if (iSubtrahend >= 0)
3392 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3393 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3394 else
3395 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3396 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3397 }
3398 else if (iGprTmp != UINT8_MAX)
3399 {
3400 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3401 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3402 }
3403 else
3404# ifdef IEM_WITH_THROW_CATCH
3405 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3406# else
3407 AssertReleaseFailedStmt(off = UINT32_MAX);
3408# endif
3409 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3410
3411#else
3412# error "Port me"
3413#endif
3414 return off;
3415}
3416
3417
3418/**
3419 * Emits adding a 64-bit GPR to another, storing the result in the first.
3420 * @note The AMD64 version sets flags.
3421 */
3422DECL_FORCE_INLINE(uint32_t)
3423iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3424{
3425#if defined(RT_ARCH_AMD64)
3426 /* add Gv,Ev */
3427 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3428 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
3429 pCodeBuf[off++] = 0x03;
3430 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3431
3432#elif defined(RT_ARCH_ARM64)
3433 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
3434
3435#else
3436# error "Port me"
3437#endif
3438 return off;
3439}
3440
3441
3442/**
3443 * Emits adding a 64-bit GPR to another, storing the result in the first.
3444 * @note The AMD64 version sets flags.
3445 */
3446DECL_INLINE_THROW(uint32_t)
3447iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3448{
3449#if defined(RT_ARCH_AMD64)
3450 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3451#elif defined(RT_ARCH_ARM64)
3452 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3453#else
3454# error "Port me"
3455#endif
3456 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3457 return off;
3458}
3459
3460
3461/**
3462 * Emits adding a 64-bit GPR to another, storing the result in the first.
3463 * @note The AMD64 version sets flags.
3464 */
3465DECL_FORCE_INLINE(uint32_t)
3466iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3467{
3468#if defined(RT_ARCH_AMD64)
3469 /* add Gv,Ev */
3470 if (iGprDst >= 8 || iGprAddend >= 8)
3471 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
3472 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
3473 pCodeBuf[off++] = 0x03;
3474 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3475
3476#elif defined(RT_ARCH_ARM64)
3477 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
3478
3479#else
3480# error "Port me"
3481#endif
3482 return off;
3483}
3484
3485
3486/**
3487 * Emits adding a 64-bit GPR to another, storing the result in the first.
3488 * @note The AMD64 version sets flags.
3489 */
3490DECL_INLINE_THROW(uint32_t)
3491iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3492{
3493#if defined(RT_ARCH_AMD64)
3494 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3495#elif defined(RT_ARCH_ARM64)
3496 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3497#else
3498# error "Port me"
3499#endif
3500 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3501 return off;
3502}
3503
3504
3505/**
3506 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3507 */
3508DECL_INLINE_THROW(uint32_t)
3509iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3510{
3511#if defined(RT_ARCH_AMD64)
3512 /* add or inc */
3513 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3514 if (iImm8 != 1)
3515 {
3516 pCodeBuf[off++] = 0x83;
3517 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3518 pCodeBuf[off++] = (uint8_t)iImm8;
3519 }
3520 else
3521 {
3522 pCodeBuf[off++] = 0xff;
3523 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3524 }
3525
3526#elif defined(RT_ARCH_ARM64)
3527 if (iImm8 >= 0)
3528 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
3529 else
3530 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
3531
3532#else
3533# error "Port me"
3534#endif
3535 return off;
3536}
3537
3538
3539/**
3540 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3541 */
3542DECL_INLINE_THROW(uint32_t)
3543iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3544{
3545#if defined(RT_ARCH_AMD64)
3546 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
3547#elif defined(RT_ARCH_ARM64)
3548 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
3549#else
3550# error "Port me"
3551#endif
3552 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3553 return off;
3554}
3555
3556
3557/**
3558 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
3559 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3560 */
3561DECL_FORCE_INLINE(uint32_t)
3562iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3563{
3564#if defined(RT_ARCH_AMD64)
3565 /* add or inc */
3566 if (iGprDst >= 8)
3567 pCodeBuf[off++] = X86_OP_REX_B;
3568 if (iImm8 != 1)
3569 {
3570 pCodeBuf[off++] = 0x83;
3571 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3572 pCodeBuf[off++] = (uint8_t)iImm8;
3573 }
3574 else
3575 {
3576 pCodeBuf[off++] = 0xff;
3577 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3578 }
3579
3580#elif defined(RT_ARCH_ARM64)
3581 if (iImm8 >= 0)
3582 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
3583 else
3584 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
3585
3586#else
3587# error "Port me"
3588#endif
3589 return off;
3590}
3591
3592
3593/**
3594 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
3595 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3596 */
3597DECL_INLINE_THROW(uint32_t)
3598iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3599{
3600#if defined(RT_ARCH_AMD64)
3601 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
3602#elif defined(RT_ARCH_ARM64)
3603 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
3604#else
3605# error "Port me"
3606#endif
3607 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3608 return off;
3609}
3610
3611
3612/**
3613 * Emits a 64-bit GPR additions with a 64-bit signed addend.
3614 *
3615 * @note Will assert / throw if @a iGprTmp is not specified when needed.
3616 */
3617DECL_FORCE_INLINE_THROW(uint32_t)
3618iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
3619{
3620#if defined(RT_ARCH_AMD64)
3621 if ((int8_t)iAddend == iAddend)
3622 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
3623
3624 if ((int32_t)iAddend == iAddend)
3625 {
3626 /* add grp, imm32 */
3627 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3628 pCodeBuf[off++] = 0x81;
3629 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3630 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
3631 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
3632 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
3633 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
3634 }
3635 else if (iGprTmp != UINT8_MAX)
3636 {
3637 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
3638
3639 /* add dst, tmpreg */
3640 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3641 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
3642 pCodeBuf[off++] = 0x03;
3643 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
3644 }
3645 else
3646# ifdef IEM_WITH_THROW_CATCH
3647 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3648# else
3649 AssertReleaseFailedStmt(off = UINT32_MAX);
3650# endif
3651
3652#elif defined(RT_ARCH_ARM64)
3653 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
3654 if (uAbsAddend < 4096)
3655 {
3656 if (iAddend >= 0)
3657 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
3658 else
3659 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
3660 }
3661 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
3662 {
3663 if (iAddend >= 0)
3664 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
3665 true /*f64Bit*/, true /*fShift12*/);
3666 else
3667 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
3668 true /*f64Bit*/, true /*fShift12*/);
3669 }
3670 else if (iGprTmp != UINT8_MAX)
3671 {
3672 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
3673 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
3674 }
3675 else
3676# ifdef IEM_WITH_THROW_CATCH
3677 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3678# else
3679 AssertReleaseFailedStmt(off = UINT32_MAX);
3680# endif
3681
3682#else
3683# error "Port me"
3684#endif
3685 return off;
3686}
3687
3688
3689/**
3690 * Emits a 64-bit GPR additions with a 64-bit signed addend.
3691 */
3692DECL_INLINE_THROW(uint32_t)
3693iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
3694{
3695#if defined(RT_ARCH_AMD64)
3696 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
3697 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
3698
3699 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
3700 {
3701 /* add grp, imm32 */
3702 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3703 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3704 pbCodeBuf[off++] = 0x81;
3705 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3706 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
3707 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
3708 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
3709 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
3710 }
3711 else
3712 {
3713 /* Best to use a temporary register to deal with this in the simplest way: */
3714 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
3715
3716 /* add dst, tmpreg */
3717 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3718 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3719 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
3720 pbCodeBuf[off++] = 0x03;
3721 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
3722
3723 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
3724 }
3725
3726#elif defined(RT_ARCH_ARM64)
3727 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
3728 {
3729 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3730 if (iAddend >= 0)
3731 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend);
3732 else
3733 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend);
3734 }
3735 else
3736 {
3737 /* Use temporary register for the immediate. */
3738 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
3739
3740 /* add gprdst, gprdst, tmpreg */
3741 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3742 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg);
3743
3744 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
3745 }
3746
3747#else
3748# error "Port me"
3749#endif
3750 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3751 return off;
3752}
3753
3754
3755/**
3756 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
3757 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3758 * @note For ARM64 the iAddend value must be in the range 0x000..0xfff,
3759 * or that range shifted 12 bits to the left (e.g. 0x1000..0xfff000 with
3760 * the lower 12 bits always zero). The negative ranges are also allowed,
3761 * making it behave like a subtraction. If the constant does not conform,
3762 * bad stuff will happen.
3763 */
3764DECL_FORCE_INLINE_THROW(uint32_t)
3765iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend)
3766{
3767#if defined(RT_ARCH_AMD64)
3768 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
3769 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
3770
3771 /* add grp, imm32 */
3772 if (iGprDst >= 8)
3773 pCodeBuf[off++] = X86_OP_REX_B;
3774 pCodeBuf[off++] = 0x81;
3775 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3776 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
3777 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
3778 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
3779 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
3780
3781#elif defined(RT_ARCH_ARM64)
3782 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
3783 if (uAbsAddend <= 0xfff)
3784 {
3785 if (iAddend >= 0)
3786 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
3787 else
3788 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
3789 }
3790 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
3791 {
3792 if (iAddend >= 0)
3793 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
3794 false /*f64Bit*/, true /*fShift12*/);
3795 else
3796 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
3797 false /*f64Bit*/, true /*fShift12*/);
3798 }
3799 else
3800# ifdef IEM_WITH_THROW_CATCH
3801 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3802# else
3803 AssertReleaseFailedStmt(off = UINT32_MAX);
3804# endif
3805
3806#else
3807# error "Port me"
3808#endif
3809 return off;
3810}
3811
3812
3813/**
3814 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
3815 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
3816 */
3817DECL_INLINE_THROW(uint32_t)
3818iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
3819{
3820#if defined(RT_ARCH_AMD64)
3821 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
3822
3823#elif defined(RT_ARCH_ARM64)
3824 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
3825 {
3826 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3827 if (iAddend >= 0)
3828 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend, false /*f64Bit*/);
3829 else
3830 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend, false /*f64Bit*/);
3831 }
3832 else
3833 {
3834 /* Use temporary register for the immediate. */
3835 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint32_t)iAddend);
3836
3837 /* add gprdst, gprdst, tmpreg */
3838 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3839 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
3840
3841 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
3842 }
3843
3844#else
3845# error "Port me"
3846#endif
3847 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3848 return off;
3849}
3850
3851
3852/**
3853 * Emits a 16-bit GPR add with a signed immediate addend.
3854 *
3855 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
3856 * so not suitable as a base for conditional jumps.
3857 *
3858 * @note AMD64: Will only update the lower 16 bits of the register.
3859 * @note ARM64: Will update the entire register.
3860 * @note ARM64: Larger constants will require a temporary register. Failing to
3861 * specify one when needed will trigger fatal assertion / throw.
3862 * @sa iemNativeEmitSubGpr16ImmEx
3863 */
3864DECL_FORCE_INLINE_THROW(uint32_t)
3865iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend,
3866 uint8_t iGprTmp = UINT8_MAX)
3867{
3868#ifdef RT_ARCH_AMD64
3869 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3870 if (iGprDst >= 8)
3871 pCodeBuf[off++] = X86_OP_REX_B;
3872 if (iAddend == 1)
3873 {
3874 /* inc r/m16 */
3875 pCodeBuf[off++] = 0xff;
3876 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3877 }
3878 else if (iAddend == -1)
3879 {
3880 /* dec r/m16 */
3881 pCodeBuf[off++] = 0xff;
3882 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3883 }
3884 else if ((int8_t)iAddend == iAddend)
3885 {
3886 /* add r/m16, imm8 */
3887 pCodeBuf[off++] = 0x83;
3888 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3889 pCodeBuf[off++] = (uint8_t)iAddend;
3890 }
3891 else
3892 {
3893 /* add r/m16, imm16 */
3894 pCodeBuf[off++] = 0x81;
3895 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3896 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
3897 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
3898 }
3899 RT_NOREF(iGprTmp);
3900
3901#elif defined(RT_ARCH_ARM64)
3902 uint32_t uAbsAddend = RT_ABS(iAddend);
3903 if (uAbsAddend < 4096)
3904 {
3905 if (iAddend >= 0)
3906 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
3907 else
3908 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
3909 }
3910 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
3911 {
3912 if (iAddend >= 0)
3913 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
3914 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3915 else
3916 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
3917 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3918 }
3919 else if (iGprTmp != UINT8_MAX)
3920 {
3921 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iAddend);
3922 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3923 }
3924 else
3925# ifdef IEM_WITH_THROW_CATCH
3926 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3927# else
3928 AssertReleaseFailedStmt(off = UINT32_MAX);
3929# endif
3930 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3931
3932#else
3933# error "Port me"
3934#endif
3935 return off;
3936}
3937
3938
3939
3940/**
3941 * Adds two 64-bit GPRs together, storing the result in a third register.
3942 */
3943DECL_FORCE_INLINE(uint32_t)
3944iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
3945{
3946#ifdef RT_ARCH_AMD64
3947 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
3948 {
3949 /** @todo consider LEA */
3950 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
3951 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
3952 }
3953 else
3954 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
3955
3956#elif defined(RT_ARCH_ARM64)
3957 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
3958
3959#else
3960# error "Port me!"
3961#endif
3962 return off;
3963}
3964
3965
3966
3967/**
3968 * Adds two 32-bit GPRs together, storing the result in a third register.
3969 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
3970 */
3971DECL_FORCE_INLINE(uint32_t)
3972iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
3973{
3974#ifdef RT_ARCH_AMD64
3975 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
3976 {
3977 /** @todo consider LEA */
3978 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
3979 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
3980 }
3981 else
3982 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
3983
3984#elif defined(RT_ARCH_ARM64)
3985 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
3986
3987#else
3988# error "Port me!"
3989#endif
3990 return off;
3991}
3992
3993
3994/**
3995 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
3996 * third register.
3997 *
3998 * @note The ARM64 version does not work for non-trivial constants if the
3999 * two registers are the same. Will assert / throw exception.
4000 */
4001DECL_FORCE_INLINE_THROW(uint32_t)
4002iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4003{
4004#ifdef RT_ARCH_AMD64
4005 /** @todo consider LEA */
4006 if ((int8_t)iImmAddend == iImmAddend)
4007 {
4008 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4009 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4010 }
4011 else
4012 {
4013 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4014 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4015 }
4016
4017#elif defined(RT_ARCH_ARM64)
4018 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4019 if (uAbsImmAddend < 4096)
4020 {
4021 if (iImmAddend >= 0)
4022 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4023 else
4024 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4025 }
4026 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4027 {
4028 if (iImmAddend >= 0)
4029 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4030 else
4031 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4032 }
4033 else if (iGprDst != iGprAddend)
4034 {
4035 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4036 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4037 }
4038 else
4039# ifdef IEM_WITH_THROW_CATCH
4040 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4041# else
4042 AssertReleaseFailedStmt(off = UINT32_MAX);
4043# endif
4044
4045#else
4046# error "Port me!"
4047#endif
4048 return off;
4049}
4050
4051
4052/**
4053 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4054 * third register.
4055 *
4056 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4057 *
4058 * @note The ARM64 version does not work for non-trivial constants if the
4059 * two registers are the same. Will assert / throw exception.
4060 */
4061DECL_FORCE_INLINE_THROW(uint32_t)
4062iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4063{
4064#ifdef RT_ARCH_AMD64
4065 /** @todo consider LEA */
4066 if ((int8_t)iImmAddend == iImmAddend)
4067 {
4068 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4069 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4070 }
4071 else
4072 {
4073 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4074 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4075 }
4076
4077#elif defined(RT_ARCH_ARM64)
4078 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4079 if (uAbsImmAddend < 4096)
4080 {
4081 if (iImmAddend >= 0)
4082 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4083 else
4084 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4085 }
4086 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4087 {
4088 if (iImmAddend >= 0)
4089 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4090 else
4091 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4092 }
4093 else if (iGprDst != iGprAddend)
4094 {
4095 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4096 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4097 }
4098 else
4099# ifdef IEM_WITH_THROW_CATCH
4100 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4101# else
4102 AssertReleaseFailedStmt(off = UINT32_MAX);
4103# endif
4104
4105#else
4106# error "Port me!"
4107#endif
4108 return off;
4109}
4110
4111
4112/*********************************************************************************************************************************
4113* Unary Operations *
4114*********************************************************************************************************************************/
4115
4116/**
4117 * Emits code for two complement negation of a 64-bit GPR.
4118 */
4119DECL_FORCE_INLINE_THROW(uint32_t)
4120iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4121{
4122#if defined(RT_ARCH_AMD64)
4123 /* neg Ev */
4124 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4125 pCodeBuf[off++] = 0xf7;
4126 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4127
4128#elif defined(RT_ARCH_ARM64)
4129 /* sub dst, xzr, dst */
4130 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4131
4132#else
4133# error "Port me"
4134#endif
4135 return off;
4136}
4137
4138
4139/**
4140 * Emits code for two complement negation of a 64-bit GPR.
4141 */
4142DECL_INLINE_THROW(uint32_t)
4143iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4144{
4145#if defined(RT_ARCH_AMD64)
4146 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4147#elif defined(RT_ARCH_ARM64)
4148 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4149#else
4150# error "Port me"
4151#endif
4152 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4153 return off;
4154}
4155
4156
4157/**
4158 * Emits code for two complement negation of a 32-bit GPR.
4159 * @note bit 32 thru 63 are set to zero.
4160 */
4161DECL_FORCE_INLINE_THROW(uint32_t)
4162iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4163{
4164#if defined(RT_ARCH_AMD64)
4165 /* neg Ev */
4166 if (iGprDst >= 8)
4167 pCodeBuf[off++] = X86_OP_REX_B;
4168 pCodeBuf[off++] = 0xf7;
4169 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4170
4171#elif defined(RT_ARCH_ARM64)
4172 /* sub dst, xzr, dst */
4173 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4174
4175#else
4176# error "Port me"
4177#endif
4178 return off;
4179}
4180
4181
4182/**
4183 * Emits code for two complement negation of a 32-bit GPR.
4184 * @note bit 32 thru 63 are set to zero.
4185 */
4186DECL_INLINE_THROW(uint32_t)
4187iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4188{
4189#if defined(RT_ARCH_AMD64)
4190 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4191#elif defined(RT_ARCH_ARM64)
4192 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4193#else
4194# error "Port me"
4195#endif
4196 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4197 return off;
4198}
4199
4200
4201
4202/*********************************************************************************************************************************
4203* Bit Operations *
4204*********************************************************************************************************************************/
4205
4206/**
4207 * Emits code for clearing bits 16 thru 63 in the GPR.
4208 */
4209DECL_INLINE_THROW(uint32_t)
4210iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4211{
4212#if defined(RT_ARCH_AMD64)
4213 /* movzx Gv,Ew */
4214 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4215 if (iGprDst >= 8)
4216 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4217 pbCodeBuf[off++] = 0x0f;
4218 pbCodeBuf[off++] = 0xb7;
4219 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4220
4221#elif defined(RT_ARCH_ARM64)
4222 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4223# if 1
4224 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4225# else
4226 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4227 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4228# endif
4229#else
4230# error "Port me"
4231#endif
4232 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4233 return off;
4234}
4235
4236
4237/**
4238 * Emits code for AND'ing two 64-bit GPRs.
4239 *
4240 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4241 * and ARM64 hosts.
4242 */
4243DECL_FORCE_INLINE(uint32_t)
4244iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4245{
4246#if defined(RT_ARCH_AMD64)
4247 /* and Gv, Ev */
4248 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4249 pCodeBuf[off++] = 0x23;
4250 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4251 RT_NOREF(fSetFlags);
4252
4253#elif defined(RT_ARCH_ARM64)
4254 if (!fSetFlags)
4255 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4256 else
4257 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4258
4259#else
4260# error "Port me"
4261#endif
4262 return off;
4263}
4264
4265
4266/**
4267 * Emits code for AND'ing two 64-bit GPRs.
4268 *
4269 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4270 * and ARM64 hosts.
4271 */
4272DECL_INLINE_THROW(uint32_t)
4273iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4274{
4275#if defined(RT_ARCH_AMD64)
4276 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4277#elif defined(RT_ARCH_ARM64)
4278 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4279#else
4280# error "Port me"
4281#endif
4282 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4283 return off;
4284}
4285
4286
4287/**
4288 * Emits code for AND'ing two 32-bit GPRs.
4289 */
4290DECL_FORCE_INLINE(uint32_t)
4291iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4292{
4293#if defined(RT_ARCH_AMD64)
4294 /* and Gv, Ev */
4295 if (iGprDst >= 8 || iGprSrc >= 8)
4296 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4297 pCodeBuf[off++] = 0x23;
4298 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4299 RT_NOREF(fSetFlags);
4300
4301#elif defined(RT_ARCH_ARM64)
4302 if (!fSetFlags)
4303 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4304 else
4305 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4306
4307#else
4308# error "Port me"
4309#endif
4310 return off;
4311}
4312
4313
4314/**
4315 * Emits code for AND'ing two 32-bit GPRs.
4316 */
4317DECL_INLINE_THROW(uint32_t)
4318iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4319{
4320#if defined(RT_ARCH_AMD64)
4321 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4322#elif defined(RT_ARCH_ARM64)
4323 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4324#else
4325# error "Port me"
4326#endif
4327 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4328 return off;
4329}
4330
4331
4332/**
4333 * Emits code for AND'ing a 64-bit GPRs with a constant.
4334 *
4335 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4336 * and ARM64 hosts.
4337 */
4338DECL_INLINE_THROW(uint32_t)
4339iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4340{
4341#if defined(RT_ARCH_AMD64)
4342 if ((int64_t)uImm == (int8_t)uImm)
4343 {
4344 /* and Ev, imm8 */
4345 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4346 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4347 pbCodeBuf[off++] = 0x83;
4348 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4349 pbCodeBuf[off++] = (uint8_t)uImm;
4350 }
4351 else if ((int64_t)uImm == (int32_t)uImm)
4352 {
4353 /* and Ev, imm32 */
4354 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4355 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4356 pbCodeBuf[off++] = 0x81;
4357 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4358 pbCodeBuf[off++] = RT_BYTE1(uImm);
4359 pbCodeBuf[off++] = RT_BYTE2(uImm);
4360 pbCodeBuf[off++] = RT_BYTE3(uImm);
4361 pbCodeBuf[off++] = RT_BYTE4(uImm);
4362 }
4363 else
4364 {
4365 /* Use temporary register for the 64-bit immediate. */
4366 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4367 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
4368 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4369 }
4370 RT_NOREF(fSetFlags);
4371
4372#elif defined(RT_ARCH_ARM64)
4373 uint32_t uImmR = 0;
4374 uint32_t uImmNandS = 0;
4375 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4376 {
4377 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4378 if (!fSetFlags)
4379 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
4380 else
4381 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
4382 }
4383 else
4384 {
4385 /* Use temporary register for the 64-bit immediate. */
4386 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4387 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4388 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4389 }
4390
4391#else
4392# error "Port me"
4393#endif
4394 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4395 return off;
4396}
4397
4398
4399/**
4400 * Emits code for AND'ing an 32-bit GPRs with a constant.
4401 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4402 * @note For ARM64 this only supports @a uImm values that can be expressed using
4403 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
4404 * make sure this is possible!
4405 */
4406DECL_FORCE_INLINE_THROW(uint32_t)
4407iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4408{
4409#if defined(RT_ARCH_AMD64)
4410 /* and Ev, imm */
4411 if (iGprDst >= 8)
4412 pCodeBuf[off++] = X86_OP_REX_B;
4413 if ((int32_t)uImm == (int8_t)uImm)
4414 {
4415 pCodeBuf[off++] = 0x83;
4416 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4417 pCodeBuf[off++] = (uint8_t)uImm;
4418 }
4419 else
4420 {
4421 pCodeBuf[off++] = 0x81;
4422 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4423 pCodeBuf[off++] = RT_BYTE1(uImm);
4424 pCodeBuf[off++] = RT_BYTE2(uImm);
4425 pCodeBuf[off++] = RT_BYTE3(uImm);
4426 pCodeBuf[off++] = RT_BYTE4(uImm);
4427 }
4428 RT_NOREF(fSetFlags);
4429
4430#elif defined(RT_ARCH_ARM64)
4431 uint32_t uImmR = 0;
4432 uint32_t uImmNandS = 0;
4433 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4434 {
4435 if (!fSetFlags)
4436 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4437 else
4438 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4439 }
4440 else
4441# ifdef IEM_WITH_THROW_CATCH
4442 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4443# else
4444 AssertReleaseFailedStmt(off = UINT32_MAX);
4445# endif
4446
4447#else
4448# error "Port me"
4449#endif
4450 return off;
4451}
4452
4453
4454/**
4455 * Emits code for AND'ing an 32-bit GPRs with a constant.
4456 *
4457 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4458 */
4459DECL_INLINE_THROW(uint32_t)
4460iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4461{
4462#if defined(RT_ARCH_AMD64)
4463 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
4464
4465#elif defined(RT_ARCH_ARM64)
4466 uint32_t uImmR = 0;
4467 uint32_t uImmNandS = 0;
4468 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4469 {
4470 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4471 if (!fSetFlags)
4472 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4473 else
4474 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4475 }
4476 else
4477 {
4478 /* Use temporary register for the 64-bit immediate. */
4479 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4480 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4481 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4482 }
4483
4484#else
4485# error "Port me"
4486#endif
4487 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4488 return off;
4489}
4490
4491
4492/**
4493 * Emits code for AND'ing an 64-bit GPRs with a constant.
4494 *
4495 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4496 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4497 * the same.
4498 */
4499DECL_FORCE_INLINE_THROW(uint32_t)
4500iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
4501 bool fSetFlags = false)
4502{
4503#if defined(RT_ARCH_AMD64)
4504 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4505 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
4506 RT_NOREF(fSetFlags);
4507
4508#elif defined(RT_ARCH_ARM64)
4509 uint32_t uImmR = 0;
4510 uint32_t uImmNandS = 0;
4511 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4512 {
4513 if (!fSetFlags)
4514 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4515 else
4516 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4517 }
4518 else if (iGprDst != iGprSrc)
4519 {
4520 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4521 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4522 }
4523 else
4524# ifdef IEM_WITH_THROW_CATCH
4525 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4526# else
4527 AssertReleaseFailedStmt(off = UINT32_MAX);
4528# endif
4529
4530#else
4531# error "Port me"
4532#endif
4533 return off;
4534}
4535
4536/**
4537 * Emits code for AND'ing an 32-bit GPRs with a constant.
4538 *
4539 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4540 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4541 * the same.
4542 *
4543 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4544 */
4545DECL_FORCE_INLINE_THROW(uint32_t)
4546iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
4547 bool fSetFlags = false)
4548{
4549#if defined(RT_ARCH_AMD64)
4550 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
4551 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
4552 RT_NOREF(fSetFlags);
4553
4554#elif defined(RT_ARCH_ARM64)
4555 uint32_t uImmR = 0;
4556 uint32_t uImmNandS = 0;
4557 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4558 {
4559 if (!fSetFlags)
4560 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
4561 else
4562 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
4563 }
4564 else if (iGprDst != iGprSrc)
4565 {
4566 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
4567 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4568 }
4569 else
4570# ifdef IEM_WITH_THROW_CATCH
4571 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4572# else
4573 AssertReleaseFailedStmt(off = UINT32_MAX);
4574# endif
4575
4576#else
4577# error "Port me"
4578#endif
4579 return off;
4580}
4581
4582
4583/**
4584 * Emits code for OR'ing two 64-bit GPRs.
4585 */
4586DECL_FORCE_INLINE(uint32_t)
4587iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4588{
4589#if defined(RT_ARCH_AMD64)
4590 /* or Gv, Ev */
4591 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4592 pCodeBuf[off++] = 0x0b;
4593 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4594
4595#elif defined(RT_ARCH_ARM64)
4596 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
4597
4598#else
4599# error "Port me"
4600#endif
4601 return off;
4602}
4603
4604
4605/**
4606 * Emits code for OR'ing two 64-bit GPRs.
4607 */
4608DECL_INLINE_THROW(uint32_t)
4609iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4610{
4611#if defined(RT_ARCH_AMD64)
4612 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4613#elif defined(RT_ARCH_ARM64)
4614 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4615#else
4616# error "Port me"
4617#endif
4618 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4619 return off;
4620}
4621
4622
4623/**
4624 * Emits code for OR'ing two 32-bit GPRs.
4625 * @note Bits 63:32 of the destination GPR will be cleared.
4626 */
4627DECL_FORCE_INLINE(uint32_t)
4628iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4629{
4630#if defined(RT_ARCH_AMD64)
4631 /* or Gv, Ev */
4632 if (iGprDst >= 8 || iGprSrc >= 8)
4633 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4634 pCodeBuf[off++] = 0x0b;
4635 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4636
4637#elif defined(RT_ARCH_ARM64)
4638 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4639
4640#else
4641# error "Port me"
4642#endif
4643 return off;
4644}
4645
4646
4647/**
4648 * Emits code for OR'ing two 32-bit GPRs.
4649 * @note Bits 63:32 of the destination GPR will be cleared.
4650 */
4651DECL_INLINE_THROW(uint32_t)
4652iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4653{
4654#if defined(RT_ARCH_AMD64)
4655 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4656#elif defined(RT_ARCH_ARM64)
4657 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4658#else
4659# error "Port me"
4660#endif
4661 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4662 return off;
4663}
4664
4665
4666/**
4667 * Emits code for OR'ing a 64-bit GPRs with a constant.
4668 */
4669DECL_INLINE_THROW(uint32_t)
4670iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
4671{
4672#if defined(RT_ARCH_AMD64)
4673 if ((int64_t)uImm == (int8_t)uImm)
4674 {
4675 /* or Ev, imm8 */
4676 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4677 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4678 pbCodeBuf[off++] = 0x83;
4679 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4680 pbCodeBuf[off++] = (uint8_t)uImm;
4681 }
4682 else if ((int64_t)uImm == (int32_t)uImm)
4683 {
4684 /* or Ev, imm32 */
4685 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4686 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4687 pbCodeBuf[off++] = 0x81;
4688 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4689 pbCodeBuf[off++] = RT_BYTE1(uImm);
4690 pbCodeBuf[off++] = RT_BYTE2(uImm);
4691 pbCodeBuf[off++] = RT_BYTE3(uImm);
4692 pbCodeBuf[off++] = RT_BYTE4(uImm);
4693 }
4694 else
4695 {
4696 /* Use temporary register for the 64-bit immediate. */
4697 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4698 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
4699 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4700 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4701 }
4702
4703#elif defined(RT_ARCH_ARM64)
4704 uint32_t uImmR = 0;
4705 uint32_t uImmNandS = 0;
4706 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4707 {
4708 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4709 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
4710 }
4711 else
4712 {
4713 /* Use temporary register for the 64-bit immediate. */
4714 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4715 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
4716 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4717 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4718 }
4719
4720#else
4721# error "Port me"
4722#endif
4723 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4724 return off;
4725}
4726
4727
4728/**
4729 * Emits code for OR'ing an 32-bit GPRs with a constant.
4730 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4731 * @note For ARM64 this only supports @a uImm values that can be expressed using
4732 * the two 6-bit immediates of the OR instructions. The caller must make
4733 * sure this is possible!
4734 */
4735DECL_FORCE_INLINE_THROW(uint32_t)
4736iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
4737{
4738#if defined(RT_ARCH_AMD64)
4739 /* or Ev, imm */
4740 if (iGprDst >= 8)
4741 pCodeBuf[off++] = X86_OP_REX_B;
4742 if ((int32_t)uImm == (int8_t)uImm)
4743 {
4744 pCodeBuf[off++] = 0x83;
4745 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4746 pCodeBuf[off++] = (uint8_t)uImm;
4747 }
4748 else
4749 {
4750 pCodeBuf[off++] = 0x81;
4751 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4752 pCodeBuf[off++] = RT_BYTE1(uImm);
4753 pCodeBuf[off++] = RT_BYTE2(uImm);
4754 pCodeBuf[off++] = RT_BYTE3(uImm);
4755 pCodeBuf[off++] = RT_BYTE4(uImm);
4756 }
4757
4758#elif defined(RT_ARCH_ARM64)
4759 uint32_t uImmR = 0;
4760 uint32_t uImmNandS = 0;
4761 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4762 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4763 else
4764# ifdef IEM_WITH_THROW_CATCH
4765 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4766# else
4767 AssertReleaseFailedStmt(off = UINT32_MAX);
4768# endif
4769
4770#else
4771# error "Port me"
4772#endif
4773 return off;
4774}
4775
4776
4777/**
4778 * Emits code for OR'ing an 32-bit GPRs with a constant.
4779 *
4780 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4781 */
4782DECL_INLINE_THROW(uint32_t)
4783iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
4784{
4785#if defined(RT_ARCH_AMD64)
4786 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
4787
4788#elif defined(RT_ARCH_ARM64)
4789 uint32_t uImmR = 0;
4790 uint32_t uImmNandS = 0;
4791 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4792 {
4793 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4794 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4795 }
4796 else
4797 {
4798 /* Use temporary register for the 64-bit immediate. */
4799 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4800 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
4801 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4802 }
4803
4804#else
4805# error "Port me"
4806#endif
4807 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4808 return off;
4809}
4810
4811
4812/**
4813 * Emits code for XOR'ing two 64-bit GPRs.
4814 */
4815DECL_INLINE_THROW(uint32_t)
4816iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4817{
4818#if defined(RT_ARCH_AMD64)
4819 /* and Gv, Ev */
4820 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4821 pCodeBuf[off++] = 0x33;
4822 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4823
4824#elif defined(RT_ARCH_ARM64)
4825 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
4826
4827#else
4828# error "Port me"
4829#endif
4830 return off;
4831}
4832
4833
4834/**
4835 * Emits code for XOR'ing two 64-bit GPRs.
4836 */
4837DECL_INLINE_THROW(uint32_t)
4838iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4839{
4840#if defined(RT_ARCH_AMD64)
4841 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4842#elif defined(RT_ARCH_ARM64)
4843 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4844#else
4845# error "Port me"
4846#endif
4847 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4848 return off;
4849}
4850
4851
4852/**
4853 * Emits code for XOR'ing two 32-bit GPRs.
4854 */
4855DECL_INLINE_THROW(uint32_t)
4856iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4857{
4858#if defined(RT_ARCH_AMD64)
4859 /* and Gv, Ev */
4860 if (iGprDst >= 8 || iGprSrc >= 8)
4861 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4862 pCodeBuf[off++] = 0x33;
4863 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4864
4865#elif defined(RT_ARCH_ARM64)
4866 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4867
4868#else
4869# error "Port me"
4870#endif
4871 return off;
4872}
4873
4874
4875/**
4876 * Emits code for XOR'ing two 32-bit GPRs.
4877 */
4878DECL_INLINE_THROW(uint32_t)
4879iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
4880{
4881#if defined(RT_ARCH_AMD64)
4882 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
4883#elif defined(RT_ARCH_ARM64)
4884 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
4885#else
4886# error "Port me"
4887#endif
4888 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4889 return off;
4890}
4891
4892
4893/**
4894 * Emits code for XOR'ing an 32-bit GPRs with a constant.
4895 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4896 * @note For ARM64 this only supports @a uImm values that can be expressed using
4897 * the two 6-bit immediates of the EOR instructions. The caller must make
4898 * sure this is possible!
4899 */
4900DECL_FORCE_INLINE_THROW(uint32_t)
4901iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
4902{
4903#if defined(RT_ARCH_AMD64)
4904 /* and Ev, imm */
4905 if (iGprDst >= 8)
4906 pCodeBuf[off++] = X86_OP_REX_B;
4907 if ((int32_t)uImm == (int8_t)uImm)
4908 {
4909 pCodeBuf[off++] = 0x83;
4910 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
4911 pCodeBuf[off++] = (uint8_t)uImm;
4912 }
4913 else
4914 {
4915 pCodeBuf[off++] = 0x81;
4916 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
4917 pCodeBuf[off++] = RT_BYTE1(uImm);
4918 pCodeBuf[off++] = RT_BYTE2(uImm);
4919 pCodeBuf[off++] = RT_BYTE3(uImm);
4920 pCodeBuf[off++] = RT_BYTE4(uImm);
4921 }
4922
4923#elif defined(RT_ARCH_ARM64)
4924 uint32_t uImmR = 0;
4925 uint32_t uImmNandS = 0;
4926 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4927 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4928 else
4929# ifdef IEM_WITH_THROW_CATCH
4930 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4931# else
4932 AssertReleaseFailedStmt(off = UINT32_MAX);
4933# endif
4934
4935#else
4936# error "Port me"
4937#endif
4938 return off;
4939}
4940
4941
4942/*********************************************************************************************************************************
4943* Shifting *
4944*********************************************************************************************************************************/
4945
4946/**
4947 * Emits code for shifting a GPR a fixed number of bits to the left.
4948 */
4949DECL_FORCE_INLINE(uint32_t)
4950iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
4951{
4952 Assert(cShift > 0 && cShift < 64);
4953
4954#if defined(RT_ARCH_AMD64)
4955 /* shl dst, cShift */
4956 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4957 if (cShift != 1)
4958 {
4959 pCodeBuf[off++] = 0xc1;
4960 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4961 pCodeBuf[off++] = cShift;
4962 }
4963 else
4964 {
4965 pCodeBuf[off++] = 0xd1;
4966 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4967 }
4968
4969#elif defined(RT_ARCH_ARM64)
4970 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
4971
4972#else
4973# error "Port me"
4974#endif
4975 return off;
4976}
4977
4978
4979/**
4980 * Emits code for shifting a GPR a fixed number of bits to the left.
4981 */
4982DECL_INLINE_THROW(uint32_t)
4983iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
4984{
4985#if defined(RT_ARCH_AMD64)
4986 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
4987#elif defined(RT_ARCH_ARM64)
4988 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
4989#else
4990# error "Port me"
4991#endif
4992 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4993 return off;
4994}
4995
4996
4997/**
4998 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
4999 */
5000DECL_FORCE_INLINE(uint32_t)
5001iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5002{
5003 Assert(cShift > 0 && cShift < 32);
5004
5005#if defined(RT_ARCH_AMD64)
5006 /* shl dst, cShift */
5007 if (iGprDst >= 8)
5008 pCodeBuf[off++] = X86_OP_REX_B;
5009 if (cShift != 1)
5010 {
5011 pCodeBuf[off++] = 0xc1;
5012 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5013 pCodeBuf[off++] = cShift;
5014 }
5015 else
5016 {
5017 pCodeBuf[off++] = 0xd1;
5018 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5019 }
5020
5021#elif defined(RT_ARCH_ARM64)
5022 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5023
5024#else
5025# error "Port me"
5026#endif
5027 return off;
5028}
5029
5030
5031/**
5032 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5033 */
5034DECL_INLINE_THROW(uint32_t)
5035iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5036{
5037#if defined(RT_ARCH_AMD64)
5038 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5039#elif defined(RT_ARCH_ARM64)
5040 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5041#else
5042# error "Port me"
5043#endif
5044 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5045 return off;
5046}
5047
5048
5049/**
5050 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5051 */
5052DECL_FORCE_INLINE(uint32_t)
5053iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5054{
5055 Assert(cShift > 0 && cShift < 64);
5056
5057#if defined(RT_ARCH_AMD64)
5058 /* shr dst, cShift */
5059 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5060 if (cShift != 1)
5061 {
5062 pCodeBuf[off++] = 0xc1;
5063 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5064 pCodeBuf[off++] = cShift;
5065 }
5066 else
5067 {
5068 pCodeBuf[off++] = 0xd1;
5069 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5070 }
5071
5072#elif defined(RT_ARCH_ARM64)
5073 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5074
5075#else
5076# error "Port me"
5077#endif
5078 return off;
5079}
5080
5081
5082/**
5083 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5084 */
5085DECL_INLINE_THROW(uint32_t)
5086iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5087{
5088#if defined(RT_ARCH_AMD64)
5089 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5090#elif defined(RT_ARCH_ARM64)
5091 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5092#else
5093# error "Port me"
5094#endif
5095 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5096 return off;
5097}
5098
5099
5100/**
5101 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5102 * right.
5103 */
5104DECL_FORCE_INLINE(uint32_t)
5105iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5106{
5107 Assert(cShift > 0 && cShift < 32);
5108
5109#if defined(RT_ARCH_AMD64)
5110 /* shr dst, cShift */
5111 if (iGprDst >= 8)
5112 pCodeBuf[off++] = X86_OP_REX_B;
5113 if (cShift != 1)
5114 {
5115 pCodeBuf[off++] = 0xc1;
5116 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5117 pCodeBuf[off++] = cShift;
5118 }
5119 else
5120 {
5121 pCodeBuf[off++] = 0xd1;
5122 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5123 }
5124
5125#elif defined(RT_ARCH_ARM64)
5126 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5127
5128#else
5129# error "Port me"
5130#endif
5131 return off;
5132}
5133
5134
5135/**
5136 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5137 * right.
5138 */
5139DECL_INLINE_THROW(uint32_t)
5140iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5141{
5142#if defined(RT_ARCH_AMD64)
5143 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5144#elif defined(RT_ARCH_ARM64)
5145 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5146#else
5147# error "Port me"
5148#endif
5149 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5150 return off;
5151}
5152
5153
5154/**
5155 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5156 * right and assigning it to a different GPR.
5157 */
5158DECL_INLINE_THROW(uint32_t)
5159iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5160{
5161 Assert(cShift > 0); Assert(cShift < 32);
5162#if defined(RT_ARCH_AMD64)
5163 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5164 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5165
5166#elif defined(RT_ARCH_ARM64)
5167 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5168
5169#else
5170# error "Port me"
5171#endif
5172 return off;
5173}
5174
5175
5176/**
5177 * Emits code for rotating a GPR a fixed number of bits to the left.
5178 */
5179DECL_FORCE_INLINE(uint32_t)
5180iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5181{
5182 Assert(cShift > 0 && cShift < 64);
5183
5184#if defined(RT_ARCH_AMD64)
5185 /* rol dst, cShift */
5186 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5187 if (cShift != 1)
5188 {
5189 pCodeBuf[off++] = 0xc1;
5190 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5191 pCodeBuf[off++] = cShift;
5192 }
5193 else
5194 {
5195 pCodeBuf[off++] = 0xd1;
5196 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5197 }
5198
5199#elif defined(RT_ARCH_ARM64)
5200 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
5201
5202#else
5203# error "Port me"
5204#endif
5205 return off;
5206}
5207
5208
5209/**
5210 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
5211 * @note Bits 63:32 of the destination GPR will be cleared.
5212 */
5213DECL_FORCE_INLINE(uint32_t)
5214iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5215{
5216#if defined(RT_ARCH_AMD64)
5217 /*
5218 * There is no bswap r16 on x86 (the encoding exists but does not work).
5219 * So just use a rol (gcc -O2 is doing that).
5220 *
5221 * rol r16, 0x8
5222 */
5223 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5224 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5225 if (iGpr >= 8)
5226 pbCodeBuf[off++] = X86_OP_REX_B;
5227 pbCodeBuf[off++] = 0xc1;
5228 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
5229 pbCodeBuf[off++] = 0x08;
5230#elif defined(RT_ARCH_ARM64)
5231 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5232
5233 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
5234#else
5235# error "Port me"
5236#endif
5237
5238 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5239 return off;
5240}
5241
5242
5243/**
5244 * Emits code for reversing the byte order in a 32-bit GPR.
5245 * @note Bits 63:32 of the destination GPR will be cleared.
5246 */
5247DECL_FORCE_INLINE(uint32_t)
5248iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5249{
5250#if defined(RT_ARCH_AMD64)
5251 /* bswap r32 */
5252 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5253
5254 if (iGpr >= 8)
5255 pbCodeBuf[off++] = X86_OP_REX_B;
5256 pbCodeBuf[off++] = 0x0f;
5257 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5258#elif defined(RT_ARCH_ARM64)
5259 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5260
5261 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
5262#else
5263# error "Port me"
5264#endif
5265
5266 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5267 return off;
5268}
5269
5270
5271/**
5272 * Emits code for reversing the byte order in a 64-bit GPR.
5273 */
5274DECL_FORCE_INLINE(uint32_t)
5275iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5276{
5277#if defined(RT_ARCH_AMD64)
5278 /* bswap r64 */
5279 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5280
5281 if (iGpr >= 8)
5282 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
5283 else
5284 pbCodeBuf[off++] = X86_OP_REX_W;
5285 pbCodeBuf[off++] = 0x0f;
5286 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5287#elif defined(RT_ARCH_ARM64)
5288 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5289
5290 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
5291#else
5292# error "Port me"
5293#endif
5294
5295 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5296 return off;
5297}
5298
5299
5300/*********************************************************************************************************************************
5301* Compare and Testing *
5302*********************************************************************************************************************************/
5303
5304
5305#ifdef RT_ARCH_ARM64
5306/**
5307 * Emits an ARM64 compare instruction.
5308 */
5309DECL_INLINE_THROW(uint32_t)
5310iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
5311 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
5312{
5313 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5314 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
5315 f64Bit, true /*fSetFlags*/, cShift, enmShift);
5316 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5317 return off;
5318}
5319#endif
5320
5321
5322/**
5323 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5324 * with conditional instruction.
5325 */
5326DECL_FORCE_INLINE(uint32_t)
5327iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5328{
5329#ifdef RT_ARCH_AMD64
5330 /* cmp Gv, Ev */
5331 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5332 pCodeBuf[off++] = 0x3b;
5333 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5334
5335#elif defined(RT_ARCH_ARM64)
5336 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
5337
5338#else
5339# error "Port me!"
5340#endif
5341 return off;
5342}
5343
5344
5345/**
5346 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5347 * with conditional instruction.
5348 */
5349DECL_INLINE_THROW(uint32_t)
5350iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5351{
5352#ifdef RT_ARCH_AMD64
5353 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5354#elif defined(RT_ARCH_ARM64)
5355 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5356#else
5357# error "Port me!"
5358#endif
5359 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5360 return off;
5361}
5362
5363
5364/**
5365 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5366 * with conditional instruction.
5367 */
5368DECL_FORCE_INLINE(uint32_t)
5369iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5370{
5371#ifdef RT_ARCH_AMD64
5372 /* cmp Gv, Ev */
5373 if (iGprLeft >= 8 || iGprRight >= 8)
5374 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5375 pCodeBuf[off++] = 0x3b;
5376 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5377
5378#elif defined(RT_ARCH_ARM64)
5379 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
5380
5381#else
5382# error "Port me!"
5383#endif
5384 return off;
5385}
5386
5387
5388/**
5389 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
5390 * with conditional instruction.
5391 */
5392DECL_INLINE_THROW(uint32_t)
5393iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5394{
5395#ifdef RT_ARCH_AMD64
5396 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
5397#elif defined(RT_ARCH_ARM64)
5398 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
5399#else
5400# error "Port me!"
5401#endif
5402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5403 return off;
5404}
5405
5406
5407/**
5408 * Emits a compare of a 64-bit GPR with a constant value, settings status
5409 * flags/whatever for use with conditional instruction.
5410 */
5411DECL_INLINE_THROW(uint32_t)
5412iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
5413{
5414#ifdef RT_ARCH_AMD64
5415 if (uImm <= UINT32_C(0xff))
5416 {
5417 /* cmp Ev, Ib */
5418 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5419 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
5420 pbCodeBuf[off++] = 0x83;
5421 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5422 pbCodeBuf[off++] = (uint8_t)uImm;
5423 }
5424 else if ((int64_t)uImm == (int32_t)uImm)
5425 {
5426 /* cmp Ev, imm */
5427 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5428 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
5429 pbCodeBuf[off++] = 0x81;
5430 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5431 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5432 pbCodeBuf[off++] = RT_BYTE1(uImm);
5433 pbCodeBuf[off++] = RT_BYTE2(uImm);
5434 pbCodeBuf[off++] = RT_BYTE3(uImm);
5435 pbCodeBuf[off++] = RT_BYTE4(uImm);
5436 }
5437 else
5438 {
5439 /* Use temporary register for the immediate. */
5440 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5441 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
5442 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5443 }
5444
5445#elif defined(RT_ARCH_ARM64)
5446 /** @todo guess there are clevere things we can do here... */
5447 if (uImm < _4K)
5448 {
5449 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5450 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5451 true /*64Bit*/, true /*fSetFlags*/);
5452 }
5453 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5454 {
5455 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5456 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
5457 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5458 }
5459 else
5460 {
5461 /* Use temporary register for the immediate. */
5462 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5463 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
5464 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5465 }
5466
5467#else
5468# error "Port me!"
5469#endif
5470
5471 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5472 return off;
5473}
5474
5475
5476/**
5477 * Emits a compare of a 32-bit GPR with a constant value, settings status
5478 * flags/whatever for use with conditional instruction.
5479 *
5480 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
5481 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
5482 * bits all zero). Will release assert or throw exception if the caller
5483 * violates this restriction.
5484 */
5485DECL_FORCE_INLINE_THROW(uint32_t)
5486iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
5487{
5488#ifdef RT_ARCH_AMD64
5489 if (iGprLeft >= 8)
5490 pCodeBuf[off++] = X86_OP_REX_B;
5491 if (uImm <= UINT32_C(0x7f))
5492 {
5493 /* cmp Ev, Ib */
5494 pCodeBuf[off++] = 0x83;
5495 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5496 pCodeBuf[off++] = (uint8_t)uImm;
5497 }
5498 else
5499 {
5500 /* cmp Ev, imm */
5501 pCodeBuf[off++] = 0x81;
5502 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5503 pCodeBuf[off++] = RT_BYTE1(uImm);
5504 pCodeBuf[off++] = RT_BYTE2(uImm);
5505 pCodeBuf[off++] = RT_BYTE3(uImm);
5506 pCodeBuf[off++] = RT_BYTE4(uImm);
5507 }
5508
5509#elif defined(RT_ARCH_ARM64)
5510 /** @todo guess there are clevere things we can do here... */
5511 if (uImm < _4K)
5512 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5513 false /*64Bit*/, true /*fSetFlags*/);
5514 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5515 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5516 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5517 else
5518# ifdef IEM_WITH_THROW_CATCH
5519 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5520# else
5521 AssertReleaseFailedStmt(off = UINT32_MAX);
5522# endif
5523
5524#else
5525# error "Port me!"
5526#endif
5527 return off;
5528}
5529
5530
5531/**
5532 * Emits a compare of a 32-bit GPR with a constant value, settings status
5533 * flags/whatever for use with conditional instruction.
5534 */
5535DECL_INLINE_THROW(uint32_t)
5536iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
5537{
5538#ifdef RT_ARCH_AMD64
5539 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
5540
5541#elif defined(RT_ARCH_ARM64)
5542 /** @todo guess there are clevere things we can do here... */
5543 if (uImm < _4K)
5544 {
5545 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5546 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5547 false /*64Bit*/, true /*fSetFlags*/);
5548 }
5549 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
5550 {
5551 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5552 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
5553 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
5554 }
5555 else
5556 {
5557 /* Use temporary register for the immediate. */
5558 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5559 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
5560 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5561 }
5562
5563#else
5564# error "Port me!"
5565#endif
5566
5567 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5568 return off;
5569}
5570
5571
5572/**
5573 * Emits a compare of a 32-bit GPR with a constant value, settings status
5574 * flags/whatever for use with conditional instruction.
5575 *
5576 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
5577 * 16-bit value from @a iGrpLeft.
5578 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
5579 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
5580 * bits all zero). Will release assert or throw exception if the caller
5581 * violates this restriction.
5582 */
5583DECL_FORCE_INLINE_THROW(uint32_t)
5584iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
5585 uint8_t idxTmpReg = UINT8_MAX)
5586{
5587#ifdef RT_ARCH_AMD64
5588 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5589 if (iGprLeft >= 8)
5590 pCodeBuf[off++] = X86_OP_REX_B;
5591 if (uImm <= UINT32_C(0x7f))
5592 {
5593 /* cmp Ev, Ib */
5594 pCodeBuf[off++] = 0x83;
5595 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5596 pCodeBuf[off++] = (uint8_t)uImm;
5597 }
5598 else
5599 {
5600 /* cmp Ev, imm */
5601 pCodeBuf[off++] = 0x81;
5602 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
5603 pCodeBuf[off++] = RT_BYTE1(uImm);
5604 pCodeBuf[off++] = RT_BYTE2(uImm);
5605 }
5606 RT_NOREF(idxTmpReg);
5607
5608#elif defined(RT_ARCH_ARM64)
5609# ifdef IEM_WITH_THROW_CATCH
5610 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5611# else
5612 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
5613# endif
5614 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
5615 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
5616 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
5617
5618#else
5619# error "Port me!"
5620#endif
5621 return off;
5622}
5623
5624
5625/**
5626 * Emits a compare of a 16-bit GPR with a constant value, settings status
5627 * flags/whatever for use with conditional instruction.
5628 *
5629 * @note ARM64: Helper register is required (idxTmpReg).
5630 */
5631DECL_INLINE_THROW(uint32_t)
5632iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
5633 uint8_t idxTmpReg = UINT8_MAX)
5634{
5635#ifdef RT_ARCH_AMD64
5636 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
5637#elif defined(RT_ARCH_ARM64)
5638 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
5639#else
5640# error "Port me!"
5641#endif
5642 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5643 return off;
5644}
5645
5646
5647
5648/*********************************************************************************************************************************
5649* Branching *
5650*********************************************************************************************************************************/
5651
5652/**
5653 * Emits a JMP rel32 / B imm19 to the given label.
5654 */
5655DECL_FORCE_INLINE_THROW(uint32_t)
5656iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
5657{
5658 Assert(idxLabel < pReNative->cLabels);
5659
5660#ifdef RT_ARCH_AMD64
5661 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
5662 {
5663 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
5664 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
5665 {
5666 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
5667 pCodeBuf[off++] = (uint8_t)offRel;
5668 }
5669 else
5670 {
5671 offRel -= 3;
5672 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
5673 pCodeBuf[off++] = RT_BYTE1(offRel);
5674 pCodeBuf[off++] = RT_BYTE2(offRel);
5675 pCodeBuf[off++] = RT_BYTE3(offRel);
5676 pCodeBuf[off++] = RT_BYTE4(offRel);
5677 }
5678 }
5679 else
5680 {
5681 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
5682 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
5683 pCodeBuf[off++] = 0xfe;
5684 pCodeBuf[off++] = 0xff;
5685 pCodeBuf[off++] = 0xff;
5686 pCodeBuf[off++] = 0xff;
5687 }
5688 pCodeBuf[off++] = 0xcc; /* int3 poison */
5689
5690#elif defined(RT_ARCH_ARM64)
5691 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
5692 pCodeBuf[off++] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
5693 else
5694 {
5695 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
5696 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
5697 }
5698
5699#else
5700# error "Port me!"
5701#endif
5702 return off;
5703}
5704
5705
5706/**
5707 * Emits a JMP rel32 / B imm19 to the given label.
5708 */
5709DECL_INLINE_THROW(uint32_t)
5710iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5711{
5712#ifdef RT_ARCH_AMD64
5713 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
5714#elif defined(RT_ARCH_ARM64)
5715 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
5716#else
5717# error "Port me!"
5718#endif
5719 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5720 return off;
5721}
5722
5723
5724/**
5725 * Emits a JMP rel32 / B imm19 to a new undefined label.
5726 */
5727DECL_INLINE_THROW(uint32_t)
5728iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
5729{
5730 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
5731 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
5732}
5733
5734/** Condition type. */
5735#ifdef RT_ARCH_AMD64
5736typedef enum IEMNATIVEINSTRCOND : uint8_t
5737{
5738 kIemNativeInstrCond_o = 0,
5739 kIemNativeInstrCond_no,
5740 kIemNativeInstrCond_c,
5741 kIemNativeInstrCond_nc,
5742 kIemNativeInstrCond_e,
5743 kIemNativeInstrCond_ne,
5744 kIemNativeInstrCond_be,
5745 kIemNativeInstrCond_nbe,
5746 kIemNativeInstrCond_s,
5747 kIemNativeInstrCond_ns,
5748 kIemNativeInstrCond_p,
5749 kIemNativeInstrCond_np,
5750 kIemNativeInstrCond_l,
5751 kIemNativeInstrCond_nl,
5752 kIemNativeInstrCond_le,
5753 kIemNativeInstrCond_nle
5754} IEMNATIVEINSTRCOND;
5755#elif defined(RT_ARCH_ARM64)
5756typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
5757# define kIemNativeInstrCond_o todo_conditional_codes
5758# define kIemNativeInstrCond_no todo_conditional_codes
5759# define kIemNativeInstrCond_c todo_conditional_codes
5760# define kIemNativeInstrCond_nc todo_conditional_codes
5761# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
5762# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
5763# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
5764# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
5765# define kIemNativeInstrCond_s todo_conditional_codes
5766# define kIemNativeInstrCond_ns todo_conditional_codes
5767# define kIemNativeInstrCond_p todo_conditional_codes
5768# define kIemNativeInstrCond_np todo_conditional_codes
5769# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
5770# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
5771# define kIemNativeInstrCond_le kArmv8InstrCond_Le
5772# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
5773#else
5774# error "Port me!"
5775#endif
5776
5777
5778/**
5779 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
5780 */
5781DECL_FORCE_INLINE_THROW(uint32_t)
5782iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
5783 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
5784{
5785 Assert(idxLabel < pReNative->cLabels);
5786
5787 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
5788#ifdef RT_ARCH_AMD64
5789 if (offLabel >= off)
5790 {
5791 /* jcc rel32 */
5792 pCodeBuf[off++] = 0x0f;
5793 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
5794 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
5795 pCodeBuf[off++] = 0x00;
5796 pCodeBuf[off++] = 0x00;
5797 pCodeBuf[off++] = 0x00;
5798 pCodeBuf[off++] = 0x00;
5799 }
5800 else
5801 {
5802 int32_t offDisp = offLabel - (off + 2);
5803 if ((int8_t)offDisp == offDisp)
5804 {
5805 /* jcc rel8 */
5806 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
5807 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
5808 }
5809 else
5810 {
5811 /* jcc rel32 */
5812 offDisp -= 4;
5813 pCodeBuf[off++] = 0x0f;
5814 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
5815 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
5816 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
5817 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
5818 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
5819 }
5820 }
5821
5822#elif defined(RT_ARCH_ARM64)
5823 if (offLabel >= off)
5824 {
5825 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5826 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
5827 }
5828 else
5829 {
5830 Assert(off - offLabel <= 0x3ffffU);
5831 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
5832 }
5833
5834#else
5835# error "Port me!"
5836#endif
5837 return off;
5838}
5839
5840
5841/**
5842 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
5843 */
5844DECL_INLINE_THROW(uint32_t)
5845iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
5846{
5847#ifdef RT_ARCH_AMD64
5848 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
5849#elif defined(RT_ARCH_ARM64)
5850 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
5851#else
5852# error "Port me!"
5853#endif
5854 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5855 return off;
5856}
5857
5858
5859/**
5860 * Emits a Jcc rel32 / B.cc imm19 to a new label.
5861 */
5862DECL_INLINE_THROW(uint32_t)
5863iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5864 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
5865{
5866 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
5867 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
5868}
5869
5870
5871/**
5872 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
5873 */
5874DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5875{
5876#ifdef RT_ARCH_AMD64
5877 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
5878#elif defined(RT_ARCH_ARM64)
5879 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
5880#else
5881# error "Port me!"
5882#endif
5883}
5884
5885/**
5886 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
5887 */
5888DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5889 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
5890{
5891#ifdef RT_ARCH_AMD64
5892 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
5893#elif defined(RT_ARCH_ARM64)
5894 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
5895#else
5896# error "Port me!"
5897#endif
5898}
5899
5900
5901/**
5902 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
5903 */
5904DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5905{
5906#ifdef RT_ARCH_AMD64
5907 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
5908#elif defined(RT_ARCH_ARM64)
5909 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
5910#else
5911# error "Port me!"
5912#endif
5913}
5914
5915/**
5916 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
5917 */
5918DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5919 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
5920{
5921#ifdef RT_ARCH_AMD64
5922 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
5923#elif defined(RT_ARCH_ARM64)
5924 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
5925#else
5926# error "Port me!"
5927#endif
5928}
5929
5930
5931/**
5932 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
5933 */
5934DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5935{
5936#ifdef RT_ARCH_AMD64
5937 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
5938#elif defined(RT_ARCH_ARM64)
5939 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
5940#else
5941# error "Port me!"
5942#endif
5943}
5944
5945/**
5946 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
5947 */
5948DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5949 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
5950{
5951#ifdef RT_ARCH_AMD64
5952 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
5953#elif defined(RT_ARCH_ARM64)
5954 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
5955#else
5956# error "Port me!"
5957#endif
5958}
5959
5960
5961/**
5962 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
5963 */
5964DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5965{
5966#ifdef RT_ARCH_AMD64
5967 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
5968#elif defined(RT_ARCH_ARM64)
5969 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
5970#else
5971# error "Port me!"
5972#endif
5973}
5974
5975/**
5976 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
5977 */
5978DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5979 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
5980{
5981#ifdef RT_ARCH_AMD64
5982 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
5983#elif defined(RT_ARCH_ARM64)
5984 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
5985#else
5986# error "Port me!"
5987#endif
5988}
5989
5990
5991/**
5992 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
5993 */
5994DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
5995{
5996#ifdef RT_ARCH_AMD64
5997 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
5998#elif defined(RT_ARCH_ARM64)
5999 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
6000#else
6001# error "Port me!"
6002#endif
6003}
6004
6005/**
6006 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
6007 */
6008DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6009 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6010{
6011#ifdef RT_ARCH_AMD64
6012 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
6013#elif defined(RT_ARCH_ARM64)
6014 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
6015#else
6016# error "Port me!"
6017#endif
6018}
6019
6020
6021/**
6022 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6023 *
6024 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6025 *
6026 * Only use hardcoded jumps forward when emitting for exactly one
6027 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6028 * the right target address on all platforms!
6029 *
6030 * Please also note that on x86 it is necessary pass off + 256 or higher
6031 * for @a offTarget one believe the intervening code is more than 127
6032 * bytes long.
6033 */
6034DECL_FORCE_INLINE(uint32_t)
6035iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6036{
6037#ifdef RT_ARCH_AMD64
6038 /* jcc rel8 / rel32 */
6039 int32_t offDisp = (int32_t)(offTarget - (off + 2));
6040 if (offDisp < 128 && offDisp >= -128)
6041 {
6042 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6043 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6044 }
6045 else
6046 {
6047 offDisp -= 4;
6048 pCodeBuf[off++] = 0x0f;
6049 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6050 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6051 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6052 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6053 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6054 }
6055
6056#elif defined(RT_ARCH_ARM64)
6057 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
6058
6059#else
6060# error "Port me!"
6061#endif
6062 return off;
6063}
6064
6065
6066/**
6067 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6068 *
6069 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6070 *
6071 * Only use hardcoded jumps forward when emitting for exactly one
6072 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6073 * the right target address on all platforms!
6074 *
6075 * Please also note that on x86 it is necessary pass off + 256 or higher
6076 * for @a offTarget if one believe the intervening code is more than 127
6077 * bytes long.
6078 */
6079DECL_INLINE_THROW(uint32_t)
6080iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6081{
6082#ifdef RT_ARCH_AMD64
6083 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
6084#elif defined(RT_ARCH_ARM64)
6085 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
6086#else
6087# error "Port me!"
6088#endif
6089 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6090 return off;
6091}
6092
6093
6094/**
6095 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
6096 *
6097 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6098 */
6099DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6100{
6101#ifdef RT_ARCH_AMD64
6102 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
6103#elif defined(RT_ARCH_ARM64)
6104 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
6105#else
6106# error "Port me!"
6107#endif
6108}
6109
6110
6111/**
6112 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
6113 *
6114 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6115 */
6116DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6117{
6118#ifdef RT_ARCH_AMD64
6119 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
6120#elif defined(RT_ARCH_ARM64)
6121 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
6122#else
6123# error "Port me!"
6124#endif
6125}
6126
6127
6128/**
6129 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
6130 *
6131 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6132 */
6133DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6134{
6135#ifdef RT_ARCH_AMD64
6136 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
6137#elif defined(RT_ARCH_ARM64)
6138 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
6139#else
6140# error "Port me!"
6141#endif
6142}
6143
6144
6145/**
6146 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
6147 *
6148 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6149 */
6150DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6151{
6152#ifdef RT_ARCH_AMD64
6153 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
6154#elif defined(RT_ARCH_ARM64)
6155 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
6156#else
6157# error "Port me!"
6158#endif
6159}
6160
6161
6162/**
6163 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6164 *
6165 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6166 */
6167DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
6168{
6169#ifdef RT_ARCH_AMD64
6170 /* jmp rel8 or rel32 */
6171 int32_t offDisp = offTarget - (off + 2);
6172 if (offDisp < 128 && offDisp >= -128)
6173 {
6174 pCodeBuf[off++] = 0xeb;
6175 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6176 }
6177 else
6178 {
6179 offDisp -= 3;
6180 pCodeBuf[off++] = 0xe9;
6181 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6182 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6183 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6184 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6185 }
6186
6187#elif defined(RT_ARCH_ARM64)
6188 pCodeBuf[off++] = Armv8A64MkInstrB((int32_t)(offTarget - off));
6189
6190#else
6191# error "Port me!"
6192#endif
6193 return off;
6194}
6195
6196
6197/**
6198 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6199 *
6200 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6201 */
6202DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6203{
6204#ifdef RT_ARCH_AMD64
6205 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
6206#elif defined(RT_ARCH_ARM64)
6207 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
6208#else
6209# error "Port me!"
6210#endif
6211 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6212 return off;
6213}
6214
6215
6216/**
6217 * Fixes up a conditional jump to a fixed label.
6218 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
6219 * iemNativeEmitJzToFixed, ...
6220 */
6221DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
6222{
6223#ifdef RT_ARCH_AMD64
6224 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
6225 uint8_t const bOpcode = pbCodeBuf[offFixup];
6226 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
6227 {
6228 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
6229 AssertStmt(pbCodeBuf[offFixup + 1] == offTarget - (offFixup + 2),
6230 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
6231 }
6232 else
6233 {
6234 if (bOpcode != 0x0f)
6235 Assert(bOpcode == 0xe9);
6236 else
6237 {
6238 offFixup += 1;
6239 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
6240 }
6241 uint32_t const offRel32 = offTarget - (offFixup + 5);
6242 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
6243 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
6244 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
6245 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
6246 }
6247
6248#elif defined(RT_ARCH_ARM64)
6249 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
6250 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
6251 {
6252 /* B.COND + BC.COND */
6253 int32_t const offDisp = offTarget - offFixup;
6254 Assert(offDisp >= -262144 && offDisp < 262144);
6255 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
6256 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6257 }
6258 else
6259 {
6260 /* B imm26 */
6261 Assert((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000));
6262 int32_t const offDisp = offTarget - offFixup;
6263 Assert(offDisp >= -33554432 && offDisp < 33554432);
6264 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
6265 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6266 }
6267
6268#else
6269# error "Port me!"
6270#endif
6271}
6272
6273
6274/**
6275 * Internal helper, don't call directly.
6276 */
6277DECL_INLINE_THROW(uint32_t)
6278iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
6279 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
6280{
6281 Assert(iBitNo < 64);
6282#ifdef RT_ARCH_AMD64
6283 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6284 if (iBitNo < 8)
6285 {
6286 /* test Eb, imm8 */
6287 if (iGprSrc >= 4)
6288 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6289 pbCodeBuf[off++] = 0xf6;
6290 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6291 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
6292 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6293 }
6294 else
6295 {
6296 /* bt Ev, imm8 */
6297 if (iBitNo >= 32)
6298 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6299 else if (iGprSrc >= 8)
6300 pbCodeBuf[off++] = X86_OP_REX_B;
6301 pbCodeBuf[off++] = 0x0f;
6302 pbCodeBuf[off++] = 0xba;
6303 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6304 pbCodeBuf[off++] = iBitNo;
6305 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
6306 }
6307
6308#elif defined(RT_ARCH_ARM64)
6309 /* Use the TBNZ instruction here. */
6310 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6311 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
6312 pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
6313
6314#else
6315# error "Port me!"
6316#endif
6317 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6318 return off;
6319}
6320
6321
6322/**
6323 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
6324 * @a iGprSrc.
6325 *
6326 * @note On ARM64 the range is only +/-8191 instructions.
6327 */
6328DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6329 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
6330{
6331 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
6332}
6333
6334
6335/**
6336 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
6337 * _set_ in @a iGprSrc.
6338 *
6339 * @note On ARM64 the range is only +/-8191 instructions.
6340 */
6341DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6342 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
6343{
6344 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
6345}
6346
6347
6348/**
6349 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
6350 * flags accordingly.
6351 */
6352DECL_INLINE_THROW(uint32_t)
6353iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
6354{
6355 Assert(fBits != 0);
6356#ifdef RT_ARCH_AMD64
6357
6358 if (fBits >= UINT32_MAX)
6359 {
6360 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6361
6362 /* test Ev,Gv */
6363 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6364 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
6365 pbCodeBuf[off++] = 0x85;
6366 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
6367
6368 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6369 }
6370 else if (fBits <= UINT32_MAX)
6371 {
6372 /* test Eb, imm8 or test Ev, imm32 */
6373 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6374 if (fBits <= UINT8_MAX)
6375 {
6376 if (iGprSrc >= 4)
6377 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6378 pbCodeBuf[off++] = 0xf6;
6379 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6380 pbCodeBuf[off++] = (uint8_t)fBits;
6381 }
6382 else
6383 {
6384 if (iGprSrc >= 8)
6385 pbCodeBuf[off++] = X86_OP_REX_B;
6386 pbCodeBuf[off++] = 0xf7;
6387 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6388 pbCodeBuf[off++] = RT_BYTE1(fBits);
6389 pbCodeBuf[off++] = RT_BYTE2(fBits);
6390 pbCodeBuf[off++] = RT_BYTE3(fBits);
6391 pbCodeBuf[off++] = RT_BYTE4(fBits);
6392 }
6393 }
6394 /** @todo implement me. */
6395 else
6396 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
6397
6398#elif defined(RT_ARCH_ARM64)
6399 uint32_t uImmR = 0;
6400 uint32_t uImmNandS = 0;
6401 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
6402 {
6403 /* ands xzr, iGprSrc, #fBits */
6404 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6405 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
6406 }
6407 else
6408 {
6409 /* ands xzr, iGprSrc, iTmpReg */
6410 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6411 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6412 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
6413 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6414 }
6415
6416#else
6417# error "Port me!"
6418#endif
6419 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6420 return off;
6421}
6422
6423
6424/**
6425 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
6426 * @a iGprSrc, setting CPU flags accordingly.
6427 *
6428 * @note For ARM64 this only supports @a fBits values that can be expressed
6429 * using the two 6-bit immediates of the ANDS instruction. The caller
6430 * must make sure this is possible!
6431 */
6432DECL_FORCE_INLINE_THROW(uint32_t)
6433iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits)
6434{
6435 Assert(fBits != 0);
6436
6437#ifdef RT_ARCH_AMD64
6438 if (fBits <= UINT8_MAX)
6439 {
6440 /* test Eb, imm8 */
6441 if (iGprSrc >= 4)
6442 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6443 pCodeBuf[off++] = 0xf6;
6444 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6445 pCodeBuf[off++] = (uint8_t)fBits;
6446 }
6447 else
6448 {
6449 /* test Ev, imm32 */
6450 if (iGprSrc >= 8)
6451 pCodeBuf[off++] = X86_OP_REX_B;
6452 pCodeBuf[off++] = 0xf7;
6453 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6454 pCodeBuf[off++] = RT_BYTE1(fBits);
6455 pCodeBuf[off++] = RT_BYTE2(fBits);
6456 pCodeBuf[off++] = RT_BYTE3(fBits);
6457 pCodeBuf[off++] = RT_BYTE4(fBits);
6458 }
6459
6460#elif defined(RT_ARCH_ARM64)
6461 /* ands xzr, src, #fBits */
6462 uint32_t uImmR = 0;
6463 uint32_t uImmNandS = 0;
6464 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6465 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6466 else
6467# ifdef IEM_WITH_THROW_CATCH
6468 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6469# else
6470 AssertReleaseFailedStmt(off = UINT32_MAX);
6471# endif
6472
6473#else
6474# error "Port me!"
6475#endif
6476 return off;
6477}
6478
6479
6480
6481/**
6482 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
6483 * @a iGprSrc, setting CPU flags accordingly.
6484 *
6485 * @note For ARM64 this only supports @a fBits values that can be expressed
6486 * using the two 6-bit immediates of the ANDS instruction. The caller
6487 * must make sure this is possible!
6488 */
6489DECL_FORCE_INLINE_THROW(uint32_t)
6490iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
6491{
6492 Assert(fBits != 0);
6493
6494#ifdef RT_ARCH_AMD64
6495 /* test Eb, imm8 */
6496 if (iGprSrc >= 4)
6497 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6498 pCodeBuf[off++] = 0xf6;
6499 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6500 pCodeBuf[off++] = fBits;
6501
6502#elif defined(RT_ARCH_ARM64)
6503 /* ands xzr, src, #fBits */
6504 uint32_t uImmR = 0;
6505 uint32_t uImmNandS = 0;
6506 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6507 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6508 else
6509# ifdef IEM_WITH_THROW_CATCH
6510 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6511# else
6512 AssertReleaseFailedStmt(off = UINT32_MAX);
6513# endif
6514
6515#else
6516# error "Port me!"
6517#endif
6518 return off;
6519}
6520
6521
6522/**
6523 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
6524 * @a iGprSrc, setting CPU flags accordingly.
6525 */
6526DECL_INLINE_THROW(uint32_t)
6527iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
6528{
6529 Assert(fBits != 0);
6530
6531#ifdef RT_ARCH_AMD64
6532 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
6533
6534#elif defined(RT_ARCH_ARM64)
6535 /* ands xzr, src, [tmp|#imm] */
6536 uint32_t uImmR = 0;
6537 uint32_t uImmNandS = 0;
6538 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
6539 {
6540 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6541 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
6542 }
6543 else
6544 {
6545 /* Use temporary register for the 64-bit immediate. */
6546 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
6547 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6548 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
6549 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6550 }
6551
6552#else
6553# error "Port me!"
6554#endif
6555 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6556 return off;
6557}
6558
6559
6560/**
6561 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
6562 * are set in @a iGprSrc.
6563 */
6564DECL_INLINE_THROW(uint32_t)
6565iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6566 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
6567{
6568 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
6569
6570 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
6571 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6572
6573 return off;
6574}
6575
6576
6577/**
6578 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
6579 * are set in @a iGprSrc.
6580 */
6581DECL_INLINE_THROW(uint32_t)
6582iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6583 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
6584{
6585 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
6586
6587 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
6588 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
6589
6590 return off;
6591}
6592
6593
6594/**
6595 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6596 *
6597 * The operand size is given by @a f64Bit.
6598 */
6599DECL_FORCE_INLINE_THROW(uint32_t)
6600iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6601 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
6602{
6603 Assert(idxLabel < pReNative->cLabels);
6604
6605#ifdef RT_ARCH_AMD64
6606 /* test reg32,reg32 / test reg64,reg64 */
6607 if (f64Bit)
6608 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
6609 else if (iGprSrc >= 8)
6610 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
6611 pCodeBuf[off++] = 0x85;
6612 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
6613
6614 /* jnz idxLabel */
6615 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
6616 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6617
6618#elif defined(RT_ARCH_ARM64)
6619 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6620 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
6621 iGprSrc, f64Bit);
6622 else
6623 {
6624 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6625 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
6626 }
6627
6628#else
6629# error "Port me!"
6630#endif
6631 return off;
6632}
6633
6634
6635/**
6636 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6637 *
6638 * The operand size is given by @a f64Bit.
6639 */
6640DECL_FORCE_INLINE_THROW(uint32_t)
6641iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
6642 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
6643{
6644#ifdef RT_ARCH_AMD64
6645 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
6646 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
6647#elif defined(RT_ARCH_ARM64)
6648 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
6649 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
6650#else
6651# error "Port me!"
6652#endif
6653 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6654 return off;
6655}
6656
6657
6658/* if (Grp1 == 0) Jmp idxLabel; */
6659
6660/**
6661 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
6662 *
6663 * The operand size is given by @a f64Bit.
6664 */
6665DECL_FORCE_INLINE_THROW(uint32_t)
6666iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6667 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6668{
6669 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
6670 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
6671}
6672
6673
6674/**
6675 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
6676 *
6677 * The operand size is given by @a f64Bit.
6678 */
6679DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6680 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6681{
6682 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
6683}
6684
6685
6686/**
6687 * Emits code that jumps to a new label if @a iGprSrc is zero.
6688 *
6689 * The operand size is given by @a f64Bit.
6690 */
6691DECL_INLINE_THROW(uint32_t)
6692iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
6693 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6694{
6695 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6696 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
6697}
6698
6699
6700/* if (Grp1 != 0) Jmp idxLabel; */
6701
6702/**
6703 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6704 *
6705 * The operand size is given by @a f64Bit.
6706 */
6707DECL_FORCE_INLINE_THROW(uint32_t)
6708iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6709 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6710{
6711 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
6712 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
6713}
6714
6715
6716/**
6717 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
6718 *
6719 * The operand size is given by @a f64Bit.
6720 */
6721DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6722 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
6723{
6724 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
6725}
6726
6727
6728/**
6729 * Emits code that jumps to a new label if @a iGprSrc is not zero.
6730 *
6731 * The operand size is given by @a f64Bit.
6732 */
6733DECL_INLINE_THROW(uint32_t)
6734iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
6735 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6736{
6737 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6738 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
6739}
6740
6741
6742/* if (Grp1 != Gpr2) Jmp idxLabel; */
6743
6744/**
6745 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
6746 * differs.
6747 */
6748DECL_INLINE_THROW(uint32_t)
6749iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6750 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
6751{
6752 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
6753 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6754 return off;
6755}
6756
6757
6758/**
6759 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
6760 */
6761DECL_INLINE_THROW(uint32_t)
6762iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6763 uint8_t iGprLeft, uint8_t iGprRight,
6764 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6765{
6766 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6767 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
6768}
6769
6770
6771/* if (Grp != Imm) Jmp idxLabel; */
6772
6773/**
6774 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
6775 */
6776DECL_INLINE_THROW(uint32_t)
6777iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6778 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
6779{
6780 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
6781 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6782 return off;
6783}
6784
6785
6786/**
6787 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
6788 */
6789DECL_INLINE_THROW(uint32_t)
6790iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6791 uint8_t iGprSrc, uint64_t uImm,
6792 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6793{
6794 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6795 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
6796}
6797
6798
6799/**
6800 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
6801 * @a uImm.
6802 */
6803DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6804 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
6805{
6806 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
6807 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6808 return off;
6809}
6810
6811
6812/**
6813 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
6814 * @a uImm.
6815 */
6816DECL_INLINE_THROW(uint32_t)
6817iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6818 uint8_t iGprSrc, uint32_t uImm,
6819 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6820{
6821 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6822 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
6823}
6824
6825
6826/**
6827 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
6828 * @a uImm.
6829 */
6830DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6831 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
6832{
6833 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
6834 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
6835 return off;
6836}
6837
6838
6839/**
6840 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
6841 * @a uImm.
6842 */
6843DECL_INLINE_THROW(uint32_t)
6844iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6845 uint8_t iGprSrc, uint16_t uImm,
6846 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6847{
6848 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6849 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
6850}
6851
6852
6853/* if (Grp == Imm) Jmp idxLabel; */
6854
6855/**
6856 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
6857 */
6858DECL_INLINE_THROW(uint32_t)
6859iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6860 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
6861{
6862 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
6863 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
6864 return off;
6865}
6866
6867
6868/**
6869 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
6870 */
6871DECL_INLINE_THROW(uint32_t)
6872iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
6873 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6874{
6875 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6876 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
6877}
6878
6879
6880/**
6881 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
6882 */
6883DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6884 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
6885{
6886 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
6887 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
6888 return off;
6889}
6890
6891
6892/**
6893 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
6894 */
6895DECL_INLINE_THROW(uint32_t)
6896iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
6897 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6898{
6899 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6900 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
6901}
6902
6903
6904/**
6905 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
6906 *
6907 * @note ARM64: Helper register is required (idxTmpReg).
6908 */
6909DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6910 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
6911 uint8_t idxTmpReg = UINT8_MAX)
6912{
6913 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
6914 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
6915 return off;
6916}
6917
6918
6919/**
6920 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
6921 *
6922 * @note ARM64: Helper register is required (idxTmpReg).
6923 */
6924DECL_INLINE_THROW(uint32_t)
6925iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
6926 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
6927 uint8_t idxTmpReg = UINT8_MAX)
6928{
6929 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6930 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
6931}
6932
6933
6934/*********************************************************************************************************************************
6935* Calls. *
6936*********************************************************************************************************************************/
6937
6938/**
6939 * Emits a call to a 64-bit address.
6940 */
6941DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
6942{
6943#ifdef RT_ARCH_AMD64
6944 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
6945
6946 /* call rax */
6947 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6948 pbCodeBuf[off++] = 0xff;
6949 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
6950
6951#elif defined(RT_ARCH_ARM64)
6952 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
6953
6954 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6955 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
6956
6957#else
6958# error "port me"
6959#endif
6960 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6961 return off;
6962}
6963
6964
6965/**
6966 * Emits code to load a stack variable into an argument GPR.
6967 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
6968 */
6969DECL_FORCE_INLINE_THROW(uint32_t)
6970iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
6971 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
6972 bool fSpilledVarsInVolatileRegs = false)
6973{
6974 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6975 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6976 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6977
6978 uint8_t const idxRegVar = pVar->idxReg;
6979 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
6980 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
6981 || !fSpilledVarsInVolatileRegs ))
6982 {
6983 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
6984 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
6985 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
6986 if (!offAddend)
6987 {
6988 if (idxRegArg != idxRegVar)
6989 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
6990 }
6991 else
6992 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
6993 }
6994 else
6995 {
6996 uint8_t const idxStackSlot = pVar->idxStackSlot;
6997 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6998 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
6999 if (offAddend)
7000 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
7001 }
7002 return off;
7003}
7004
7005
7006/**
7007 * Emits code to load a stack or immediate variable value into an argument GPR,
7008 * optional with a addend.
7009 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7010 */
7011DECL_FORCE_INLINE_THROW(uint32_t)
7012iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7013 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
7014 bool fSpilledVarsInVolatileRegs = false)
7015{
7016 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7017 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7018 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7019 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
7020 else
7021 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
7022 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
7023 return off;
7024}
7025
7026
7027/**
7028 * Emits code to load the variable address into an argument GRP.
7029 *
7030 * This only works for uninitialized and stack variables.
7031 */
7032DECL_FORCE_INLINE_THROW(uint32_t)
7033iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7034 bool fFlushShadows)
7035{
7036 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7037 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7038 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7039 || pVar->enmKind == kIemNativeVarKind_Stack,
7040 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7041
7042 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7043 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7044
7045 uint8_t const idxRegVar = pVar->idxReg;
7046 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
7047 {
7048 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
7049 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
7050 Assert(pVar->idxReg == UINT8_MAX);
7051 }
7052 Assert( pVar->idxStackSlot != UINT8_MAX
7053 && pVar->idxReg == UINT8_MAX);
7054
7055 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7056}
7057
7058
7059#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7060
7061/**
7062 * Emits a gprdst = ~gprsrc store.
7063 */
7064DECL_FORCE_INLINE_THROW(uint32_t)
7065iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7066{
7067#ifdef RT_ARCH_AMD64
7068 if (iGprDst != iGprSrc)
7069 {
7070 /* mov gprdst, gprsrc. */
7071 if (f64Bit)
7072 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
7073 else
7074 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
7075 }
7076
7077 /* not gprdst */
7078 if (f64Bit || iGprDst >= 8)
7079 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
7080 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
7081 pCodeBuf[off++] = 0xf7;
7082 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
7083#elif defined(RT_ARCH_ARM64)
7084 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
7085#else
7086# error "port me"
7087#endif
7088 return off;
7089}
7090
7091
7092/**
7093 * Emits a gprdst = ~gprsrc store.
7094 */
7095DECL_INLINE_THROW(uint32_t)
7096iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7097{
7098#ifdef RT_ARCH_AMD64
7099 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
7100#elif defined(RT_ARCH_ARM64)
7101 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
7102#else
7103# error "port me"
7104#endif
7105 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7106 return off;
7107}
7108
7109
7110/**
7111 * Emits a 128-bit vector register store to a VCpu value.
7112 */
7113DECL_FORCE_INLINE_THROW(uint32_t)
7114iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7115{
7116#ifdef RT_ARCH_AMD64
7117 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
7118 pCodeBuf[off++] = 0x66;
7119 if (iVecReg >= 8)
7120 pCodeBuf[off++] = X86_OP_REX_R;
7121 pCodeBuf[off++] = 0x0f;
7122 pCodeBuf[off++] = 0x7f;
7123 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7124#elif defined(RT_ARCH_ARM64)
7125 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7126
7127#else
7128# error "port me"
7129#endif
7130 return off;
7131}
7132
7133
7134/**
7135 * Emits a 128-bit vector register load of a VCpu value.
7136 */
7137DECL_INLINE_THROW(uint32_t)
7138iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7139{
7140#ifdef RT_ARCH_AMD64
7141 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7142#elif defined(RT_ARCH_ARM64)
7143 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7144#else
7145# error "port me"
7146#endif
7147 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7148 return off;
7149}
7150
7151
7152/**
7153 * Emits a high 128-bit vector register store to a VCpu value.
7154 */
7155DECL_FORCE_INLINE_THROW(uint32_t)
7156iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7157{
7158#ifdef RT_ARCH_AMD64
7159 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
7160 pCodeBuf[off++] = X86_OP_VEX3;
7161 if (iVecReg >= 8)
7162 pCodeBuf[off++] = 0x63;
7163 else
7164 pCodeBuf[off++] = 0xe3;
7165 pCodeBuf[off++] = 0x7d;
7166 pCodeBuf[off++] = 0x39;
7167 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7168 pCodeBuf[off++] = 0x01; /* Immediate */
7169#elif defined(RT_ARCH_ARM64)
7170 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7171#else
7172# error "port me"
7173#endif
7174 return off;
7175}
7176
7177
7178/**
7179 * Emits a high 128-bit vector register load of a VCpu value.
7180 */
7181DECL_INLINE_THROW(uint32_t)
7182iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7183{
7184#ifdef RT_ARCH_AMD64
7185 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7186#elif defined(RT_ARCH_ARM64)
7187 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7188 Assert(!(iVecReg & 0x1));
7189 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
7190#else
7191# error "port me"
7192#endif
7193 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7194 return off;
7195}
7196
7197
7198/**
7199 * Emits a 128-bit vector register load of a VCpu value.
7200 */
7201DECL_FORCE_INLINE_THROW(uint32_t)
7202iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7203{
7204#ifdef RT_ARCH_AMD64
7205 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
7206 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7207 if (iVecReg >= 8)
7208 pCodeBuf[off++] = X86_OP_REX_R;
7209 pCodeBuf[off++] = 0x0f;
7210 pCodeBuf[off++] = 0x6f;
7211 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7212#elif defined(RT_ARCH_ARM64)
7213 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
7214
7215#else
7216# error "port me"
7217#endif
7218 return off;
7219}
7220
7221
7222/**
7223 * Emits a 128-bit vector register load of a VCpu value.
7224 */
7225DECL_INLINE_THROW(uint32_t)
7226iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7227{
7228#ifdef RT_ARCH_AMD64
7229 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7230#elif defined(RT_ARCH_ARM64)
7231 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7232#else
7233# error "port me"
7234#endif
7235 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7236 return off;
7237}
7238
7239
7240/**
7241 * Emits a 128-bit vector register load of a VCpu value.
7242 */
7243DECL_FORCE_INLINE_THROW(uint32_t)
7244iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7245{
7246#ifdef RT_ARCH_AMD64
7247 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
7248 pCodeBuf[off++] = X86_OP_VEX3;
7249 if (iVecReg >= 8)
7250 pCodeBuf[off++] = 0x63;
7251 else
7252 pCodeBuf[off++] = 0xe3;
7253 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
7254 pCodeBuf[off++] = 0x38;
7255 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7256 pCodeBuf[off++] = 0x01; /* Immediate */
7257#elif defined(RT_ARCH_ARM64)
7258 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
7259#else
7260# error "port me"
7261#endif
7262 return off;
7263}
7264
7265
7266/**
7267 * Emits a 128-bit vector register load of a VCpu value.
7268 */
7269DECL_INLINE_THROW(uint32_t)
7270iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7271{
7272#ifdef RT_ARCH_AMD64
7273 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7274#elif defined(RT_ARCH_ARM64)
7275 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7276 Assert(!(iVecReg & 0x1));
7277 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
7278#else
7279# error "port me"
7280#endif
7281 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7282 return off;
7283}
7284
7285
7286/**
7287 * Emits a vecdst = vecsrc load.
7288 */
7289DECL_FORCE_INLINE(uint32_t)
7290iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7291{
7292#ifdef RT_ARCH_AMD64
7293 /* movdqu vecdst, vecsrc */
7294 pCodeBuf[off++] = 0xf3;
7295
7296 if ((iVecRegDst | iVecRegSrc) >= 8)
7297 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
7298 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
7299 : X86_OP_REX_R;
7300 pCodeBuf[off++] = 0x0f;
7301 pCodeBuf[off++] = 0x6f;
7302 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7303
7304#elif defined(RT_ARCH_ARM64)
7305 /* mov dst, src; alias for: orr dst, src, src */
7306 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
7307
7308#else
7309# error "port me"
7310#endif
7311 return off;
7312}
7313
7314
7315/**
7316 * Emits a vecdst = vecsrc load, 128-bit.
7317 */
7318DECL_INLINE_THROW(uint32_t)
7319iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7320{
7321#ifdef RT_ARCH_AMD64
7322 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
7323#elif defined(RT_ARCH_ARM64)
7324 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
7325#else
7326# error "port me"
7327#endif
7328 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7329 return off;
7330}
7331
7332
7333/**
7334 * Emits a vecdst = vecsrc load, 256-bit.
7335 */
7336DECL_INLINE_THROW(uint32_t)
7337iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
7338{
7339#ifdef RT_ARCH_AMD64
7340 /* vmovdqa ymm, ymm */
7341 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7342 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
7343 {
7344 pbCodeBuf[off++] = X86_OP_VEX3;
7345 pbCodeBuf[off++] = 0x41;
7346 pbCodeBuf[off++] = 0x7d;
7347 pbCodeBuf[off++] = 0x6f;
7348 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7349 }
7350 else
7351 {
7352 pbCodeBuf[off++] = X86_OP_VEX2;
7353 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
7354 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
7355 pbCodeBuf[off++] = iVecRegSrc >= 8
7356 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
7357 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
7358 }
7359#elif defined(RT_ARCH_ARM64)
7360 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7361 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
7362 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
7363 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
7364#else
7365# error "port me"
7366#endif
7367 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7368 return off;
7369}
7370
7371
7372/**
7373 * Emits a gprdst = vecsrc[x] load, 64-bit.
7374 */
7375DECL_FORCE_INLINE(uint32_t)
7376iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
7377{
7378#ifdef RT_ARCH_AMD64
7379 if (iQWord >= 2)
7380 {
7381 /** @todo Currently not used. */
7382 AssertReleaseFailed();
7383 }
7384 else
7385 {
7386 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
7387 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7388 pCodeBuf[off++] = X86_OP_REX_W
7389 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
7390 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
7391 pCodeBuf[off++] = 0x0f;
7392 pCodeBuf[off++] = 0x3a;
7393 pCodeBuf[off++] = 0x16;
7394 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
7395 pCodeBuf[off++] = iQWord;
7396 }
7397#elif defined(RT_ARCH_ARM64)
7398 /* umov gprdst, vecsrc[iQWord] */
7399 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
7400#else
7401# error "port me"
7402#endif
7403 return off;
7404}
7405
7406
7407/**
7408 * Emits a gprdst = vecsrc[x] load, 64-bit.
7409 */
7410DECL_INLINE_THROW(uint32_t)
7411iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
7412{
7413 Assert(iQWord <= 3);
7414
7415#ifdef RT_ARCH_AMD64
7416 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iQWord);
7417#elif defined(RT_ARCH_ARM64)
7418 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7419 Assert(!(iVecRegSrc & 0x1));
7420 /* Need to access the "high" 128-bit vector register. */
7421 if (iQWord >= 2)
7422 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
7423 else
7424 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
7425#else
7426# error "port me"
7427#endif
7428 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7429 return off;
7430}
7431
7432
7433/**
7434 * Emits a gprdst = vecsrc[x] load, 32-bit.
7435 */
7436DECL_FORCE_INLINE(uint32_t)
7437iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
7438{
7439#ifdef RT_ARCH_AMD64
7440 if (iDWord >= 4)
7441 {
7442 /** @todo Currently not used. */
7443 AssertReleaseFailed();
7444 }
7445 else
7446 {
7447 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
7448 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7449 if (iGprDst >= 8 || iVecRegSrc >= 8)
7450 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
7451 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
7452 pCodeBuf[off++] = 0x0f;
7453 pCodeBuf[off++] = 0x3a;
7454 pCodeBuf[off++] = 0x16;
7455 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
7456 pCodeBuf[off++] = iDWord;
7457 }
7458#elif defined(RT_ARCH_ARM64)
7459 /* umov gprdst, vecsrc[iDWord] */
7460 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
7461#else
7462# error "port me"
7463#endif
7464 return off;
7465}
7466
7467
7468/**
7469 * Emits a gprdst = vecsrc[x] load, 32-bit.
7470 */
7471DECL_INLINE_THROW(uint32_t)
7472iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
7473{
7474 Assert(iDWord <= 7);
7475
7476#ifdef RT_ARCH_AMD64
7477 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iDWord);
7478#elif defined(RT_ARCH_ARM64)
7479 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7480 Assert(!(iVecRegSrc & 0x1));
7481 /* Need to access the "high" 128-bit vector register. */
7482 if (iDWord >= 4)
7483 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
7484 else
7485 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
7486#else
7487# error "port me"
7488#endif
7489 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7490 return off;
7491}
7492
7493
7494/**
7495 * Emits a gprdst = vecsrc[x] load, 16-bit.
7496 */
7497DECL_FORCE_INLINE(uint32_t)
7498iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
7499{
7500#ifdef RT_ARCH_AMD64
7501 if (iWord >= 8)
7502 {
7503 /** @todo Currently not used. */
7504 AssertReleaseFailed();
7505 }
7506 else
7507 {
7508 /* pextrw gpr, vecsrc, #iWord */
7509 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7510 if (iGprDst >= 8 || iVecRegSrc >= 8)
7511 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
7512 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
7513 pCodeBuf[off++] = 0x0f;
7514 pCodeBuf[off++] = 0xc5;
7515 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
7516 pCodeBuf[off++] = iWord;
7517 }
7518#elif defined(RT_ARCH_ARM64)
7519 /* umov gprdst, vecsrc[iWord] */
7520 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
7521#else
7522# error "port me"
7523#endif
7524 return off;
7525}
7526
7527
7528/**
7529 * Emits a gprdst = vecsrc[x] load, 16-bit.
7530 */
7531DECL_INLINE_THROW(uint32_t)
7532iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
7533{
7534 Assert(iWord <= 16);
7535
7536#ifdef RT_ARCH_AMD64
7537 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
7538#elif defined(RT_ARCH_ARM64)
7539 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7540 Assert(!(iVecRegSrc & 0x1));
7541 /* Need to access the "high" 128-bit vector register. */
7542 if (iWord >= 8)
7543 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
7544 else
7545 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
7546#else
7547# error "port me"
7548#endif
7549 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7550 return off;
7551}
7552
7553
7554/**
7555 * Emits a gprdst = vecsrc[x] load, 8-bit.
7556 */
7557DECL_FORCE_INLINE(uint32_t)
7558iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
7559{
7560#ifdef RT_ARCH_AMD64
7561 if (iByte >= 16)
7562 {
7563 /** @todo Currently not used. */
7564 AssertReleaseFailed();
7565 }
7566 else
7567 {
7568 /* pextrb gpr, vecsrc, #iByte */
7569 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7570 if (iGprDst >= 8 || iVecRegSrc >= 8)
7571 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
7572 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
7573 pCodeBuf[off++] = 0x0f;
7574 pCodeBuf[off++] = 0x3a;
7575 pCodeBuf[off++] = 0x14;
7576 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
7577 pCodeBuf[off++] = iByte;
7578 }
7579#elif defined(RT_ARCH_ARM64)
7580 /* umov gprdst, vecsrc[iByte] */
7581 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
7582#else
7583# error "port me"
7584#endif
7585 return off;
7586}
7587
7588
7589/**
7590 * Emits a gprdst = vecsrc[x] load, 8-bit.
7591 */
7592DECL_INLINE_THROW(uint32_t)
7593iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
7594{
7595 Assert(iByte <= 32);
7596
7597#ifdef RT_ARCH_AMD64
7598 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iByte);
7599#elif defined(RT_ARCH_ARM64)
7600 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7601 Assert(!(iVecRegSrc & 0x1));
7602 /* Need to access the "high" 128-bit vector register. */
7603 if (iByte >= 16)
7604 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
7605 else
7606 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
7607#else
7608# error "port me"
7609#endif
7610 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7611 return off;
7612}
7613
7614
7615/**
7616 * Emits a vecdst[x] = gprsrc store, 64-bit.
7617 */
7618DECL_FORCE_INLINE(uint32_t)
7619iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
7620{
7621#ifdef RT_ARCH_AMD64
7622 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
7623 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7624 pCodeBuf[off++] = X86_OP_REX_W
7625 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
7626 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7627 pCodeBuf[off++] = 0x0f;
7628 pCodeBuf[off++] = 0x3a;
7629 pCodeBuf[off++] = 0x22;
7630 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
7631 pCodeBuf[off++] = iQWord;
7632#elif defined(RT_ARCH_ARM64)
7633 /* ins vecsrc[iQWord], gpr */
7634 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
7635#else
7636# error "port me"
7637#endif
7638 return off;
7639}
7640
7641
7642/**
7643 * Emits a vecdst[x] = gprsrc store, 64-bit.
7644 */
7645DECL_INLINE_THROW(uint32_t)
7646iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
7647{
7648 Assert(iQWord <= 1);
7649
7650#ifdef RT_ARCH_AMD64
7651 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iQWord);
7652#elif defined(RT_ARCH_ARM64)
7653 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
7654#else
7655# error "port me"
7656#endif
7657 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7658 return off;
7659}
7660
7661
7662/**
7663 * Emits a vecdst[x] = gprsrc store, 32-bit.
7664 */
7665DECL_FORCE_INLINE(uint32_t)
7666iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
7667{
7668#ifdef RT_ARCH_AMD64
7669 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
7670 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7671 if (iVecRegDst >= 8 || iGprSrc >= 8)
7672 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
7673 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7674 pCodeBuf[off++] = 0x0f;
7675 pCodeBuf[off++] = 0x3a;
7676 pCodeBuf[off++] = 0x22;
7677 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
7678 pCodeBuf[off++] = iDWord;
7679#elif defined(RT_ARCH_ARM64)
7680 /* ins vecsrc[iDWord], gpr */
7681 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
7682#else
7683# error "port me"
7684#endif
7685 return off;
7686}
7687
7688
7689/**
7690 * Emits a vecdst[x] = gprsrc store, 64-bit.
7691 */
7692DECL_INLINE_THROW(uint32_t)
7693iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
7694{
7695 Assert(iDWord <= 3);
7696
7697#ifdef RT_ARCH_AMD64
7698 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iDWord);
7699#elif defined(RT_ARCH_ARM64)
7700 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
7701#else
7702# error "port me"
7703#endif
7704 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7705 return off;
7706}
7707
7708
7709/**
7710 * Emits a vecdst.au32[iDWord] = 0 store.
7711 */
7712DECL_FORCE_INLINE(uint32_t)
7713iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
7714{
7715 Assert(iDWord <= 7);
7716
7717#ifdef RT_ARCH_AMD64
7718 /*
7719 * xor tmp0, tmp0
7720 * pinsrd xmm, tmp0, iDword
7721 */
7722 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
7723 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7724 pCodeBuf[off++] = 0x33;
7725 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
7726 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(&pCodeBuf[off], off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
7727#elif defined(RT_ARCH_ARM64)
7728 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7729 Assert(!(iVecReg & 0x1));
7730 /* ins vecsrc[iDWord], wzr */
7731 if (iDWord >= 4)
7732 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
7733 else
7734 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
7735#else
7736# error "port me"
7737#endif
7738 return off;
7739}
7740
7741
7742/**
7743 * Emits a vecdst.au32[iDWord] = 0 store.
7744 */
7745DECL_INLINE_THROW(uint32_t)
7746iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
7747{
7748
7749#ifdef RT_ARCH_AMD64
7750 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
7751#elif defined(RT_ARCH_ARM64)
7752 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
7753#else
7754# error "port me"
7755#endif
7756 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7757 return off;
7758}
7759
7760
7761/**
7762 * Emits a vecdst[0:127] = 0 store.
7763 */
7764DECL_FORCE_INLINE(uint32_t)
7765iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
7766{
7767#ifdef RT_ARCH_AMD64
7768 /* pxor xmm, xmm */
7769 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7770 if (iVecReg >= 8)
7771 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
7772 pCodeBuf[off++] = 0x0f;
7773 pCodeBuf[off++] = 0xef;
7774 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
7775#elif defined(RT_ARCH_ARM64)
7776 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7777 Assert(!(iVecReg & 0x1));
7778 /* eor vecreg, vecreg, vecreg */
7779 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
7780#else
7781# error "port me"
7782#endif
7783 return off;
7784}
7785
7786
7787/**
7788 * Emits a vecdst[0:127] = 0 store.
7789 */
7790DECL_INLINE_THROW(uint32_t)
7791iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
7792{
7793#ifdef RT_ARCH_AMD64
7794 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
7795#elif defined(RT_ARCH_ARM64)
7796 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
7797#else
7798# error "port me"
7799#endif
7800 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7801 return off;
7802}
7803
7804
7805/**
7806 * Emits a vecdst[128:255] = 0 store.
7807 */
7808DECL_FORCE_INLINE(uint32_t)
7809iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
7810{
7811#ifdef RT_ARCH_AMD64
7812 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
7813 if (iVecReg < 8)
7814 {
7815 pCodeBuf[off++] = X86_OP_VEX2;
7816 pCodeBuf[off++] = 0xf9;
7817 }
7818 else
7819 {
7820 pCodeBuf[off++] = X86_OP_VEX3;
7821 pCodeBuf[off++] = 0x41;
7822 pCodeBuf[off++] = 0x79;
7823 }
7824 pCodeBuf[off++] = 0x6f;
7825 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
7826#elif defined(RT_ARCH_ARM64)
7827 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7828 Assert(!(iVecReg & 0x1));
7829 /* eor vecreg, vecreg, vecreg */
7830 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
7831#else
7832# error "port me"
7833#endif
7834 return off;
7835}
7836
7837
7838/**
7839 * Emits a vecdst[128:255] = 0 store.
7840 */
7841DECL_INLINE_THROW(uint32_t)
7842iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
7843{
7844#ifdef RT_ARCH_AMD64
7845 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
7846#elif defined(RT_ARCH_ARM64)
7847 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
7848#else
7849# error "port me"
7850#endif
7851 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7852 return off;
7853}
7854
7855
7856/**
7857 * Emits a vecdst[0:255] = 0 store.
7858 */
7859DECL_FORCE_INLINE(uint32_t)
7860iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
7861{
7862#ifdef RT_ARCH_AMD64
7863 /* vpxor ymm, ymm, ymm */
7864 if (iVecReg < 8)
7865 {
7866 pCodeBuf[off++] = X86_OP_VEX2;
7867 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
7868 }
7869 else
7870 {
7871 pCodeBuf[off++] = X86_OP_VEX3;
7872 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
7873 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
7874 }
7875 pCodeBuf[off++] = 0xef;
7876 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
7877#elif defined(RT_ARCH_ARM64)
7878 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7879 Assert(!(iVecReg & 0x1));
7880 /* eor vecreg, vecreg, vecreg */
7881 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
7882 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
7883#else
7884# error "port me"
7885#endif
7886 return off;
7887}
7888
7889
7890/**
7891 * Emits a vecdst[0:255] = 0 store.
7892 */
7893DECL_INLINE_THROW(uint32_t)
7894iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
7895{
7896#ifdef RT_ARCH_AMD64
7897 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
7898#elif defined(RT_ARCH_ARM64)
7899 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
7900#else
7901# error "port me"
7902#endif
7903 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7904 return off;
7905}
7906
7907
7908/**
7909 * Emits a vecdst = gprsrc broadcast, 16-bit.
7910 */
7911DECL_FORCE_INLINE(uint32_t)
7912iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
7913{
7914#ifdef RT_ARCH_AMD64
7915 /* pinsrw vecdst, gpr, #0 */
7916 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7917 if (iVecRegDst >= 8 || iGprSrc >= 8)
7918 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
7919 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7920 pCodeBuf[off++] = 0x0f;
7921 pCodeBuf[off++] = 0xc4;
7922 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
7923 pCodeBuf[off++] = 0x00;
7924
7925 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
7926 pCodeBuf[off++] = X86_OP_VEX3;
7927 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
7928 | 0x02 /* opcode map. */
7929 | ( iVecRegDst >= 8
7930 ? 0
7931 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
7932 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
7933 pCodeBuf[off++] = 0x79;
7934 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
7935#elif defined(RT_ARCH_ARM64)
7936 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7937 Assert(!(iVecRegDst & 0x1) || !f256Bit);
7938
7939 /* dup vecsrc, gpr */
7940 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
7941 if (f256Bit)
7942 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
7943#else
7944# error "port me"
7945#endif
7946 return off;
7947}
7948
7949
7950/**
7951 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
7952 */
7953DECL_INLINE_THROW(uint32_t)
7954iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
7955{
7956#ifdef RT_ARCH_AMD64
7957 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
7958#elif defined(RT_ARCH_ARM64)
7959 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
7960#else
7961# error "port me"
7962#endif
7963 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7964 return off;
7965}
7966
7967
7968/**
7969 * Emits a vecdst = gprsrc broadcast, 32-bit.
7970 */
7971DECL_FORCE_INLINE(uint32_t)
7972iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
7973{
7974#ifdef RT_ARCH_AMD64
7975 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
7976 * vbroadcast needs a memory operand or another xmm register to work... */
7977
7978 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
7979 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7980 if (iVecRegDst >= 8 || iGprSrc >= 8)
7981 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
7982 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7983 pCodeBuf[off++] = 0x0f;
7984 pCodeBuf[off++] = 0x3a;
7985 pCodeBuf[off++] = 0x22;
7986 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
7987 pCodeBuf[off++] = 0x00;
7988
7989 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
7990 pCodeBuf[off++] = X86_OP_VEX3;
7991 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
7992 | 0x02 /* opcode map. */
7993 | ( iVecRegDst >= 8
7994 ? 0
7995 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
7996 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
7997 pCodeBuf[off++] = 0x58;
7998 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
7999#elif defined(RT_ARCH_ARM64)
8000 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8001 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8002
8003 /* dup vecsrc, gpr */
8004 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
8005 if (f256Bit)
8006 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
8007#else
8008# error "port me"
8009#endif
8010 return off;
8011}
8012
8013
8014/**
8015 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
8016 */
8017DECL_INLINE_THROW(uint32_t)
8018iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8019{
8020#ifdef RT_ARCH_AMD64
8021 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
8022#elif defined(RT_ARCH_ARM64)
8023 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8024#else
8025# error "port me"
8026#endif
8027 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8028 return off;
8029}
8030
8031
8032/**
8033 * Emits a vecdst = gprsrc broadcast, 64-bit.
8034 */
8035DECL_FORCE_INLINE(uint32_t)
8036iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8037{
8038#ifdef RT_ARCH_AMD64
8039 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
8040 * vbroadcast needs a memory operand or another xmm register to work... */
8041
8042 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
8043 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8044 pCodeBuf[off++] = X86_OP_REX_W
8045 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8046 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8047 pCodeBuf[off++] = 0x0f;
8048 pCodeBuf[off++] = 0x3a;
8049 pCodeBuf[off++] = 0x22;
8050 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8051 pCodeBuf[off++] = 0x00;
8052
8053 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
8054 pCodeBuf[off++] = X86_OP_VEX3;
8055 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
8056 | 0x02 /* opcode map. */
8057 | ( iVecRegDst >= 8
8058 ? 0
8059 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
8060 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
8061 pCodeBuf[off++] = 0x59;
8062 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
8063#elif defined(RT_ARCH_ARM64)
8064 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8065 Assert(!(iVecRegDst & 0x1) || !f256Bit);
8066
8067 /* dup vecsrc, gpr */
8068 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
8069 if (f256Bit)
8070 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
8071#else
8072# error "port me"
8073#endif
8074 return off;
8075}
8076
8077
8078/**
8079 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
8080 */
8081DECL_INLINE_THROW(uint32_t)
8082iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
8083{
8084#ifdef RT_ARCH_AMD64
8085 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
8086#elif defined(RT_ARCH_ARM64)
8087 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
8088#else
8089# error "port me"
8090#endif
8091 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8092 return off;
8093}
8094
8095#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
8096
8097/** @} */
8098
8099#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
8100
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette