VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/target-x86/IEMAllN8veEmit-x86.h@ 104279

Last change on this file since 104279 was 104279, checked in by vboxsync, 8 months ago

VMM/IEM: Implement a native emitter for the pand,andps,andpd instructions, bugref:10652

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 93.5 KB
Line 
1/* $Id: IEMAllN8veEmit-x86.h 104279 2024-04-10 14:22:22Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler, x86 Target - Code Emitters.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h
29#define VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34
35#ifdef RT_ARCH_AMD64
36
37/**
38 * Emits an ModR/M instruction with one opcode byte and only register operands.
39 */
40DECL_FORCE_INLINE(uint32_t)
41iemNativeEmitAmd64OneByteModRmInstrRREx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t bOpcode8, uint8_t bOpcodeOther,
42 uint8_t cOpBits, uint8_t idxRegReg, uint8_t idxRegRm)
43{
44 Assert(idxRegReg < 16); Assert(idxRegRm < 16);
45 switch (cOpBits)
46 {
47 case 16:
48 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
49 RT_FALL_THRU();
50 case 32:
51 if (idxRegReg >= 8 || idxRegRm >= 8)
52 pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
53 pCodeBuf[off++] = bOpcodeOther;
54 break;
55
56 default: AssertFailed(); RT_FALL_THRU();
57 case 64:
58 pCodeBuf[off++] = X86_OP_REX_W | (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
59 pCodeBuf[off++] = bOpcodeOther;
60 break;
61
62 case 8:
63 if (idxRegReg >= 8 || idxRegRm >= 8)
64 pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
65 else if (idxRegReg >= 4 || idxRegRm >= 4)
66 pCodeBuf[off++] = X86_OP_REX;
67 pCodeBuf[off++] = bOpcode8;
68 break;
69 }
70 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg & 7, idxRegRm & 7);
71 return off;
72}
73
74
75/**
76 * Emits an ModR/M instruction with two opcode bytes and only register operands.
77 */
78DECL_FORCE_INLINE(uint32_t)
79iemNativeEmitAmd64TwoByteModRmInstrRREx(PIEMNATIVEINSTR pCodeBuf, uint32_t off,
80 uint8_t bOpcode0, uint8_t bOpcode8, uint8_t bOpcodeOther,
81 uint8_t cOpBits, uint8_t idxRegReg, uint8_t idxRegRm)
82{
83 Assert(idxRegReg < 16); Assert(idxRegRm < 16);
84 switch (cOpBits)
85 {
86 case 16:
87 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
88 RT_FALL_THRU();
89 case 32:
90 if (idxRegReg >= 8 || idxRegRm >= 8)
91 pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
92 pCodeBuf[off++] = bOpcode0;
93 pCodeBuf[off++] = bOpcodeOther;
94 break;
95
96 default: AssertFailed(); RT_FALL_THRU();
97 case 64:
98 pCodeBuf[off++] = X86_OP_REX_W | (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
99 pCodeBuf[off++] = bOpcode0;
100 pCodeBuf[off++] = bOpcodeOther;
101 break;
102
103 case 8:
104 if (idxRegReg >= 8 || idxRegRm >= 8)
105 pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
106 else if (idxRegReg >= 4 || idxRegRm >= 4)
107 pCodeBuf[off++] = X86_OP_REX;
108 pCodeBuf[off++] = bOpcode0;
109 pCodeBuf[off++] = bOpcode8;
110 break;
111 }
112 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg & 7, idxRegRm & 7);
113 return off;
114}
115
116
117/**
118 * Emits one of three opcodes with an immediate.
119 *
120 * These are expected to be a /idxRegReg form.
121 */
122DECL_FORCE_INLINE(uint32_t)
123iemNativeEmitAmd64OneByteModRmInstrRIEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t bOpcode8, uint8_t bOpcodeOtherImm8,
124 uint8_t bOpcodeOther, uint8_t cOpBits, uint8_t cImmBits, uint8_t idxRegReg,
125 uint8_t idxRegRm, uint64_t uImmOp)
126{
127 Assert(idxRegReg < 8); Assert(idxRegRm < 16);
128 if ( cImmBits == 8
129 || (uImmOp <= (uint64_t)0x7f && bOpcodeOtherImm8 != 0xcc))
130 {
131 switch (cOpBits)
132 {
133 case 16:
134 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
135 RT_FALL_THRU();
136 case 32:
137 if (idxRegRm >= 8)
138 pCodeBuf[off++] = X86_OP_REX_B;
139 pCodeBuf[off++] = bOpcodeOtherImm8; Assert(bOpcodeOtherImm8 != 0xcc);
140 break;
141
142 default: AssertFailed(); RT_FALL_THRU();
143 case 64:
144 pCodeBuf[off++] = X86_OP_REX_W | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
145 pCodeBuf[off++] = bOpcodeOtherImm8; Assert(bOpcodeOtherImm8 != 0xcc);
146 break;
147
148 case 8:
149 if (idxRegRm >= 8)
150 pCodeBuf[off++] = X86_OP_REX_B;
151 else if (idxRegRm >= 4)
152 pCodeBuf[off++] = X86_OP_REX;
153 pCodeBuf[off++] = bOpcode8; Assert(bOpcode8 != 0xcc);
154 break;
155 }
156 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
157 pCodeBuf[off++] = (uint8_t)uImmOp;
158 }
159 else
160 {
161 switch (cOpBits)
162 {
163 case 32:
164 if (idxRegRm >= 8)
165 pCodeBuf[off++] = X86_OP_REX_B;
166 break;
167
168 default: AssertFailed(); RT_FALL_THRU();
169 case 64:
170 pCodeBuf[off++] = X86_OP_REX_W | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
171 break;
172
173 case 16:
174 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
175 if (idxRegRm >= 8)
176 pCodeBuf[off++] = X86_OP_REX_B;
177 pCodeBuf[off++] = bOpcodeOther;
178 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
179 pCodeBuf[off++] = RT_BYTE1(uImmOp);
180 pCodeBuf[off++] = RT_BYTE2(uImmOp);
181 Assert(cImmBits == 16);
182 return off;
183 }
184 pCodeBuf[off++] = bOpcodeOther;
185 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
186 pCodeBuf[off++] = RT_BYTE1(uImmOp);
187 pCodeBuf[off++] = RT_BYTE2(uImmOp);
188 pCodeBuf[off++] = RT_BYTE3(uImmOp);
189 pCodeBuf[off++] = RT_BYTE4(uImmOp);
190 Assert(cImmBits == 32);
191 }
192 return off;
193}
194
195#endif /* RT_ARCH_AMD64 */
196
197/**
198 * This is an implementation of IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGICAL.
199 *
200 * It takes liveness stuff into account.
201 */
202DECL_INLINE_THROW(uint32_t)
203iemNativeEmitEFlagsForLogical(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl
204#ifndef RT_ARCH_AMD64
205 , uint8_t cOpBits, uint8_t idxRegResult, bool fNativeFlags = false
206#endif
207 )
208{
209#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
210 /*
211 * See if we can skip this wholesale.
212 */
213 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
214 if (IEMLIVENESS_STATE_ARE_STATUS_EFL_TO_BE_CLOBBERED(pLivenessEntry))
215 {
216 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEflSkippedLogical);
217# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
218 off = iemNativeEmitOrImmIntoVCpuU32(pReNative, off, X86_EFL_STATUS_BITS, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
219# endif
220 }
221 else
222#endif
223 {
224#ifdef RT_ARCH_AMD64
225 /*
226 * Collect flags and merge them with eflags.
227 */
228 /** @todo we could alternatively use SAHF here when host rax is free since,
229 * OF is cleared. */
230 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
231 /* pushf - do this before any reg allocations as they may emit instructions too. */
232 pCodeBuf[off++] = 0x9c;
233
234 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
235 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
236 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + 7 + 7 + 3);
237 /* pop tmp */
238 if (idxTmpReg >= 8)
239 pCodeBuf[off++] = X86_OP_REX_B;
240 pCodeBuf[off++] = 0x58 + (idxTmpReg & 7);
241 /* and tmp, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF */
242 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF);
243 /* Clear the status bits in EFLs. */
244 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
245 /* OR in the flags we collected. */
246 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxTmpReg);
247 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
248 iemNativeRegFreeTmp(pReNative, idxTmpReg);
249
250#elif defined(RT_ARCH_ARM64)
251 /*
252 * Calculate flags.
253 */
254 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
255 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
256 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
257
258 /* Clear the status bits. ~0x8D5 (or ~0x8FD) can't be AND immediate, so use idxTmpReg for constant. */
259 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, ~X86_EFL_STATUS_BITS);
260 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxRegEfl, idxTmpReg);
261
262 /* N,Z -> SF,ZF */
263 if (cOpBits < 32)
264 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegResult, cOpBits > 8); /* sets NZ */
265 else if (!fNativeFlags)
266 pCodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, idxRegResult, idxRegResult, cOpBits > 32 /*f64Bit*/);
267 pCodeBuf[off++] = Armv8A64MkInstrMrs(idxTmpReg, ARMV8_AARCH64_SYSREG_NZCV); /* Bits: 31=N; 30=Z; 29=C; 28=V; */
268 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 30);
269 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_ZF_BIT, 2, false /*f64Bit*/);
270 AssertCompile(X86_EFL_ZF_BIT + 1 == X86_EFL_SF_BIT);
271
272 /* Calculate 8-bit parity of the result. */
273 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegResult, idxRegResult, false /*f64Bit*/,
274 4 /*offShift6*/, kArmv8A64InstrShift_Lsr);
275 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /*f64Bit*/,
276 2 /*offShift6*/, kArmv8A64InstrShift_Lsr);
277 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /*f64Bit*/,
278 1 /*offShift6*/, kArmv8A64InstrShift_Lsr);
279 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
280 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
281 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_PF_BIT, 1, false /*f64Bit*/);
282
283 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
284 iemNativeRegFreeTmp(pReNative, idxTmpReg);
285#else
286# error "port me"
287#endif
288 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
289
290# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
291 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
292# endif
293 }
294 return off;
295}
296
297
298/**
299 * This is an implementation of IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC.
300 *
301 * It takes liveness stuff into account.
302 */
303DECL_FORCE_INLINE_THROW(uint32_t)
304iemNativeEmitEFlagsForArithmetic(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl, uint8_t idxRegEflIn
305#ifndef RT_ARCH_AMD64
306 , uint8_t cOpBits, uint8_t idxRegResult, uint8_t idxRegDstIn, uint8_t idxRegSrc
307 , bool fInvertCarry, uint64_t uImmSrc
308#endif
309 )
310{
311#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
312 /*
313 * See if we can skip this wholesale.
314 */
315 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
316 if (IEMLIVENESS_STATE_ARE_STATUS_EFL_TO_BE_CLOBBERED(pLivenessEntry))
317 {
318 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEflSkippedArithmetic);
319# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
320 off = iemNativeEmitOrImmIntoVCpuU32(pReNative, off, X86_EFL_STATUS_BITS, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
321# endif
322 }
323 else
324#endif
325 {
326#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
327 uint32_t fSkipped = 0;
328#endif
329#ifdef RT_ARCH_AMD64
330 /*
331 * Collect flags and merge them with eflags.
332 */
333 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
334 /* pushf - do this before any reg allocations as they may emit instructions too. */
335 pCodeBuf[off++] = 0x9c;
336
337 uint8_t const idxRegEfl = idxRegEflIn != UINT8_MAX ? idxRegEflIn
338 : iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
339 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
340 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + 7 + 7 + 3);
341 /* pop tmp */
342 if (idxTmpReg >= 8)
343 pCodeBuf[off++] = X86_OP_REX_B;
344 pCodeBuf[off++] = 0x58 + (idxTmpReg & 7);
345 /* Isolate the flags we want. */
346 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_EFL_STATUS_BITS);
347 /* Clear the status bits in EFLs. */
348 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
349 /* OR in the flags we collected. */
350 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxTmpReg);
351 if (idxRegEflIn != idxRegEfl)
352 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
353 iemNativeRegFreeTmp(pReNative, idxTmpReg);
354
355#elif defined(RT_ARCH_ARM64)
356 /*
357 * Calculate flags.
358 */
359 uint8_t const idxRegEfl = idxRegEflIn != UINT8_MAX ? idxRegEflIn
360 : iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
361 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
362 uint8_t const idxTmpReg2 = cOpBits >= 32 ? UINT8_MAX : iemNativeRegAllocTmp(pReNative, &off);
363 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
364
365 /* Invert CF (stored inved on ARM) and load the flags into the temporary register. */
366 if (fInvertCarry)
367 pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
368 pCodeBuf[off++] = Armv8A64MkInstrMrs(idxTmpReg, ARMV8_AARCH64_SYSREG_NZCV); /* Bits: 31=N; 30=Z; 29=C; 28=V; */
369
370 if (cOpBits >= 32)
371 {
372 /* V -> OF */
373 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 28);
374 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_OF_BIT, 1, false /*f64Bit*/);
375
376 /* C -> CF */
377 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 1);
378 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_CF_BIT, 1, false /*f64Bit*/);
379 }
380
381 /* N,Z -> SF,ZF */
382 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, cOpBits >= 32 ? 1 : 30);
383 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_ZF_BIT, 2, false /*f64Bit*/);
384
385 /* For ADC and SBB we have to calculate overflow and carry our selves. */
386 if (cOpBits < 32)
387 {
388 /* Since the carry flag is the zero'th flag, we just use BFXIL got copy it over. */
389 AssertCompile(X86_EFL_CF_BIT == 0);
390 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxRegEfl, idxRegResult, cOpBits, 1, false /*f64Bit*/);
391
392 /* The overflow flag is more work as we have to compare the signed bits for
393 both inputs and the result. See IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC.
394
395 Formula: ~(a_uDst ^ a_uSrcOf) & (a_uResult ^ a_uDst)
396 With a_uSrcOf as a_uSrc for additions and ~a_uSrc for subtractions.
397
398 It is a bit simpler when the right (source) side is constant:
399 adc: S D R -> OF sbb: S D R -> OF
400 0 0 0 -> 0 \ 0 0 0 -> 0 \
401 0 0 1 -> 1 \ 0 0 1 -> 0 \
402 0 1 0 -> 0 / and not(D), R 0 1 0 -> 1 / and D, not(R)
403 0 1 1 -> 0 / 0 1 1 -> 0 /
404 1 0 0 -> 0 \ 1 0 0 -> 0 \
405 1 0 1 -> 0 \ and D, not(R) 1 0 1 -> 1 \ and not(D), R
406 1 1 0 -> 1 / 1 1 0 -> 0 /
407 1 1 1 -> 0 / 1 1 1 -> 0 / */
408 if (idxRegSrc != UINT8_MAX)
409 {
410 if (fInvertCarry) /* sbb: ~((a_uDst) ^ ~(a_uSrcOf)) -> (a_uDst) ^ (a_uSrcOf); HACK ALERT: fInvertCarry == sbb */
411 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegSrc, false);
412 else /* adc: ~((a_uDst) ^ (a_uSrcOf)) -> (a_uDst) ^ ~(a_uSrcOf) */
413 pCodeBuf[off++] = Armv8A64MkInstrEon(idxTmpReg, idxRegDstIn, idxRegSrc, false);
414 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg2, idxRegDstIn, idxRegResult, false); /* (a_uDst) ^ (a_uResult) */
415 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpReg, idxTmpReg, idxTmpReg2, false /*f64Bit*/);
416 }
417 else if (uImmSrc & RT_BIT_32(cOpBits - 1))
418 {
419 if (fInvertCarry) /* HACK ALERT: fInvertCarry == sbb */
420 pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegResult, idxRegDstIn, false);
421 else
422 pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegDstIn, idxRegResult, false);
423 }
424 else
425 {
426 if (fInvertCarry) /* HACK ALERT: fInvertCarry == sbb */
427 pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegDstIn, idxRegResult, false);
428 else
429 pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegResult, idxRegDstIn, false);
430 }
431 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, cOpBits - 1, false /*f64Bit*/);
432 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_OF_BIT, 1);
433 iemNativeRegFreeTmp(pReNative, idxTmpReg2);
434 }
435
436 /* Calculate 8-bit parity of the result. */
437 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegResult, idxRegResult, false /*f64Bit*/,
438 4 /*offShift6*/, kArmv8A64InstrShift_Lsr);
439 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /*f64Bit*/,
440 2 /*offShift6*/, kArmv8A64InstrShift_Lsr);
441 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /*f64Bit*/,
442 1 /*offShift6*/, kArmv8A64InstrShift_Lsr);
443 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
444 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
445 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_PF_BIT, 1, false /*f64Bit*/);
446
447 /* Calculate auxilary carry/borrow. This is related to 8-bit BCD.
448 General formula: ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
449 S D R
450 0 0 0 -> 0; \
451 0 0 1 -> 1; \ regular
452 0 1 0 -> 1; / xor R, D
453 0 1 1 -> 0; /
454 1 0 0 -> 1; \
455 1 0 1 -> 0; \ invert one of the two
456 1 1 0 -> 0; / xor not(R), D
457 1 1 1 -> 1; /
458 a_uSrc[bit 4]=0: ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
459 a_uSrc[bit 4]=1: ((uint32_t)~(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
460 */
461
462 if (idxRegSrc != UINT8_MAX)
463 {
464 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegSrc, false /*f64Bit*/);
465 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxRegResult, false /*f64Bit*/);
466 }
467 else if (uImmSrc & X86_EFL_AF)
468 pCodeBuf[off++] = Armv8A64MkInstrEon(idxTmpReg, idxRegDstIn, idxRegResult, false /*f64Bit*/);
469 else
470 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegResult, false /*f64Bit*/);
471 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, X86_EFL_AF_BIT, false /*f64Bit*/);
472 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_AF_BIT, 1, false /*f64Bit*/);
473
474 if (idxRegEflIn != idxRegEfl)
475 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
476 iemNativeRegFreeTmp(pReNative, idxTmpReg);
477
478#else
479# error "port me"
480#endif
481 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
482
483#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
484 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, fSkipped, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
485#endif
486 }
487 return off;
488
489}
490
491
492/**
493 * The AND instruction will clear OF, CF and AF (latter is undefined) and
494 * set the other flags according to the result.
495 */
496DECL_INLINE_THROW(uint32_t)
497iemNativeEmit_and_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
498 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
499{
500 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
501 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
502#ifdef RT_ARCH_AMD64
503 /* On AMD64 we just use the correctly sized AND instruction harvest the EFLAGS. */
504 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
505 0x22, 0x23, cOpBits, idxRegDst, idxRegSrc);
506 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
507 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
508
509 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
510
511#elif defined(RT_ARCH_ARM64)
512 /* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. */
513 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
514 pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
515 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
516 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
517
518 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst, true /*fNativeFlags*/);
519#else
520# error "Port me"
521#endif
522 iemNativeVarRegisterRelease(pReNative, idxVarDst);
523 return off;
524}
525
526
527/**
528 * The AND instruction with immediate value as right operand.
529 */
530DECL_INLINE_THROW(uint32_t)
531iemNativeEmit_and_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
532 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
533{
534 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
535#ifdef RT_ARCH_AMD64
536 /* On AMD64 we just use the correctly sized AND instruction harvest the EFLAGS. */
537 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
538 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 4, idxRegDst, uImmOp);
539 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
540
541 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
542
543#elif defined(RT_ARCH_ARM64)
544 /* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones, and of
545 course the immediate variant when possible to save a register load. */
546 uint32_t uImmSizeLen, uImmRotations;
547 if ( cOpBits > 32
548 ? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
549 : Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
550 {
551 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
552 if (cOpBits >= 32)
553 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /*f64Bit*/);
554 else
555 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /*f64Bit*/);
556 }
557 else
558 {
559 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
560 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
561 if (cOpBits >= 32)
562 pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/);
563 else
564 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/);
565 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
566 }
567 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
568
569 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst, cOpBits >= 32 /*fNativeFlags*/);
570 RT_NOREF_PV(cImmBits);
571
572#else
573# error "Port me"
574#endif
575 iemNativeVarRegisterRelease(pReNative, idxVarDst);
576 return off;
577}
578
579
580/**
581 * The TEST instruction will clear OF, CF and AF (latter is undefined) and
582 * set the other flags according to the result.
583 */
584DECL_INLINE_THROW(uint32_t)
585iemNativeEmit_test_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
586 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
587{
588 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
589 uint8_t const idxRegSrc = idxVarSrc == idxVarDst ? idxRegDst /* special case of 'test samereg,samereg' */
590 : iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
591#ifdef RT_ARCH_AMD64
592 /* On AMD64 we just use the correctly sized TEST instruction harvest the EFLAGS. */
593 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
594 0x84, 0x85, cOpBits, idxRegSrc, idxRegDst);
595 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
596
597#elif defined(RT_ARCH_ARM64)
598 /* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. We also
599 need to keep the result in order to calculate the flags. */
600 uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
601 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
602 if (cOpBits >= 32)
603 pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
604 else
605 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
606 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
607
608#else
609# error "Port me"
610#endif
611 if (idxVarSrc != idxVarDst)
612 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
613 iemNativeVarRegisterRelease(pReNative, idxVarDst);
614
615#ifdef RT_ARCH_AMD64
616 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
617#else
618 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegResult, cOpBits >= 32 /*fNativeFlags*/);
619 iemNativeRegFreeTmp(pReNative, idxRegResult);
620#endif
621 return off;
622}
623
624
625/**
626 * The TEST instruction with immediate value as right operand.
627 */
628DECL_INLINE_THROW(uint32_t)
629iemNativeEmit_test_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
630 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
631{
632 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
633#ifdef RT_ARCH_AMD64
634 /* On AMD64 we just use the correctly sized AND instruction harvest the EFLAGS. */
635 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
636 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0xf6, 0xcc, 0xf7, cOpBits, cImmBits, 0, idxRegDst, uImmOp);
637 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
638 iemNativeVarRegisterRelease(pReNative, idxVarDst);
639
640 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
641
642#elif defined(RT_ARCH_ARM64)
643 /* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones, and of
644 course the immediate variant when possible to save a register load.
645 We also need to keep the result in order to calculate the flags. */
646 uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
647 uint32_t uImmSizeLen, uImmRotations;
648 if ( cOpBits > 32
649 ? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
650 : Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
651 {
652 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
653 if (cOpBits >= 32)
654 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(idxRegResult, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /*f64Bit*/);
655 else
656 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegResult, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /*f64Bit*/);
657 }
658 else
659 {
660 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
661 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
662 if (cOpBits >= 32)
663 pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/);
664 else
665 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/);
666 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
667 }
668 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
669 iemNativeVarRegisterRelease(pReNative, idxVarDst);
670
671 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegResult, cOpBits >= 32 /*fNativeFlags*/);
672
673 iemNativeRegFreeTmp(pReNative, idxRegResult);
674 RT_NOREF_PV(cImmBits);
675
676#else
677# error "Port me"
678#endif
679 return off;
680}
681
682
683/**
684 * The OR instruction will clear OF, CF and AF (latter is undefined) and
685 * set the other flags according to the result.
686 */
687DECL_INLINE_THROW(uint32_t)
688iemNativeEmit_or_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
689 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
690{
691 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
692 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
693#ifdef RT_ARCH_AMD64
694 /* On AMD64 we just use the correctly sized OR instruction harvest the EFLAGS. */
695 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
696 0x0a, 0x0b, cOpBits, idxRegDst, idxRegSrc);
697 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
698 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
699
700 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
701
702#elif defined(RT_ARCH_ARM64)
703 /* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones. */
704 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
705 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
706 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
707 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
708
709 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
710
711#else
712# error "Port me"
713#endif
714 iemNativeVarRegisterRelease(pReNative, idxVarDst);
715 return off;
716}
717
718
719/**
720 * The OR instruction with immediate value as right operand.
721 */
722DECL_INLINE_THROW(uint32_t)
723iemNativeEmit_or_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
724 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
725{
726 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
727#ifdef RT_ARCH_AMD64
728 /* On AMD64 we just use the correctly sized OR instruction harvest the EFLAGS. */
729 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
730 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 1, idxRegDst, uImmOp);
731 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
732
733 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
734
735#elif defined(RT_ARCH_ARM64)
736 /* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones, and of
737 course the immediate variant when possible to save a register load. */
738 uint32_t uImmSizeLen, uImmRotations;
739 if ( cOpBits > 32
740 ? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
741 : Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
742 {
743 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
744 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /*f64Bit*/);
745 }
746 else
747 {
748 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
749 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
750 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/);
751 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
752 }
753 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
754
755 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
756 RT_NOREF_PV(cImmBits);
757
758#else
759# error "Port me"
760#endif
761 iemNativeVarRegisterRelease(pReNative, idxVarDst);
762 return off;
763}
764
765
766/**
767 * The XOR instruction will clear OF, CF and AF (latter is undefined) and
768 * set the other flags according to the result.
769 */
770DECL_INLINE_THROW(uint32_t)
771iemNativeEmit_xor_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
772 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
773{
774 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
775 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
776#ifdef RT_ARCH_AMD64
777 /* On AMD64 we just use the correctly sized OR instruction harvest the EFLAGS. */
778 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
779 0x32, 0x33, cOpBits, idxRegDst, idxRegSrc);
780 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
781 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
782
783 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
784
785#elif defined(RT_ARCH_ARM64)
786 /* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones. */
787 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
788 pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
789 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
790 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
791
792 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
793
794#else
795# error "Port me"
796#endif
797 iemNativeVarRegisterRelease(pReNative, idxVarDst);
798 return off;
799}
800
801
802/**
803 * The XOR instruction with immediate value as right operand.
804 */
805DECL_INLINE_THROW(uint32_t)
806iemNativeEmit_xor_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
807 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
808{
809 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
810#ifdef RT_ARCH_AMD64
811 /* On AMD64 we just use the correctly sized XOR instruction harvest the EFLAGS. */
812 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
813 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 6, idxRegDst, uImmOp);
814 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
815
816 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
817
818#elif defined(RT_ARCH_ARM64)
819 /* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones, and of
820 course the immediate variant when possible to save a register load. */
821 uint32_t uImmSizeLen, uImmRotations;
822 if ( cOpBits > 32
823 ? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
824 : Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
825 {
826 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
827 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /*f64Bit*/);
828 }
829 else
830 {
831 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
832 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
833 pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/);
834 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
835 }
836 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
837
838 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
839 RT_NOREF_PV(cImmBits);
840
841#else
842# error "Port me"
843#endif
844 iemNativeVarRegisterRelease(pReNative, idxVarDst);
845 return off;
846}
847
848
849
850/*********************************************************************************************************************************
851* ADD, ADC, SUB, SBB, CMP *
852*********************************************************************************************************************************/
853
854/**
855 * The ADD instruction will set all status flags.
856 */
857DECL_INLINE_THROW(uint32_t)
858iemNativeEmit_add_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
859 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
860{
861 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
862 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
863
864#ifdef RT_ARCH_AMD64
865 /* On AMD64 we just use the correctly sized ADD instruction to get the right EFLAGS.SF value. */
866 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
867 0x02, 0x03, cOpBits, idxRegDst, idxRegSrc);
868 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
869
870 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
871 iemNativeVarRegisterRelease(pReNative, idxVarDst);
872
873 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
874
875#elif defined(RT_ARCH_ARM64)
876 /* On ARM64 we'll need the two input operands as well as the result in order
877 to calculate the right flags, even if we use ADDS and translates NZCV into
878 OF, CF, ZF and SF. */
879 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
880 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
881 if (cOpBits >= 32)
882 {
883 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
884 pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
885 }
886 else
887 {
888 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
889 uint32_t const cShift = 32 - cOpBits;
890 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDstIn, ARMV8_A64_REG_XZR, idxRegDst, false /*f64Bit*/, cShift);
891 pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDstIn, idxRegSrc, false /*f64Bit*/,
892 true /*fSetFlags*/, cShift);
893 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /*f64Bit*/);
894 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /*f64Bit*/);
895 cOpBits = 32;
896 }
897 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
898
899 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
900 idxRegDstIn, idxRegSrc, false /*fInvertCarry*/, 0);
901
902 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
903 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
904 iemNativeVarRegisterRelease(pReNative, idxVarDst);
905
906#else
907# error "port me"
908#endif
909 return off;
910}
911
912
913/**
914 * The ADD instruction with immediate value as right operand.
915 */
916DECL_INLINE_THROW(uint32_t)
917iemNativeEmit_add_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
918 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
919{
920 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
921
922#ifdef RT_ARCH_AMD64
923 /* On AMD64 we just use the correctly sized ADD instruction to get the right EFLAGS.SF value. */
924 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
925 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 0, idxRegDst, uImmOp);
926 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
927
928 iemNativeVarRegisterRelease(pReNative, idxVarDst);
929
930 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
931
932#elif defined(RT_ARCH_ARM64)
933 /* On ARM64 we'll need the two input operands as well as the result in order
934 to calculate the right flags, even if we use ADDS and translates NZCV into
935 OF, CF, ZF and SF. */
936 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
937 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
938 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
939 if (cOpBits >= 32)
940 {
941 if (uImmOp <= 0xfffU)
942 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegDst, idxRegDst, uImmOp, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
943 else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
944 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegDst, idxRegDst, uImmOp >> 12, cOpBits > 32 /*f64Bit*/,
945 true /*fSetFlags*/, true /*fShift12*/);
946 else
947 {
948 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
949 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
950 pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
951 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
952 }
953 }
954 else
955 {
956 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
957 uint32_t const cShift = 32 - cOpBits;
958 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp << cShift);
959 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
960 pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegTmpImm, idxRegDstIn, false /*f64Bit*/, true /*fSetFlags*/, cShift);
961 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /*f64Bit*/);
962 cOpBits = 32;
963 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
964 }
965 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
966
967 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
968 idxRegDstIn, UINT8_MAX, false /*fInvertCarry*/, uImmOp);
969
970 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
971 iemNativeVarRegisterRelease(pReNative, idxVarDst);
972 RT_NOREF(cImmBits);
973
974#else
975# error "port me"
976#endif
977 return off;
978}
979
980
981/**
982 * The ADC instruction takes CF as input and will set all status flags.
983 */
984DECL_INLINE_THROW(uint32_t)
985iemNativeEmit_adc_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
986 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
987{
988 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
989 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
990 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
991
992#ifdef RT_ARCH_AMD64
993 /* On AMD64 we use BT to set EFLAGS.CF and then issue an ADC instruction
994 with matching size to get the correct flags. */
995 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
996
997 /* Use the BT instruction to set CF according to idxRegEfl. */
998 off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /*cOpBits*/, 4, idxRegEfl);
999 pCodeBuf[off++] = X86_EFL_CF_BIT;
1000
1001 off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0x12, 0x13, cOpBits, idxRegDst, idxRegSrc);
1002 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1003
1004 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1005 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1006
1007 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1008
1009#elif defined(RT_ARCH_ARM64)
1010 /* On ARM64 we use the RMIF instruction to load PSTATE.CF from idxRegEfl and
1011 then ADCS for the calculation. We need all inputs and result for the two
1012 flags (AF,PF) that can't be directly derived from PSTATE.NZCV. */
1013 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1014 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1015
1016 pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /*fMask=C*/);
1017 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1018 if (cOpBits >= 32)
1019 pCodeBuf[off++] = Armv8A64MkInstrAdcs(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
1020 else
1021 {
1022 /* Since we're also adding in the carry flag here, shifting operands up
1023 doesn't work. So, we have to calculate carry & overflow manually. */
1024 pCodeBuf[off++] = Armv8A64MkInstrAdc(idxRegDst, idxRegDst, idxRegSrc, false /*f64Bit*/);
1025 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1026 }
1027 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1028
1029 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1030 idxRegDstIn, idxRegSrc, false /*fInvertCarry*/, 0);
1031
1032 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1033 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1034 if (cOpBits < 32)
1035 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1036 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1037
1038#else
1039# error "port me"
1040#endif
1041 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1042 return off;
1043}
1044
1045
1046/**
1047 * The ADC instruction with immediate value as right operand.
1048 */
1049DECL_INLINE_THROW(uint32_t)
1050iemNativeEmit_adc_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1051 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1052{
1053 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1054 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
1055
1056#ifdef RT_ARCH_AMD64
1057 /* On AMD64 we use BT to set EFLAGS.CF and then issue an ADC instruction
1058 with matching size to get the correct flags. */
1059 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1060
1061 off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /*cOpBits*/, 4, idxRegEfl);
1062 pCodeBuf[off++] = X86_EFL_CF_BIT;
1063
1064 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 2, idxRegDst, uImmOp);
1065 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1066
1067 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1068
1069 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1070
1071#elif defined(RT_ARCH_ARM64)
1072 /* On ARM64 we use the RMIF instructions to load PSTATE.CF from idxRegEfl
1073 and then ADCS for the calculation. We need all inputs and result for
1074 the two flags (AF,PF) that can't be directly derived from PSTATE.NZCV. */
1075 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1076 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1077 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1078
1079 pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /*fMask=C*/);
1080 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1081 if (cOpBits >= 32)
1082 pCodeBuf[off++] = Armv8A64MkInstrAdcs(idxRegDst, idxRegDst, idxRegImm, cOpBits > 32 /*f64Bit*/);
1083 else
1084 {
1085 /* Since we're also adding in the carry flag here, shifting operands up
1086 doesn't work. So, we have to calculate carry & overflow manually. */
1087 pCodeBuf[off++] = Armv8A64MkInstrAdc(idxRegDst, idxRegDst, idxRegImm, false /*f64Bit*/);
1088 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1089 }
1090 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1091
1092 iemNativeRegFreeTmp(pReNative, idxRegImm);
1093
1094 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1095 idxRegDstIn, UINT8_MAX, false /*fInvertCarry*/, uImmOp);
1096
1097 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1098 if (cOpBits < 32)
1099 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1100 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1101 RT_NOREF(cImmBits);
1102
1103#else
1104# error "port me"
1105#endif
1106 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1107 return off;
1108}
1109
1110
1111/**
1112 * The SUB instruction will set all status flags.
1113 */
1114DECL_INLINE_THROW(uint32_t)
1115iemNativeEmit_sub_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1116 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1117{
1118 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1119 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
1120
1121#ifdef RT_ARCH_AMD64
1122 /* On AMD64 we just use the correctly sized SUB instruction to get the right EFLAGS.SF value. */
1123 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
1124 0x2a, 0x2b, cOpBits, idxRegDst, idxRegSrc);
1125 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1126
1127 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1128 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1129
1130 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1131
1132#elif defined(RT_ARCH_ARM64)
1133 /* On ARM64 we'll need the two input operands as well as the result in order
1134 to calculate the right flags, even if we use SUBS and translates NZCV into
1135 OF, CF, ZF and SF. */
1136 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1137 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1138 if (cOpBits >= 32)
1139 {
1140 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1141 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1142 }
1143 else
1144 {
1145 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1146 uint32_t const cShift = 32 - cOpBits;
1147 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDstIn, ARMV8_A64_REG_XZR, idxRegDst, false /*f64Bit*/, cShift);
1148 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDstIn, idxRegSrc, false /*f64Bit*/,
1149 true /*fSetFlags*/, cShift);
1150 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /*f64Bit*/);
1151 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /*f64Bit*/);
1152 cOpBits = 32;
1153 }
1154 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1155
1156 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
1157 idxRegDstIn, idxRegSrc, true /*fInvertCarry*/, 0);
1158
1159 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1160 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1161 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1162
1163#else
1164# error "port me"
1165#endif
1166 return off;
1167}
1168
1169
1170/**
1171 * The SUB instruction with immediate value as right operand.
1172 */
1173DECL_INLINE_THROW(uint32_t)
1174iemNativeEmit_sub_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1175 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1176{
1177 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1178
1179#ifdef RT_ARCH_AMD64
1180 /* On AMD64 we just use the correctly sized SUB instruction to get the right EFLAGS.SF value. */
1181 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1182 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 5, idxRegDst, uImmOp);
1183 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1184
1185 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1186
1187 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1188
1189#elif defined(RT_ARCH_ARM64)
1190 /* On ARM64 we'll need the two input operands as well as the result in order
1191 to calculate the right flags, even if we use SUBS and translates NZCV into
1192 OF, CF, ZF and SF. */
1193 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1194 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1195 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1196 if (cOpBits >= 32)
1197 {
1198 if (uImmOp <= 0xfffU)
1199 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegDst, idxRegDst, uImmOp, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1200 else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
1201 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegDst, idxRegDst, uImmOp >> 12, cOpBits > 32 /*f64Bit*/,
1202 true /*fSetFlags*/, true /*fShift12*/);
1203 else
1204 {
1205 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1206 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1207 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1208 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1209 }
1210 }
1211 else
1212 {
1213 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1214 uint32_t const cShift = 32 - cOpBits;
1215 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1216 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1217 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegDstIn, idxRegDstIn, cShift, false /*f64Bit*/);
1218 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDstIn, idxRegTmpImm, false /*f64Bit*/, true /*fSetFlags*/, cShift);
1219 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /*f64Bit*/);
1220 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /*f64Bit*/);
1221 cOpBits = 32;
1222 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1223 }
1224 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1225
1226 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
1227 idxRegDstIn, UINT8_MAX, true /*fInvertCarry*/, uImmOp);
1228
1229 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1230 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1231 RT_NOREF(cImmBits);
1232
1233#else
1234# error "port me"
1235#endif
1236 return off;
1237}
1238
1239
1240/**
1241 * The CMP instruction will set all status flags, but modifies no registers.
1242 */
1243DECL_INLINE_THROW(uint32_t)
1244iemNativeEmit_cmp_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1245 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1246{
1247 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1248 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
1249
1250#ifdef RT_ARCH_AMD64
1251 /* On AMD64 we just use the correctly sized CMP instruction to get the right EFLAGS.SF value. */
1252 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
1253 0x3a, 0x3b, cOpBits, idxRegDst, idxRegSrc);
1254 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1255
1256 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1257 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1258
1259 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1260
1261#elif defined(RT_ARCH_ARM64)
1262 /* On ARM64 we'll need the actual result as well as both input operands in order
1263 to calculate the right flags, even if we use SUBS and translates NZCV into
1264 OF, CF, ZF and SF. */
1265 uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
1266 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1267 if (cOpBits >= 32)
1268 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1269 else
1270 {
1271 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1272 uint32_t const cShift = 32 - cOpBits;
1273 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegResult, ARMV8_A64_REG_XZR, idxRegDst, false /*f64Bit*/, cShift);
1274 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegResult, idxRegSrc, false /*f64Bit*/,
1275 true /*fSetFlags*/, cShift);
1276 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegResult, idxRegResult, cShift, false /*f64Bit*/);
1277 cOpBits = 32;
1278 }
1279 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1280
1281 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegResult,
1282 idxRegDst, idxRegSrc, true /*fInvertCarry*/, 0);
1283
1284 iemNativeRegFreeTmp(pReNative, idxRegResult);
1285 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1286 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1287
1288#else
1289# error "port me"
1290#endif
1291 return off;
1292}
1293
1294
1295/**
1296 * The CMP instruction with immediate value as right operand.
1297 */
1298DECL_INLINE_THROW(uint32_t)
1299iemNativeEmit_cmp_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1300 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1301{
1302 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1303
1304#ifdef RT_ARCH_AMD64
1305 /* On AMD64 we just use the correctly sized CMP instruction to get the right EFLAGS.SF value. */
1306 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1307 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 7, idxRegDst, uImmOp);
1308 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1309
1310 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1311
1312 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1313
1314#elif defined(RT_ARCH_ARM64)
1315 /* On ARM64 we'll need the actual result as well as both input operands in order
1316 to calculate the right flags, even if we use SUBS and translates NZCV into
1317 OF, CF, ZF and SF. */
1318 uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
1319 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1320 if (cOpBits >= 32)
1321 {
1322 if (uImmOp <= 0xfffU)
1323 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegResult, idxRegDst, uImmOp, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1324 else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
1325 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegResult, idxRegDst, uImmOp >> 12, cOpBits > 32 /*f64Bit*/,
1326 true /*fSetFlags*/, true /*fShift12*/);
1327 else
1328 {
1329 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1330 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1331 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1332 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1333 }
1334 }
1335 else
1336 {
1337 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1338 uint32_t const cShift = 32 - cOpBits;
1339 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1340 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1341 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegResult, idxRegDst, cShift, false /*f64Bit*/);
1342 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegResult, idxRegTmpImm, false /*f64Bit*/, true /*fSetFlags*/, cShift);
1343 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegResult, idxRegResult, cShift, false /*f64Bit*/);
1344 cOpBits = 32;
1345 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1346 }
1347 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1348
1349 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegResult,
1350 idxRegDst, UINT8_MAX, true /*fInvertCarry*/, uImmOp);
1351
1352 iemNativeRegFreeTmp(pReNative, idxRegResult);
1353 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1354 RT_NOREF(cImmBits);
1355
1356#else
1357# error "port me"
1358#endif
1359 return off;
1360}
1361
1362
1363/**
1364 * The SBB instruction takes CF as input and will set all status flags.
1365 */
1366DECL_INLINE_THROW(uint32_t)
1367iemNativeEmit_sbb_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1368 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1369{
1370 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1371 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
1372 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
1373
1374#ifdef RT_ARCH_AMD64
1375 /* On AMD64 we use BT to set EFLAGS.CF and then issue an SBB instruction
1376 with matching size to get the correct flags. */
1377 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1378
1379 off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /*cOpBits*/, 4, idxRegEfl);
1380 pCodeBuf[off++] = X86_EFL_CF_BIT;
1381
1382 off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0x1a, 0x1b, cOpBits, idxRegDst, idxRegSrc);
1383 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1384
1385 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1386 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1387
1388 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1389
1390#elif defined(RT_ARCH_ARM64)
1391 /* On ARM64 we use the RMIF+CFINV instructions to load PSTATE.CF from
1392 idxRegEfl and then SBCS for the calculation. We need all inputs and
1393 result for the two flags (AF,PF) that can't be directly derived from
1394 PSTATE.NZCV. */
1395 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1396 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1397
1398 pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /*fMask=C*/);
1399 pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
1400 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1401 if (cOpBits >= 32)
1402 pCodeBuf[off++] = Armv8A64MkInstrSbcs(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
1403 else
1404 {
1405 /* Since we're also adding in the carry flag here, shifting operands up
1406 doesn't work. So, we have to calculate carry & overflow manually. */
1407 pCodeBuf[off++] = Armv8A64MkInstrSbc(idxRegDst, idxRegDst, idxRegSrc, false /*f64Bit*/);
1408 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1409 }
1410 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1411
1412 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1413 idxRegDstIn, idxRegSrc, true /*fInvertCarry*/, 0);
1414
1415 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1416 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1417 if (cOpBits < 32)
1418 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1419 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1420
1421#else
1422# error "port me"
1423#endif
1424 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1425 return off;
1426}
1427
1428
1429/**
1430 * The SBB instruction with immediate value as right operand.
1431 */
1432DECL_INLINE_THROW(uint32_t)
1433iemNativeEmit_sbb_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1434 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1435{
1436 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1437 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
1438
1439#ifdef RT_ARCH_AMD64
1440 /* On AMD64 we use BT to set EFLAGS.CF and then issue an SBB instruction
1441 with matching size to get the correct flags. */
1442 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1443
1444 off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /*cOpBits*/, 4, idxRegEfl);
1445 pCodeBuf[off++] = X86_EFL_CF_BIT;
1446
1447 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 3, idxRegDst, uImmOp);
1448 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1449
1450 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1451
1452 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1453
1454#elif defined(RT_ARCH_ARM64)
1455 /* On ARM64 we use the RMIF+CFINV instructions to load PSTATE.CF from
1456 idxRegEfl and then SBCS for the calculation. We need all inputs and
1457 result for the two flags (AF,PF) that can't be directly derived from
1458 PSTATE.NZCV. */
1459 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1460 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1461 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1462
1463 pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /*fMask=C*/);
1464 pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
1465 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1466 if (cOpBits >= 32)
1467 pCodeBuf[off++] = Armv8A64MkInstrSbcs(idxRegDst, idxRegDst, idxRegImm, cOpBits > 32 /*f64Bit*/);
1468 else
1469 {
1470 /* Since we're also adding in the carry flag here, shifting operands up
1471 doesn't work. So, we have to calculate carry & overflow manually. */
1472 pCodeBuf[off++] = Armv8A64MkInstrSbc(idxRegDst, idxRegDst, idxRegImm, false /*f64Bit*/);
1473 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1474 }
1475 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1476
1477 iemNativeRegFreeTmp(pReNative, idxRegImm);
1478
1479 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1480 idxRegDstIn, UINT8_MAX, true /*fInvertCarry*/, uImmOp);
1481
1482 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1483 if (cOpBits < 32)
1484 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1485 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1486 RT_NOREF(cImmBits);
1487
1488#else
1489# error "port me"
1490#endif
1491 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1492 return off;
1493}
1494
1495
1496DECL_INLINE_THROW(uint32_t)
1497iemNativeEmit_imul_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1498 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1499{
1500 RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1501 AssertFailed();
1502 return iemNativeEmitBrk(pReNative, off, 0x666);
1503}
1504
1505
1506DECL_INLINE_THROW(uint32_t)
1507iemNativeEmit_popcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1508 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1509{
1510 RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1511 AssertFailed();
1512 return iemNativeEmitBrk(pReNative, off, 0x666);
1513}
1514
1515
1516DECL_INLINE_THROW(uint32_t)
1517iemNativeEmit_tzcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1518 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1519{
1520 RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1521 AssertFailed();
1522 return iemNativeEmitBrk(pReNative, off, 0x666);
1523}
1524
1525
1526DECL_INLINE_THROW(uint32_t)
1527iemNativeEmit_lzcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1528 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1529{
1530 RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1531 AssertFailed();
1532 return iemNativeEmitBrk(pReNative, off, 0x666);
1533}
1534
1535
1536
1537/*********************************************************************************************************************************
1538* Shifting and Rotating. *
1539*********************************************************************************************************************************/
1540
1541
1542typedef enum
1543{
1544 kIemNativeEmitEFlagsForShiftType_Left,
1545 kIemNativeEmitEFlagsForShiftType_Right,
1546 kIemNativeEmitEFlagsForShiftType_SignedRight
1547} IEMNATIVEEMITEFLAGSFORSHIFTTYPE;
1548
1549/**
1550 * This is used by SHL, SHR and SAR emulation.
1551 *
1552 * It takes liveness stuff into account.
1553 */
1554DECL_INLINE_THROW(uint32_t)
1555iemNativeEmitEFlagsForShift(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegEfl, uint8_t idxRegResult,
1556 uint8_t idxRegSrc, uint8_t idxRegCount, uint8_t cOpBits, IEMNATIVEEMITEFLAGSFORSHIFTTYPE enmType,
1557 uint8_t idxRegTmp)
1558{
1559RT_NOREF(pReNative, off, idxRegEfl, idxRegResult, idxRegSrc, idxRegCount, cOpBits, enmType);
1560#if 0 //def IEMNATIVE_WITH_EFLAGS_SKIPPING
1561 /*
1562 * See if we can skip this wholesale.
1563 */
1564 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
1565 if (IEMLIVENESS_STATE_ARE_STATUS_EFL_TO_BE_CLOBBERED(pLivenessEntry))
1566 {
1567 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEflSkippedLogical);
1568# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
1569 off = iemNativeEmitOrImmIntoVCpuU32(pReNative, off, X86_EFL_STATUS_BITS, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
1570# endif
1571 }
1572 else
1573#endif
1574 {
1575 /*
1576 * The difference between Intel and AMD flags for SHL are:
1577 * - Intel always clears AF while AMD always sets it.
1578 * - Intel sets OF for the first shift, while AMD for the last shift.
1579 *
1580 */
1581
1582#ifdef RT_ARCH_AMD64
1583 /*
1584 * We capture flags and does the additional OF and AF calculations as needed.
1585 */
1586 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1587 /** @todo kIemNativeEmitEFlagsForShiftType_SignedRight: we could alternatively
1588 * use SAHF here when host rax is free since, OF is cleared. */
1589 /* pushf */
1590 pCodeBuf[off++] = 0x9c;
1591 /* pop tmp */
1592 if (idxRegTmp >= 8)
1593 pCodeBuf[off++] = X86_OP_REX_B;
1594 pCodeBuf[off++] = 0x58 + (idxRegTmp & 7);
1595 /* Clear the status bits in EFLs. */
1596 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
1597 uint8_t const idxTargetCpuEflFlavour = pReNative->pVCpu->iem.s.aidxTargetCpuEflFlavour[1];
1598 if (idxTargetCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
1599 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegTmp, X86_EFL_STATUS_BITS);
1600 else
1601 {
1602 /* and tmp, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF | X86_EFL_CF */
1603 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegTmp, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF | X86_EFL_CF);
1604 if (idxTargetCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD)
1605 off = iemNativeEmitOrGpr32ByImmEx(pCodeBuf, off, idxRegTmp, X86_EFL_AF);
1606 /* OR in the flags we collected. */
1607 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxRegTmp);
1608
1609 /* Calculate OF */
1610 if (idxTargetCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD)
1611 {
1612 /* AMD last bit shifted: fEfl |= ((uResult >> (cOpBits - 1)) ^ fCarry) << X86_EFL_OF_BIT; */
1613 /* bt idxRegResult, (cOpBits - 1) => CF=result-sign-bit */
1614 off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b /*ud2*/, 0xba,
1615 RT_MAX(cOpBits, 16), 4, idxRegResult);
1616 pCodeBuf[off++] = cOpBits - 1;
1617 /* setc idxRegTmp */
1618 off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x92, 0x0b /*ud2*/, 8, 0, idxRegTmp);
1619 /* xor idxRegTmp, idxRegEfl */
1620 off = iemNativeEmitXorGpr32ByGpr32Ex(pCodeBuf, off, idxRegTmp, idxRegEfl);
1621 /* and idxRegTmp, 1 */
1622 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegTmp, 1);
1623 /* shl idxRegTmp, X86_EFL_OF_BIT */
1624 off = iemNativeEmitShiftGpr32LeftEx(pCodeBuf, off, idxRegTmp, X86_EFL_OF_BIT);
1625 }
1626 else
1627 {
1628 /* Intel first bit shifted: fEfl |= X86_EFL_GET_OF_ ## cOpBits(uDst ^ (uDst << 1)); */
1629 if (cOpBits <= 32)
1630 {
1631 /* mov idxRegTmp, idxRegSrc */
1632 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegTmp, idxRegSrc);
1633 /* shl idxRegTmp, 1 */
1634 off = iemNativeEmitShiftGpr32LeftEx(pCodeBuf, off, idxRegTmp, 1);
1635 /* xor idxRegTmp, idxRegSrc */
1636 off = iemNativeEmitXorGprByGprEx(pCodeBuf, off, idxRegTmp, idxRegSrc);
1637 /* shr idxRegTmp, cOpBits - X86_EFL_OF_BIT - 1 or shl idxRegTmp, X86_EFL_OF_BIT - cOpBits + 1 */
1638 if (cOpBits >= X86_EFL_OF_BIT)
1639 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, idxRegTmp, cOpBits - X86_EFL_OF_BIT - 1);
1640 else
1641 off = iemNativeEmitShiftGpr32LeftEx(pCodeBuf, off, idxRegTmp, X86_EFL_OF_BIT - cOpBits + 1);
1642 }
1643 else
1644 {
1645 /* same as above but with 64-bit grps*/
1646 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegTmp, idxRegSrc);
1647 off = iemNativeEmitShiftGprLeftEx(pCodeBuf, off, idxRegTmp, 1);
1648 off = iemNativeEmitXorGprByGprEx(pCodeBuf, off, idxRegTmp, idxRegSrc);
1649 off = iemNativeEmitShiftGprRightEx(pCodeBuf, off, idxRegTmp, cOpBits - X86_EFL_OF_BIT - 1);
1650 }
1651 /* and idxRegTmp, X86_EFL_OF */
1652 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegTmp, X86_EFL_OF);
1653 }
1654 }
1655 /* Or in the collected flag(s) */
1656 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxRegTmp);
1657
1658#elif defined(RT_ARCH_ARM64)
1659 /*
1660 * Calculate flags.
1661 */
1662 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
1663
1664 /* Clear the status bits. ~0x8D5 (or ~0x8FD) can't be AND immediate, so use idxRegTmp for constant. */
1665 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxRegTmp, ~X86_EFL_STATUS_BITS);
1666 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxRegEfl, idxRegTmp);
1667
1668 /* N,Z -> SF,ZF */
1669 if (cOpBits < 32)
1670 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegResult, cOpBits > 8); /* sets NZ */
1671 else
1672 pCodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, idxRegResult, idxRegResult, cOpBits > 32 /*f64Bit*/);
1673 pCodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_NZCV); /* Bits: 31=N; 30=Z; 29=C; 28=V; */
1674 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, 30);
1675 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxRegTmp, X86_EFL_ZF_BIT, 2, false /*f64Bit*/);
1676 AssertCompile(X86_EFL_ZF_BIT + 1 == X86_EFL_SF_BIT);
1677
1678 /* Calculate 8-bit parity of the result. */
1679 pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegTmp, idxRegResult, idxRegResult, false /*f64Bit*/,
1680 4 /*offShift6*/, kArmv8A64InstrShift_Lsr);
1681 pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegTmp, idxRegTmp, idxRegTmp, false /*f64Bit*/,
1682 2 /*offShift6*/, kArmv8A64InstrShift_Lsr);
1683 pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegTmp, idxRegTmp, idxRegTmp, false /*f64Bit*/,
1684 1 /*offShift6*/, kArmv8A64InstrShift_Lsr);
1685 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
1686 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxRegTmp, idxRegTmp, 0, 0, false /*f64Bit*/);
1687 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxRegTmp, X86_EFL_PF_BIT, 1, false /*f64Bit*/);
1688
1689 /* Calculate carry - the last bit shifted out of the input value. */
1690 if (enmType == kIemNativeEmitEFlagsForShiftType_Left)
1691 {
1692 /* CF = (idxRegSrc >> (cOpBits - idxRegCount))) & 1 */
1693 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegTmp, cOpBits);
1694 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegTmp, idxRegCount, false /*f64Bit*/, cOpBits < 32 /*fSetFlags*/);
1695 if (cOpBits < 32)
1696 pCodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Cc, 3); /* 16 or 8 bit: CF is clear if all shifted out */
1697 pCodeBuf[off++] = Armv8A64MkInstrLsrv(idxRegTmp, idxRegSrc, idxRegTmp, cOpBits > 32);
1698 }
1699 else
1700 {
1701 /* CF = (idxRegSrc >> (idxRegCount - 1)) & 1 */
1702 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegTmp, idxRegCount, 1, false /*f64Bit*/);
1703 pCodeBuf[off++] = Armv8A64MkInstrLsrv(idxRegTmp, idxRegSrc, idxRegTmp, cOpBits > 32);
1704 }
1705 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxRegTmp, X86_EFL_CF_BIT, 1, false /*f64Bit*/);
1706
1707 uint8_t const idxTargetCpuEflFlavour = pReNative->pVCpu->iem.s.aidxTargetCpuEflFlavour[0];
1708 if (idxTargetCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_AMD)
1709 {
1710 /* Intel: OF = first bit shifted: fEfl |= X86_EFL_GET_OF_ ## cOpBits(uDst ^ (uDst << 1)); */
1711 pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegTmp, idxRegSrc, idxRegSrc, cOpBits > 32, 1 /*left shift count*/);
1712 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, cOpBits - 1, cOpBits > 32);
1713 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxRegTmp, X86_EFL_OF_BIT, 1, false /*f64Bit*/);
1714 }
1715 else
1716 {
1717 /* AMD: OF = last bit shifted: fEfl |= ((uResult >> (cOpBits - 1)) ^ fCarry) << X86_EFL_OF_BIT; */
1718 AssertCompile(X86_EFL_CF_BIT == 0);
1719 pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegTmp, idxRegEfl, idxRegResult, cOpBits > 32, /* ASSUMES CF calculated! */
1720 cOpBits - 1, kArmv8A64InstrShift_Lsr);
1721 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxRegTmp, X86_EFL_OF_BIT, 1, false /*f64Bit*/);
1722
1723 /* AMD unconditionally clears AF. */
1724 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 32 - X86_EFL_AF_BIT) == X86_EFL_AF);
1725 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxRegEfl, idxRegEfl, 0, 32 - X86_EFL_AF_BIT, false /*f64Bit*/);
1726 }
1727#else
1728# error "port me"
1729#endif
1730 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1731
1732# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
1733 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
1734# endif
1735 }
1736 return off;
1737}
1738
1739
1740DECL_INLINE_THROW(uint32_t)
1741iemNativeEmit_shl_r_CL_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1742 uint8_t idxVarDst, uint8_t idxVarCount, uint8_t idxVarEfl, uint8_t cOpBits)
1743{
1744 /* Note! Since we're doing some branching here, we need to allocate all
1745 registers we need before the jump or we may end up with invalid
1746 register state if the branch is taken. */
1747 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off); /* Do this first in hope we'll get EAX. */
1748 uint8_t const idxRegCount = iemNativeVarRegisterAcquire(pReNative, idxVarCount, &off, true /*fInitialized*/); /* modified on arm64 */
1749 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1750 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
1751
1752#ifdef RT_ARCH_AMD64
1753 /* Make sure IEM_MC_NATIVE_AMD64_HOST_REG_FOR_LOCAL was used. */
1754 AssertStmt(idxRegCount == X86_GREG_xCX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_UNEXPECTED_VAR_REGISTER));
1755
1756 /* We only need a copy of the input value if the target CPU differs from the host CPU. */
1757 uint8_t const idxRegDstIn = pReNative->pVCpu->iem.s.aidxTargetCpuEflFlavour[1] == IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
1758 ? UINT8_MAX : iemNativeRegAllocTmp(pReNative, &off);
1759 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4+2+3+4);
1760
1761 /* Check if it's NOP before we do anything. */
1762 off = iemNativeEmitTestAnyBitsInGpr8Ex(pCodeBuf, off, idxRegCount, cOpBits <= 32 ? 0x1f : 0x3f);
1763 uint32_t const offFixup = off;
1764 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit should be enough */, kIemNativeInstrCond_z);
1765
1766 if (idxRegDstIn != UINT8_MAX)
1767 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1768 off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0xd2, 0xd3, cOpBits, 4, idxRegDst);
1769
1770#elif defined(RT_ARCH_ARM64)
1771 /* We always (except we can skip EFLAGS calcs) a copy of the input value. */
1772 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1773 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
1774
1775 /* Check if it's NOP before we do anything. We MODIFY idxRegCount here! */
1776 Assert(Armv8A64ConvertImmRImmS2Mask32(4, 0) == 0x1f);
1777 Assert(Armv8A64ConvertImmRImmS2Mask32(5, 0) == 0x3f);
1778 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(idxRegCount, idxRegCount, cOpBits > 32 ? 5 : 4, 0, false /*f64Bit*/);
1779 uint32_t const offFixup = off;
1780 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off, kArmv8InstrCond_Eq);
1781
1782 pCodeBuf[off++] = Armv8A64MkInstrMov(idxRegDstIn, idxRegDst);
1783 pCodeBuf[off++] = Armv8A64MkInstrLslv(idxRegDst, idxRegDst, idxRegCount, cOpBits > 32 /*f64Bit*/);
1784 if (cOpBits < 32)
1785 {
1786 Assert(Armv8A64ConvertImmRImmS2Mask32(7, 0) == 0xff);
1787 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1788 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegDst, idxRegDst, cOpBits - 1, 0, false /*f64Bit*/);
1789 }
1790
1791#else
1792# error "port me"
1793#endif
1794
1795 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1796 off = iemNativeEmitEFlagsForShift(pReNative, off, idxRegEfl, idxRegDst, idxRegDstIn, idxRegCount,
1797 cOpBits, kIemNativeEmitEFlagsForShiftType_Left, idxRegTmp);
1798
1799 /* fixup the jump */
1800 iemNativeFixupFixedJump(pReNative, offFixup, off);
1801
1802#ifdef RT_ARCH_AMD64
1803 if (idxRegDstIn != UINT8_MAX)
1804#endif
1805 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1806 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1807 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1808 iemNativeVarRegisterRelease(pReNative, idxVarCount);
1809 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1810 return off;
1811}
1812
1813
1814DECL_INLINE_THROW(uint32_t)
1815iemNativeEmit_shr_r_CL_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1816 uint8_t idxVarDst, uint8_t idxVarCount, uint8_t idxVarEfl, uint8_t cOpBits)
1817{
1818 RT_NOREF(idxVarDst, idxVarCount, idxVarEfl, cOpBits);
1819 AssertFailed();
1820 return iemNativeEmitBrk(pReNative, off, 0x666);
1821}
1822
1823
1824DECL_INLINE_THROW(uint32_t)
1825iemNativeEmit_sar_r_CL_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1826 uint8_t idxVarDst, uint8_t idxVarCount, uint8_t idxVarEfl, uint8_t cOpBits)
1827{
1828 RT_NOREF(idxVarDst, idxVarCount, idxVarEfl, cOpBits);
1829 AssertFailed();
1830 return iemNativeEmitBrk(pReNative, off, 0x666);
1831}
1832
1833
1834DECL_INLINE_THROW(uint32_t)
1835iemNativeEmit_rol_r_CL_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1836 uint8_t idxVarDst, uint8_t idxVarCount, uint8_t idxVarEfl, uint8_t cOpBits)
1837{
1838 RT_NOREF(idxVarDst, idxVarCount, idxVarEfl, cOpBits);
1839 AssertFailed();
1840 return iemNativeEmitBrk(pReNative, off, 0x666);
1841}
1842
1843
1844DECL_INLINE_THROW(uint32_t)
1845iemNativeEmit_ror_r_CL_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1846 uint8_t idxVarDst, uint8_t idxVarCount, uint8_t idxVarEfl, uint8_t cOpBits)
1847{
1848 RT_NOREF(idxVarDst, idxVarCount, idxVarEfl, cOpBits);
1849 AssertFailed();
1850 return iemNativeEmitBrk(pReNative, off, 0x666);
1851}
1852
1853
1854DECL_INLINE_THROW(uint32_t)
1855iemNativeEmit_rcl_r_CL_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1856 uint8_t idxVarDst, uint8_t idxVarCount, uint8_t idxVarEfl, uint8_t cOpBits)
1857{
1858 RT_NOREF(idxVarDst, idxVarCount, idxVarEfl, cOpBits);
1859 AssertFailed();
1860 return iemNativeEmitBrk(pReNative, off, 0x666);
1861}
1862
1863
1864DECL_INLINE_THROW(uint32_t)
1865iemNativeEmit_rcr_r_CL_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1866 uint8_t idxVarDst, uint8_t idxVarCount, uint8_t idxVarEfl, uint8_t cOpBits)
1867{
1868 RT_NOREF(idxVarDst, idxVarCount, idxVarEfl, cOpBits);
1869 AssertFailed();
1870 return iemNativeEmitBrk(pReNative, off, 0x666);
1871}
1872
1873
1874
1875#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1876/*********************************************************************************************************************************
1877* SIMD emitters. *
1878*********************************************************************************************************************************/
1879
1880/**
1881 * Common emitter for the PXOR, XORPS, XORPD instructions - guest register / guest register variant.
1882 */
1883DECL_INLINE_THROW(uint32_t)
1884iemNativeEmit_pxor_rr_u128(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1885 uint8_t const idxSimdGstRegDst, uint8_t const idxSimdGstRegSrc)
1886{
1887 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegDst),
1888 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
1889 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegSrc),
1890 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
1891
1892#ifdef RT_ARCH_AMD64
1893 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1894
1895 /* pxor xmm, xmm */
1896 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1897 if (idxSimdRegDst >= 8 || idxSimdRegSrc >= 8)
1898 pCodeBuf[off++] = (idxSimdRegSrc >= 8 ? X86_OP_REX_B : 0)
1899 | (idxSimdRegDst >= 8 ? X86_OP_REX_R : 0);
1900 pCodeBuf[off++] = 0x0f;
1901 pCodeBuf[off++] = 0xef;
1902 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdRegDst & 7, idxSimdRegSrc & 7);
1903
1904#elif defined(RT_ARCH_ARM64)
1905 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1906
1907 pCodeBuf[off++] = Armv8A64MkVecInstrEor(idxSimdRegDst, idxSimdRegDst, idxSimdRegSrc);
1908#else
1909# error "port me"
1910#endif
1911
1912 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
1913 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
1914
1915 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1916 return off;
1917}
1918
1919
1920/**
1921 * Common emitter for the PXOR, XORPS, XORPD instructions - guest register / recompiler variable variant.
1922 */
1923DECL_INLINE_THROW(uint32_t)
1924iemNativeEmit_pxor_rv_u128(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1925 uint8_t const idxSimdGstRegDst, uint8_t const idxVarSrc)
1926{
1927 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
1928 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarSrc, sizeof(RTUINT128U));
1929
1930 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegDst),
1931 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
1932 uint8_t const idxSimdRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
1933
1934
1935#ifdef RT_ARCH_AMD64
1936 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1937
1938 /* pxor xmm, xmm */
1939 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1940 if (idxSimdRegDst >= 8 || idxSimdRegSrc >= 8)
1941 pCodeBuf[off++] = (idxSimdRegSrc >= 8 ? X86_OP_REX_B : 0)
1942 | (idxSimdRegDst >= 8 ? X86_OP_REX_R : 0);
1943 pCodeBuf[off++] = 0x0f;
1944 pCodeBuf[off++] = 0xef;
1945 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdRegDst & 7, idxSimdRegSrc & 7);
1946
1947#elif defined(RT_ARCH_ARM64)
1948 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1949
1950 pCodeBuf[off++] = Armv8A64MkVecInstrEor(idxSimdRegDst, idxSimdRegDst, idxSimdRegSrc);
1951#else
1952# error "port me"
1953#endif
1954
1955 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
1956 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1957
1958 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1959 return off;
1960}
1961
1962
1963/**
1964 * Common emitter for the PAND, ANDPS, ANDPD instructions - guest register / guest register variant.
1965 */
1966DECL_INLINE_THROW(uint32_t)
1967iemNativeEmit_pand_rr_u128(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1968 uint8_t const idxSimdGstRegDst, uint8_t const idxSimdGstRegSrc)
1969{
1970 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegDst),
1971 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
1972 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegSrc),
1973 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
1974
1975#ifdef RT_ARCH_AMD64
1976 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1977
1978 /* pand xmm, xmm */
1979 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1980 if (idxSimdRegDst >= 8 || idxSimdRegSrc >= 8)
1981 pCodeBuf[off++] = (idxSimdRegSrc >= 8 ? X86_OP_REX_B : 0)
1982 | (idxSimdRegDst >= 8 ? X86_OP_REX_R : 0);
1983 pCodeBuf[off++] = 0x0f;
1984 pCodeBuf[off++] = 0xdb;
1985 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdRegDst & 7, idxSimdRegSrc & 7);
1986
1987#elif defined(RT_ARCH_ARM64)
1988 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1989
1990 pCodeBuf[off++] = Armv8A64MkVecInstrAnd(idxSimdRegDst, idxSimdRegDst, idxSimdRegSrc);
1991#else
1992# error "port me"
1993#endif
1994
1995 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
1996 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
1997
1998 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1999 return off;
2000}
2001
2002
2003/**
2004 * Common emitter for the PAND, ANDPS, ANDPD instructions - guest register / recompiler variable variant.
2005 */
2006DECL_INLINE_THROW(uint32_t)
2007iemNativeEmit_pand_rv_u128(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2008 uint8_t const idxSimdGstRegDst, uint8_t const idxVarSrc)
2009{
2010 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
2011 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarSrc, sizeof(RTUINT128U));
2012
2013 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegDst),
2014 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
2015 uint8_t const idxSimdRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
2016
2017
2018#ifdef RT_ARCH_AMD64
2019 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
2020
2021 /* pand xmm, xmm */
2022 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2023 if (idxSimdRegDst >= 8 || idxSimdRegSrc >= 8)
2024 pCodeBuf[off++] = (idxSimdRegSrc >= 8 ? X86_OP_REX_B : 0)
2025 | (idxSimdRegDst >= 8 ? X86_OP_REX_R : 0);
2026 pCodeBuf[off++] = 0x0f;
2027 pCodeBuf[off++] = 0xdb;
2028 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdRegDst & 7, idxSimdRegSrc & 7);
2029
2030#elif defined(RT_ARCH_ARM64)
2031 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2032
2033 pCodeBuf[off++] = Armv8A64MkVecInstrAnd(idxSimdRegDst, idxSimdRegDst, idxSimdRegSrc);
2034#else
2035# error "port me"
2036#endif
2037
2038 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
2039 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
2040
2041 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2042 return off;
2043}
2044#endif
2045
2046#endif /* !VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette