VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/target-x86/IEMAllN8veEmit-x86.h@ 103886

Last change on this file since 103886 was 103828, checked in by vboxsync, 13 months ago

VMM/IEM: Implemented simple (whole sale) status flag up update skipping for arithmetic operations with native emitter. bugref:10375

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 68.7 KB
Line 
1/* $Id: IEMAllN8veEmit-x86.h 103828 2024-03-13 14:01:20Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler, x86 Target - Code Emitters.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h
29#define VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34
35#ifdef RT_ARCH_AMD64
36
37/**
38 * Emits an ModR/M instruction with one opcode byte and only register operands.
39 */
40DECL_FORCE_INLINE(uint32_t)
41iemNativeEmitAmd64OneByteModRmInstrRREx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t bOpcode8, uint8_t bOpcodeOther,
42 uint8_t cOpBits, uint8_t idxRegReg, uint8_t idxRegRm)
43{
44 Assert(idxRegReg < 16); Assert(idxRegRm < 16);
45 switch (cOpBits)
46 {
47 case 16:
48 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
49 RT_FALL_THRU();
50 case 32:
51 if (idxRegReg >= 8 || idxRegRm >= 8)
52 pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
53 pCodeBuf[off++] = bOpcodeOther;
54 break;
55
56 default: AssertFailed(); RT_FALL_THRU();
57 case 64:
58 pCodeBuf[off++] = X86_OP_REX_W | (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
59 pCodeBuf[off++] = bOpcodeOther;
60 break;
61
62 case 8:
63 if (idxRegReg >= 8 || idxRegRm >= 8)
64 pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
65 else if (idxRegReg >= 4 || idxRegRm >= 4)
66 pCodeBuf[off++] = X86_OP_REX;
67 pCodeBuf[off++] = bOpcode8;
68 break;
69 }
70 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg & 7, idxRegRm & 7);
71 return off;
72}
73
74
75/**
76 * Emits an ModR/M instruction with two opcode bytes and only register operands.
77 */
78DECL_FORCE_INLINE(uint32_t)
79iemNativeEmitAmd64TwoByteModRmInstrRREx(PIEMNATIVEINSTR pCodeBuf, uint32_t off,
80 uint8_t bOpcode0, uint8_t bOpcode8, uint8_t bOpcodeOther,
81 uint8_t cOpBits, uint8_t idxRegReg, uint8_t idxRegRm)
82{
83 Assert(idxRegReg < 16); Assert(idxRegRm < 16);
84 switch (cOpBits)
85 {
86 case 16:
87 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
88 RT_FALL_THRU();
89 case 32:
90 if (idxRegReg >= 8 || idxRegRm >= 8)
91 pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
92 pCodeBuf[off++] = bOpcode0;
93 pCodeBuf[off++] = bOpcodeOther;
94 break;
95
96 default: AssertFailed(); RT_FALL_THRU();
97 case 64:
98 pCodeBuf[off++] = X86_OP_REX_W | (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
99 pCodeBuf[off++] = bOpcode0;
100 pCodeBuf[off++] = bOpcodeOther;
101 break;
102
103 case 8:
104 if (idxRegReg >= 8 || idxRegRm >= 8)
105 pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
106 else if (idxRegReg >= 4 || idxRegRm >= 4)
107 pCodeBuf[off++] = X86_OP_REX;
108 pCodeBuf[off++] = bOpcode0;
109 pCodeBuf[off++] = bOpcode8;
110 break;
111 }
112 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg & 7, idxRegRm & 7);
113 return off;
114}
115
116
117/**
118 * Emits one of three opcodes with an immediate.
119 *
120 * These are expected to be a /idxRegReg form.
121 */
122DECL_FORCE_INLINE(uint32_t)
123iemNativeEmitAmd64OneByteModRmInstrRIEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t bOpcode8, uint8_t bOpcodeOtherImm8,
124 uint8_t bOpcodeOther, uint8_t cOpBits, uint8_t cImmBits, uint8_t idxRegReg,
125 uint8_t idxRegRm, uint64_t uImmOp)
126{
127 Assert(idxRegReg < 8); Assert(idxRegRm < 16);
128 if ( cImmBits == 8
129 || (uImmOp <= (uint64_t)0x7f && bOpcodeOtherImm8 != 0xcc))
130 {
131 switch (cOpBits)
132 {
133 case 16:
134 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
135 RT_FALL_THRU();
136 case 32:
137 if (idxRegRm >= 8)
138 pCodeBuf[off++] = X86_OP_REX_B;
139 pCodeBuf[off++] = bOpcodeOtherImm8; Assert(bOpcodeOtherImm8 != 0xcc);
140 break;
141
142 default: AssertFailed(); RT_FALL_THRU();
143 case 64:
144 pCodeBuf[off++] = X86_OP_REX_W | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
145 pCodeBuf[off++] = bOpcodeOtherImm8; Assert(bOpcodeOtherImm8 != 0xcc);
146 break;
147
148 case 8:
149 if (idxRegRm >= 8)
150 pCodeBuf[off++] = X86_OP_REX_B;
151 else if (idxRegRm >= 4)
152 pCodeBuf[off++] = X86_OP_REX;
153 pCodeBuf[off++] = bOpcode8; Assert(bOpcode8 != 0xcc);
154 break;
155 }
156 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
157 pCodeBuf[off++] = (uint8_t)uImmOp;
158 }
159 else
160 {
161 switch (cOpBits)
162 {
163 case 32:
164 if (idxRegRm >= 8)
165 pCodeBuf[off++] = X86_OP_REX_B;
166 break;
167
168 default: AssertFailed(); RT_FALL_THRU();
169 case 64:
170 pCodeBuf[off++] = X86_OP_REX_W | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
171 break;
172
173 case 16:
174 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
175 if (idxRegRm >= 8)
176 pCodeBuf[off++] = X86_OP_REX_B;
177 pCodeBuf[off++] = bOpcodeOther;
178 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
179 pCodeBuf[off++] = RT_BYTE1(uImmOp);
180 pCodeBuf[off++] = RT_BYTE2(uImmOp);
181 Assert(cImmBits == 16);
182 return off;
183 }
184 pCodeBuf[off++] = bOpcodeOther;
185 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
186 pCodeBuf[off++] = RT_BYTE1(uImmOp);
187 pCodeBuf[off++] = RT_BYTE2(uImmOp);
188 pCodeBuf[off++] = RT_BYTE3(uImmOp);
189 pCodeBuf[off++] = RT_BYTE4(uImmOp);
190 Assert(cImmBits == 32);
191 }
192 return off;
193}
194
195#endif /* RT_ARCH_AMD64 */
196
197/**
198 * This is an implementation of IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGICAL.
199 *
200 * It takes liveness stuff into account.
201 */
202DECL_INLINE_THROW(uint32_t)
203iemNativeEmitEFlagsForLogical(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl
204#ifndef RT_ARCH_AMD64
205 , uint8_t cOpBits, uint8_t idxRegResult, bool fNativeFlags = false
206#endif
207 )
208{
209#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
210 /** @todo */
211#endif
212 {
213#ifdef RT_ARCH_AMD64
214 /*
215 * Collect flags and merge them with eflags.
216 */
217 /** @todo we could alternatively use SAHF here when host rax is free since,
218 * OF is cleared. */
219 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
220 /* pushf - do this before any reg allocations as they may emit instructions too. */
221 pCodeBuf[off++] = 0x9c;
222
223 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
224 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
225 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + 7 + 7 + 3);
226 /* pop tmp */
227 if (idxTmpReg >= 8)
228 pCodeBuf[off++] = X86_OP_REX_B;
229 pCodeBuf[off++] = 0x58 + (idxTmpReg & 7);
230 /* and tmp, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF */
231 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF);
232 /* Clear the status bits in EFLs. */
233 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
234 /* OR in the flags we collected. */
235 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxTmpReg);
236 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
237 iemNativeRegFreeTmp(pReNative, idxTmpReg);
238
239#elif defined(RT_ARCH_ARM64)
240 /*
241 * Calculate flags.
242 */
243 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
244 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
245 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
246
247 /* Clear the status bits. ~0x8D5 (or ~0x8FD) can't be AND immediate, so use idxTmpReg for constant. */
248 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, ~X86_EFL_STATUS_BITS);
249 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxRegEfl, idxTmpReg);
250
251 /* N,Z -> SF,ZF */
252 if (cOpBits < 32)
253 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegResult, cOpBits > 8); /* sets NZ */
254 else if (!fNativeFlags)
255 pCodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, idxRegResult, idxRegResult, cOpBits > 32 /*f64Bit*/);
256 pCodeBuf[off++] = Armv8A64MkInstrMrs(idxTmpReg, ARMV8_AARCH64_SYSREG_NZCV); /* Bits: 31=N; 30=Z; 29=C; 28=V; */
257 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 30);
258 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_ZF_BIT, 2, false /*f64Bit*/);
259 AssertCompile(X86_EFL_ZF_BIT + 1 == X86_EFL_SF_BIT);
260
261 /* Calculate 8-bit parity of the result. */
262 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegResult, idxRegResult, false /*f64Bit*/,
263 4 /*offShift6*/, kArmv8A64InstrShift_Lsr);
264 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /*f64Bit*/,
265 2 /*offShift6*/, kArmv8A64InstrShift_Lsr);
266 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /*f64Bit*/,
267 1 /*offShift6*/, kArmv8A64InstrShift_Lsr);
268 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
269 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
270 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_PF_BIT, 1, false /*f64Bit*/);
271
272 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
273 iemNativeRegFreeTmp(pReNative, idxTmpReg);
274#else
275# error "port me"
276#endif
277 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
278
279# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
280 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
281# endif
282 }
283 return off;
284}
285
286
287/**
288 * This is an implementation of IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC.
289 *
290 * It takes liveness stuff into account.
291 */
292DECL_FORCE_INLINE_THROW(uint32_t)
293iemNativeEmitEFlagsForArithmetic(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl, uint8_t idxRegEflIn
294#ifndef RT_ARCH_AMD64
295 , uint8_t cOpBits, uint8_t idxRegResult, uint8_t idxRegDstIn, uint8_t idxRegSrc
296 , bool fInvertCarry, uint64_t uImmSrc
297#endif
298 )
299{
300#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
301 /*
302 * See if we can skip this wholesale.
303 */
304 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
305 if (IEMLIVENESS_STATE_ARE_STATUS_EFL_TO_BE_CLOBBERED(pLivenessEntry))
306 {
307 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEflArithmeticSkipped);
308# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
309 off = iemNativeEmitOrImmIntoVCpuU32(pReNative, off, X86_EFL_STATUS_BITS, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
310# endif
311 }
312 else
313#endif
314 {
315#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
316 uint32_t fSkipped = 0;
317#endif
318#ifdef RT_ARCH_AMD64
319 /*
320 * Collect flags and merge them with eflags.
321 */
322 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
323 /* pushf - do this before any reg allocations as they may emit instructions too. */
324 pCodeBuf[off++] = 0x9c;
325
326 uint8_t const idxRegEfl = idxRegEflIn != UINT8_MAX ? idxRegEflIn
327 : iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
328 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
329 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + 7 + 7 + 3);
330 /* pop tmp */
331 if (idxTmpReg >= 8)
332 pCodeBuf[off++] = X86_OP_REX_B;
333 pCodeBuf[off++] = 0x58 + (idxTmpReg & 7);
334 /* Isolate the flags we want. */
335 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_EFL_STATUS_BITS);
336 /* Clear the status bits in EFLs. */
337 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
338 /* OR in the flags we collected. */
339 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxTmpReg);
340 if (idxRegEflIn != idxRegEfl)
341 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
342 iemNativeRegFreeTmp(pReNative, idxTmpReg);
343
344#elif defined(RT_ARCH_ARM64)
345 /*
346 * Calculate flags.
347 */
348 uint8_t const idxRegEfl = idxRegEflIn != UINT8_MAX ? idxRegEflIn
349 : iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
350 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
351 uint8_t const idxTmpReg2 = cOpBits >= 32 ? UINT8_MAX : iemNativeRegAllocTmp(pReNative, &off);
352 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
353
354 /* Invert CF (stored inved on ARM) and load the flags into the temporary register. */
355 if (fInvertCarry)
356 pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
357 pCodeBuf[off++] = Armv8A64MkInstrMrs(idxTmpReg, ARMV8_AARCH64_SYSREG_NZCV); /* Bits: 31=N; 30=Z; 29=C; 28=V; */
358
359 if (cOpBits >= 32)
360 {
361 /* V -> OF */
362 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 28);
363 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_OF_BIT, 1, false /*f64Bit*/);
364
365 /* C -> CF */
366 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 1);
367 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_CF_BIT, 1, false /*f64Bit*/);
368 }
369
370 /* N,Z -> SF,ZF */
371 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, cOpBits >= 32 ? 1 : 30);
372 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_ZF_BIT, 2, false /*f64Bit*/);
373
374 /* For ADC and SBB we have to calculate overflow and carry our selves. */
375 if (cOpBits < 32)
376 {
377 /* Since the carry flag is the zero'th flag, we just use BFXIL got copy it over. */
378 AssertCompile(X86_EFL_CF_BIT == 0);
379 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxRegEfl, idxRegResult, cOpBits, 1, false /*f64Bit*/);
380
381 /* The overflow flag is more work as we have to compare the signed bits for
382 both inputs and the result. See IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC.
383
384 Formula: ~(a_uDst ^ a_uSrcOf) & (a_uResult ^ a_uDst)
385 With a_uSrcOf as a_uSrc for additions and ~a_uSrc for subtractions.
386
387 It is a bit simpler when the right (source) side is constant:
388 adc: S D R -> OF sbb: S D R -> OF
389 0 0 0 -> 0 \ 0 0 0 -> 0 \
390 0 0 1 -> 1 \ 0 0 1 -> 0 \
391 0 1 0 -> 0 / and not(D), R 0 1 0 -> 1 / and D, not(R)
392 0 1 1 -> 0 / 0 1 1 -> 0 /
393 1 0 0 -> 0 \ 1 0 0 -> 0 \
394 1 0 1 -> 0 \ and D, not(R) 1 0 1 -> 1 \ and not(D), R
395 1 1 0 -> 1 / 1 1 0 -> 0 /
396 1 1 1 -> 0 / 1 1 1 -> 0 / */
397 if (idxRegSrc != UINT8_MAX)
398 {
399 if (fInvertCarry) /* sbb: ~((a_uDst) ^ ~(a_uSrcOf)) -> (a_uDst) ^ (a_uSrcOf); HACK ALERT: fInvertCarry == sbb */
400 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegSrc, false);
401 else /* adc: ~((a_uDst) ^ (a_uSrcOf)) -> (a_uDst) ^ ~(a_uSrcOf) */
402 pCodeBuf[off++] = Armv8A64MkInstrEon(idxTmpReg, idxRegDstIn, idxRegSrc, false);
403 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg2, idxRegDstIn, idxRegResult, false); /* (a_uDst) ^ (a_uResult) */
404 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpReg, idxTmpReg, idxTmpReg2, false /*f64Bit*/);
405 }
406 else if (uImmSrc & RT_BIT_32(cOpBits - 1))
407 {
408 if (fInvertCarry) /* HACK ALERT: fInvertCarry == sbb */
409 pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegResult, idxRegDstIn, false);
410 else
411 pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegDstIn, idxRegResult, false);
412 }
413 else
414 {
415 if (fInvertCarry) /* HACK ALERT: fInvertCarry == sbb */
416 pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegDstIn, idxRegResult, false);
417 else
418 pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegResult, idxRegDstIn, false);
419 }
420 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, cOpBits - 1, false /*f64Bit*/);
421 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_OF_BIT, 1);
422 iemNativeRegFreeTmp(pReNative, idxTmpReg2);
423 }
424
425 /* Calculate 8-bit parity of the result. */
426 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegResult, idxRegResult, false /*f64Bit*/,
427 4 /*offShift6*/, kArmv8A64InstrShift_Lsr);
428 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /*f64Bit*/,
429 2 /*offShift6*/, kArmv8A64InstrShift_Lsr);
430 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /*f64Bit*/,
431 1 /*offShift6*/, kArmv8A64InstrShift_Lsr);
432 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
433 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
434 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_PF_BIT, 1, false /*f64Bit*/);
435
436 /* Calculate auxilary carry/borrow. This is related to 8-bit BCD.
437 General formula: ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
438 S D R
439 0 0 0 -> 0; \
440 0 0 1 -> 1; \ regular
441 0 1 0 -> 1; / xor R, D
442 0 1 1 -> 0; /
443 1 0 0 -> 1; \
444 1 0 1 -> 0; \ invert one of the two
445 1 1 0 -> 0; / xor not(R), D
446 1 1 1 -> 1; /
447 a_uSrc[bit 4]=0: ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
448 a_uSrc[bit 4]=1: ((uint32_t)~(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
449 */
450
451 if (idxRegSrc != UINT8_MAX)
452 {
453 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegSrc, false /*f64Bit*/);
454 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxRegResult, false /*f64Bit*/);
455 }
456 else if (uImmSrc & X86_EFL_AF)
457 pCodeBuf[off++] = Armv8A64MkInstrEon(idxTmpReg, idxRegDstIn, idxRegResult, false /*f64Bit*/);
458 else
459 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegResult, false /*f64Bit*/);
460 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, X86_EFL_AF_BIT, false /*f64Bit*/);
461 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_AF_BIT, 1, false /*f64Bit*/);
462
463 if (idxRegEflIn != idxRegEfl)
464 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
465 iemNativeRegFreeTmp(pReNative, idxTmpReg);
466
467#else
468# error "port me"
469#endif
470 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
471
472#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
473 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, fSkipped, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
474#endif
475 }
476 return off;
477
478}
479
480
481/**
482 * The AND instruction will clear OF, CF and AF (latter is undefined) and
483 * set the other flags according to the result.
484 */
485DECL_INLINE_THROW(uint32_t)
486iemNativeEmit_and_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
487 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
488{
489 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
490 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
491#ifdef RT_ARCH_AMD64
492 /* On AMD64 we just use the correctly sized AND instruction harvest the EFLAGS. */
493 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
494 0x22, 0x23, cOpBits, idxRegDst, idxRegSrc);
495 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
496 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
497
498 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
499
500#elif defined(RT_ARCH_ARM64)
501 /* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. */
502 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
503 pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
504 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
505 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
506
507 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst, true /*fNativeFlags*/);
508#else
509# error "Port me"
510#endif
511 iemNativeVarRegisterRelease(pReNative, idxVarDst);
512 return off;
513}
514
515
516/**
517 * The AND instruction with immediate value as right operand.
518 */
519DECL_INLINE_THROW(uint32_t)
520iemNativeEmit_and_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
521 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
522{
523 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
524#ifdef RT_ARCH_AMD64
525 /* On AMD64 we just use the correctly sized AND instruction harvest the EFLAGS. */
526 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
527 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 4, idxRegDst, uImmOp);
528 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
529
530 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
531
532#elif defined(RT_ARCH_ARM64)
533 /* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones, and of
534 course the immediate variant when possible to save a register load. */
535 uint32_t uImmSizeLen, uImmRotations;
536 if ( cOpBits > 32
537 ? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
538 : Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
539 {
540 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
541 if (cOpBits >= 32)
542 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /*f64Bit*/);
543 else
544 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /*f64Bit*/);
545 }
546 else
547 {
548 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
549 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
550 if (cOpBits >= 32)
551 pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/);
552 else
553 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/);
554 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
555 }
556 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
557
558 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst, cOpBits >= 32 /*fNativeFlags*/);
559 RT_NOREF_PV(cImmBits)
560
561#else
562# error "Port me"
563#endif
564 iemNativeVarRegisterRelease(pReNative, idxVarDst);
565 return off;
566}
567
568
569/**
570 * The TEST instruction will clear OF, CF and AF (latter is undefined) and
571 * set the other flags according to the result.
572 */
573DECL_INLINE_THROW(uint32_t)
574iemNativeEmit_test_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
575 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
576{
577 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
578 uint8_t const idxRegSrc = idxVarSrc == idxVarDst ? idxRegDst /* special case of 'test samereg,samereg' */
579 : iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
580#ifdef RT_ARCH_AMD64
581 /* On AMD64 we just use the correctly sized TEST instruction harvest the EFLAGS. */
582 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
583 0x84, 0x85, cOpBits, idxRegSrc, idxRegDst);
584 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
585
586#elif defined(RT_ARCH_ARM64)
587 /* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. We also
588 need to keep the result in order to calculate the flags. */
589 uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
590 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
591 if (cOpBits >= 32)
592 pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
593 else
594 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
595 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
596
597#else
598# error "Port me"
599#endif
600 if (idxVarSrc != idxVarDst)
601 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
602 iemNativeVarRegisterRelease(pReNative, idxVarDst);
603
604#ifdef RT_ARCH_AMD64
605 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
606#else
607 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegResult, cOpBits >= 32 /*fNativeFlags*/);
608 iemNativeRegFreeTmp(pReNative, idxRegResult);
609#endif
610 return off;
611}
612
613
614/**
615 * The TEST instruction with immediate value as right operand.
616 */
617DECL_INLINE_THROW(uint32_t)
618iemNativeEmit_test_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
619 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
620{
621 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
622#ifdef RT_ARCH_AMD64
623 /* On AMD64 we just use the correctly sized AND instruction harvest the EFLAGS. */
624 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
625 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0xf6, 0xcc, 0xf7, cOpBits, cImmBits, 0, idxRegDst, uImmOp);
626 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
627 iemNativeVarRegisterRelease(pReNative, idxVarDst);
628
629 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
630
631#elif defined(RT_ARCH_ARM64)
632 /* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones, and of
633 course the immediate variant when possible to save a register load.
634 We also need to keep the result in order to calculate the flags. */
635 uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
636 uint32_t uImmSizeLen, uImmRotations;
637 if ( cOpBits > 32
638 ? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
639 : Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
640 {
641 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
642 if (cOpBits >= 32)
643 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(idxRegResult, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /*f64Bit*/);
644 else
645 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegResult, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /*f64Bit*/);
646 }
647 else
648 {
649 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
650 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
651 if (cOpBits >= 32)
652 pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/);
653 else
654 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/);
655 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
656 }
657 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
658 iemNativeVarRegisterRelease(pReNative, idxVarDst);
659
660 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegResult, cOpBits >= 32 /*fNativeFlags*/);
661
662 iemNativeRegFreeTmp(pReNative, idxRegResult);
663 RT_NOREF_PV(cImmBits)
664
665#else
666# error "Port me"
667#endif
668 return off;
669}
670
671
672/**
673 * The OR instruction will clear OF, CF and AF (latter is undefined) and
674 * set the other flags according to the result.
675 */
676DECL_INLINE_THROW(uint32_t)
677iemNativeEmit_or_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
678 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
679{
680 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
681 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
682#ifdef RT_ARCH_AMD64
683 /* On AMD64 we just use the correctly sized OR instruction harvest the EFLAGS. */
684 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
685 0x0a, 0x0b, cOpBits, idxRegDst, idxRegSrc);
686 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
687 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
688
689 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
690
691#elif defined(RT_ARCH_ARM64)
692 /* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones. */
693 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
694 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
695 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
696 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
697
698 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
699
700#else
701# error "Port me"
702#endif
703 iemNativeVarRegisterRelease(pReNative, idxVarDst);
704 return off;
705}
706
707
708/**
709 * The OR instruction with immediate value as right operand.
710 */
711DECL_INLINE_THROW(uint32_t)
712iemNativeEmit_or_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
713 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
714{
715 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
716#ifdef RT_ARCH_AMD64
717 /* On AMD64 we just use the correctly sized OR instruction harvest the EFLAGS. */
718 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
719 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 1, idxRegDst, uImmOp);
720 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
721
722 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
723
724#elif defined(RT_ARCH_ARM64)
725 /* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones, and of
726 course the immediate variant when possible to save a register load. */
727 uint32_t uImmSizeLen, uImmRotations;
728 if ( cOpBits > 32
729 ? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
730 : Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
731 {
732 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
733 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /*f64Bit*/);
734 }
735 else
736 {
737 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
738 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
739 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/);
740 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
741 }
742 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
743
744 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
745 RT_NOREF_PV(cImmBits)
746
747#else
748# error "Port me"
749#endif
750 iemNativeVarRegisterRelease(pReNative, idxVarDst);
751 return off;
752}
753
754
755/**
756 * The XOR instruction will clear OF, CF and AF (latter is undefined) and
757 * set the other flags according to the result.
758 */
759DECL_INLINE_THROW(uint32_t)
760iemNativeEmit_xor_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
761 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
762{
763 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
764 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
765#ifdef RT_ARCH_AMD64
766 /* On AMD64 we just use the correctly sized OR instruction harvest the EFLAGS. */
767 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
768 0x32, 0x33, cOpBits, idxRegDst, idxRegSrc);
769 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
770 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
771
772 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
773
774#elif defined(RT_ARCH_ARM64)
775 /* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones. */
776 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
777 pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
778 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
779 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
780
781 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
782
783#else
784# error "Port me"
785#endif
786 iemNativeVarRegisterRelease(pReNative, idxVarDst);
787 return off;
788}
789
790
791/**
792 * The XOR instruction with immediate value as right operand.
793 */
794DECL_INLINE_THROW(uint32_t)
795iemNativeEmit_xor_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
796 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
797{
798 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
799#ifdef RT_ARCH_AMD64
800 /* On AMD64 we just use the correctly sized XOR instruction harvest the EFLAGS. */
801 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
802 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 6, idxRegDst, uImmOp);
803 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
804
805 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
806
807#elif defined(RT_ARCH_ARM64)
808 /* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones, and of
809 course the immediate variant when possible to save a register load. */
810 uint32_t uImmSizeLen, uImmRotations;
811 if ( cOpBits > 32
812 ? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
813 : Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
814 {
815 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
816 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /*f64Bit*/);
817 }
818 else
819 {
820 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
821 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
822 pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/);
823 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
824 }
825 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
826
827 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
828 RT_NOREF_PV(cImmBits)
829
830#else
831# error "Port me"
832#endif
833 iemNativeVarRegisterRelease(pReNative, idxVarDst);
834 return off;
835}
836
837
838/**
839 * The ADD instruction will set all status flags.
840 */
841DECL_INLINE_THROW(uint32_t)
842iemNativeEmit_add_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
843 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
844{
845 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
846 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
847
848#ifdef RT_ARCH_AMD64
849 /* On AMD64 we just use the correctly sized ADD instruction to get the right EFLAGS.SF value. */
850 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
851 0x02, 0x03, cOpBits, idxRegDst, idxRegSrc);
852 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
853
854 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
855 iemNativeVarRegisterRelease(pReNative, idxVarDst);
856
857 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
858
859#elif defined(RT_ARCH_ARM64)
860 /* On ARM64 we'll need the two input operands as well as the result in order
861 to calculate the right flags, even if we use ADDS and translates NZCV into
862 OF, CF, ZF and SF. */
863 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
864 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
865 if (cOpBits >= 32)
866 {
867 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
868 pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
869 }
870 else
871 {
872 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
873 uint32_t const cShift = 32 - cOpBits;
874 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDstIn, ARMV8_A64_REG_XZR, idxRegDst, false /*f64Bit*/, cShift);
875 pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDstIn, idxRegSrc, false /*f64Bit*/,
876 true /*fSetFlags*/, cShift);
877 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /*f64Bit*/);
878 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /*f64Bit*/);
879 cOpBits = 32;
880 }
881 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
882
883 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
884 idxRegDstIn, idxRegSrc, false /*fInvertCarry*/, 0);
885
886 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
887 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
888 iemNativeVarRegisterRelease(pReNative, idxVarDst);
889
890#else
891# error "port me"
892#endif
893 return off;
894}
895
896
897/**
898 * The ADD instruction with immediate value as right operand.
899 */
900DECL_INLINE_THROW(uint32_t)
901iemNativeEmit_add_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
902 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
903{
904 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
905
906#ifdef RT_ARCH_AMD64
907 /* On AMD64 we just use the correctly sized ADD instruction to get the right EFLAGS.SF value. */
908 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
909 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 0, idxRegDst, uImmOp);
910 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
911
912 iemNativeVarRegisterRelease(pReNative, idxVarDst);
913
914 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
915
916#elif defined(RT_ARCH_ARM64)
917 /* On ARM64 we'll need the two input operands as well as the result in order
918 to calculate the right flags, even if we use ADDS and translates NZCV into
919 OF, CF, ZF and SF. */
920 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
921 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
922 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
923 if (cOpBits >= 32)
924 {
925 if (uImmOp <= 0xfffU)
926 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegDst, idxRegDst, uImmOp, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
927 else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
928 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegDst, idxRegDst, uImmOp >> 12, cOpBits > 32 /*f64Bit*/,
929 true /*fSetFlags*/, true /*fShift12*/);
930 else
931 {
932 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
933 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
934 pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
935 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
936 }
937 }
938 else
939 {
940 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
941 uint32_t const cShift = 32 - cOpBits;
942 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp << cShift);
943 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
944 pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegTmpImm, idxRegDstIn, false /*f64Bit*/, true /*fSetFlags*/, cShift);
945 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /*f64Bit*/);
946 cOpBits = 32;
947 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
948 }
949 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
950
951 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
952 idxRegDstIn, UINT8_MAX, false /*fInvertCarry*/, uImmOp);
953
954 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
955 iemNativeVarRegisterRelease(pReNative, idxVarDst);
956 RT_NOREF(cImmBits);
957
958#else
959# error "port me"
960#endif
961 return off;
962}
963
964
965/**
966 * The ADC instruction takes CF as input and will set all status flags.
967 */
968DECL_INLINE_THROW(uint32_t)
969iemNativeEmit_adc_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
970 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
971{
972 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
973 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
974 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
975
976#ifdef RT_ARCH_AMD64
977 /* On AMD64 we use BT to set EFLAGS.CF and then issue an ADC instruction
978 with matching size to get the correct flags. */
979 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
980
981 /* Use the BT instruction to set CF according to idxRegEfl. */
982 off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /*cOpBits*/, 4, idxRegEfl);
983 pCodeBuf[off++] = X86_EFL_CF_BIT;
984
985 off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0x12, 0x13, cOpBits, idxRegDst, idxRegSrc);
986 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
987
988 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
989 iemNativeVarRegisterRelease(pReNative, idxVarDst);
990
991 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
992
993#elif defined(RT_ARCH_ARM64)
994 /* On ARM64 we use the RMIF instruction to load PSTATE.CF from idxRegEfl and
995 then ADCS for the calculation. We need all inputs and result for the two
996 flags (AF,PF) that can't be directly derived from PSTATE.NZCV. */
997 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
998 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
999
1000 pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /*fMask=C*/);
1001 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1002 if (cOpBits >= 32)
1003 pCodeBuf[off++] = Armv8A64MkInstrAdcs(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
1004 else
1005 {
1006 /* Since we're also adding in the carry flag here, shifting operands up
1007 doesn't work. So, we have to calculate carry & overflow manually. */
1008 pCodeBuf[off++] = Armv8A64MkInstrAdc(idxRegDst, idxRegDst, idxRegSrc, false /*f64Bit*/);
1009 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1010 }
1011 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1012
1013 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1014 idxRegDstIn, idxRegSrc, false /*fInvertCarry*/, 0);
1015
1016 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1017 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1018 if (cOpBits < 32)
1019 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1020 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1021
1022#else
1023# error "port me"
1024#endif
1025 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1026 return off;
1027}
1028
1029
1030/**
1031 * The ADC instruction with immediate value as right operand.
1032 */
1033DECL_INLINE_THROW(uint32_t)
1034iemNativeEmit_adc_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1035 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1036{
1037 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1038 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
1039
1040#ifdef RT_ARCH_AMD64
1041 /* On AMD64 we use BT to set EFLAGS.CF and then issue an ADC instruction
1042 with matching size to get the correct flags. */
1043 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1044
1045 off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /*cOpBits*/, 4, idxRegEfl);
1046 pCodeBuf[off++] = X86_EFL_CF_BIT;
1047
1048 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 2, idxRegDst, uImmOp);
1049 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1050
1051 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1052
1053 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1054
1055#elif defined(RT_ARCH_ARM64)
1056 /* On ARM64 we use the RMIF instructions to load PSTATE.CF from idxRegEfl
1057 and then ADCS for the calculation. We need all inputs and result for
1058 the two flags (AF,PF) that can't be directly derived from PSTATE.NZCV. */
1059 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1060 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1061 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1062
1063 pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /*fMask=C*/);
1064 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1065 if (cOpBits >= 32)
1066 pCodeBuf[off++] = Armv8A64MkInstrAdcs(idxRegDst, idxRegDst, idxRegImm, cOpBits > 32 /*f64Bit*/);
1067 else
1068 {
1069 /* Since we're also adding in the carry flag here, shifting operands up
1070 doesn't work. So, we have to calculate carry & overflow manually. */
1071 pCodeBuf[off++] = Armv8A64MkInstrAdc(idxRegDst, idxRegDst, idxRegImm, false /*f64Bit*/);
1072 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1073 }
1074 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1075
1076 iemNativeRegFreeTmp(pReNative, idxRegImm);
1077
1078 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1079 idxRegDstIn, UINT8_MAX, false /*fInvertCarry*/, uImmOp);
1080
1081 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1082 if (cOpBits < 32)
1083 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1084 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1085 RT_NOREF(cImmBits);
1086
1087#else
1088# error "port me"
1089#endif
1090 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1091 return off;
1092}
1093
1094
1095/**
1096 * The SUB instruction will set all status flags.
1097 */
1098DECL_INLINE_THROW(uint32_t)
1099iemNativeEmit_sub_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1100 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1101{
1102 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1103 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
1104
1105#ifdef RT_ARCH_AMD64
1106 /* On AMD64 we just use the correctly sized SUB instruction to get the right EFLAGS.SF value. */
1107 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
1108 0x2a, 0x2b, cOpBits, idxRegDst, idxRegSrc);
1109 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1110
1111 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1112 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1113
1114 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1115
1116#elif defined(RT_ARCH_ARM64)
1117 /* On ARM64 we'll need the two input operands as well as the result in order
1118 to calculate the right flags, even if we use SUBS and translates NZCV into
1119 OF, CF, ZF and SF. */
1120 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1121 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1122 if (cOpBits >= 32)
1123 {
1124 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1125 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1126 }
1127 else
1128 {
1129 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1130 uint32_t const cShift = 32 - cOpBits;
1131 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDstIn, ARMV8_A64_REG_XZR, idxRegDst, false /*f64Bit*/, cShift);
1132 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDstIn, idxRegSrc, false /*f64Bit*/,
1133 true /*fSetFlags*/, cShift);
1134 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /*f64Bit*/);
1135 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /*f64Bit*/);
1136 cOpBits = 32;
1137 }
1138 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1139
1140 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
1141 idxRegDstIn, idxRegSrc, true /*fInvertCarry*/, 0);
1142
1143 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1144 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1145 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1146
1147#else
1148# error "port me"
1149#endif
1150 return off;
1151}
1152
1153
1154/**
1155 * The SUB instruction with immediate value as right operand.
1156 */
1157DECL_INLINE_THROW(uint32_t)
1158iemNativeEmit_sub_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1159 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1160{
1161 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1162
1163#ifdef RT_ARCH_AMD64
1164 /* On AMD64 we just use the correctly sized SUB instruction to get the right EFLAGS.SF value. */
1165 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1166 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 5, idxRegDst, uImmOp);
1167 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1168
1169 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1170
1171 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1172
1173#elif defined(RT_ARCH_ARM64)
1174 /* On ARM64 we'll need the two input operands as well as the result in order
1175 to calculate the right flags, even if we use SUBS and translates NZCV into
1176 OF, CF, ZF and SF. */
1177 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1178 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1179 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1180 if (cOpBits >= 32)
1181 {
1182 if (uImmOp <= 0xfffU)
1183 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegDst, idxRegDst, uImmOp, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1184 else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
1185 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegDst, idxRegDst, uImmOp >> 12, cOpBits > 32 /*f64Bit*/,
1186 true /*fSetFlags*/, true /*fShift12*/);
1187 else
1188 {
1189 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1190 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1191 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1192 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1193 }
1194 }
1195 else
1196 {
1197 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1198 uint32_t const cShift = 32 - cOpBits;
1199 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1200 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1201 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegDstIn, idxRegDstIn, cShift, false /*f64Bit*/);
1202 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDstIn, idxRegTmpImm, false /*f64Bit*/, true /*fSetFlags*/, cShift);
1203 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /*f64Bit*/);
1204 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /*f64Bit*/);
1205 cOpBits = 32;
1206 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1207 }
1208 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1209
1210 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
1211 idxRegDstIn, UINT8_MAX, true /*fInvertCarry*/, uImmOp);
1212
1213 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1214 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1215 RT_NOREF(cImmBits);
1216
1217#else
1218# error "port me"
1219#endif
1220 return off;
1221}
1222
1223
1224/**
1225 * The CMP instruction will set all status flags, but modifies no registers.
1226 */
1227DECL_INLINE_THROW(uint32_t)
1228iemNativeEmit_cmp_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1229 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1230{
1231 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1232 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
1233
1234#ifdef RT_ARCH_AMD64
1235 /* On AMD64 we just use the correctly sized CMP instruction to get the right EFLAGS.SF value. */
1236 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
1237 0x3a, 0x3b, cOpBits, idxRegDst, idxRegSrc);
1238 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1239
1240 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1241 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1242
1243 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1244
1245#elif defined(RT_ARCH_ARM64)
1246 /* On ARM64 we'll need the actual result as well as both input operands in order
1247 to calculate the right flags, even if we use SUBS and translates NZCV into
1248 OF, CF, ZF and SF. */
1249 uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
1250 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1251 if (cOpBits >= 32)
1252 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1253 else
1254 {
1255 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1256 uint32_t const cShift = 32 - cOpBits;
1257 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegResult, ARMV8_A64_REG_XZR, idxRegDst, false /*f64Bit*/, cShift);
1258 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegResult, idxRegSrc, false /*f64Bit*/,
1259 true /*fSetFlags*/, cShift);
1260 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegResult, idxRegResult, cShift, false /*f64Bit*/);
1261 cOpBits = 32;
1262 }
1263 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1264
1265 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegResult,
1266 idxRegDst, idxRegSrc, true /*fInvertCarry*/, 0);
1267
1268 iemNativeRegFreeTmp(pReNative, idxRegResult);
1269 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1270 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1271
1272#else
1273# error "port me"
1274#endif
1275 return off;
1276}
1277
1278
1279/**
1280 * The CMP instruction with immediate value as right operand.
1281 */
1282DECL_INLINE_THROW(uint32_t)
1283iemNativeEmit_cmp_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1284 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1285{
1286 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1287
1288#ifdef RT_ARCH_AMD64
1289 /* On AMD64 we just use the correctly sized CMP instruction to get the right EFLAGS.SF value. */
1290 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1291 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 7, idxRegDst, uImmOp);
1292 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1293
1294 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1295
1296 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1297
1298#elif defined(RT_ARCH_ARM64)
1299 /* On ARM64 we'll need the actual result as well as both input operands in order
1300 to calculate the right flags, even if we use SUBS and translates NZCV into
1301 OF, CF, ZF and SF. */
1302 uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
1303 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1304 if (cOpBits >= 32)
1305 {
1306 if (uImmOp <= 0xfffU)
1307 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegResult, idxRegDst, uImmOp, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1308 else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
1309 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegResult, idxRegDst, uImmOp >> 12, cOpBits > 32 /*f64Bit*/,
1310 true /*fSetFlags*/, true /*fShift12*/);
1311 else
1312 {
1313 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1314 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1315 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1316 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1317 }
1318 }
1319 else
1320 {
1321 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1322 uint32_t const cShift = 32 - cOpBits;
1323 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1324 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1325 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegResult, idxRegDst, cShift, false /*f64Bit*/);
1326 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegResult, idxRegTmpImm, false /*f64Bit*/, true /*fSetFlags*/, cShift);
1327 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegResult, idxRegResult, cShift, false /*f64Bit*/);
1328 cOpBits = 32;
1329 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1330 }
1331 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1332
1333 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegResult,
1334 idxRegDst, UINT8_MAX, true /*fInvertCarry*/, uImmOp);
1335
1336 iemNativeRegFreeTmp(pReNative, idxRegResult);
1337 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1338 RT_NOREF(cImmBits);
1339
1340#else
1341# error "port me"
1342#endif
1343 return off;
1344}
1345
1346
1347/**
1348 * The SBB instruction takes CF as input and will set all status flags.
1349 */
1350DECL_INLINE_THROW(uint32_t)
1351iemNativeEmit_sbb_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1352 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1353{
1354 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1355 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
1356 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
1357
1358#ifdef RT_ARCH_AMD64
1359 /* On AMD64 we use BT to set EFLAGS.CF and then issue an SBB instruction
1360 with matching size to get the correct flags. */
1361 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1362
1363 off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /*cOpBits*/, 4, idxRegEfl);
1364 pCodeBuf[off++] = X86_EFL_CF_BIT;
1365
1366 off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0x1a, 0x1b, cOpBits, idxRegDst, idxRegSrc);
1367 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1368
1369 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1370 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1371
1372 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1373
1374#elif defined(RT_ARCH_ARM64)
1375 /* On ARM64 we use the RMIF+CFINV instructions to load PSTATE.CF from
1376 idxRegEfl and then SBCS for the calculation. We need all inputs and
1377 result for the two flags (AF,PF) that can't be directly derived from
1378 PSTATE.NZCV. */
1379 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1380 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1381
1382 pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /*fMask=C*/);
1383 pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
1384 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1385 if (cOpBits >= 32)
1386 pCodeBuf[off++] = Armv8A64MkInstrSbcs(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
1387 else
1388 {
1389 /* Since we're also adding in the carry flag here, shifting operands up
1390 doesn't work. So, we have to calculate carry & overflow manually. */
1391 pCodeBuf[off++] = Armv8A64MkInstrSbc(idxRegDst, idxRegDst, idxRegSrc, false /*f64Bit*/);
1392 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1393 }
1394 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1395
1396 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1397 idxRegDstIn, idxRegSrc, true /*fInvertCarry*/, 0);
1398
1399 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1400 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1401 if (cOpBits < 32)
1402 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1403 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1404
1405#else
1406# error "port me"
1407#endif
1408 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1409 return off;
1410}
1411
1412
1413/**
1414 * The SBB instruction with immediate value as right operand.
1415 */
1416DECL_INLINE_THROW(uint32_t)
1417iemNativeEmit_sbb_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1418 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1419{
1420 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1421 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
1422
1423#ifdef RT_ARCH_AMD64
1424 /* On AMD64 we use BT to set EFLAGS.CF and then issue an SBB instruction
1425 with matching size to get the correct flags. */
1426 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1427
1428 off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /*cOpBits*/, 4, idxRegEfl);
1429 pCodeBuf[off++] = X86_EFL_CF_BIT;
1430
1431 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 3, idxRegDst, uImmOp);
1432 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1433
1434 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1435
1436 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1437
1438#elif defined(RT_ARCH_ARM64)
1439 /* On ARM64 we use the RMIF+CFINV instructions to load PSTATE.CF from
1440 idxRegEfl and then SBCS for the calculation. We need all inputs and
1441 result for the two flags (AF,PF) that can't be directly derived from
1442 PSTATE.NZCV. */
1443 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1444 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1445 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1446
1447 pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /*fMask=C*/);
1448 pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
1449 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1450 if (cOpBits >= 32)
1451 pCodeBuf[off++] = Armv8A64MkInstrSbcs(idxRegDst, idxRegDst, idxRegImm, cOpBits > 32 /*f64Bit*/);
1452 else
1453 {
1454 /* Since we're also adding in the carry flag here, shifting operands up
1455 doesn't work. So, we have to calculate carry & overflow manually. */
1456 pCodeBuf[off++] = Armv8A64MkInstrSbc(idxRegDst, idxRegDst, idxRegImm, false /*f64Bit*/);
1457 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1458 }
1459 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1460
1461 iemNativeRegFreeTmp(pReNative, idxRegImm);
1462
1463 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1464 idxRegDstIn, UINT8_MAX, true /*fInvertCarry*/, uImmOp);
1465
1466 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1467 if (cOpBits < 32)
1468 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1469 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1470 RT_NOREF(cImmBits);
1471
1472#else
1473# error "port me"
1474#endif
1475 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1476 return off;
1477}
1478
1479
1480DECL_INLINE_THROW(uint32_t)
1481iemNativeEmit_imul_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1482 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1483{
1484 RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1485 AssertFailed();
1486 return iemNativeEmitBrk(pReNative, off, 0x666);
1487}
1488
1489
1490DECL_INLINE_THROW(uint32_t)
1491iemNativeEmit_popcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1492 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1493{
1494 RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1495 AssertFailed();
1496 return iemNativeEmitBrk(pReNative, off, 0x666);
1497}
1498
1499
1500DECL_INLINE_THROW(uint32_t)
1501iemNativeEmit_tzcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1502 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1503{
1504 RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1505 AssertFailed();
1506 return iemNativeEmitBrk(pReNative, off, 0x666);
1507}
1508
1509
1510DECL_INLINE_THROW(uint32_t)
1511iemNativeEmit_lzcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1512 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1513{
1514 RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1515 AssertFailed();
1516 return iemNativeEmitBrk(pReNative, off, 0x666);
1517}
1518
1519
1520#endif /* !VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h */
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette