VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/target-x86/IEMAllN8veEmit-x86.h@ 103764

Last change on this file since 103764 was 103744, checked in by vboxsync, 14 months ago

VMM/IEM: Implemented iemNativeEmit_adc_r_i_efl and enabled it for both hosts. bugref:10376

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 60.3 KB
Line 
1/* $Id: IEMAllN8veEmit-x86.h 103744 2024-03-09 02:52:46Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler, x86 Target - Code Emitters.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h
29#define VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34
35#ifdef RT_ARCH_AMD64
36
37/**
38 * Emits an ModR/M instruction with one opcode byte and only register operands.
39 */
40DECL_FORCE_INLINE(uint32_t)
41iemNativeEmitAmd64OneByteModRmInstrRREx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t bOpcode8, uint8_t bOpcodeOther,
42 uint8_t cOpBits, uint8_t idxRegReg, uint8_t idxRegRm)
43{
44 Assert(idxRegReg < 16); Assert(idxRegRm < 16);
45 switch (cOpBits)
46 {
47 case 16:
48 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
49 RT_FALL_THRU();
50 case 32:
51 if (idxRegReg >= 8 || idxRegRm >= 8)
52 pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
53 pCodeBuf[off++] = bOpcodeOther;
54 break;
55
56 default: AssertFailed(); RT_FALL_THRU();
57 case 64:
58 pCodeBuf[off++] = X86_OP_REX_W | (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
59 pCodeBuf[off++] = bOpcodeOther;
60 break;
61
62 case 8:
63 if (idxRegReg >= 8 || idxRegRm >= 8)
64 pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
65 else if (idxRegReg >= 4 || idxRegRm >= 4)
66 pCodeBuf[off++] = X86_OP_REX;
67 pCodeBuf[off++] = bOpcode8;
68 break;
69 }
70 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg & 7, idxRegRm & 7);
71 return off;
72}
73
74
75/**
76 * Emits an ModR/M instruction with two opcode bytes and only register operands.
77 */
78DECL_FORCE_INLINE(uint32_t)
79iemNativeEmitAmd64TwoByteModRmInstrRREx(PIEMNATIVEINSTR pCodeBuf, uint32_t off,
80 uint8_t bOpcode0, uint8_t bOpcode8, uint8_t bOpcodeOther,
81 uint8_t cOpBits, uint8_t idxRegReg, uint8_t idxRegRm)
82{
83 Assert(idxRegReg < 16); Assert(idxRegRm < 16);
84 switch (cOpBits)
85 {
86 case 16:
87 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
88 RT_FALL_THRU();
89 case 32:
90 if (idxRegReg >= 8 || idxRegRm >= 8)
91 pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
92 pCodeBuf[off++] = bOpcode0;
93 pCodeBuf[off++] = bOpcodeOther;
94 break;
95
96 default: AssertFailed(); RT_FALL_THRU();
97 case 64:
98 pCodeBuf[off++] = X86_OP_REX_W | (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
99 pCodeBuf[off++] = bOpcode0;
100 pCodeBuf[off++] = bOpcodeOther;
101 break;
102
103 case 8:
104 if (idxRegReg >= 8 || idxRegRm >= 8)
105 pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
106 else if (idxRegReg >= 4 || idxRegRm >= 4)
107 pCodeBuf[off++] = X86_OP_REX;
108 pCodeBuf[off++] = bOpcode0;
109 pCodeBuf[off++] = bOpcode8;
110 break;
111 }
112 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg & 7, idxRegRm & 7);
113 return off;
114}
115
116
117/**
118 * Emits one of three opcodes with an immediate.
119 *
120 * These are expected to be a /idxRegReg form.
121 */
122DECL_FORCE_INLINE(uint32_t)
123iemNativeEmitAmd64OneByteModRmInstrRIEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t bOpcode8, uint8_t bOpcodeOtherImm8,
124 uint8_t bOpcodeOther, uint8_t cOpBits, uint8_t cImmBits, uint8_t idxRegReg,
125 uint8_t idxRegRm, uint64_t uImmOp)
126{
127 Assert(idxRegReg < 8); Assert(idxRegRm < 16);
128 if (cImmBits == 8 || uImmOp <= (uint64_t)0x7f)
129 {
130 switch (cOpBits)
131 {
132 case 16:
133 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
134 RT_FALL_THRU();
135 case 32:
136 if (idxRegRm >= 8)
137 pCodeBuf[off++] = X86_OP_REX_B;
138 pCodeBuf[off++] = bOpcodeOtherImm8;
139 break;
140
141 default: AssertFailed(); RT_FALL_THRU();
142 case 64:
143 pCodeBuf[off++] = X86_OP_REX_W | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
144 pCodeBuf[off++] = bOpcodeOtherImm8;
145 break;
146
147 case 8:
148 if (idxRegRm >= 8)
149 pCodeBuf[off++] = X86_OP_REX_B;
150 else if (idxRegRm >= 4)
151 pCodeBuf[off++] = X86_OP_REX;
152 pCodeBuf[off++] = bOpcode8;
153 break;
154 }
155 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
156 pCodeBuf[off++] = (uint8_t)uImmOp;
157 }
158 else
159 {
160 switch (cOpBits)
161 {
162 case 32:
163 if (idxRegRm >= 8)
164 pCodeBuf[off++] = X86_OP_REX_B;
165 break;
166
167 default: AssertFailed(); RT_FALL_THRU();
168 case 64:
169 pCodeBuf[off++] = X86_OP_REX_W | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
170 break;
171
172 case 16:
173 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
174 if (idxRegRm >= 8)
175 pCodeBuf[off++] = X86_OP_REX_B;
176 pCodeBuf[off++] = bOpcodeOther;
177 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
178 pCodeBuf[off++] = RT_BYTE1(uImmOp);
179 pCodeBuf[off++] = RT_BYTE2(uImmOp);
180 Assert(cImmBits == 16);
181 return off;
182 }
183 pCodeBuf[off++] = bOpcodeOther;
184 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
185 pCodeBuf[off++] = RT_BYTE1(uImmOp);
186 pCodeBuf[off++] = RT_BYTE2(uImmOp);
187 pCodeBuf[off++] = RT_BYTE3(uImmOp);
188 pCodeBuf[off++] = RT_BYTE4(uImmOp);
189 Assert(cImmBits == 32);
190 }
191 return off;
192}
193
194#endif /* RT_ARCH_AMD64 */
195
196/**
197 * This is an implementation of IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGICAL.
198 *
199 * It takes liveness stuff into account.
200 */
201DECL_INLINE_THROW(uint32_t)
202iemNativeEmitEFlagsForLogical(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl,
203 uint8_t cOpBits, uint8_t idxRegResult)
204{
205#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
206 if (1) /** @todo check if all bits are clobbered. */
207#endif
208 {
209#ifdef RT_ARCH_AMD64
210 /*
211 * Collect flags and merge them with eflags.
212 */
213 /** @todo we could alternatively use SAHF here when host rax is free since,
214 * OF is cleared. */
215 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
216 /* pushf - do this before any reg allocations as they may emit instructions too. */
217 pCodeBuf[off++] = 0x9c;
218
219 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
220 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
221 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + 7 + 7 + 3);
222 /* pop tmp */
223 if (idxTmpReg >= 8)
224 pCodeBuf[off++] = X86_OP_REX_B;
225 pCodeBuf[off++] = 0x58 + (idxTmpReg & 7);
226 /* and tmp, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF */
227 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF);
228 /* Clear the status bits in EFLs. */
229 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
230 /* OR in the flags we collected. */
231 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxTmpReg);
232 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
233 iemNativeRegFreeTmp(pReNative, idxTmpReg);
234 RT_NOREF(cOpBits, idxRegResult);
235
236#elif defined(RT_ARCH_ARM64)
237 /*
238 * Calculate flags.
239 */
240 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
241 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
242 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
243
244 /* Clear the status bits. ~0x8D5 (or ~0x8FD) can't be AND immediate, so use idxTmpReg for constant. */
245 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, ~X86_EFL_STATUS_BITS);
246 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxRegEfl, idxTmpReg);
247
248 /* Calculate zero: mov tmp, zf; cmp result,zero; csel.eq tmp,tmp,wxr */
249 if (cOpBits > 32)
250 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, idxRegResult, ARMV8_A64_REG_XZR);
251 else
252 off = iemNativeEmitCmpGpr32WithGprEx(pCodeBuf, off, idxRegResult, ARMV8_A64_REG_XZR);
253 pCodeBuf[off++] = Armv8A64MkInstrCSet(idxTmpReg, kArmv8InstrCond_Eq, false /*f64Bit*/);
254 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegEfl, idxRegEfl, idxTmpReg, false /*f64Bit*/, X86_EFL_ZF_BIT);
255
256 /* Calculate signed: We could use the native SF flag, but it's just as simple to calculate it by shifting. */
257 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxRegResult, cOpBits - 1, cOpBits > 32 /*f64Bit*/);
258# if 0 /* BFI and ORR hsould have the same performance characteristics, so use BFI like we'll have to do for SUB/ADD/++. */
259 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegEfl, idxRegEfl, idxTmpReg, false /*f64Bit*/, X86_EFL_SF_BIT);
260# else
261 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_SF_BIT, 1, false /*f64Bit*/);
262# endif
263
264 /* Calculate 8-bit parity of the result. */
265 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegResult, idxRegResult, false /*f64Bit*/,
266 4 /*offShift6*/, kArmv8A64InstrShift_Lsr);
267 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /*f64Bit*/,
268 2 /*offShift6*/, kArmv8A64InstrShift_Lsr);
269 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /*f64Bit*/,
270 1 /*offShift6*/, kArmv8A64InstrShift_Lsr);
271 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
272 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
273 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_PF_BIT, 1, false /*f64Bit*/);
274
275 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
276 iemNativeRegFreeTmp(pReNative, idxTmpReg);
277#else
278# error "port me"
279#endif
280 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
281 }
282 return off;
283}
284
285
286/**
287 * This is an implementation of IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC.
288 *
289 * It takes liveness stuff into account.
290 */
291DECL_FORCE_INLINE_THROW(uint32_t)
292iemNativeEmitEFlagsForArithmetic(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl, uint8_t idxRegEflIn
293#ifndef RT_ARCH_AMD64
294 , uint8_t cOpBits, uint8_t idxRegResult, uint8_t idxRegDstIn, uint8_t idxRegSrc
295 , bool fInvertCarry, uint64_t uImmSrc
296#endif
297 )
298{
299#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
300 if (1) /** @todo check if all bits are clobbered. */
301#endif
302 {
303#ifdef RT_ARCH_AMD64
304 /*
305 * Collect flags and merge them with eflags.
306 */
307 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
308 /* pushf - do this before any reg allocations as they may emit instructions too. */
309 pCodeBuf[off++] = 0x9c;
310
311 uint8_t const idxRegEfl = idxRegEflIn != UINT8_MAX ? idxRegEflIn
312 : iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
313 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
314 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + 7 + 7 + 3);
315 /* pop tmp */
316 if (idxTmpReg >= 8)
317 pCodeBuf[off++] = X86_OP_REX_B;
318 pCodeBuf[off++] = 0x58 + (idxTmpReg & 7);
319 /* Isolate the flags we want. */
320 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_EFL_STATUS_BITS);
321 /* Clear the status bits in EFLs. */
322 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
323 /* OR in the flags we collected. */
324 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxTmpReg);
325 if (idxRegEflIn != idxRegEfl)
326 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
327 iemNativeRegFreeTmp(pReNative, idxTmpReg);
328
329#elif defined(RT_ARCH_ARM64)
330 /*
331 * Calculate flags.
332 */
333 uint8_t const idxRegEfl = idxRegEflIn != UINT8_MAX ? idxRegEflIn
334 : iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
335 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
336 uint8_t const idxTmpReg2 = cOpBits >= 32 ? UINT8_MAX : iemNativeRegAllocTmp(pReNative, &off);
337 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
338
339 /* Invert CF (stored inved on ARM) and load the flags into the temporary register. */
340 if (fInvertCarry)
341 pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
342 pCodeBuf[off++] = Armv8A64MkInstrMrs(idxTmpReg, ARMV8_AARCH64_SYSREG_NZCV); /* Bits: 31=N; 30=Z; 29=C; 28=V; */
343
344 if (cOpBits >= 32)
345 {
346 /* V -> OF */
347 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 28);
348 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_OF_BIT, 1, false /*f64Bit*/);
349
350 /* C -> CF */
351 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 1);
352 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_CF_BIT, 1, false /*f64Bit*/);
353 }
354
355 /* N,Z -> SF,ZF */
356 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, cOpBits >= 32 ? 1 : 30);
357 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_ZF_BIT, 2, false /*f64Bit*/);
358
359 /* For ADC and SBB we have to calculate overflow and carry our selves. */
360 if (cOpBits < 32)
361 {
362 /* Since the carry flag is the zero'th flag, we just use BFXIL got copy it over. */
363 AssertCompile(X86_EFL_CF_BIT == 0);
364 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxRegEfl, idxRegResult, cOpBits, 1, false /*f64Bit*/);
365
366 /* The overflow flag is more work as we have to compare the signed bits for
367 both inputs and the result. See IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC.
368
369 Formula: ~(a_uDst ^ a_uSrcOf) & (a_uResult ^ a_uDst)
370 With a_uSrcOf as a_uSrc for additions and ~a_uSrc for subtractions.
371
372 It is a bit simpler when the right (source) side is constant:
373 adc: S D R -> OF sbb: S D R -> OF
374 0 0 0 -> 0 \ 0 0 0 -> 0 \
375 0 0 1 -> 1 \ 0 0 1 -> 0 \
376 0 1 0 -> 0 / and not(D), R 0 1 0 -> 1 / and D, not(R)
377 0 1 1 -> 0 / 0 1 1 -> 0 /
378 1 0 0 -> 0 \ 1 0 0 -> 0 \
379 1 0 1 -> 0 \ and D, not(R) 1 0 1 -> 1 \ and not(D), R
380 1 1 0 -> 1 / 1 1 0 -> 0 /
381 1 1 1 -> 0 / 1 1 1 -> 0 / */
382 if (idxRegSrc != UINT8_MAX)
383 {
384 if (fInvertCarry) /* sbb: ~((a_uDst) ^ ~(a_uSrcOf)) -> (a_uDst) ^ (a_uSrcOf); HACK ALERT: fInvertCarry == sbb */
385 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegSrc, false);
386 else /* adc: ~((a_uDst) ^ (a_uSrcOf)) -> (a_uDst) ^ ~(a_uSrcOf) */
387 pCodeBuf[off++] = Armv8A64MkInstrEon(idxTmpReg, idxRegDstIn, idxRegSrc, false);
388 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg2, idxRegDstIn, idxRegResult, false); /* (a_uDst) ^ (a_uResult) */
389 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpReg, idxTmpReg, idxTmpReg2, false /*f64Bit*/);
390 }
391 else if (uImmSrc & RT_BIT_32(cOpBits - 1))
392 {
393 if (fInvertCarry) /* HACK ALERT: fInvertCarry == sbb */
394 pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegResult, idxRegDstIn, false);
395 else
396 pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegDstIn, idxRegResult, false);
397 }
398 else
399 {
400 if (fInvertCarry) /* HACK ALERT: fInvertCarry == sbb */
401 pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegDstIn, idxRegResult, false);
402 else
403 pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegResult, idxRegDstIn, false);
404 }
405 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, cOpBits - 1, false /*f64Bit*/);
406 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_OF_BIT, 1);
407 iemNativeRegFreeTmp(pReNative, idxTmpReg2);
408 }
409
410 /* Calculate 8-bit parity of the result. */
411 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegResult, idxRegResult, false /*f64Bit*/,
412 4 /*offShift6*/, kArmv8A64InstrShift_Lsr);
413 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /*f64Bit*/,
414 2 /*offShift6*/, kArmv8A64InstrShift_Lsr);
415 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /*f64Bit*/,
416 1 /*offShift6*/, kArmv8A64InstrShift_Lsr);
417 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
418 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
419 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_PF_BIT, 1, false /*f64Bit*/);
420
421 /* Calculate auxilary carry/borrow. This is related to 8-bit BCD.
422 General formula: ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
423 S D R
424 0 0 0 -> 0; \
425 0 0 1 -> 1; \ regular
426 0 1 0 -> 1; / xor R, D
427 0 1 1 -> 0; /
428 1 0 0 -> 1; \
429 1 0 1 -> 0; \ invert one of the two
430 1 1 0 -> 0; / xor not(R), D
431 1 1 1 -> 1; /
432 a_uSrc[bit 4]=0: ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
433 a_uSrc[bit 4]=1: ((uint32_t)~(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
434 */
435
436 if (idxRegSrc != UINT8_MAX)
437 {
438 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegSrc, false /*f64Bit*/);
439 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxRegResult, false /*f64Bit*/);
440 }
441 else if (uImmSrc & X86_EFL_AF)
442 pCodeBuf[off++] = Armv8A64MkInstrEon(idxTmpReg, idxRegDstIn, idxRegResult, false /*f64Bit*/);
443 else
444 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegResult, false /*f64Bit*/);
445 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, X86_EFL_AF_BIT, false /*f64Bit*/);
446 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_AF_BIT, 1, false /*f64Bit*/);
447
448 if (idxRegEflIn != idxRegEfl)
449 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
450 iemNativeRegFreeTmp(pReNative, idxTmpReg);
451#else
452# error "port me"
453#endif
454 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
455 }
456 return off;
457
458}
459
460
461/**
462 * The AND instruction will clear OF, CF and AF (latter is undefined) and
463 * set the other flags according to the result.
464 */
465DECL_INLINE_THROW(uint32_t)
466iemNativeEmit_and_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
467 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
468{
469 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
470 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
471#ifdef RT_ARCH_AMD64
472 /* On AMD64 we just use the correctly size AND instruction harvest the EFLAGS. */
473 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
474 0x22, 0x23, cOpBits, idxRegDst, idxRegSrc);
475 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
476
477#elif defined(RT_ARCH_ARM64)
478 /* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. */
479 /** @todo we should use ANDS on ARM64 and get the ZF for free for all
480 * variants, and SF for 32-bit and 64-bit. */
481 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
482 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
483 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
484
485#else
486# error "Port me"
487#endif
488 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
489
490 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
491 iemNativeVarRegisterRelease(pReNative, idxVarDst);
492 return off;
493}
494
495
496/**
497 * The AND instruction with immediate value as right operand.
498 */
499DECL_INLINE_THROW(uint32_t)
500iemNativeEmit_and_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
501 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
502{
503 RT_NOREF(pReNative, off, idxVarDst, uImmOp, idxVarEfl, cOpBits, cImmBits);
504 return off;
505}
506
507
508/**
509 * The TEST instruction will clear OF, CF and AF (latter is undefined) and
510 * set the other flags according to the result.
511 */
512DECL_INLINE_THROW(uint32_t)
513iemNativeEmit_test_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
514 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
515{
516 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
517 uint8_t const idxRegSrc = idxVarSrc == idxVarDst ? idxRegDst /* special case of 'test samereg,samereg' */
518 : iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
519#ifdef RT_ARCH_AMD64
520 /* On AMD64 we just use the correctly size TEST instruction harvest the EFLAGS. */
521 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
522 0x84, 0x85, cOpBits, idxRegSrc, idxRegDst);
523 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
524
525#elif defined(RT_ARCH_ARM64)
526 /* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. We also
527 need to keep the result in order to calculate the flags. */
528 /** @todo we should use ANDS on ARM64 and get the ZF for free for all
529 * variants, and SF for 32-bit and 64-bit. */
530 uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
531 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
532 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
533 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
534
535#else
536# error "Port me"
537#endif
538 if (idxVarSrc != idxVarDst)
539 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
540 iemNativeVarRegisterRelease(pReNative, idxVarDst);
541
542#ifdef RT_ARCH_AMD64
543 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, UINT8_MAX);
544#else
545 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegResult);
546 iemNativeRegFreeTmp(pReNative, idxRegResult);
547#endif
548 return off;
549}
550
551
552/**
553 * The TEST instruction with immediate value as right operand.
554 */
555DECL_INLINE_THROW(uint32_t)
556iemNativeEmit_test_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
557 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
558{
559 RT_NOREF(pReNative, off, idxVarDst, uImmOp, idxVarEfl, cOpBits, cImmBits);
560 return off;
561}
562
563
564/**
565 * The OR instruction will clear OF, CF and AF (latter is undefined) and
566 * set the other flags according to the result.
567 */
568DECL_INLINE_THROW(uint32_t)
569iemNativeEmit_or_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
570 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
571{
572 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
573 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
574#ifdef RT_ARCH_AMD64
575 /* On AMD64 we just use the correctly size OR instruction harvest the EFLAGS. */
576 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
577 0x0a, 0x0b, cOpBits, idxRegDst, idxRegSrc);
578 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
579
580#elif defined(RT_ARCH_ARM64)
581 /* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones. */
582 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
583 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
584 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
585
586#else
587# error "Port me"
588#endif
589 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
590
591 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
592 iemNativeVarRegisterRelease(pReNative, idxVarDst);
593 return off;
594}
595
596
597/**
598 * The OR instruction with immediate value as right operand.
599 */
600DECL_INLINE_THROW(uint32_t)
601iemNativeEmit_or_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
602 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
603{
604 RT_NOREF(pReNative, off, idxVarDst, uImmOp, idxVarEfl, cOpBits, cImmBits);
605 return off;
606}
607
608
609/**
610 * The XOR instruction will clear OF, CF and AF (latter is undefined) and
611 * set the other flags according to the result.
612 */
613DECL_INLINE_THROW(uint32_t)
614iemNativeEmit_xor_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
615 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
616{
617 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
618 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
619#ifdef RT_ARCH_AMD64
620 /* On AMD64 we just use the correctly size OR instruction harvest the EFLAGS. */
621 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
622 0x32, 0x33, cOpBits, idxRegDst, idxRegSrc);
623 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
624
625#elif defined(RT_ARCH_ARM64)
626 /* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones. */
627 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
628 pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
629 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
630
631#else
632# error "Port me"
633#endif
634 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
635
636 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
637 iemNativeVarRegisterRelease(pReNative, idxVarDst);
638 return off;
639}
640
641
642/**
643 * The XOR instruction with immediate value as right operand.
644 */
645DECL_INLINE_THROW(uint32_t)
646iemNativeEmit_xor_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
647 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
648{
649 RT_NOREF(pReNative, off, idxVarDst, uImmOp, idxVarEfl, cOpBits, cImmBits);
650 return off;
651}
652
653
654/**
655 * The ADD instruction will set all status flags.
656 */
657DECL_INLINE_THROW(uint32_t)
658iemNativeEmit_add_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
659 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
660{
661 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
662 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
663
664#ifdef RT_ARCH_AMD64
665 /* On AMD64 we just use the correctly sized ADD instruction to get the right EFLAGS.SF value. */
666 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
667 0x02, 0x03, cOpBits, idxRegDst, idxRegSrc);
668 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
669
670 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
671 iemNativeVarRegisterRelease(pReNative, idxVarDst);
672
673 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
674
675#elif defined(RT_ARCH_ARM64)
676 /* On ARM64 we'll need the two input operands as well as the result in order
677 to calculate the right flags, even if we use ADDS and translates NZCV into
678 OF, CF, ZF and SF. */
679 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
680 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
681 if (cOpBits >= 32)
682 {
683 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
684 pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
685 }
686 else
687 {
688 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
689 uint32_t const cShift = 32 - cOpBits;
690 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDstIn, ARMV8_A64_REG_XZR, idxRegDst, false /*f64Bit*/, cShift);
691 pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDstIn, idxRegSrc, false /*f64Bit*/,
692 true /*fSetFlags*/, cShift);
693 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /*f64Bit*/);
694 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /*f64Bit*/);
695 cOpBits = 32;
696 }
697 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
698
699 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
700 idxRegDstIn, idxRegSrc, false /*fInvertCarry*/, 0);
701
702 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
703 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
704 iemNativeVarRegisterRelease(pReNative, idxVarDst);
705
706#else
707# error "port me"
708#endif
709 return off;
710}
711
712
713/**
714 * The ADD instruction with immediate value as right operand.
715 */
716DECL_INLINE_THROW(uint32_t)
717iemNativeEmit_add_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
718 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
719{
720 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
721
722#ifdef RT_ARCH_AMD64
723 /* On AMD64 we just use the correctly sized ADD instruction to get the right EFLAGS.SF value. */
724 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
725 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 0, idxRegDst, uImmOp);
726 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
727
728 iemNativeVarRegisterRelease(pReNative, idxVarDst);
729
730 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
731
732#elif defined(RT_ARCH_ARM64)
733 /* On ARM64 we'll need the two input operands as well as the result in order
734 to calculate the right flags, even if we use ADDS and translates NZCV into
735 OF, CF, ZF and SF. */
736 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
737 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
738 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
739 if (cOpBits >= 32)
740 {
741 if (uImmOp <= 0xfffU)
742 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegDst, idxRegDst, uImmOp, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
743 else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
744 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegDst, idxRegDst, uImmOp >> 12, cOpBits > 32 /*f64Bit*/,
745 true /*fSetFlags*/, true /*fShift12*/);
746 else
747 {
748 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
749 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
750 pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
751 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
752 }
753 }
754 else
755 {
756 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
757 uint32_t const cShift = 32 - cOpBits;
758 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp << cShift);
759 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
760 pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegTmpImm, idxRegDstIn, false /*f64Bit*/, true /*fSetFlags*/, cShift);
761 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /*f64Bit*/);
762 cOpBits = 32;
763 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
764 }
765 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
766
767 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
768 idxRegDstIn, UINT8_MAX, false /*fInvertCarry*/, uImmOp);
769
770 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
771 iemNativeVarRegisterRelease(pReNative, idxVarDst);
772 RT_NOREF(cImmBits);
773
774#else
775# error "port me"
776#endif
777 return off;
778}
779
780
781/**
782 * The ADC instruction takes CF as input and will set all status flags.
783 */
784DECL_INLINE_THROW(uint32_t)
785iemNativeEmit_adc_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
786 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
787{
788 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
789 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
790 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
791
792#ifdef RT_ARCH_AMD64
793 /* On AMD64 we use BT to set EFLAGS.CF and then issue an ADC instruction
794 with matching size to get the correct flags. */
795 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
796
797 /* Use the BT instruction to set CF according to idxRegEfl. */
798 off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /*cOpBits*/, 4, idxRegEfl);
799 pCodeBuf[off++] = X86_EFL_CF_BIT;
800
801 off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0x12, 0x13, cOpBits, idxRegDst, idxRegSrc);
802 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
803
804 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
805 iemNativeVarRegisterRelease(pReNative, idxVarDst);
806
807 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
808
809#elif defined(RT_ARCH_ARM64)
810 /* On ARM64 we use the RMIF instruction to load PSTATE.CF from idxRegEfl and
811 then ADCS for the calculation. We need all inputs and result for the two
812 flags (AF,PF) that can't be directly derived from PSTATE.NZCV. */
813 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
814 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
815
816 pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /*fMask=C*/);
817 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
818 if (cOpBits >= 32)
819 pCodeBuf[off++] = Armv8A64MkInstrAdcs(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
820 else
821 {
822 /* Since we're also adding in the carry flag here, shifting operands up
823 doesn't work. So, we have to calculate carry & overflow manually. */
824 pCodeBuf[off++] = Armv8A64MkInstrAdc(idxRegDst, idxRegDst, idxRegSrc, false /*f64Bit*/);
825 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
826 }
827 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
828
829 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
830 idxRegDstIn, idxRegSrc, false /*fInvertCarry*/, 0);
831
832 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
833 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
834 if (cOpBits < 32)
835 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
836 iemNativeVarRegisterRelease(pReNative, idxVarDst);
837
838#else
839# error "port me"
840#endif
841 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
842 return off;
843}
844
845
846/**
847 * The ADC instruction with immediate value as right operand.
848 */
849DECL_INLINE_THROW(uint32_t)
850iemNativeEmit_adc_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
851 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
852{
853 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
854 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
855
856#ifdef RT_ARCH_AMD64
857 /* On AMD64 we use BT to set EFLAGS.CF and then issue an ADC instruction
858 with matching size to get the correct flags. */
859 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
860
861 off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /*cOpBits*/, 4, idxRegEfl);
862 pCodeBuf[off++] = X86_EFL_CF_BIT;
863
864 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 2, idxRegDst, uImmOp);
865 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
866
867 iemNativeVarRegisterRelease(pReNative, idxVarDst);
868
869 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
870
871#elif defined(RT_ARCH_ARM64)
872 /* On ARM64 we use the RMIF instructions to load PSTATE.CF from idxRegEfl
873 and then ADCS for the calculation. We need all inputs and result for
874 the two flags (AF,PF) that can't be directly derived from PSTATE.NZCV. */
875 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
876 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
877 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
878
879 pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /*fMask=C*/);
880 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
881 if (cOpBits >= 32)
882 pCodeBuf[off++] = Armv8A64MkInstrAdcs(idxRegDst, idxRegDst, idxRegImm, cOpBits > 32 /*f64Bit*/);
883 else
884 {
885 /* Since we're also adding in the carry flag here, shifting operands up
886 doesn't work. So, we have to calculate carry & overflow manually. */
887 pCodeBuf[off++] = Armv8A64MkInstrAdc(idxRegDst, idxRegDst, idxRegImm, false /*f64Bit*/);
888 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
889 }
890 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
891
892 iemNativeRegFreeTmp(pReNative, idxRegImm);
893
894 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
895 idxRegDstIn, UINT8_MAX, false /*fInvertCarry*/, uImmOp);
896
897 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
898 if (cOpBits < 32)
899 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
900 iemNativeVarRegisterRelease(pReNative, idxVarDst);
901 RT_NOREF(cImmBits);
902
903#else
904# error "port me"
905#endif
906 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
907 return off;
908}
909
910
911/**
912 * The SUB instruction will set all status flags.
913 */
914DECL_INLINE_THROW(uint32_t)
915iemNativeEmit_sub_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
916 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
917{
918 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
919 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
920
921#ifdef RT_ARCH_AMD64
922 /* On AMD64 we just use the correctly sized SUB instruction to get the right EFLAGS.SF value. */
923 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
924 0x2a, 0x2b, cOpBits, idxRegDst, idxRegSrc);
925 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
926
927 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
928 iemNativeVarRegisterRelease(pReNative, idxVarDst);
929
930 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
931
932#elif defined(RT_ARCH_ARM64)
933 /* On ARM64 we'll need the two input operands as well as the result in order
934 to calculate the right flags, even if we use SUBS and translates NZCV into
935 OF, CF, ZF and SF. */
936 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
937 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
938 if (cOpBits >= 32)
939 {
940 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
941 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
942 }
943 else
944 {
945 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
946 uint32_t const cShift = 32 - cOpBits;
947 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDstIn, ARMV8_A64_REG_XZR, idxRegDst, false /*f64Bit*/, cShift);
948 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDstIn, idxRegSrc, false /*f64Bit*/,
949 true /*fSetFlags*/, cShift);
950 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /*f64Bit*/);
951 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /*f64Bit*/);
952 cOpBits = 32;
953 }
954 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
955
956 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
957 idxRegDstIn, idxRegSrc, true /*fInvertCarry*/, 0);
958
959 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
960 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
961 iemNativeVarRegisterRelease(pReNative, idxVarDst);
962
963#else
964# error "port me"
965#endif
966 return off;
967}
968
969
970/**
971 * The SUB instruction with immediate value as right operand.
972 */
973DECL_INLINE_THROW(uint32_t)
974iemNativeEmit_sub_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
975 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
976{
977 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
978
979#ifdef RT_ARCH_AMD64
980 /* On AMD64 we just use the correctly sized SUB instruction to get the right EFLAGS.SF value. */
981 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
982 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 5, idxRegDst, uImmOp);
983 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
984
985 iemNativeVarRegisterRelease(pReNative, idxVarDst);
986
987 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
988
989#elif defined(RT_ARCH_ARM64)
990 /* On ARM64 we'll need the two input operands as well as the result in order
991 to calculate the right flags, even if we use SUBS and translates NZCV into
992 OF, CF, ZF and SF. */
993 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
994 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
995 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
996 if (cOpBits >= 32)
997 {
998 if (uImmOp <= 0xfffU)
999 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegDst, idxRegDst, uImmOp, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1000 else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
1001 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegDst, idxRegDst, uImmOp >> 12, cOpBits > 32 /*f64Bit*/,
1002 true /*fSetFlags*/, true /*fShift12*/);
1003 else
1004 {
1005 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1006 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1007 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1008 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1009 }
1010 }
1011 else
1012 {
1013 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1014 uint32_t const cShift = 32 - cOpBits;
1015 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1016 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1017 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegDstIn, idxRegDstIn, cShift, false /*f64Bit*/);
1018 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDstIn, idxRegTmpImm, false /*f64Bit*/, true /*fSetFlags*/, cShift);
1019 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /*f64Bit*/);
1020 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /*f64Bit*/);
1021 cOpBits = 32;
1022 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1023 }
1024 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1025
1026 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
1027 idxRegDstIn, UINT8_MAX, true /*fInvertCarry*/, uImmOp);
1028
1029 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1030 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1031 RT_NOREF(cImmBits);
1032
1033#else
1034# error "port me"
1035#endif
1036 return off;
1037}
1038
1039
1040/**
1041 * The CMP instruction will set all status flags, but modifies no registers.
1042 */
1043DECL_INLINE_THROW(uint32_t)
1044iemNativeEmit_cmp_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1045 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1046{
1047 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1048 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
1049
1050#ifdef RT_ARCH_AMD64
1051 /* On AMD64 we just use the correctly sized CMP instruction to get the right EFLAGS.SF value. */
1052 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
1053 0x3a, 0x3b, cOpBits, idxRegDst, idxRegSrc);
1054 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1055
1056 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1057 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1058
1059 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1060
1061#elif defined(RT_ARCH_ARM64)
1062 /* On ARM64 we'll need the actual result as well as both input operands in order
1063 to calculate the right flags, even if we use SUBS and translates NZCV into
1064 OF, CF, ZF and SF. */
1065 uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
1066 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1067 if (cOpBits >= 32)
1068 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1069 else
1070 {
1071 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1072 uint32_t const cShift = 32 - cOpBits;
1073 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegResult, ARMV8_A64_REG_XZR, idxRegDst, false /*f64Bit*/, cShift);
1074 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegResult, idxRegSrc, false /*f64Bit*/,
1075 true /*fSetFlags*/, cShift);
1076 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegResult, idxRegResult, cShift, false /*f64Bit*/);
1077 cOpBits = 32;
1078 }
1079 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1080
1081 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegResult,
1082 idxRegDst, idxRegSrc, true /*fInvertCarry*/, 0);
1083
1084 iemNativeRegFreeTmp(pReNative, idxRegResult);
1085 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1086 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1087
1088#else
1089# error "port me"
1090#endif
1091 return off;
1092}
1093
1094
1095/**
1096 * The CMP instruction with immediate value as right operand.
1097 */
1098DECL_INLINE_THROW(uint32_t)
1099iemNativeEmit_cmp_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1100 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1101{
1102 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1103
1104#ifdef RT_ARCH_AMD64
1105 /* On AMD64 we just use the correctly sized CMP instruction to get the right EFLAGS.SF value. */
1106 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1107 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 7, idxRegDst, uImmOp);
1108 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1109
1110 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1111
1112 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1113
1114#elif defined(RT_ARCH_ARM64)
1115 /* On ARM64 we'll need the actual result as well as both input operands in order
1116 to calculate the right flags, even if we use SUBS and translates NZCV into
1117 OF, CF, ZF and SF. */
1118 uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
1119 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1120 if (cOpBits >= 32)
1121 {
1122 if (uImmOp <= 0xfffU)
1123 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegResult, idxRegDst, uImmOp, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1124 else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
1125 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegResult, idxRegDst, uImmOp >> 12, cOpBits > 32 /*f64Bit*/,
1126 true /*fSetFlags*/, true /*fShift12*/);
1127 else
1128 {
1129 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1130 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1131 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1132 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1133 }
1134 }
1135 else
1136 {
1137 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1138 uint32_t const cShift = 32 - cOpBits;
1139 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1140 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1141 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegResult, idxRegDst, cShift, false /*f64Bit*/);
1142 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegResult, idxRegTmpImm, false /*f64Bit*/, true /*fSetFlags*/, cShift);
1143 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegResult, idxRegResult, cShift, false /*f64Bit*/);
1144 cOpBits = 32;
1145 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1146 }
1147 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1148
1149 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegResult,
1150 idxRegDst, UINT8_MAX, true /*fInvertCarry*/, uImmOp);
1151
1152 iemNativeRegFreeTmp(pReNative, idxRegResult);
1153 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1154 RT_NOREF(cImmBits);
1155
1156#else
1157# error "port me"
1158#endif
1159 return off;
1160}
1161
1162
1163/**
1164 * The SBB instruction takes CF as input and will set all status flags.
1165 */
1166DECL_INLINE_THROW(uint32_t)
1167iemNativeEmit_sbb_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1168 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1169{
1170 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1171 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
1172 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
1173
1174#ifdef RT_ARCH_AMD64
1175 /* On AMD64 we use BT to set EFLAGS.CF and then issue an SBB instruction
1176 with matching size to get the correct flags. */
1177 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1178
1179 off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /*cOpBits*/, 4, idxRegEfl);
1180 pCodeBuf[off++] = X86_EFL_CF_BIT;
1181
1182 off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0x1a, 0x1b, cOpBits, idxRegDst, idxRegSrc);
1183 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1184
1185 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1186 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1187
1188 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1189
1190#elif defined(RT_ARCH_ARM64)
1191 /* On ARM64 we use the RMIF+CFINV instructions to load PSTATE.CF from
1192 idxRegEfl and then SBCS for the calculation. We need all inputs and
1193 result for the two flags (AF,PF) that can't be directly derived from
1194 PSTATE.NZCV. */
1195 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1196 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1197
1198 pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /*fMask=C*/);
1199 pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
1200 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1201 if (cOpBits >= 32)
1202 pCodeBuf[off++] = Armv8A64MkInstrSbcs(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
1203 else
1204 {
1205 /* Since we're also adding in the carry flag here, shifting operands up
1206 doesn't work. So, we have to calculate carry & overflow manually. */
1207 pCodeBuf[off++] = Armv8A64MkInstrSbc(idxRegDst, idxRegDst, idxRegSrc, false /*f64Bit*/);
1208 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1209 }
1210 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1211
1212 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1213 idxRegDstIn, idxRegSrc, true /*fInvertCarry*/, 0);
1214
1215 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1216 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1217 if (cOpBits < 32)
1218 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1219 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1220
1221#else
1222# error "port me"
1223#endif
1224 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1225 return off;
1226}
1227
1228
1229/**
1230 * The SBB instruction with immediate value as right operand.
1231 */
1232DECL_INLINE_THROW(uint32_t)
1233iemNativeEmit_sbb_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1234 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1235{
1236 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1237 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
1238
1239#ifdef RT_ARCH_AMD64
1240 /* On AMD64 we use BT to set EFLAGS.CF and then issue an SBB instruction
1241 with matching size to get the correct flags. */
1242 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1243
1244 off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /*cOpBits*/, 4, idxRegEfl);
1245 pCodeBuf[off++] = X86_EFL_CF_BIT;
1246
1247 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 3, idxRegDst, uImmOp);
1248 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1249
1250 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1251
1252 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1253
1254#elif defined(RT_ARCH_ARM64)
1255 /* On ARM64 we use the RMIF+CFINV instructions to load PSTATE.CF from
1256 idxRegEfl and then SBCS for the calculation. We need all inputs and
1257 result for the two flags (AF,PF) that can't be directly derived from
1258 PSTATE.NZCV. */
1259 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1260 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1261 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1262
1263 pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /*fMask=C*/);
1264 pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
1265 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1266 if (cOpBits >= 32)
1267 pCodeBuf[off++] = Armv8A64MkInstrSbcs(idxRegDst, idxRegDst, idxRegImm, cOpBits > 32 /*f64Bit*/);
1268 else
1269 {
1270 /* Since we're also adding in the carry flag here, shifting operands up
1271 doesn't work. So, we have to calculate carry & overflow manually. */
1272 pCodeBuf[off++] = Armv8A64MkInstrSbc(idxRegDst, idxRegDst, idxRegImm, false /*f64Bit*/);
1273 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1274 }
1275 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1276
1277 iemNativeRegFreeTmp(pReNative, idxRegImm);
1278
1279 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1280 idxRegDstIn, UINT8_MAX, true /*fInvertCarry*/, uImmOp);
1281
1282 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1283 if (cOpBits < 32)
1284 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1285 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1286 RT_NOREF(cImmBits);
1287
1288#else
1289# error "port me"
1290#endif
1291 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1292 return off;
1293}
1294
1295
1296DECL_INLINE_THROW(uint32_t)
1297iemNativeEmit_imul_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1298 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1299{
1300 RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1301 AssertFailed();
1302 return iemNativeEmitBrk(pReNative, off, 0x666);
1303}
1304
1305
1306DECL_INLINE_THROW(uint32_t)
1307iemNativeEmit_popcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1308 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1309{
1310 RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1311 AssertFailed();
1312 return iemNativeEmitBrk(pReNative, off, 0x666);
1313}
1314
1315
1316DECL_INLINE_THROW(uint32_t)
1317iemNativeEmit_tzcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1318 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1319{
1320 RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1321 AssertFailed();
1322 return iemNativeEmitBrk(pReNative, off, 0x666);
1323}
1324
1325
1326DECL_INLINE_THROW(uint32_t)
1327iemNativeEmit_lzcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1328 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1329{
1330 RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1331 AssertFailed();
1332 return iemNativeEmitBrk(pReNative, off, 0x666);
1333}
1334
1335
1336#endif /* !VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h */
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette