VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/target-x86/IEMAllN8veEmit-x86.h@ 103799

Last change on this file since 103799 was 103799, checked in by vboxsync, 9 months ago

VMM/IEM: Implemented iemNativeEmit_test_r_i_efl and enabled it for both hosts. bugref:10376

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 64.4 KB
Line 
1/* $Id: IEMAllN8veEmit-x86.h 103799 2024-03-11 22:23:37Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler, x86 Target - Code Emitters.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h
29#define VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34
35#ifdef RT_ARCH_AMD64
36
37/**
38 * Emits an ModR/M instruction with one opcode byte and only register operands.
39 */
40DECL_FORCE_INLINE(uint32_t)
41iemNativeEmitAmd64OneByteModRmInstrRREx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t bOpcode8, uint8_t bOpcodeOther,
42 uint8_t cOpBits, uint8_t idxRegReg, uint8_t idxRegRm)
43{
44 Assert(idxRegReg < 16); Assert(idxRegRm < 16);
45 switch (cOpBits)
46 {
47 case 16:
48 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
49 RT_FALL_THRU();
50 case 32:
51 if (idxRegReg >= 8 || idxRegRm >= 8)
52 pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
53 pCodeBuf[off++] = bOpcodeOther;
54 break;
55
56 default: AssertFailed(); RT_FALL_THRU();
57 case 64:
58 pCodeBuf[off++] = X86_OP_REX_W | (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
59 pCodeBuf[off++] = bOpcodeOther;
60 break;
61
62 case 8:
63 if (idxRegReg >= 8 || idxRegRm >= 8)
64 pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
65 else if (idxRegReg >= 4 || idxRegRm >= 4)
66 pCodeBuf[off++] = X86_OP_REX;
67 pCodeBuf[off++] = bOpcode8;
68 break;
69 }
70 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg & 7, idxRegRm & 7);
71 return off;
72}
73
74
75/**
76 * Emits an ModR/M instruction with two opcode bytes and only register operands.
77 */
78DECL_FORCE_INLINE(uint32_t)
79iemNativeEmitAmd64TwoByteModRmInstrRREx(PIEMNATIVEINSTR pCodeBuf, uint32_t off,
80 uint8_t bOpcode0, uint8_t bOpcode8, uint8_t bOpcodeOther,
81 uint8_t cOpBits, uint8_t idxRegReg, uint8_t idxRegRm)
82{
83 Assert(idxRegReg < 16); Assert(idxRegRm < 16);
84 switch (cOpBits)
85 {
86 case 16:
87 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
88 RT_FALL_THRU();
89 case 32:
90 if (idxRegReg >= 8 || idxRegRm >= 8)
91 pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
92 pCodeBuf[off++] = bOpcode0;
93 pCodeBuf[off++] = bOpcodeOther;
94 break;
95
96 default: AssertFailed(); RT_FALL_THRU();
97 case 64:
98 pCodeBuf[off++] = X86_OP_REX_W | (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
99 pCodeBuf[off++] = bOpcode0;
100 pCodeBuf[off++] = bOpcodeOther;
101 break;
102
103 case 8:
104 if (idxRegReg >= 8 || idxRegRm >= 8)
105 pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
106 else if (idxRegReg >= 4 || idxRegRm >= 4)
107 pCodeBuf[off++] = X86_OP_REX;
108 pCodeBuf[off++] = bOpcode0;
109 pCodeBuf[off++] = bOpcode8;
110 break;
111 }
112 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg & 7, idxRegRm & 7);
113 return off;
114}
115
116
117/**
118 * Emits one of three opcodes with an immediate.
119 *
120 * These are expected to be a /idxRegReg form.
121 */
122DECL_FORCE_INLINE(uint32_t)
123iemNativeEmitAmd64OneByteModRmInstrRIEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t bOpcode8, uint8_t bOpcodeOtherImm8,
124 uint8_t bOpcodeOther, uint8_t cOpBits, uint8_t cImmBits, uint8_t idxRegReg,
125 uint8_t idxRegRm, uint64_t uImmOp)
126{
127 Assert(idxRegReg < 8); Assert(idxRegRm < 16);
128 if ( cImmBits == 8
129 || (uImmOp <= (uint64_t)0x7f && bOpcodeOtherImm8 != 0xcc))
130 {
131 switch (cOpBits)
132 {
133 case 16:
134 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
135 RT_FALL_THRU();
136 case 32:
137 if (idxRegRm >= 8)
138 pCodeBuf[off++] = X86_OP_REX_B;
139 pCodeBuf[off++] = bOpcodeOtherImm8; Assert(bOpcodeOtherImm8 != 0xcc);
140 break;
141
142 default: AssertFailed(); RT_FALL_THRU();
143 case 64:
144 pCodeBuf[off++] = X86_OP_REX_W | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
145 pCodeBuf[off++] = bOpcodeOtherImm8; Assert(bOpcodeOtherImm8 != 0xcc);
146 break;
147
148 case 8:
149 if (idxRegRm >= 8)
150 pCodeBuf[off++] = X86_OP_REX_B;
151 else if (idxRegRm >= 4)
152 pCodeBuf[off++] = X86_OP_REX;
153 pCodeBuf[off++] = bOpcode8; Assert(bOpcode8 != 0xcc);
154 break;
155 }
156 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
157 pCodeBuf[off++] = (uint8_t)uImmOp;
158 }
159 else
160 {
161 switch (cOpBits)
162 {
163 case 32:
164 if (idxRegRm >= 8)
165 pCodeBuf[off++] = X86_OP_REX_B;
166 break;
167
168 default: AssertFailed(); RT_FALL_THRU();
169 case 64:
170 pCodeBuf[off++] = X86_OP_REX_W | (idxRegRm >= 8 ? X86_OP_REX_B : 0);
171 break;
172
173 case 16:
174 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
175 if (idxRegRm >= 8)
176 pCodeBuf[off++] = X86_OP_REX_B;
177 pCodeBuf[off++] = bOpcodeOther;
178 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
179 pCodeBuf[off++] = RT_BYTE1(uImmOp);
180 pCodeBuf[off++] = RT_BYTE2(uImmOp);
181 Assert(cImmBits == 16);
182 return off;
183 }
184 pCodeBuf[off++] = bOpcodeOther;
185 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
186 pCodeBuf[off++] = RT_BYTE1(uImmOp);
187 pCodeBuf[off++] = RT_BYTE2(uImmOp);
188 pCodeBuf[off++] = RT_BYTE3(uImmOp);
189 pCodeBuf[off++] = RT_BYTE4(uImmOp);
190 Assert(cImmBits == 32);
191 }
192 return off;
193}
194
195#endif /* RT_ARCH_AMD64 */
196
197/**
198 * This is an implementation of IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGICAL.
199 *
200 * It takes liveness stuff into account.
201 */
202DECL_INLINE_THROW(uint32_t)
203iemNativeEmitEFlagsForLogical(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl
204#ifndef RT_ARCH_AMD64
205 , uint8_t cOpBits, uint8_t idxRegResult, bool fNativeFlags = false
206#endif
207 )
208{
209#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
210 if (1) /** @todo check if all bits are clobbered. */
211#endif
212 {
213#ifdef RT_ARCH_AMD64
214 /*
215 * Collect flags and merge them with eflags.
216 */
217 /** @todo we could alternatively use SAHF here when host rax is free since,
218 * OF is cleared. */
219 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
220 /* pushf - do this before any reg allocations as they may emit instructions too. */
221 pCodeBuf[off++] = 0x9c;
222
223 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
224 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
225 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + 7 + 7 + 3);
226 /* pop tmp */
227 if (idxTmpReg >= 8)
228 pCodeBuf[off++] = X86_OP_REX_B;
229 pCodeBuf[off++] = 0x58 + (idxTmpReg & 7);
230 /* and tmp, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF */
231 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF);
232 /* Clear the status bits in EFLs. */
233 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
234 /* OR in the flags we collected. */
235 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxTmpReg);
236 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
237 iemNativeRegFreeTmp(pReNative, idxTmpReg);
238
239#elif defined(RT_ARCH_ARM64)
240 /*
241 * Calculate flags.
242 */
243 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
244 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
245 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
246
247 /* Clear the status bits. ~0x8D5 (or ~0x8FD) can't be AND immediate, so use idxTmpReg for constant. */
248 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, ~X86_EFL_STATUS_BITS);
249 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxRegEfl, idxTmpReg);
250
251 /* N,Z -> SF,ZF */
252 if (cOpBits < 32)
253 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegResult, cOpBits > 8); /* sets NZ */
254 else if (!fNativeFlags)
255 pCodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, idxRegResult, idxRegResult, cOpBits > 32 /*f64Bit*/);
256 pCodeBuf[off++] = Armv8A64MkInstrMrs(idxTmpReg, ARMV8_AARCH64_SYSREG_NZCV); /* Bits: 31=N; 30=Z; 29=C; 28=V; */
257 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 30);
258 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_ZF_BIT, 2, false /*f64Bit*/);
259 AssertCompile(X86_EFL_ZF_BIT + 1 == X86_EFL_SF_BIT);
260
261 /* Calculate 8-bit parity of the result. */
262 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegResult, idxRegResult, false /*f64Bit*/,
263 4 /*offShift6*/, kArmv8A64InstrShift_Lsr);
264 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /*f64Bit*/,
265 2 /*offShift6*/, kArmv8A64InstrShift_Lsr);
266 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /*f64Bit*/,
267 1 /*offShift6*/, kArmv8A64InstrShift_Lsr);
268 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
269 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
270 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_PF_BIT, 1, false /*f64Bit*/);
271
272 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
273 iemNativeRegFreeTmp(pReNative, idxTmpReg);
274#else
275# error "port me"
276#endif
277 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
278 }
279 return off;
280}
281
282
283/**
284 * This is an implementation of IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC.
285 *
286 * It takes liveness stuff into account.
287 */
288DECL_FORCE_INLINE_THROW(uint32_t)
289iemNativeEmitEFlagsForArithmetic(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl, uint8_t idxRegEflIn
290#ifndef RT_ARCH_AMD64
291 , uint8_t cOpBits, uint8_t idxRegResult, uint8_t idxRegDstIn, uint8_t idxRegSrc
292 , bool fInvertCarry, uint64_t uImmSrc
293#endif
294 )
295{
296#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
297 if (1) /** @todo check if all bits are clobbered. */
298#endif
299 {
300#ifdef RT_ARCH_AMD64
301 /*
302 * Collect flags and merge them with eflags.
303 */
304 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
305 /* pushf - do this before any reg allocations as they may emit instructions too. */
306 pCodeBuf[off++] = 0x9c;
307
308 uint8_t const idxRegEfl = idxRegEflIn != UINT8_MAX ? idxRegEflIn
309 : iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
310 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
311 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + 7 + 7 + 3);
312 /* pop tmp */
313 if (idxTmpReg >= 8)
314 pCodeBuf[off++] = X86_OP_REX_B;
315 pCodeBuf[off++] = 0x58 + (idxTmpReg & 7);
316 /* Isolate the flags we want. */
317 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_EFL_STATUS_BITS);
318 /* Clear the status bits in EFLs. */
319 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
320 /* OR in the flags we collected. */
321 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxTmpReg);
322 if (idxRegEflIn != idxRegEfl)
323 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
324 iemNativeRegFreeTmp(pReNative, idxTmpReg);
325
326#elif defined(RT_ARCH_ARM64)
327 /*
328 * Calculate flags.
329 */
330 uint8_t const idxRegEfl = idxRegEflIn != UINT8_MAX ? idxRegEflIn
331 : iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
332 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
333 uint8_t const idxTmpReg2 = cOpBits >= 32 ? UINT8_MAX : iemNativeRegAllocTmp(pReNative, &off);
334 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
335
336 /* Invert CF (stored inved on ARM) and load the flags into the temporary register. */
337 if (fInvertCarry)
338 pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
339 pCodeBuf[off++] = Armv8A64MkInstrMrs(idxTmpReg, ARMV8_AARCH64_SYSREG_NZCV); /* Bits: 31=N; 30=Z; 29=C; 28=V; */
340
341 if (cOpBits >= 32)
342 {
343 /* V -> OF */
344 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 28);
345 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_OF_BIT, 1, false /*f64Bit*/);
346
347 /* C -> CF */
348 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 1);
349 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_CF_BIT, 1, false /*f64Bit*/);
350 }
351
352 /* N,Z -> SF,ZF */
353 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, cOpBits >= 32 ? 1 : 30);
354 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_ZF_BIT, 2, false /*f64Bit*/);
355
356 /* For ADC and SBB we have to calculate overflow and carry our selves. */
357 if (cOpBits < 32)
358 {
359 /* Since the carry flag is the zero'th flag, we just use BFXIL got copy it over. */
360 AssertCompile(X86_EFL_CF_BIT == 0);
361 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxRegEfl, idxRegResult, cOpBits, 1, false /*f64Bit*/);
362
363 /* The overflow flag is more work as we have to compare the signed bits for
364 both inputs and the result. See IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC.
365
366 Formula: ~(a_uDst ^ a_uSrcOf) & (a_uResult ^ a_uDst)
367 With a_uSrcOf as a_uSrc for additions and ~a_uSrc for subtractions.
368
369 It is a bit simpler when the right (source) side is constant:
370 adc: S D R -> OF sbb: S D R -> OF
371 0 0 0 -> 0 \ 0 0 0 -> 0 \
372 0 0 1 -> 1 \ 0 0 1 -> 0 \
373 0 1 0 -> 0 / and not(D), R 0 1 0 -> 1 / and D, not(R)
374 0 1 1 -> 0 / 0 1 1 -> 0 /
375 1 0 0 -> 0 \ 1 0 0 -> 0 \
376 1 0 1 -> 0 \ and D, not(R) 1 0 1 -> 1 \ and not(D), R
377 1 1 0 -> 1 / 1 1 0 -> 0 /
378 1 1 1 -> 0 / 1 1 1 -> 0 / */
379 if (idxRegSrc != UINT8_MAX)
380 {
381 if (fInvertCarry) /* sbb: ~((a_uDst) ^ ~(a_uSrcOf)) -> (a_uDst) ^ (a_uSrcOf); HACK ALERT: fInvertCarry == sbb */
382 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegSrc, false);
383 else /* adc: ~((a_uDst) ^ (a_uSrcOf)) -> (a_uDst) ^ ~(a_uSrcOf) */
384 pCodeBuf[off++] = Armv8A64MkInstrEon(idxTmpReg, idxRegDstIn, idxRegSrc, false);
385 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg2, idxRegDstIn, idxRegResult, false); /* (a_uDst) ^ (a_uResult) */
386 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpReg, idxTmpReg, idxTmpReg2, false /*f64Bit*/);
387 }
388 else if (uImmSrc & RT_BIT_32(cOpBits - 1))
389 {
390 if (fInvertCarry) /* HACK ALERT: fInvertCarry == sbb */
391 pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegResult, idxRegDstIn, false);
392 else
393 pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegDstIn, idxRegResult, false);
394 }
395 else
396 {
397 if (fInvertCarry) /* HACK ALERT: fInvertCarry == sbb */
398 pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegDstIn, idxRegResult, false);
399 else
400 pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegResult, idxRegDstIn, false);
401 }
402 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, cOpBits - 1, false /*f64Bit*/);
403 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_OF_BIT, 1);
404 iemNativeRegFreeTmp(pReNative, idxTmpReg2);
405 }
406
407 /* Calculate 8-bit parity of the result. */
408 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegResult, idxRegResult, false /*f64Bit*/,
409 4 /*offShift6*/, kArmv8A64InstrShift_Lsr);
410 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /*f64Bit*/,
411 2 /*offShift6*/, kArmv8A64InstrShift_Lsr);
412 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /*f64Bit*/,
413 1 /*offShift6*/, kArmv8A64InstrShift_Lsr);
414 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
415 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
416 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_PF_BIT, 1, false /*f64Bit*/);
417
418 /* Calculate auxilary carry/borrow. This is related to 8-bit BCD.
419 General formula: ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
420 S D R
421 0 0 0 -> 0; \
422 0 0 1 -> 1; \ regular
423 0 1 0 -> 1; / xor R, D
424 0 1 1 -> 0; /
425 1 0 0 -> 1; \
426 1 0 1 -> 0; \ invert one of the two
427 1 1 0 -> 0; / xor not(R), D
428 1 1 1 -> 1; /
429 a_uSrc[bit 4]=0: ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
430 a_uSrc[bit 4]=1: ((uint32_t)~(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
431 */
432
433 if (idxRegSrc != UINT8_MAX)
434 {
435 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegSrc, false /*f64Bit*/);
436 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxRegResult, false /*f64Bit*/);
437 }
438 else if (uImmSrc & X86_EFL_AF)
439 pCodeBuf[off++] = Armv8A64MkInstrEon(idxTmpReg, idxRegDstIn, idxRegResult, false /*f64Bit*/);
440 else
441 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegResult, false /*f64Bit*/);
442 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, X86_EFL_AF_BIT, false /*f64Bit*/);
443 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_AF_BIT, 1, false /*f64Bit*/);
444
445 if (idxRegEflIn != idxRegEfl)
446 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
447 iemNativeRegFreeTmp(pReNative, idxTmpReg);
448#else
449# error "port me"
450#endif
451 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
452 }
453 return off;
454
455}
456
457
458/**
459 * The AND instruction will clear OF, CF and AF (latter is undefined) and
460 * set the other flags according to the result.
461 */
462DECL_INLINE_THROW(uint32_t)
463iemNativeEmit_and_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
464 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
465{
466 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
467 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
468#ifdef RT_ARCH_AMD64
469 /* On AMD64 we just use the correctly size AND instruction harvest the EFLAGS. */
470 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
471 0x22, 0x23, cOpBits, idxRegDst, idxRegSrc);
472 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
473 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
474
475 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
476
477#elif defined(RT_ARCH_ARM64)
478 /* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. */
479 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
480 pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
481 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
482 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
483
484 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst, true /*fNativeFlags*/);
485#else
486# error "Port me"
487#endif
488 iemNativeVarRegisterRelease(pReNative, idxVarDst);
489 return off;
490}
491
492
493/**
494 * The AND instruction with immediate value as right operand.
495 */
496DECL_INLINE_THROW(uint32_t)
497iemNativeEmit_and_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
498 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
499{
500 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
501#ifdef RT_ARCH_AMD64
502 /* On AMD64 we just use the correctly size AND instruction harvest the EFLAGS. */
503 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
504 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 4, idxRegDst, uImmOp);
505 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
506
507 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
508
509#elif defined(RT_ARCH_ARM64)
510 /* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. */
511 uint32_t uImmSizeLen, uImmRotations;
512 if ( cOpBits > 32
513 ? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
514 : Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
515 {
516 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
517 if (cOpBits >= 32)
518 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /*f64Bit*/);
519 else
520 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /*f64Bit*/);
521 }
522 else
523 {
524 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
525 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
526 if (cOpBits >= 32)
527 pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/);
528 else
529 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/);
530 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
531 }
532 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
533
534 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst, cOpBits >= 32 /*fNativeFlags*/);
535 RT_NOREF_PV(cImmBits)
536
537#else
538# error "Port me"
539#endif
540 iemNativeVarRegisterRelease(pReNative, idxVarDst);
541 return off;
542}
543
544
545/**
546 * The TEST instruction will clear OF, CF and AF (latter is undefined) and
547 * set the other flags according to the result.
548 */
549DECL_INLINE_THROW(uint32_t)
550iemNativeEmit_test_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
551 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
552{
553 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
554 uint8_t const idxRegSrc = idxVarSrc == idxVarDst ? idxRegDst /* special case of 'test samereg,samereg' */
555 : iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
556#ifdef RT_ARCH_AMD64
557 /* On AMD64 we just use the correctly size TEST instruction harvest the EFLAGS. */
558 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
559 0x84, 0x85, cOpBits, idxRegSrc, idxRegDst);
560 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
561
562#elif defined(RT_ARCH_ARM64)
563 /* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. We also
564 need to keep the result in order to calculate the flags. */
565 uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
566 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
567 if (cOpBits >= 32)
568 pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
569 else
570 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
571 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
572
573#else
574# error "Port me"
575#endif
576 if (idxVarSrc != idxVarDst)
577 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
578 iemNativeVarRegisterRelease(pReNative, idxVarDst);
579
580#ifdef RT_ARCH_AMD64
581 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
582#else
583 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegResult, cOpBits >= 32 /*fNativeFlags*/);
584 iemNativeRegFreeTmp(pReNative, idxRegResult);
585#endif
586 return off;
587}
588
589
590/**
591 * The TEST instruction with immediate value as right operand.
592 */
593DECL_INLINE_THROW(uint32_t)
594iemNativeEmit_test_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
595 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
596{
597 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
598#ifdef RT_ARCH_AMD64
599 /* On AMD64 we just use the correctly size AND instruction harvest the EFLAGS. */
600 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
601 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0xf6, 0xcc, 0xf7, cOpBits, cImmBits, 0, idxRegDst, uImmOp);
602 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
603 iemNativeVarRegisterRelease(pReNative, idxVarDst);
604
605 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
606
607#elif defined(RT_ARCH_ARM64)
608 /* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. We also
609 need to keep the result in order to calculate the flags. */
610 uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
611 uint32_t uImmSizeLen, uImmRotations;
612 if ( cOpBits > 32
613 ? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
614 : Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
615 {
616 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
617 if (cOpBits >= 32)
618 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(idxRegResult, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /*f64Bit*/);
619 else
620 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegResult, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /*f64Bit*/);
621 }
622 else
623 {
624 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
625 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
626 if (cOpBits >= 32)
627 pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/);
628 else
629 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/);
630 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
631 }
632 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
633 iemNativeVarRegisterRelease(pReNative, idxVarDst);
634
635 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegResult, cOpBits >= 32 /*fNativeFlags*/);
636
637 iemNativeRegFreeTmp(pReNative, idxRegResult);
638 RT_NOREF_PV(cImmBits)
639
640#else
641# error "Port me"
642#endif
643 return off;
644}
645
646
647/**
648 * The OR instruction will clear OF, CF and AF (latter is undefined) and
649 * set the other flags according to the result.
650 */
651DECL_INLINE_THROW(uint32_t)
652iemNativeEmit_or_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
653 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
654{
655 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
656 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
657#ifdef RT_ARCH_AMD64
658 /* On AMD64 we just use the correctly size OR instruction harvest the EFLAGS. */
659 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
660 0x0a, 0x0b, cOpBits, idxRegDst, idxRegSrc);
661 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
662 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
663
664 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
665
666#elif defined(RT_ARCH_ARM64)
667 /* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones. */
668 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
669 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
670 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
671 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
672
673 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
674
675#else
676# error "Port me"
677#endif
678 iemNativeVarRegisterRelease(pReNative, idxVarDst);
679 return off;
680}
681
682
683/**
684 * The OR instruction with immediate value as right operand.
685 */
686DECL_INLINE_THROW(uint32_t)
687iemNativeEmit_or_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
688 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
689{
690 RT_NOREF(pReNative, off, idxVarDst, uImmOp, idxVarEfl, cOpBits, cImmBits);
691 return off;
692}
693
694
695/**
696 * The XOR instruction will clear OF, CF and AF (latter is undefined) and
697 * set the other flags according to the result.
698 */
699DECL_INLINE_THROW(uint32_t)
700iemNativeEmit_xor_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
701 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
702{
703 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
704 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
705#ifdef RT_ARCH_AMD64
706 /* On AMD64 we just use the correctly size OR instruction harvest the EFLAGS. */
707 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
708 0x32, 0x33, cOpBits, idxRegDst, idxRegSrc);
709 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
710 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
711
712 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
713
714#elif defined(RT_ARCH_ARM64)
715 /* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones. */
716 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
717 pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
718 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
719 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
720
721 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
722
723#else
724# error "Port me"
725#endif
726 iemNativeVarRegisterRelease(pReNative, idxVarDst);
727 return off;
728}
729
730
731/**
732 * The XOR instruction with immediate value as right operand.
733 */
734DECL_INLINE_THROW(uint32_t)
735iemNativeEmit_xor_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
736 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
737{
738 RT_NOREF(pReNative, off, idxVarDst, uImmOp, idxVarEfl, cOpBits, cImmBits);
739 return off;
740}
741
742
743/**
744 * The ADD instruction will set all status flags.
745 */
746DECL_INLINE_THROW(uint32_t)
747iemNativeEmit_add_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
748 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
749{
750 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
751 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
752
753#ifdef RT_ARCH_AMD64
754 /* On AMD64 we just use the correctly sized ADD instruction to get the right EFLAGS.SF value. */
755 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
756 0x02, 0x03, cOpBits, idxRegDst, idxRegSrc);
757 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
758
759 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
760 iemNativeVarRegisterRelease(pReNative, idxVarDst);
761
762 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
763
764#elif defined(RT_ARCH_ARM64)
765 /* On ARM64 we'll need the two input operands as well as the result in order
766 to calculate the right flags, even if we use ADDS and translates NZCV into
767 OF, CF, ZF and SF. */
768 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
769 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
770 if (cOpBits >= 32)
771 {
772 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
773 pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
774 }
775 else
776 {
777 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
778 uint32_t const cShift = 32 - cOpBits;
779 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDstIn, ARMV8_A64_REG_XZR, idxRegDst, false /*f64Bit*/, cShift);
780 pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDstIn, idxRegSrc, false /*f64Bit*/,
781 true /*fSetFlags*/, cShift);
782 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /*f64Bit*/);
783 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /*f64Bit*/);
784 cOpBits = 32;
785 }
786 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
787
788 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
789 idxRegDstIn, idxRegSrc, false /*fInvertCarry*/, 0);
790
791 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
792 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
793 iemNativeVarRegisterRelease(pReNative, idxVarDst);
794
795#else
796# error "port me"
797#endif
798 return off;
799}
800
801
802/**
803 * The ADD instruction with immediate value as right operand.
804 */
805DECL_INLINE_THROW(uint32_t)
806iemNativeEmit_add_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
807 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
808{
809 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
810
811#ifdef RT_ARCH_AMD64
812 /* On AMD64 we just use the correctly sized ADD instruction to get the right EFLAGS.SF value. */
813 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
814 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 0, idxRegDst, uImmOp);
815 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
816
817 iemNativeVarRegisterRelease(pReNative, idxVarDst);
818
819 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
820
821#elif defined(RT_ARCH_ARM64)
822 /* On ARM64 we'll need the two input operands as well as the result in order
823 to calculate the right flags, even if we use ADDS and translates NZCV into
824 OF, CF, ZF and SF. */
825 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
826 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
827 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
828 if (cOpBits >= 32)
829 {
830 if (uImmOp <= 0xfffU)
831 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegDst, idxRegDst, uImmOp, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
832 else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
833 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegDst, idxRegDst, uImmOp >> 12, cOpBits > 32 /*f64Bit*/,
834 true /*fSetFlags*/, true /*fShift12*/);
835 else
836 {
837 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
838 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
839 pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
840 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
841 }
842 }
843 else
844 {
845 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
846 uint32_t const cShift = 32 - cOpBits;
847 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp << cShift);
848 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
849 pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegTmpImm, idxRegDstIn, false /*f64Bit*/, true /*fSetFlags*/, cShift);
850 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /*f64Bit*/);
851 cOpBits = 32;
852 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
853 }
854 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
855
856 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
857 idxRegDstIn, UINT8_MAX, false /*fInvertCarry*/, uImmOp);
858
859 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
860 iemNativeVarRegisterRelease(pReNative, idxVarDst);
861 RT_NOREF(cImmBits);
862
863#else
864# error "port me"
865#endif
866 return off;
867}
868
869
870/**
871 * The ADC instruction takes CF as input and will set all status flags.
872 */
873DECL_INLINE_THROW(uint32_t)
874iemNativeEmit_adc_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
875 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
876{
877 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
878 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
879 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
880
881#ifdef RT_ARCH_AMD64
882 /* On AMD64 we use BT to set EFLAGS.CF and then issue an ADC instruction
883 with matching size to get the correct flags. */
884 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
885
886 /* Use the BT instruction to set CF according to idxRegEfl. */
887 off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /*cOpBits*/, 4, idxRegEfl);
888 pCodeBuf[off++] = X86_EFL_CF_BIT;
889
890 off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0x12, 0x13, cOpBits, idxRegDst, idxRegSrc);
891 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
892
893 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
894 iemNativeVarRegisterRelease(pReNative, idxVarDst);
895
896 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
897
898#elif defined(RT_ARCH_ARM64)
899 /* On ARM64 we use the RMIF instruction to load PSTATE.CF from idxRegEfl and
900 then ADCS for the calculation. We need all inputs and result for the two
901 flags (AF,PF) that can't be directly derived from PSTATE.NZCV. */
902 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
903 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
904
905 pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /*fMask=C*/);
906 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
907 if (cOpBits >= 32)
908 pCodeBuf[off++] = Armv8A64MkInstrAdcs(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
909 else
910 {
911 /* Since we're also adding in the carry flag here, shifting operands up
912 doesn't work. So, we have to calculate carry & overflow manually. */
913 pCodeBuf[off++] = Armv8A64MkInstrAdc(idxRegDst, idxRegDst, idxRegSrc, false /*f64Bit*/);
914 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
915 }
916 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
917
918 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
919 idxRegDstIn, idxRegSrc, false /*fInvertCarry*/, 0);
920
921 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
922 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
923 if (cOpBits < 32)
924 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
925 iemNativeVarRegisterRelease(pReNative, idxVarDst);
926
927#else
928# error "port me"
929#endif
930 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
931 return off;
932}
933
934
935/**
936 * The ADC instruction with immediate value as right operand.
937 */
938DECL_INLINE_THROW(uint32_t)
939iemNativeEmit_adc_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
940 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
941{
942 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
943 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
944
945#ifdef RT_ARCH_AMD64
946 /* On AMD64 we use BT to set EFLAGS.CF and then issue an ADC instruction
947 with matching size to get the correct flags. */
948 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
949
950 off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /*cOpBits*/, 4, idxRegEfl);
951 pCodeBuf[off++] = X86_EFL_CF_BIT;
952
953 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 2, idxRegDst, uImmOp);
954 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
955
956 iemNativeVarRegisterRelease(pReNative, idxVarDst);
957
958 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
959
960#elif defined(RT_ARCH_ARM64)
961 /* On ARM64 we use the RMIF instructions to load PSTATE.CF from idxRegEfl
962 and then ADCS for the calculation. We need all inputs and result for
963 the two flags (AF,PF) that can't be directly derived from PSTATE.NZCV. */
964 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
965 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
966 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
967
968 pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /*fMask=C*/);
969 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
970 if (cOpBits >= 32)
971 pCodeBuf[off++] = Armv8A64MkInstrAdcs(idxRegDst, idxRegDst, idxRegImm, cOpBits > 32 /*f64Bit*/);
972 else
973 {
974 /* Since we're also adding in the carry flag here, shifting operands up
975 doesn't work. So, we have to calculate carry & overflow manually. */
976 pCodeBuf[off++] = Armv8A64MkInstrAdc(idxRegDst, idxRegDst, idxRegImm, false /*f64Bit*/);
977 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
978 }
979 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
980
981 iemNativeRegFreeTmp(pReNative, idxRegImm);
982
983 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
984 idxRegDstIn, UINT8_MAX, false /*fInvertCarry*/, uImmOp);
985
986 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
987 if (cOpBits < 32)
988 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
989 iemNativeVarRegisterRelease(pReNative, idxVarDst);
990 RT_NOREF(cImmBits);
991
992#else
993# error "port me"
994#endif
995 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
996 return off;
997}
998
999
1000/**
1001 * The SUB instruction will set all status flags.
1002 */
1003DECL_INLINE_THROW(uint32_t)
1004iemNativeEmit_sub_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1005 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1006{
1007 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1008 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
1009
1010#ifdef RT_ARCH_AMD64
1011 /* On AMD64 we just use the correctly sized SUB instruction to get the right EFLAGS.SF value. */
1012 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
1013 0x2a, 0x2b, cOpBits, idxRegDst, idxRegSrc);
1014 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1015
1016 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1017 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1018
1019 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1020
1021#elif defined(RT_ARCH_ARM64)
1022 /* On ARM64 we'll need the two input operands as well as the result in order
1023 to calculate the right flags, even if we use SUBS and translates NZCV into
1024 OF, CF, ZF and SF. */
1025 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1026 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1027 if (cOpBits >= 32)
1028 {
1029 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1030 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1031 }
1032 else
1033 {
1034 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1035 uint32_t const cShift = 32 - cOpBits;
1036 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDstIn, ARMV8_A64_REG_XZR, idxRegDst, false /*f64Bit*/, cShift);
1037 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDstIn, idxRegSrc, false /*f64Bit*/,
1038 true /*fSetFlags*/, cShift);
1039 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /*f64Bit*/);
1040 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /*f64Bit*/);
1041 cOpBits = 32;
1042 }
1043 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1044
1045 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
1046 idxRegDstIn, idxRegSrc, true /*fInvertCarry*/, 0);
1047
1048 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1049 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1050 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1051
1052#else
1053# error "port me"
1054#endif
1055 return off;
1056}
1057
1058
1059/**
1060 * The SUB instruction with immediate value as right operand.
1061 */
1062DECL_INLINE_THROW(uint32_t)
1063iemNativeEmit_sub_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1064 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1065{
1066 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1067
1068#ifdef RT_ARCH_AMD64
1069 /* On AMD64 we just use the correctly sized SUB instruction to get the right EFLAGS.SF value. */
1070 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1071 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 5, idxRegDst, uImmOp);
1072 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1073
1074 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1075
1076 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1077
1078#elif defined(RT_ARCH_ARM64)
1079 /* On ARM64 we'll need the two input operands as well as the result in order
1080 to calculate the right flags, even if we use SUBS and translates NZCV into
1081 OF, CF, ZF and SF. */
1082 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1083 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1084 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1085 if (cOpBits >= 32)
1086 {
1087 if (uImmOp <= 0xfffU)
1088 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegDst, idxRegDst, uImmOp, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1089 else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
1090 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegDst, idxRegDst, uImmOp >> 12, cOpBits > 32 /*f64Bit*/,
1091 true /*fSetFlags*/, true /*fShift12*/);
1092 else
1093 {
1094 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1095 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1096 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1097 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1098 }
1099 }
1100 else
1101 {
1102 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1103 uint32_t const cShift = 32 - cOpBits;
1104 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1105 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1106 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegDstIn, idxRegDstIn, cShift, false /*f64Bit*/);
1107 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDstIn, idxRegTmpImm, false /*f64Bit*/, true /*fSetFlags*/, cShift);
1108 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /*f64Bit*/);
1109 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /*f64Bit*/);
1110 cOpBits = 32;
1111 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1112 }
1113 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1114
1115 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
1116 idxRegDstIn, UINT8_MAX, true /*fInvertCarry*/, uImmOp);
1117
1118 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1119 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1120 RT_NOREF(cImmBits);
1121
1122#else
1123# error "port me"
1124#endif
1125 return off;
1126}
1127
1128
1129/**
1130 * The CMP instruction will set all status flags, but modifies no registers.
1131 */
1132DECL_INLINE_THROW(uint32_t)
1133iemNativeEmit_cmp_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1134 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1135{
1136 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1137 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
1138
1139#ifdef RT_ARCH_AMD64
1140 /* On AMD64 we just use the correctly sized CMP instruction to get the right EFLAGS.SF value. */
1141 off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
1142 0x3a, 0x3b, cOpBits, idxRegDst, idxRegSrc);
1143 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1144
1145 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1146 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1147
1148 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1149
1150#elif defined(RT_ARCH_ARM64)
1151 /* On ARM64 we'll need the actual result as well as both input operands in order
1152 to calculate the right flags, even if we use SUBS and translates NZCV into
1153 OF, CF, ZF and SF. */
1154 uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
1155 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1156 if (cOpBits >= 32)
1157 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1158 else
1159 {
1160 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1161 uint32_t const cShift = 32 - cOpBits;
1162 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegResult, ARMV8_A64_REG_XZR, idxRegDst, false /*f64Bit*/, cShift);
1163 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegResult, idxRegSrc, false /*f64Bit*/,
1164 true /*fSetFlags*/, cShift);
1165 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegResult, idxRegResult, cShift, false /*f64Bit*/);
1166 cOpBits = 32;
1167 }
1168 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1169
1170 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegResult,
1171 idxRegDst, idxRegSrc, true /*fInvertCarry*/, 0);
1172
1173 iemNativeRegFreeTmp(pReNative, idxRegResult);
1174 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1175 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1176
1177#else
1178# error "port me"
1179#endif
1180 return off;
1181}
1182
1183
1184/**
1185 * The CMP instruction with immediate value as right operand.
1186 */
1187DECL_INLINE_THROW(uint32_t)
1188iemNativeEmit_cmp_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1189 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1190{
1191 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1192
1193#ifdef RT_ARCH_AMD64
1194 /* On AMD64 we just use the correctly sized CMP instruction to get the right EFLAGS.SF value. */
1195 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1196 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 7, idxRegDst, uImmOp);
1197 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1198
1199 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1200
1201 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1202
1203#elif defined(RT_ARCH_ARM64)
1204 /* On ARM64 we'll need the actual result as well as both input operands in order
1205 to calculate the right flags, even if we use SUBS and translates NZCV into
1206 OF, CF, ZF and SF. */
1207 uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
1208 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1209 if (cOpBits >= 32)
1210 {
1211 if (uImmOp <= 0xfffU)
1212 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegResult, idxRegDst, uImmOp, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1213 else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
1214 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegResult, idxRegDst, uImmOp >> 12, cOpBits > 32 /*f64Bit*/,
1215 true /*fSetFlags*/, true /*fShift12*/);
1216 else
1217 {
1218 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1219 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1220 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /*f64Bit*/, true /*fSetFlags*/);
1221 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1222 }
1223 }
1224 else
1225 {
1226 /* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1227 uint32_t const cShift = 32 - cOpBits;
1228 uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1229 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1230 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegResult, idxRegDst, cShift, false /*f64Bit*/);
1231 pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegResult, idxRegTmpImm, false /*f64Bit*/, true /*fSetFlags*/, cShift);
1232 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegResult, idxRegResult, cShift, false /*f64Bit*/);
1233 cOpBits = 32;
1234 iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1235 }
1236 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1237
1238 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegResult,
1239 idxRegDst, UINT8_MAX, true /*fInvertCarry*/, uImmOp);
1240
1241 iemNativeRegFreeTmp(pReNative, idxRegResult);
1242 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1243 RT_NOREF(cImmBits);
1244
1245#else
1246# error "port me"
1247#endif
1248 return off;
1249}
1250
1251
1252/**
1253 * The SBB instruction takes CF as input and will set all status flags.
1254 */
1255DECL_INLINE_THROW(uint32_t)
1256iemNativeEmit_sbb_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1257 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1258{
1259 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1260 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
1261 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
1262
1263#ifdef RT_ARCH_AMD64
1264 /* On AMD64 we use BT to set EFLAGS.CF and then issue an SBB instruction
1265 with matching size to get the correct flags. */
1266 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1267
1268 off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /*cOpBits*/, 4, idxRegEfl);
1269 pCodeBuf[off++] = X86_EFL_CF_BIT;
1270
1271 off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0x1a, 0x1b, cOpBits, idxRegDst, idxRegSrc);
1272 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1273
1274 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1275 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1276
1277 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1278
1279#elif defined(RT_ARCH_ARM64)
1280 /* On ARM64 we use the RMIF+CFINV instructions to load PSTATE.CF from
1281 idxRegEfl and then SBCS for the calculation. We need all inputs and
1282 result for the two flags (AF,PF) that can't be directly derived from
1283 PSTATE.NZCV. */
1284 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1285 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1286
1287 pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /*fMask=C*/);
1288 pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
1289 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1290 if (cOpBits >= 32)
1291 pCodeBuf[off++] = Armv8A64MkInstrSbcs(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /*f64Bit*/);
1292 else
1293 {
1294 /* Since we're also adding in the carry flag here, shifting operands up
1295 doesn't work. So, we have to calculate carry & overflow manually. */
1296 pCodeBuf[off++] = Armv8A64MkInstrSbc(idxRegDst, idxRegDst, idxRegSrc, false /*f64Bit*/);
1297 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1298 }
1299 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1300
1301 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1302 idxRegDstIn, idxRegSrc, true /*fInvertCarry*/, 0);
1303
1304 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1305 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1306 if (cOpBits < 32)
1307 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1308 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1309
1310#else
1311# error "port me"
1312#endif
1313 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1314 return off;
1315}
1316
1317
1318/**
1319 * The SBB instruction with immediate value as right operand.
1320 */
1321DECL_INLINE_THROW(uint32_t)
1322iemNativeEmit_sbb_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1323 uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1324{
1325 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
1326 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
1327
1328#ifdef RT_ARCH_AMD64
1329 /* On AMD64 we use BT to set EFLAGS.CF and then issue an SBB instruction
1330 with matching size to get the correct flags. */
1331 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1332
1333 off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /*cOpBits*/, 4, idxRegEfl);
1334 pCodeBuf[off++] = X86_EFL_CF_BIT;
1335
1336 off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 3, idxRegDst, uImmOp);
1337 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1338
1339 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1340
1341 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1342
1343#elif defined(RT_ARCH_ARM64)
1344 /* On ARM64 we use the RMIF+CFINV instructions to load PSTATE.CF from
1345 idxRegEfl and then SBCS for the calculation. We need all inputs and
1346 result for the two flags (AF,PF) that can't be directly derived from
1347 PSTATE.NZCV. */
1348 uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1349 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1350 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1351
1352 pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /*fMask=C*/);
1353 pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
1354 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1355 if (cOpBits >= 32)
1356 pCodeBuf[off++] = Armv8A64MkInstrSbcs(idxRegDst, idxRegDst, idxRegImm, cOpBits > 32 /*f64Bit*/);
1357 else
1358 {
1359 /* Since we're also adding in the carry flag here, shifting operands up
1360 doesn't work. So, we have to calculate carry & overflow manually. */
1361 pCodeBuf[off++] = Armv8A64MkInstrSbc(idxRegDst, idxRegDst, idxRegImm, false /*f64Bit*/);
1362 pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1363 }
1364 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1365
1366 iemNativeRegFreeTmp(pReNative, idxRegImm);
1367
1368 off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1369 idxRegDstIn, UINT8_MAX, true /*fInvertCarry*/, uImmOp);
1370
1371 iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1372 if (cOpBits < 32)
1373 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1374 iemNativeVarRegisterRelease(pReNative, idxVarDst);
1375 RT_NOREF(cImmBits);
1376
1377#else
1378# error "port me"
1379#endif
1380 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1381 return off;
1382}
1383
1384
1385DECL_INLINE_THROW(uint32_t)
1386iemNativeEmit_imul_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1387 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1388{
1389 RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1390 AssertFailed();
1391 return iemNativeEmitBrk(pReNative, off, 0x666);
1392}
1393
1394
1395DECL_INLINE_THROW(uint32_t)
1396iemNativeEmit_popcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1397 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1398{
1399 RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1400 AssertFailed();
1401 return iemNativeEmitBrk(pReNative, off, 0x666);
1402}
1403
1404
1405DECL_INLINE_THROW(uint32_t)
1406iemNativeEmit_tzcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1407 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1408{
1409 RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1410 AssertFailed();
1411 return iemNativeEmitBrk(pReNative, off, 0x666);
1412}
1413
1414
1415DECL_INLINE_THROW(uint32_t)
1416iemNativeEmit_lzcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1417 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1418{
1419 RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1420 AssertFailed();
1421 return iemNativeEmitBrk(pReNative, off, 0x666);
1422}
1423
1424
1425#endif /* !VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette