IEMAllN8veEmit-x86.h@ 103764

Last change on this file since 103764 was 103744, checked in by vboxsync, 14 months ago
VMM/IEM: Implemented iemNativeEmit_adc_r_i_efl and enabled it for both hosts. bugref:10376
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 60.3 KB

Line
1	/* $Id: IEMAllN8veEmit-x86.h 103744 2024-03-09 02:52:46Z vboxsync $ */
2	/** @file
3	* IEM - Native Recompiler, x86 Target - Code Emitters.
4	*/
5
6	/*
7	* Copyright (C) 2023-2024 Oracle and/or its affiliates.
8	*
9	* This file is part of VirtualBox base platform packages, as
10	* available from https://www.virtualbox.org.
11	*
12	* This program is free software; you can redistribute it and/or
13	* modify it under the terms of the GNU General Public License
14	* as published by the Free Software Foundation, in version 3 of the
15	* License.
16	*
17	* This program is distributed in the hope that it will be useful, but
18	* WITHOUT ANY WARRANTY; without even the implied warranty of
19	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20	* General Public License for more details.
21	*
22	* You should have received a copy of the GNU General Public License
23	* along with this program; if not, see <https://www.gnu.org/licenses>.
24	*
25	* SPDX-License-Identifier: GPL-3.0-only
26	*/
27
28	#ifndef VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h
29	#define VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h
30	#ifndef RT_WITHOUT_PRAGMA_ONCE
31	# pragma once
32	#endif
33
34
35	#ifdef RT_ARCH_AMD64
36
37	/**
38	* Emits an ModR/M instruction with one opcode byte and only register operands.
39	*/
40	DECL_FORCE_INLINE(uint32_t)
41	iemNativeEmitAmd64OneByteModRmInstrRREx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t bOpcode8, uint8_t bOpcodeOther,
42	uint8_t cOpBits, uint8_t idxRegReg, uint8_t idxRegRm)
43	{
44	Assert(idxRegReg < 16); Assert(idxRegRm < 16);
45	switch (cOpBits)
46	{
47	case 16:
48	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
49	RT_FALL_THRU();
50	case 32:
51	if (idxRegReg >= 8 \|\| idxRegRm >= 8)
52	pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
53	pCodeBuf[off++] = bOpcodeOther;
54	break;
55
56	default: AssertFailed(); RT_FALL_THRU();
57	case 64:
58	pCodeBuf[off++] = X86_OP_REX_W \| (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
59	pCodeBuf[off++] = bOpcodeOther;
60	break;
61
62	case 8:
63	if (idxRegReg >= 8 \|\| idxRegRm >= 8)
64	pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
65	else if (idxRegReg >= 4 \|\| idxRegRm >= 4)
66	pCodeBuf[off++] = X86_OP_REX;
67	pCodeBuf[off++] = bOpcode8;
68	break;
69	}
70	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg & 7, idxRegRm & 7);
71	return off;
72	}
73
74
75	/**
76	* Emits an ModR/M instruction with two opcode bytes and only register operands.
77	*/
78	DECL_FORCE_INLINE(uint32_t)
79	iemNativeEmitAmd64TwoByteModRmInstrRREx(PIEMNATIVEINSTR pCodeBuf, uint32_t off,
80	uint8_t bOpcode0, uint8_t bOpcode8, uint8_t bOpcodeOther,
81	uint8_t cOpBits, uint8_t idxRegReg, uint8_t idxRegRm)
82	{
83	Assert(idxRegReg < 16); Assert(idxRegRm < 16);
84	switch (cOpBits)
85	{
86	case 16:
87	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
88	RT_FALL_THRU();
89	case 32:
90	if (idxRegReg >= 8 \|\| idxRegRm >= 8)
91	pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
92	pCodeBuf[off++] = bOpcode0;
93	pCodeBuf[off++] = bOpcodeOther;
94	break;
95
96	default: AssertFailed(); RT_FALL_THRU();
97	case 64:
98	pCodeBuf[off++] = X86_OP_REX_W \| (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
99	pCodeBuf[off++] = bOpcode0;
100	pCodeBuf[off++] = bOpcodeOther;
101	break;
102
103	case 8:
104	if (idxRegReg >= 8 \|\| idxRegRm >= 8)
105	pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
106	else if (idxRegReg >= 4 \|\| idxRegRm >= 4)
107	pCodeBuf[off++] = X86_OP_REX;
108	pCodeBuf[off++] = bOpcode0;
109	pCodeBuf[off++] = bOpcode8;
110	break;
111	}
112	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg & 7, idxRegRm & 7);
113	return off;
114	}
115
116
117	/**
118	* Emits one of three opcodes with an immediate.
119	*
120	* These are expected to be a /idxRegReg form.
121	*/
122	DECL_FORCE_INLINE(uint32_t)
123	iemNativeEmitAmd64OneByteModRmInstrRIEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t bOpcode8, uint8_t bOpcodeOtherImm8,
124	uint8_t bOpcodeOther, uint8_t cOpBits, uint8_t cImmBits, uint8_t idxRegReg,
125	uint8_t idxRegRm, uint64_t uImmOp)
126	{
127	Assert(idxRegReg < 8); Assert(idxRegRm < 16);
128	if (cImmBits == 8 \|\| uImmOp <= (uint64_t)0x7f)
129	{
130	switch (cOpBits)
131	{
132	case 16:
133	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
134	RT_FALL_THRU();
135	case 32:
136	if (idxRegRm >= 8)
137	pCodeBuf[off++] = X86_OP_REX_B;
138	pCodeBuf[off++] = bOpcodeOtherImm8;
139	break;
140
141	default: AssertFailed(); RT_FALL_THRU();
142	case 64:
143	pCodeBuf[off++] = X86_OP_REX_W \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
144	pCodeBuf[off++] = bOpcodeOtherImm8;
145	break;
146
147	case 8:
148	if (idxRegRm >= 8)
149	pCodeBuf[off++] = X86_OP_REX_B;
150	else if (idxRegRm >= 4)
151	pCodeBuf[off++] = X86_OP_REX;
152	pCodeBuf[off++] = bOpcode8;
153	break;
154	}
155	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
156	pCodeBuf[off++] = (uint8_t)uImmOp;
157	}
158	else
159	{
160	switch (cOpBits)
161	{
162	case 32:
163	if (idxRegRm >= 8)
164	pCodeBuf[off++] = X86_OP_REX_B;
165	break;
166
167	default: AssertFailed(); RT_FALL_THRU();
168	case 64:
169	pCodeBuf[off++] = X86_OP_REX_W \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
170	break;
171
172	case 16:
173	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
174	if (idxRegRm >= 8)
175	pCodeBuf[off++] = X86_OP_REX_B;
176	pCodeBuf[off++] = bOpcodeOther;
177	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
178	pCodeBuf[off++] = RT_BYTE1(uImmOp);
179	pCodeBuf[off++] = RT_BYTE2(uImmOp);
180	Assert(cImmBits == 16);
181	return off;
182	}
183	pCodeBuf[off++] = bOpcodeOther;
184	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
185	pCodeBuf[off++] = RT_BYTE1(uImmOp);
186	pCodeBuf[off++] = RT_BYTE2(uImmOp);
187	pCodeBuf[off++] = RT_BYTE3(uImmOp);
188	pCodeBuf[off++] = RT_BYTE4(uImmOp);
189	Assert(cImmBits == 32);
190	}
191	return off;
192	}
193
194	#endif /* RT_ARCH_AMD64 */
195
196	/**
197	* This is an implementation of IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGICAL.
198	*
199	* It takes liveness stuff into account.
200	*/
201	DECL_INLINE_THROW(uint32_t)
202	iemNativeEmitEFlagsForLogical(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl,
203	uint8_t cOpBits, uint8_t idxRegResult)
204	{
205	#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
206	if (1) /** @todo check if all bits are clobbered. */
207	#endif
208	{
209	#ifdef RT_ARCH_AMD64
210	/*
211	* Collect flags and merge them with eflags.
212	*/
213	/** @todo we could alternatively use SAHF here when host rax is free since,
214	* OF is cleared. */
215	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
216	/* pushf - do this before any reg allocations as they may emit instructions too. */
217	pCodeBuf[off++] = 0x9c;
218
219	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
220	uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
221	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + 7 + 7 + 3);
222	/* pop tmp */
223	if (idxTmpReg >= 8)
224	pCodeBuf[off++] = X86_OP_REX_B;
225	pCodeBuf[off++] = 0x58 + (idxTmpReg & 7);
226	/* and tmp, X86_EFL_PF \| X86_EFL_ZF \| X86_EFL_SF */
227	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_EFL_PF \| X86_EFL_ZF \| X86_EFL_SF);
228	/* Clear the status bits in EFLs. */
229	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
230	/* OR in the flags we collected. */
231	off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxTmpReg);
232	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
233	iemNativeRegFreeTmp(pReNative, idxTmpReg);
234	RT_NOREF(cOpBits, idxRegResult);
235
236	#elif defined(RT_ARCH_ARM64)
237	/*
238	* Calculate flags.
239	*/
240	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
241	uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
242	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
243
244	/* Clear the status bits. ~0x8D5 (or ~0x8FD) can't be AND immediate, so use idxTmpReg for constant. */
245	off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, ~X86_EFL_STATUS_BITS);
246	off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxRegEfl, idxTmpReg);
247
248	/* Calculate zero: mov tmp, zf; cmp result,zero; csel.eq tmp,tmp,wxr */
249	if (cOpBits > 32)
250	off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, idxRegResult, ARMV8_A64_REG_XZR);
251	else
252	off = iemNativeEmitCmpGpr32WithGprEx(pCodeBuf, off, idxRegResult, ARMV8_A64_REG_XZR);
253	pCodeBuf[off++] = Armv8A64MkInstrCSet(idxTmpReg, kArmv8InstrCond_Eq, false /f64Bit/);
254	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegEfl, idxRegEfl, idxTmpReg, false /f64Bit/, X86_EFL_ZF_BIT);
255
256	/* Calculate signed: We could use the native SF flag, but it's just as simple to calculate it by shifting. */
257	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxRegResult, cOpBits - 1, cOpBits > 32 /f64Bit/);
258	# if 0 /* BFI and ORR hsould have the same performance characteristics, so use BFI like we'll have to do for SUB/ADD/++. */
259	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegEfl, idxRegEfl, idxTmpReg, false /f64Bit/, X86_EFL_SF_BIT);
260	# else
261	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_SF_BIT, 1, false /f64Bit/);
262	# endif
263
264	/* Calculate 8-bit parity of the result. */
265	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegResult, idxRegResult, false /f64Bit/,
266	4 /offShift6/, kArmv8A64InstrShift_Lsr);
267	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /f64Bit/,
268	2 /offShift6/, kArmv8A64InstrShift_Lsr);
269	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /f64Bit/,
270	1 /offShift6/, kArmv8A64InstrShift_Lsr);
271	Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
272	pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /f64Bit/);
273	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_PF_BIT, 1, false /f64Bit/);
274
275	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
276	iemNativeRegFreeTmp(pReNative, idxTmpReg);
277	#else
278	# error "port me"
279	#endif
280	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
281	}
282	return off;
283	}
284
285
286	/**
287	* This is an implementation of IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC.
288	*
289	* It takes liveness stuff into account.
290	*/
291	DECL_FORCE_INLINE_THROW(uint32_t)
292	iemNativeEmitEFlagsForArithmetic(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl, uint8_t idxRegEflIn
293	#ifndef RT_ARCH_AMD64
294	, uint8_t cOpBits, uint8_t idxRegResult, uint8_t idxRegDstIn, uint8_t idxRegSrc
295	, bool fInvertCarry, uint64_t uImmSrc
296	#endif
297	)
298	{
299	#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
300	if (1) /** @todo check if all bits are clobbered. */
301	#endif
302	{
303	#ifdef RT_ARCH_AMD64
304	/*
305	* Collect flags and merge them with eflags.
306	*/
307	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
308	/* pushf - do this before any reg allocations as they may emit instructions too. */
309	pCodeBuf[off++] = 0x9c;
310
311	uint8_t const idxRegEfl = idxRegEflIn != UINT8_MAX ? idxRegEflIn
312	: iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
313	uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
314	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + 7 + 7 + 3);
315	/* pop tmp */
316	if (idxTmpReg >= 8)
317	pCodeBuf[off++] = X86_OP_REX_B;
318	pCodeBuf[off++] = 0x58 + (idxTmpReg & 7);
319	/* Isolate the flags we want. */
320	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_EFL_STATUS_BITS);
321	/* Clear the status bits in EFLs. */
322	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
323	/* OR in the flags we collected. */
324	off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxTmpReg);
325	if (idxRegEflIn != idxRegEfl)
326	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
327	iemNativeRegFreeTmp(pReNative, idxTmpReg);
328
329	#elif defined(RT_ARCH_ARM64)
330	/*
331	* Calculate flags.
332	*/
333	uint8_t const idxRegEfl = idxRegEflIn != UINT8_MAX ? idxRegEflIn
334	: iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
335	uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
336	uint8_t const idxTmpReg2 = cOpBits >= 32 ? UINT8_MAX : iemNativeRegAllocTmp(pReNative, &off);
337	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
338
339	/* Invert CF (stored inved on ARM) and load the flags into the temporary register. */
340	if (fInvertCarry)
341	pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
342	pCodeBuf[off++] = Armv8A64MkInstrMrs(idxTmpReg, ARMV8_AARCH64_SYSREG_NZCV); /* Bits: 31=N; 30=Z; 29=C; 28=V; */
343
344	if (cOpBits >= 32)
345	{
346	/* V -> OF */
347	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 28);
348	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_OF_BIT, 1, false /f64Bit/);
349
350	/* C -> CF */
351	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 1);
352	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_CF_BIT, 1, false /f64Bit/);
353	}
354
355	/* N,Z -> SF,ZF */
356	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, cOpBits >= 32 ? 1 : 30);
357	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_ZF_BIT, 2, false /f64Bit/);
358
359	/* For ADC and SBB we have to calculate overflow and carry our selves. */
360	if (cOpBits < 32)
361	{
362	/* Since the carry flag is the zero'th flag, we just use BFXIL got copy it over. */
363	AssertCompile(X86_EFL_CF_BIT == 0);
364	pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxRegEfl, idxRegResult, cOpBits, 1, false /f64Bit/);
365
366	/* The overflow flag is more work as we have to compare the signed bits for
367	both inputs and the result. See IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC.
368
369	Formula: ~(a_uDst ^ a_uSrcOf) & (a_uResult ^ a_uDst)
370	With a_uSrcOf as a_uSrc for additions and ~a_uSrc for subtractions.
371
372	It is a bit simpler when the right (source) side is constant:
373	adc: S D R -> OF sbb: S D R -> OF
374	0 0 0 -> 0 \ 0 0 0 -> 0 \
375	0 0 1 -> 1 \ 0 0 1 -> 0 \
376	0 1 0 -> 0 / and not(D), R 0 1 0 -> 1 / and D, not(R)
377	0 1 1 -> 0 / 0 1 1 -> 0 /
378	1 0 0 -> 0 \ 1 0 0 -> 0 \
379	1 0 1 -> 0 \ and D, not(R) 1 0 1 -> 1 \ and not(D), R
380	1 1 0 -> 1 / 1 1 0 -> 0 /
381	1 1 1 -> 0 / 1 1 1 -> 0 / */
382	if (idxRegSrc != UINT8_MAX)
383	{
384	if (fInvertCarry) /* sbb: ~((a_uDst) ^ ~(a_uSrcOf)) -> (a_uDst) ^ (a_uSrcOf); HACK ALERT: fInvertCarry == sbb */
385	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegSrc, false);
386	else /* adc: ~((a_uDst) ^ (a_uSrcOf)) -> (a_uDst) ^ ~(a_uSrcOf) */
387	pCodeBuf[off++] = Armv8A64MkInstrEon(idxTmpReg, idxRegDstIn, idxRegSrc, false);
388	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg2, idxRegDstIn, idxRegResult, false); /* (a_uDst) ^ (a_uResult) */
389	pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpReg, idxTmpReg, idxTmpReg2, false /f64Bit/);
390	}
391	else if (uImmSrc & RT_BIT_32(cOpBits - 1))
392	{
393	if (fInvertCarry) /* HACK ALERT: fInvertCarry == sbb */
394	pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegResult, idxRegDstIn, false);
395	else
396	pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegDstIn, idxRegResult, false);
397	}
398	else
399	{
400	if (fInvertCarry) /* HACK ALERT: fInvertCarry == sbb */
401	pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegDstIn, idxRegResult, false);
402	else
403	pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegResult, idxRegDstIn, false);
404	}
405	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, cOpBits - 1, false /f64Bit/);
406	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_OF_BIT, 1);
407	iemNativeRegFreeTmp(pReNative, idxTmpReg2);
408	}
409
410	/* Calculate 8-bit parity of the result. */
411	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegResult, idxRegResult, false /f64Bit/,
412	4 /offShift6/, kArmv8A64InstrShift_Lsr);
413	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /f64Bit/,
414	2 /offShift6/, kArmv8A64InstrShift_Lsr);
415	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /f64Bit/,
416	1 /offShift6/, kArmv8A64InstrShift_Lsr);
417	Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
418	pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /f64Bit/);
419	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_PF_BIT, 1, false /f64Bit/);
420
421	/* Calculate auxilary carry/borrow. This is related to 8-bit BCD.
422	General formula: ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
423	S D R
424	0 0 0 -> 0; \
425	0 0 1 -> 1; \ regular
426	0 1 0 -> 1; / xor R, D
427	0 1 1 -> 0; /
428	1 0 0 -> 1; \
429	1 0 1 -> 0; \ invert one of the two
430	1 1 0 -> 0; / xor not(R), D
431	1 1 1 -> 1; /
432	a_uSrc[bit 4]=0: ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
433	a_uSrc[bit 4]=1: ((uint32_t)~(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
434	*/
435
436	if (idxRegSrc != UINT8_MAX)
437	{
438	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegSrc, false /f64Bit/);
439	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxRegResult, false /f64Bit/);
440	}
441	else if (uImmSrc & X86_EFL_AF)
442	pCodeBuf[off++] = Armv8A64MkInstrEon(idxTmpReg, idxRegDstIn, idxRegResult, false /f64Bit/);
443	else
444	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegResult, false /f64Bit/);
445	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, X86_EFL_AF_BIT, false /f64Bit/);
446	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_AF_BIT, 1, false /f64Bit/);
447
448	if (idxRegEflIn != idxRegEfl)
449	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
450	iemNativeRegFreeTmp(pReNative, idxTmpReg);
451	#else
452	# error "port me"
453	#endif
454	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
455	}
456	return off;
457
458	}
459
460
461	/**
462	* The AND instruction will clear OF, CF and AF (latter is undefined) and
463	* set the other flags according to the result.
464	*/
465	DECL_INLINE_THROW(uint32_t)
466	iemNativeEmit_and_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
467	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
468	{
469	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
470	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
471	#ifdef RT_ARCH_AMD64
472	/* On AMD64 we just use the correctly size AND instruction harvest the EFLAGS. */
473	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
474	0x22, 0x23, cOpBits, idxRegDst, idxRegSrc);
475	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
476
477	#elif defined(RT_ARCH_ARM64)
478	/* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. */
479	/** @todo we should use ANDS on ARM64 and get the ZF for free for all
480	* variants, and SF for 32-bit and 64-bit. */
481	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
482	pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
483	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
484
485	#else
486	# error "Port me"
487	#endif
488	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
489
490	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
491	iemNativeVarRegisterRelease(pReNative, idxVarDst);
492	return off;
493	}
494
495
496	/**
497	* The AND instruction with immediate value as right operand.
498	*/
499	DECL_INLINE_THROW(uint32_t)
500	iemNativeEmit_and_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
501	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
502	{
503	RT_NOREF(pReNative, off, idxVarDst, uImmOp, idxVarEfl, cOpBits, cImmBits);
504	return off;
505	}
506
507
508	/**
509	* The TEST instruction will clear OF, CF and AF (latter is undefined) and
510	* set the other flags according to the result.
511	*/
512	DECL_INLINE_THROW(uint32_t)
513	iemNativeEmit_test_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
514	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
515	{
516	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
517	uint8_t const idxRegSrc = idxVarSrc == idxVarDst ? idxRegDst /* special case of 'test samereg,samereg' */
518	: iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
519	#ifdef RT_ARCH_AMD64
520	/* On AMD64 we just use the correctly size TEST instruction harvest the EFLAGS. */
521	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
522	0x84, 0x85, cOpBits, idxRegSrc, idxRegDst);
523	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
524
525	#elif defined(RT_ARCH_ARM64)
526	/* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. We also
527	need to keep the result in order to calculate the flags. */
528	/** @todo we should use ANDS on ARM64 and get the ZF for free for all
529	* variants, and SF for 32-bit and 64-bit. */
530	uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
531	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
532	pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
533	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
534
535	#else
536	# error "Port me"
537	#endif
538	if (idxVarSrc != idxVarDst)
539	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
540	iemNativeVarRegisterRelease(pReNative, idxVarDst);
541
542	#ifdef RT_ARCH_AMD64
543	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, UINT8_MAX);
544	#else
545	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegResult);
546	iemNativeRegFreeTmp(pReNative, idxRegResult);
547	#endif
548	return off;
549	}
550
551
552	/**
553	* The TEST instruction with immediate value as right operand.
554	*/
555	DECL_INLINE_THROW(uint32_t)
556	iemNativeEmit_test_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
557	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
558	{
559	RT_NOREF(pReNative, off, idxVarDst, uImmOp, idxVarEfl, cOpBits, cImmBits);
560	return off;
561	}
562
563
564	/**
565	* The OR instruction will clear OF, CF and AF (latter is undefined) and
566	* set the other flags according to the result.
567	*/
568	DECL_INLINE_THROW(uint32_t)
569	iemNativeEmit_or_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
570	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
571	{
572	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
573	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
574	#ifdef RT_ARCH_AMD64
575	/* On AMD64 we just use the correctly size OR instruction harvest the EFLAGS. */
576	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
577	0x0a, 0x0b, cOpBits, idxRegDst, idxRegSrc);
578	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
579
580	#elif defined(RT_ARCH_ARM64)
581	/* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones. */
582	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
583	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
584	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
585
586	#else
587	# error "Port me"
588	#endif
589	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
590
591	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
592	iemNativeVarRegisterRelease(pReNative, idxVarDst);
593	return off;
594	}
595
596
597	/**
598	* The OR instruction with immediate value as right operand.
599	*/
600	DECL_INLINE_THROW(uint32_t)
601	iemNativeEmit_or_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
602	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
603	{
604	RT_NOREF(pReNative, off, idxVarDst, uImmOp, idxVarEfl, cOpBits, cImmBits);
605	return off;
606	}
607
608
609	/**
610	* The XOR instruction will clear OF, CF and AF (latter is undefined) and
611	* set the other flags according to the result.
612	*/
613	DECL_INLINE_THROW(uint32_t)
614	iemNativeEmit_xor_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
615	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
616	{
617	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
618	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
619	#ifdef RT_ARCH_AMD64
620	/* On AMD64 we just use the correctly size OR instruction harvest the EFLAGS. */
621	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
622	0x32, 0x33, cOpBits, idxRegDst, idxRegSrc);
623	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
624
625	#elif defined(RT_ARCH_ARM64)
626	/* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones. */
627	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
628	pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
629	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
630
631	#else
632	# error "Port me"
633	#endif
634	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
635
636	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
637	iemNativeVarRegisterRelease(pReNative, idxVarDst);
638	return off;
639	}
640
641
642	/**
643	* The XOR instruction with immediate value as right operand.
644	*/
645	DECL_INLINE_THROW(uint32_t)
646	iemNativeEmit_xor_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
647	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
648	{
649	RT_NOREF(pReNative, off, idxVarDst, uImmOp, idxVarEfl, cOpBits, cImmBits);
650	return off;
651	}
652
653
654	/**
655	* The ADD instruction will set all status flags.
656	*/
657	DECL_INLINE_THROW(uint32_t)
658	iemNativeEmit_add_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
659	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
660	{
661	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
662	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
663
664	#ifdef RT_ARCH_AMD64
665	/* On AMD64 we just use the correctly sized ADD instruction to get the right EFLAGS.SF value. */
666	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
667	0x02, 0x03, cOpBits, idxRegDst, idxRegSrc);
668	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
669
670	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
671	iemNativeVarRegisterRelease(pReNative, idxVarDst);
672
673	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
674
675	#elif defined(RT_ARCH_ARM64)
676	/* On ARM64 we'll need the two input operands as well as the result in order
677	to calculate the right flags, even if we use ADDS and translates NZCV into
678	OF, CF, ZF and SF. */
679	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
680	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
681	if (cOpBits >= 32)
682	{
683	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
684	pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/, true /fSetFlags/);
685	}
686	else
687	{
688	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
689	uint32_t const cShift = 32 - cOpBits;
690	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDstIn, ARMV8_A64_REG_XZR, idxRegDst, false /f64Bit/, cShift);
691	pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDstIn, idxRegSrc, false /f64Bit/,
692	true /fSetFlags/, cShift);
693	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /f64Bit/);
694	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /f64Bit/);
695	cOpBits = 32;
696	}
697	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
698
699	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
700	idxRegDstIn, idxRegSrc, false /fInvertCarry/, 0);
701
702	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
703	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
704	iemNativeVarRegisterRelease(pReNative, idxVarDst);
705
706	#else
707	# error "port me"
708	#endif
709	return off;
710	}
711
712
713	/**
714	* The ADD instruction with immediate value as right operand.
715	*/
716	DECL_INLINE_THROW(uint32_t)
717	iemNativeEmit_add_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
718	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
719	{
720	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
721
722	#ifdef RT_ARCH_AMD64
723	/* On AMD64 we just use the correctly sized ADD instruction to get the right EFLAGS.SF value. */
724	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
725	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 0, idxRegDst, uImmOp);
726	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
727
728	iemNativeVarRegisterRelease(pReNative, idxVarDst);
729
730	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
731
732	#elif defined(RT_ARCH_ARM64)
733	/* On ARM64 we'll need the two input operands as well as the result in order
734	to calculate the right flags, even if we use ADDS and translates NZCV into
735	OF, CF, ZF and SF. */
736	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
737	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
738	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
739	if (cOpBits >= 32)
740	{
741	if (uImmOp <= 0xfffU)
742	pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegDst, idxRegDst, uImmOp, cOpBits > 32 /f64Bit/, true /fSetFlags/);
743	else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
744	pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegDst, idxRegDst, uImmOp >> 12, cOpBits > 32 /f64Bit/,
745	true /fSetFlags/, true /fShift12/);
746	else
747	{
748	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
749	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
750	pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/, true /fSetFlags/);
751	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
752	}
753	}
754	else
755	{
756	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
757	uint32_t const cShift = 32 - cOpBits;
758	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp << cShift);
759	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
760	pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegTmpImm, idxRegDstIn, false /f64Bit/, true /fSetFlags/, cShift);
761	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /f64Bit/);
762	cOpBits = 32;
763	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
764	}
765	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
766
767	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
768	idxRegDstIn, UINT8_MAX, false /fInvertCarry/, uImmOp);
769
770	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
771	iemNativeVarRegisterRelease(pReNative, idxVarDst);
772	RT_NOREF(cImmBits);
773
774	#else
775	# error "port me"
776	#endif
777	return off;
778	}
779
780
781	/**
782	* The ADC instruction takes CF as input and will set all status flags.
783	*/
784	DECL_INLINE_THROW(uint32_t)
785	iemNativeEmit_adc_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
786	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
787	{
788	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
789	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
790	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
791
792	#ifdef RT_ARCH_AMD64
793	/* On AMD64 we use BT to set EFLAGS.CF and then issue an ADC instruction
794	with matching size to get the correct flags. */
795	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
796
797	/* Use the BT instruction to set CF according to idxRegEfl. */
798	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /cOpBits/, 4, idxRegEfl);
799	pCodeBuf[off++] = X86_EFL_CF_BIT;
800
801	off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0x12, 0x13, cOpBits, idxRegDst, idxRegSrc);
802	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
803
804	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
805	iemNativeVarRegisterRelease(pReNative, idxVarDst);
806
807	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
808
809	#elif defined(RT_ARCH_ARM64)
810	/* On ARM64 we use the RMIF instruction to load PSTATE.CF from idxRegEfl and
811	then ADCS for the calculation. We need all inputs and result for the two
812	flags (AF,PF) that can't be directly derived from PSTATE.NZCV. */
813	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
814	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
815
816	pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /fMask=C/);
817	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
818	if (cOpBits >= 32)
819	pCodeBuf[off++] = Armv8A64MkInstrAdcs(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
820	else
821	{
822	/* Since we're also adding in the carry flag here, shifting operands up
823	doesn't work. So, we have to calculate carry & overflow manually. */
824	pCodeBuf[off++] = Armv8A64MkInstrAdc(idxRegDst, idxRegDst, idxRegSrc, false /f64Bit/);
825	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
826	}
827	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
828
829	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
830	idxRegDstIn, idxRegSrc, false /fInvertCarry/, 0);
831
832	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
833	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
834	if (cOpBits < 32)
835	off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
836	iemNativeVarRegisterRelease(pReNative, idxVarDst);
837
838	#else
839	# error "port me"
840	#endif
841	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
842	return off;
843	}
844
845
846	/**
847	* The ADC instruction with immediate value as right operand.
848	*/
849	DECL_INLINE_THROW(uint32_t)
850	iemNativeEmit_adc_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
851	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
852	{
853	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
854	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
855
856	#ifdef RT_ARCH_AMD64
857	/* On AMD64 we use BT to set EFLAGS.CF and then issue an ADC instruction
858	with matching size to get the correct flags. */
859	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
860
861	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /cOpBits/, 4, idxRegEfl);
862	pCodeBuf[off++] = X86_EFL_CF_BIT;
863
864	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 2, idxRegDst, uImmOp);
865	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
866
867	iemNativeVarRegisterRelease(pReNative, idxVarDst);
868
869	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
870
871	#elif defined(RT_ARCH_ARM64)
872	/* On ARM64 we use the RMIF instructions to load PSTATE.CF from idxRegEfl
873	and then ADCS for the calculation. We need all inputs and result for
874	the two flags (AF,PF) that can't be directly derived from PSTATE.NZCV. */
875	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
876	uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
877	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
878
879	pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /fMask=C/);
880	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
881	if (cOpBits >= 32)
882	pCodeBuf[off++] = Armv8A64MkInstrAdcs(idxRegDst, idxRegDst, idxRegImm, cOpBits > 32 /f64Bit/);
883	else
884	{
885	/* Since we're also adding in the carry flag here, shifting operands up
886	doesn't work. So, we have to calculate carry & overflow manually. */
887	pCodeBuf[off++] = Armv8A64MkInstrAdc(idxRegDst, idxRegDst, idxRegImm, false /f64Bit/);
888	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
889	}
890	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
891
892	iemNativeRegFreeTmp(pReNative, idxRegImm);
893
894	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
895	idxRegDstIn, UINT8_MAX, false /fInvertCarry/, uImmOp);
896
897	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
898	if (cOpBits < 32)
899	off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
900	iemNativeVarRegisterRelease(pReNative, idxVarDst);
901	RT_NOREF(cImmBits);
902
903	#else
904	# error "port me"
905	#endif
906	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
907	return off;
908	}
909
910
911	/**
912	* The SUB instruction will set all status flags.
913	*/
914	DECL_INLINE_THROW(uint32_t)
915	iemNativeEmit_sub_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
916	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
917	{
918	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
919	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
920
921	#ifdef RT_ARCH_AMD64
922	/* On AMD64 we just use the correctly sized SUB instruction to get the right EFLAGS.SF value. */
923	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
924	0x2a, 0x2b, cOpBits, idxRegDst, idxRegSrc);
925	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
926
927	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
928	iemNativeVarRegisterRelease(pReNative, idxVarDst);
929
930	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
931
932	#elif defined(RT_ARCH_ARM64)
933	/* On ARM64 we'll need the two input operands as well as the result in order
934	to calculate the right flags, even if we use SUBS and translates NZCV into
935	OF, CF, ZF and SF. */
936	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
937	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
938	if (cOpBits >= 32)
939	{
940	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
941	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/, true /fSetFlags/);
942	}
943	else
944	{
945	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
946	uint32_t const cShift = 32 - cOpBits;
947	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDstIn, ARMV8_A64_REG_XZR, idxRegDst, false /f64Bit/, cShift);
948	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDstIn, idxRegSrc, false /f64Bit/,
949	true /fSetFlags/, cShift);
950	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /f64Bit/);
951	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /f64Bit/);
952	cOpBits = 32;
953	}
954	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
955
956	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
957	idxRegDstIn, idxRegSrc, true /fInvertCarry/, 0);
958
959	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
960	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
961	iemNativeVarRegisterRelease(pReNative, idxVarDst);
962
963	#else
964	# error "port me"
965	#endif
966	return off;
967	}
968
969
970	/**
971	* The SUB instruction with immediate value as right operand.
972	*/
973	DECL_INLINE_THROW(uint32_t)
974	iemNativeEmit_sub_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
975	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
976	{
977	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
978
979	#ifdef RT_ARCH_AMD64
980	/* On AMD64 we just use the correctly sized SUB instruction to get the right EFLAGS.SF value. */
981	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
982	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 5, idxRegDst, uImmOp);
983	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
984
985	iemNativeVarRegisterRelease(pReNative, idxVarDst);
986
987	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
988
989	#elif defined(RT_ARCH_ARM64)
990	/* On ARM64 we'll need the two input operands as well as the result in order
991	to calculate the right flags, even if we use SUBS and translates NZCV into
992	OF, CF, ZF and SF. */
993	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
994	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
995	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
996	if (cOpBits >= 32)
997	{
998	if (uImmOp <= 0xfffU)
999	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegDst, idxRegDst, uImmOp, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1000	else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
1001	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegDst, idxRegDst, uImmOp >> 12, cOpBits > 32 /f64Bit/,
1002	true /fSetFlags/, true /fShift12/);
1003	else
1004	{
1005	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1006	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1007	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1008	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1009	}
1010	}
1011	else
1012	{
1013	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1014	uint32_t const cShift = 32 - cOpBits;
1015	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1016	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1017	pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegDstIn, idxRegDstIn, cShift, false /f64Bit/);
1018	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDstIn, idxRegTmpImm, false /f64Bit/, true /fSetFlags/, cShift);
1019	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /f64Bit/);
1020	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /f64Bit/);
1021	cOpBits = 32;
1022	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1023	}
1024	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1025
1026	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
1027	idxRegDstIn, UINT8_MAX, true /fInvertCarry/, uImmOp);
1028
1029	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1030	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1031	RT_NOREF(cImmBits);
1032
1033	#else
1034	# error "port me"
1035	#endif
1036	return off;
1037	}
1038
1039
1040	/**
1041	* The CMP instruction will set all status flags, but modifies no registers.
1042	*/
1043	DECL_INLINE_THROW(uint32_t)
1044	iemNativeEmit_cmp_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1045	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1046	{
1047	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1048	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
1049
1050	#ifdef RT_ARCH_AMD64
1051	/* On AMD64 we just use the correctly sized CMP instruction to get the right EFLAGS.SF value. */
1052	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
1053	0x3a, 0x3b, cOpBits, idxRegDst, idxRegSrc);
1054	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1055
1056	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1057	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1058
1059	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1060
1061	#elif defined(RT_ARCH_ARM64)
1062	/* On ARM64 we'll need the actual result as well as both input operands in order
1063	to calculate the right flags, even if we use SUBS and translates NZCV into
1064	OF, CF, ZF and SF. */
1065	uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
1066	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1067	if (cOpBits >= 32)
1068	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1069	else
1070	{
1071	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1072	uint32_t const cShift = 32 - cOpBits;
1073	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegResult, ARMV8_A64_REG_XZR, idxRegDst, false /f64Bit/, cShift);
1074	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegResult, idxRegSrc, false /f64Bit/,
1075	true /fSetFlags/, cShift);
1076	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegResult, idxRegResult, cShift, false /f64Bit/);
1077	cOpBits = 32;
1078	}
1079	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1080
1081	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegResult,
1082	idxRegDst, idxRegSrc, true /fInvertCarry/, 0);
1083
1084	iemNativeRegFreeTmp(pReNative, idxRegResult);
1085	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1086	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1087
1088	#else
1089	# error "port me"
1090	#endif
1091	return off;
1092	}
1093
1094
1095	/**
1096	* The CMP instruction with immediate value as right operand.
1097	*/
1098	DECL_INLINE_THROW(uint32_t)
1099	iemNativeEmit_cmp_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1100	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1101	{
1102	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1103
1104	#ifdef RT_ARCH_AMD64
1105	/* On AMD64 we just use the correctly sized CMP instruction to get the right EFLAGS.SF value. */
1106	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1107	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 7, idxRegDst, uImmOp);
1108	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1109
1110	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1111
1112	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1113
1114	#elif defined(RT_ARCH_ARM64)
1115	/* On ARM64 we'll need the actual result as well as both input operands in order
1116	to calculate the right flags, even if we use SUBS and translates NZCV into
1117	OF, CF, ZF and SF. */
1118	uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
1119	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1120	if (cOpBits >= 32)
1121	{
1122	if (uImmOp <= 0xfffU)
1123	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegResult, idxRegDst, uImmOp, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1124	else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
1125	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegResult, idxRegDst, uImmOp >> 12, cOpBits > 32 /f64Bit/,
1126	true /fSetFlags/, true /fShift12/);
1127	else
1128	{
1129	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1130	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1131	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1132	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1133	}
1134	}
1135	else
1136	{
1137	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1138	uint32_t const cShift = 32 - cOpBits;
1139	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1140	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1141	pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegResult, idxRegDst, cShift, false /f64Bit/);
1142	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegResult, idxRegTmpImm, false /f64Bit/, true /fSetFlags/, cShift);
1143	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegResult, idxRegResult, cShift, false /f64Bit/);
1144	cOpBits = 32;
1145	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1146	}
1147	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1148
1149	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegResult,
1150	idxRegDst, UINT8_MAX, true /fInvertCarry/, uImmOp);
1151
1152	iemNativeRegFreeTmp(pReNative, idxRegResult);
1153	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1154	RT_NOREF(cImmBits);
1155
1156	#else
1157	# error "port me"
1158	#endif
1159	return off;
1160	}
1161
1162
1163	/**
1164	* The SBB instruction takes CF as input and will set all status flags.
1165	*/
1166	DECL_INLINE_THROW(uint32_t)
1167	iemNativeEmit_sbb_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1168	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1169	{
1170	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1171	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
1172	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
1173
1174	#ifdef RT_ARCH_AMD64
1175	/* On AMD64 we use BT to set EFLAGS.CF and then issue an SBB instruction
1176	with matching size to get the correct flags. */
1177	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1178
1179	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /cOpBits/, 4, idxRegEfl);
1180	pCodeBuf[off++] = X86_EFL_CF_BIT;
1181
1182	off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0x1a, 0x1b, cOpBits, idxRegDst, idxRegSrc);
1183	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1184
1185	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1186	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1187
1188	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1189
1190	#elif defined(RT_ARCH_ARM64)
1191	/* On ARM64 we use the RMIF+CFINV instructions to load PSTATE.CF from
1192	idxRegEfl and then SBCS for the calculation. We need all inputs and
1193	result for the two flags (AF,PF) that can't be directly derived from
1194	PSTATE.NZCV. */
1195	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1196	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1197
1198	pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /fMask=C/);
1199	pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
1200	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1201	if (cOpBits >= 32)
1202	pCodeBuf[off++] = Armv8A64MkInstrSbcs(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
1203	else
1204	{
1205	/* Since we're also adding in the carry flag here, shifting operands up
1206	doesn't work. So, we have to calculate carry & overflow manually. */
1207	pCodeBuf[off++] = Armv8A64MkInstrSbc(idxRegDst, idxRegDst, idxRegSrc, false /f64Bit/);
1208	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1209	}
1210	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1211
1212	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1213	idxRegDstIn, idxRegSrc, true /fInvertCarry/, 0);
1214
1215	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1216	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1217	if (cOpBits < 32)
1218	off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1219	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1220
1221	#else
1222	# error "port me"
1223	#endif
1224	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1225	return off;
1226	}
1227
1228
1229	/**
1230	* The SBB instruction with immediate value as right operand.
1231	*/
1232	DECL_INLINE_THROW(uint32_t)
1233	iemNativeEmit_sbb_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1234	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1235	{
1236	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1237	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
1238
1239	#ifdef RT_ARCH_AMD64
1240	/* On AMD64 we use BT to set EFLAGS.CF and then issue an SBB instruction
1241	with matching size to get the correct flags. */
1242	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1243
1244	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /cOpBits/, 4, idxRegEfl);
1245	pCodeBuf[off++] = X86_EFL_CF_BIT;
1246
1247	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 3, idxRegDst, uImmOp);
1248	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1249
1250	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1251
1252	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1253
1254	#elif defined(RT_ARCH_ARM64)
1255	/* On ARM64 we use the RMIF+CFINV instructions to load PSTATE.CF from
1256	idxRegEfl and then SBCS for the calculation. We need all inputs and
1257	result for the two flags (AF,PF) that can't be directly derived from
1258	PSTATE.NZCV. */
1259	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1260	uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1261	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1262
1263	pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /fMask=C/);
1264	pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
1265	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1266	if (cOpBits >= 32)
1267	pCodeBuf[off++] = Armv8A64MkInstrSbcs(idxRegDst, idxRegDst, idxRegImm, cOpBits > 32 /f64Bit/);
1268	else
1269	{
1270	/* Since we're also adding in the carry flag here, shifting operands up
1271	doesn't work. So, we have to calculate carry & overflow manually. */
1272	pCodeBuf[off++] = Armv8A64MkInstrSbc(idxRegDst, idxRegDst, idxRegImm, false /f64Bit/);
1273	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1274	}
1275	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1276
1277	iemNativeRegFreeTmp(pReNative, idxRegImm);
1278
1279	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1280	idxRegDstIn, UINT8_MAX, true /fInvertCarry/, uImmOp);
1281
1282	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1283	if (cOpBits < 32)
1284	off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1285	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1286	RT_NOREF(cImmBits);
1287
1288	#else
1289	# error "port me"
1290	#endif
1291	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1292	return off;
1293	}
1294
1295
1296	DECL_INLINE_THROW(uint32_t)
1297	iemNativeEmit_imul_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1298	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1299	{
1300	RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1301	AssertFailed();
1302	return iemNativeEmitBrk(pReNative, off, 0x666);
1303	}
1304
1305
1306	DECL_INLINE_THROW(uint32_t)
1307	iemNativeEmit_popcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1308	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1309	{
1310	RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1311	AssertFailed();
1312	return iemNativeEmitBrk(pReNative, off, 0x666);
1313	}
1314
1315
1316	DECL_INLINE_THROW(uint32_t)
1317	iemNativeEmit_tzcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1318	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1319	{
1320	RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1321	AssertFailed();
1322	return iemNativeEmitBrk(pReNative, off, 0x666);
1323	}
1324
1325
1326	DECL_INLINE_THROW(uint32_t)
1327	iemNativeEmit_lzcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1328	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1329	{
1330	RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1331	AssertFailed();
1332	return iemNativeEmitBrk(pReNative, off, 0x666);
1333	}
1334
1335
1336	#endif /* !VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h */

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMAll/target-x86/IEMAllN8veEmit-x86.h@ 103764

Download in other formats: