IEMAllN8veEmit-x86.h@ 103886

Last change on this file since 103886 was 103828, checked in by vboxsync, 13 months ago
VMM/IEM: Implemented simple (whole sale) status flag up update skipping for arithmetic operations with native emitter. bugref:10375
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 68.7 KB

Line
1	/* $Id: IEMAllN8veEmit-x86.h 103828 2024-03-13 14:01:20Z vboxsync $ */
2	/** @file
3	* IEM - Native Recompiler, x86 Target - Code Emitters.
4	*/
5
6	/*
7	* Copyright (C) 2023-2024 Oracle and/or its affiliates.
8	*
9	* This file is part of VirtualBox base platform packages, as
10	* available from https://www.virtualbox.org.
11	*
12	* This program is free software; you can redistribute it and/or
13	* modify it under the terms of the GNU General Public License
14	* as published by the Free Software Foundation, in version 3 of the
15	* License.
16	*
17	* This program is distributed in the hope that it will be useful, but
18	* WITHOUT ANY WARRANTY; without even the implied warranty of
19	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20	* General Public License for more details.
21	*
22	* You should have received a copy of the GNU General Public License
23	* along with this program; if not, see <https://www.gnu.org/licenses>.
24	*
25	* SPDX-License-Identifier: GPL-3.0-only
26	*/
27
28	#ifndef VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h
29	#define VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h
30	#ifndef RT_WITHOUT_PRAGMA_ONCE
31	# pragma once
32	#endif
33
34
35	#ifdef RT_ARCH_AMD64
36
37	/**
38	* Emits an ModR/M instruction with one opcode byte and only register operands.
39	*/
40	DECL_FORCE_INLINE(uint32_t)
41	iemNativeEmitAmd64OneByteModRmInstrRREx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t bOpcode8, uint8_t bOpcodeOther,
42	uint8_t cOpBits, uint8_t idxRegReg, uint8_t idxRegRm)
43	{
44	Assert(idxRegReg < 16); Assert(idxRegRm < 16);
45	switch (cOpBits)
46	{
47	case 16:
48	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
49	RT_FALL_THRU();
50	case 32:
51	if (idxRegReg >= 8 \|\| idxRegRm >= 8)
52	pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
53	pCodeBuf[off++] = bOpcodeOther;
54	break;
55
56	default: AssertFailed(); RT_FALL_THRU();
57	case 64:
58	pCodeBuf[off++] = X86_OP_REX_W \| (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
59	pCodeBuf[off++] = bOpcodeOther;
60	break;
61
62	case 8:
63	if (idxRegReg >= 8 \|\| idxRegRm >= 8)
64	pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
65	else if (idxRegReg >= 4 \|\| idxRegRm >= 4)
66	pCodeBuf[off++] = X86_OP_REX;
67	pCodeBuf[off++] = bOpcode8;
68	break;
69	}
70	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg & 7, idxRegRm & 7);
71	return off;
72	}
73
74
75	/**
76	* Emits an ModR/M instruction with two opcode bytes and only register operands.
77	*/
78	DECL_FORCE_INLINE(uint32_t)
79	iemNativeEmitAmd64TwoByteModRmInstrRREx(PIEMNATIVEINSTR pCodeBuf, uint32_t off,
80	uint8_t bOpcode0, uint8_t bOpcode8, uint8_t bOpcodeOther,
81	uint8_t cOpBits, uint8_t idxRegReg, uint8_t idxRegRm)
82	{
83	Assert(idxRegReg < 16); Assert(idxRegRm < 16);
84	switch (cOpBits)
85	{
86	case 16:
87	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
88	RT_FALL_THRU();
89	case 32:
90	if (idxRegReg >= 8 \|\| idxRegRm >= 8)
91	pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
92	pCodeBuf[off++] = bOpcode0;
93	pCodeBuf[off++] = bOpcodeOther;
94	break;
95
96	default: AssertFailed(); RT_FALL_THRU();
97	case 64:
98	pCodeBuf[off++] = X86_OP_REX_W \| (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
99	pCodeBuf[off++] = bOpcode0;
100	pCodeBuf[off++] = bOpcodeOther;
101	break;
102
103	case 8:
104	if (idxRegReg >= 8 \|\| idxRegRm >= 8)
105	pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
106	else if (idxRegReg >= 4 \|\| idxRegRm >= 4)
107	pCodeBuf[off++] = X86_OP_REX;
108	pCodeBuf[off++] = bOpcode0;
109	pCodeBuf[off++] = bOpcode8;
110	break;
111	}
112	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg & 7, idxRegRm & 7);
113	return off;
114	}
115
116
117	/**
118	* Emits one of three opcodes with an immediate.
119	*
120	* These are expected to be a /idxRegReg form.
121	*/
122	DECL_FORCE_INLINE(uint32_t)
123	iemNativeEmitAmd64OneByteModRmInstrRIEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t bOpcode8, uint8_t bOpcodeOtherImm8,
124	uint8_t bOpcodeOther, uint8_t cOpBits, uint8_t cImmBits, uint8_t idxRegReg,
125	uint8_t idxRegRm, uint64_t uImmOp)
126	{
127	Assert(idxRegReg < 8); Assert(idxRegRm < 16);
128	if ( cImmBits == 8
129	\|\| (uImmOp <= (uint64_t)0x7f && bOpcodeOtherImm8 != 0xcc))
130	{
131	switch (cOpBits)
132	{
133	case 16:
134	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
135	RT_FALL_THRU();
136	case 32:
137	if (idxRegRm >= 8)
138	pCodeBuf[off++] = X86_OP_REX_B;
139	pCodeBuf[off++] = bOpcodeOtherImm8; Assert(bOpcodeOtherImm8 != 0xcc);
140	break;
141
142	default: AssertFailed(); RT_FALL_THRU();
143	case 64:
144	pCodeBuf[off++] = X86_OP_REX_W \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
145	pCodeBuf[off++] = bOpcodeOtherImm8; Assert(bOpcodeOtherImm8 != 0xcc);
146	break;
147
148	case 8:
149	if (idxRegRm >= 8)
150	pCodeBuf[off++] = X86_OP_REX_B;
151	else if (idxRegRm >= 4)
152	pCodeBuf[off++] = X86_OP_REX;
153	pCodeBuf[off++] = bOpcode8; Assert(bOpcode8 != 0xcc);
154	break;
155	}
156	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
157	pCodeBuf[off++] = (uint8_t)uImmOp;
158	}
159	else
160	{
161	switch (cOpBits)
162	{
163	case 32:
164	if (idxRegRm >= 8)
165	pCodeBuf[off++] = X86_OP_REX_B;
166	break;
167
168	default: AssertFailed(); RT_FALL_THRU();
169	case 64:
170	pCodeBuf[off++] = X86_OP_REX_W \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
171	break;
172
173	case 16:
174	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
175	if (idxRegRm >= 8)
176	pCodeBuf[off++] = X86_OP_REX_B;
177	pCodeBuf[off++] = bOpcodeOther;
178	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
179	pCodeBuf[off++] = RT_BYTE1(uImmOp);
180	pCodeBuf[off++] = RT_BYTE2(uImmOp);
181	Assert(cImmBits == 16);
182	return off;
183	}
184	pCodeBuf[off++] = bOpcodeOther;
185	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
186	pCodeBuf[off++] = RT_BYTE1(uImmOp);
187	pCodeBuf[off++] = RT_BYTE2(uImmOp);
188	pCodeBuf[off++] = RT_BYTE3(uImmOp);
189	pCodeBuf[off++] = RT_BYTE4(uImmOp);
190	Assert(cImmBits == 32);
191	}
192	return off;
193	}
194
195	#endif /* RT_ARCH_AMD64 */
196
197	/**
198	* This is an implementation of IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGICAL.
199	*
200	* It takes liveness stuff into account.
201	*/
202	DECL_INLINE_THROW(uint32_t)
203	iemNativeEmitEFlagsForLogical(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl
204	#ifndef RT_ARCH_AMD64
205	, uint8_t cOpBits, uint8_t idxRegResult, bool fNativeFlags = false
206	#endif
207	)
208	{
209	#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
210	/** @todo */
211	#endif
212	{
213	#ifdef RT_ARCH_AMD64
214	/*
215	* Collect flags and merge them with eflags.
216	*/
217	/** @todo we could alternatively use SAHF here when host rax is free since,
218	* OF is cleared. */
219	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
220	/* pushf - do this before any reg allocations as they may emit instructions too. */
221	pCodeBuf[off++] = 0x9c;
222
223	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
224	uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
225	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + 7 + 7 + 3);
226	/* pop tmp */
227	if (idxTmpReg >= 8)
228	pCodeBuf[off++] = X86_OP_REX_B;
229	pCodeBuf[off++] = 0x58 + (idxTmpReg & 7);
230	/* and tmp, X86_EFL_PF \| X86_EFL_ZF \| X86_EFL_SF */
231	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_EFL_PF \| X86_EFL_ZF \| X86_EFL_SF);
232	/* Clear the status bits in EFLs. */
233	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
234	/* OR in the flags we collected. */
235	off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxTmpReg);
236	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
237	iemNativeRegFreeTmp(pReNative, idxTmpReg);
238
239	#elif defined(RT_ARCH_ARM64)
240	/*
241	* Calculate flags.
242	*/
243	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
244	uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
245	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
246
247	/* Clear the status bits. ~0x8D5 (or ~0x8FD) can't be AND immediate, so use idxTmpReg for constant. */
248	off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, ~X86_EFL_STATUS_BITS);
249	off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxRegEfl, idxTmpReg);
250
251	/* N,Z -> SF,ZF */
252	if (cOpBits < 32)
253	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegResult, cOpBits > 8); /* sets NZ */
254	else if (!fNativeFlags)
255	pCodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, idxRegResult, idxRegResult, cOpBits > 32 /f64Bit/);
256	pCodeBuf[off++] = Armv8A64MkInstrMrs(idxTmpReg, ARMV8_AARCH64_SYSREG_NZCV); /* Bits: 31=N; 30=Z; 29=C; 28=V; */
257	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 30);
258	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_ZF_BIT, 2, false /f64Bit/);
259	AssertCompile(X86_EFL_ZF_BIT + 1 == X86_EFL_SF_BIT);
260
261	/* Calculate 8-bit parity of the result. */
262	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegResult, idxRegResult, false /f64Bit/,
263	4 /offShift6/, kArmv8A64InstrShift_Lsr);
264	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /f64Bit/,
265	2 /offShift6/, kArmv8A64InstrShift_Lsr);
266	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /f64Bit/,
267	1 /offShift6/, kArmv8A64InstrShift_Lsr);
268	Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
269	pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /f64Bit/);
270	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_PF_BIT, 1, false /f64Bit/);
271
272	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
273	iemNativeRegFreeTmp(pReNative, idxTmpReg);
274	#else
275	# error "port me"
276	#endif
277	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
278
279	# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
280	off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
281	# endif
282	}
283	return off;
284	}
285
286
287	/**
288	* This is an implementation of IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC.
289	*
290	* It takes liveness stuff into account.
291	*/
292	DECL_FORCE_INLINE_THROW(uint32_t)
293	iemNativeEmitEFlagsForArithmetic(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl, uint8_t idxRegEflIn
294	#ifndef RT_ARCH_AMD64
295	, uint8_t cOpBits, uint8_t idxRegResult, uint8_t idxRegDstIn, uint8_t idxRegSrc
296	, bool fInvertCarry, uint64_t uImmSrc
297	#endif
298	)
299	{
300	#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
301	/*
302	* See if we can skip this wholesale.
303	*/
304	PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
305	if (IEMLIVENESS_STATE_ARE_STATUS_EFL_TO_BE_CLOBBERED(pLivenessEntry))
306	{
307	STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEflArithmeticSkipped);
308	# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
309	off = iemNativeEmitOrImmIntoVCpuU32(pReNative, off, X86_EFL_STATUS_BITS, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
310	# endif
311	}
312	else
313	#endif
314	{
315	#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
316	uint32_t fSkipped = 0;
317	#endif
318	#ifdef RT_ARCH_AMD64
319	/*
320	* Collect flags and merge them with eflags.
321	*/
322	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
323	/* pushf - do this before any reg allocations as they may emit instructions too. */
324	pCodeBuf[off++] = 0x9c;
325
326	uint8_t const idxRegEfl = idxRegEflIn != UINT8_MAX ? idxRegEflIn
327	: iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
328	uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
329	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + 7 + 7 + 3);
330	/* pop tmp */
331	if (idxTmpReg >= 8)
332	pCodeBuf[off++] = X86_OP_REX_B;
333	pCodeBuf[off++] = 0x58 + (idxTmpReg & 7);
334	/* Isolate the flags we want. */
335	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_EFL_STATUS_BITS);
336	/* Clear the status bits in EFLs. */
337	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
338	/* OR in the flags we collected. */
339	off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxTmpReg);
340	if (idxRegEflIn != idxRegEfl)
341	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
342	iemNativeRegFreeTmp(pReNative, idxTmpReg);
343
344	#elif defined(RT_ARCH_ARM64)
345	/*
346	* Calculate flags.
347	*/
348	uint8_t const idxRegEfl = idxRegEflIn != UINT8_MAX ? idxRegEflIn
349	: iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
350	uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
351	uint8_t const idxTmpReg2 = cOpBits >= 32 ? UINT8_MAX : iemNativeRegAllocTmp(pReNative, &off);
352	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
353
354	/* Invert CF (stored inved on ARM) and load the flags into the temporary register. */
355	if (fInvertCarry)
356	pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
357	pCodeBuf[off++] = Armv8A64MkInstrMrs(idxTmpReg, ARMV8_AARCH64_SYSREG_NZCV); /* Bits: 31=N; 30=Z; 29=C; 28=V; */
358
359	if (cOpBits >= 32)
360	{
361	/* V -> OF */
362	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 28);
363	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_OF_BIT, 1, false /f64Bit/);
364
365	/* C -> CF */
366	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 1);
367	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_CF_BIT, 1, false /f64Bit/);
368	}
369
370	/* N,Z -> SF,ZF */
371	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, cOpBits >= 32 ? 1 : 30);
372	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_ZF_BIT, 2, false /f64Bit/);
373
374	/* For ADC and SBB we have to calculate overflow and carry our selves. */
375	if (cOpBits < 32)
376	{
377	/* Since the carry flag is the zero'th flag, we just use BFXIL got copy it over. */
378	AssertCompile(X86_EFL_CF_BIT == 0);
379	pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxRegEfl, idxRegResult, cOpBits, 1, false /f64Bit/);
380
381	/* The overflow flag is more work as we have to compare the signed bits for
382	both inputs and the result. See IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC.
383
384	Formula: ~(a_uDst ^ a_uSrcOf) & (a_uResult ^ a_uDst)
385	With a_uSrcOf as a_uSrc for additions and ~a_uSrc for subtractions.
386
387	It is a bit simpler when the right (source) side is constant:
388	adc: S D R -> OF sbb: S D R -> OF
389	0 0 0 -> 0 \ 0 0 0 -> 0 \
390	0 0 1 -> 1 \ 0 0 1 -> 0 \
391	0 1 0 -> 0 / and not(D), R 0 1 0 -> 1 / and D, not(R)
392	0 1 1 -> 0 / 0 1 1 -> 0 /
393	1 0 0 -> 0 \ 1 0 0 -> 0 \
394	1 0 1 -> 0 \ and D, not(R) 1 0 1 -> 1 \ and not(D), R
395	1 1 0 -> 1 / 1 1 0 -> 0 /
396	1 1 1 -> 0 / 1 1 1 -> 0 / */
397	if (idxRegSrc != UINT8_MAX)
398	{
399	if (fInvertCarry) /* sbb: ~((a_uDst) ^ ~(a_uSrcOf)) -> (a_uDst) ^ (a_uSrcOf); HACK ALERT: fInvertCarry == sbb */
400	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegSrc, false);
401	else /* adc: ~((a_uDst) ^ (a_uSrcOf)) -> (a_uDst) ^ ~(a_uSrcOf) */
402	pCodeBuf[off++] = Armv8A64MkInstrEon(idxTmpReg, idxRegDstIn, idxRegSrc, false);
403	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg2, idxRegDstIn, idxRegResult, false); /* (a_uDst) ^ (a_uResult) */
404	pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpReg, idxTmpReg, idxTmpReg2, false /f64Bit/);
405	}
406	else if (uImmSrc & RT_BIT_32(cOpBits - 1))
407	{
408	if (fInvertCarry) /* HACK ALERT: fInvertCarry == sbb */
409	pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegResult, idxRegDstIn, false);
410	else
411	pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegDstIn, idxRegResult, false);
412	}
413	else
414	{
415	if (fInvertCarry) /* HACK ALERT: fInvertCarry == sbb */
416	pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegDstIn, idxRegResult, false);
417	else
418	pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegResult, idxRegDstIn, false);
419	}
420	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, cOpBits - 1, false /f64Bit/);
421	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_OF_BIT, 1);
422	iemNativeRegFreeTmp(pReNative, idxTmpReg2);
423	}
424
425	/* Calculate 8-bit parity of the result. */
426	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegResult, idxRegResult, false /f64Bit/,
427	4 /offShift6/, kArmv8A64InstrShift_Lsr);
428	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /f64Bit/,
429	2 /offShift6/, kArmv8A64InstrShift_Lsr);
430	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /f64Bit/,
431	1 /offShift6/, kArmv8A64InstrShift_Lsr);
432	Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
433	pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /f64Bit/);
434	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_PF_BIT, 1, false /f64Bit/);
435
436	/* Calculate auxilary carry/borrow. This is related to 8-bit BCD.
437	General formula: ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
438	S D R
439	0 0 0 -> 0; \
440	0 0 1 -> 1; \ regular
441	0 1 0 -> 1; / xor R, D
442	0 1 1 -> 0; /
443	1 0 0 -> 1; \
444	1 0 1 -> 0; \ invert one of the two
445	1 1 0 -> 0; / xor not(R), D
446	1 1 1 -> 1; /
447	a_uSrc[bit 4]=0: ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
448	a_uSrc[bit 4]=1: ((uint32_t)~(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
449	*/
450
451	if (idxRegSrc != UINT8_MAX)
452	{
453	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegSrc, false /f64Bit/);
454	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxRegResult, false /f64Bit/);
455	}
456	else if (uImmSrc & X86_EFL_AF)
457	pCodeBuf[off++] = Armv8A64MkInstrEon(idxTmpReg, idxRegDstIn, idxRegResult, false /f64Bit/);
458	else
459	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegResult, false /f64Bit/);
460	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, X86_EFL_AF_BIT, false /f64Bit/);
461	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_AF_BIT, 1, false /f64Bit/);
462
463	if (idxRegEflIn != idxRegEfl)
464	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
465	iemNativeRegFreeTmp(pReNative, idxTmpReg);
466
467	#else
468	# error "port me"
469	#endif
470	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
471
472	#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
473	off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, fSkipped, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
474	#endif
475	}
476	return off;
477
478	}
479
480
481	/**
482	* The AND instruction will clear OF, CF and AF (latter is undefined) and
483	* set the other flags according to the result.
484	*/
485	DECL_INLINE_THROW(uint32_t)
486	iemNativeEmit_and_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
487	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
488	{
489	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
490	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
491	#ifdef RT_ARCH_AMD64
492	/* On AMD64 we just use the correctly sized AND instruction harvest the EFLAGS. */
493	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
494	0x22, 0x23, cOpBits, idxRegDst, idxRegSrc);
495	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
496	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
497
498	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
499
500	#elif defined(RT_ARCH_ARM64)
501	/* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. */
502	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
503	pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
504	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
505	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
506
507	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst, true /fNativeFlags/);
508	#else
509	# error "Port me"
510	#endif
511	iemNativeVarRegisterRelease(pReNative, idxVarDst);
512	return off;
513	}
514
515
516	/**
517	* The AND instruction with immediate value as right operand.
518	*/
519	DECL_INLINE_THROW(uint32_t)
520	iemNativeEmit_and_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
521	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
522	{
523	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
524	#ifdef RT_ARCH_AMD64
525	/* On AMD64 we just use the correctly sized AND instruction harvest the EFLAGS. */
526	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
527	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 4, idxRegDst, uImmOp);
528	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
529
530	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
531
532	#elif defined(RT_ARCH_ARM64)
533	/* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones, and of
534	course the immediate variant when possible to save a register load. */
535	uint32_t uImmSizeLen, uImmRotations;
536	if ( cOpBits > 32
537	? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
538	: Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
539	{
540	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
541	if (cOpBits >= 32)
542	pCodeBuf[off++] = Armv8A64MkInstrAndsImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
543	else
544	pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
545	}
546	else
547	{
548	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
549	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
550	if (cOpBits >= 32)
551	pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
552	else
553	pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
554	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
555	}
556	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
557
558	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst, cOpBits >= 32 /fNativeFlags/);
559	RT_NOREF_PV(cImmBits)
560
561	#else
562	# error "Port me"
563	#endif
564	iemNativeVarRegisterRelease(pReNative, idxVarDst);
565	return off;
566	}
567
568
569	/**
570	* The TEST instruction will clear OF, CF and AF (latter is undefined) and
571	* set the other flags according to the result.
572	*/
573	DECL_INLINE_THROW(uint32_t)
574	iemNativeEmit_test_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
575	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
576	{
577	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
578	uint8_t const idxRegSrc = idxVarSrc == idxVarDst ? idxRegDst /* special case of 'test samereg,samereg' */
579	: iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
580	#ifdef RT_ARCH_AMD64
581	/* On AMD64 we just use the correctly sized TEST instruction harvest the EFLAGS. */
582	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
583	0x84, 0x85, cOpBits, idxRegSrc, idxRegDst);
584	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
585
586	#elif defined(RT_ARCH_ARM64)
587	/* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. We also
588	need to keep the result in order to calculate the flags. */
589	uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
590	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
591	if (cOpBits >= 32)
592	pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
593	else
594	pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
595	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
596
597	#else
598	# error "Port me"
599	#endif
600	if (idxVarSrc != idxVarDst)
601	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
602	iemNativeVarRegisterRelease(pReNative, idxVarDst);
603
604	#ifdef RT_ARCH_AMD64
605	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
606	#else
607	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegResult, cOpBits >= 32 /fNativeFlags/);
608	iemNativeRegFreeTmp(pReNative, idxRegResult);
609	#endif
610	return off;
611	}
612
613
614	/**
615	* The TEST instruction with immediate value as right operand.
616	*/
617	DECL_INLINE_THROW(uint32_t)
618	iemNativeEmit_test_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
619	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
620	{
621	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
622	#ifdef RT_ARCH_AMD64
623	/* On AMD64 we just use the correctly sized AND instruction harvest the EFLAGS. */
624	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
625	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0xf6, 0xcc, 0xf7, cOpBits, cImmBits, 0, idxRegDst, uImmOp);
626	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
627	iemNativeVarRegisterRelease(pReNative, idxVarDst);
628
629	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
630
631	#elif defined(RT_ARCH_ARM64)
632	/* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones, and of
633	course the immediate variant when possible to save a register load.
634	We also need to keep the result in order to calculate the flags. */
635	uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
636	uint32_t uImmSizeLen, uImmRotations;
637	if ( cOpBits > 32
638	? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
639	: Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
640	{
641	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
642	if (cOpBits >= 32)
643	pCodeBuf[off++] = Armv8A64MkInstrAndsImm(idxRegResult, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
644	else
645	pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegResult, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
646	}
647	else
648	{
649	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
650	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
651	if (cOpBits >= 32)
652	pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
653	else
654	pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
655	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
656	}
657	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
658	iemNativeVarRegisterRelease(pReNative, idxVarDst);
659
660	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegResult, cOpBits >= 32 /fNativeFlags/);
661
662	iemNativeRegFreeTmp(pReNative, idxRegResult);
663	RT_NOREF_PV(cImmBits)
664
665	#else
666	# error "Port me"
667	#endif
668	return off;
669	}
670
671
672	/**
673	* The OR instruction will clear OF, CF and AF (latter is undefined) and
674	* set the other flags according to the result.
675	*/
676	DECL_INLINE_THROW(uint32_t)
677	iemNativeEmit_or_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
678	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
679	{
680	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
681	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
682	#ifdef RT_ARCH_AMD64
683	/* On AMD64 we just use the correctly sized OR instruction harvest the EFLAGS. */
684	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
685	0x0a, 0x0b, cOpBits, idxRegDst, idxRegSrc);
686	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
687	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
688
689	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
690
691	#elif defined(RT_ARCH_ARM64)
692	/* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones. */
693	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
694	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
695	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
696	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
697
698	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
699
700	#else
701	# error "Port me"
702	#endif
703	iemNativeVarRegisterRelease(pReNative, idxVarDst);
704	return off;
705	}
706
707
708	/**
709	* The OR instruction with immediate value as right operand.
710	*/
711	DECL_INLINE_THROW(uint32_t)
712	iemNativeEmit_or_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
713	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
714	{
715	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
716	#ifdef RT_ARCH_AMD64
717	/* On AMD64 we just use the correctly sized OR instruction harvest the EFLAGS. */
718	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
719	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 1, idxRegDst, uImmOp);
720	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
721
722	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
723
724	#elif defined(RT_ARCH_ARM64)
725	/* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones, and of
726	course the immediate variant when possible to save a register load. */
727	uint32_t uImmSizeLen, uImmRotations;
728	if ( cOpBits > 32
729	? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
730	: Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
731	{
732	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
733	pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
734	}
735	else
736	{
737	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
738	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
739	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
740	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
741	}
742	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
743
744	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
745	RT_NOREF_PV(cImmBits)
746
747	#else
748	# error "Port me"
749	#endif
750	iemNativeVarRegisterRelease(pReNative, idxVarDst);
751	return off;
752	}
753
754
755	/**
756	* The XOR instruction will clear OF, CF and AF (latter is undefined) and
757	* set the other flags according to the result.
758	*/
759	DECL_INLINE_THROW(uint32_t)
760	iemNativeEmit_xor_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
761	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
762	{
763	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
764	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
765	#ifdef RT_ARCH_AMD64
766	/* On AMD64 we just use the correctly sized OR instruction harvest the EFLAGS. */
767	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
768	0x32, 0x33, cOpBits, idxRegDst, idxRegSrc);
769	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
770	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
771
772	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
773
774	#elif defined(RT_ARCH_ARM64)
775	/* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones. */
776	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
777	pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
778	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
779	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
780
781	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
782
783	#else
784	# error "Port me"
785	#endif
786	iemNativeVarRegisterRelease(pReNative, idxVarDst);
787	return off;
788	}
789
790
791	/**
792	* The XOR instruction with immediate value as right operand.
793	*/
794	DECL_INLINE_THROW(uint32_t)
795	iemNativeEmit_xor_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
796	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
797	{
798	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
799	#ifdef RT_ARCH_AMD64
800	/* On AMD64 we just use the correctly sized XOR instruction harvest the EFLAGS. */
801	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
802	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 6, idxRegDst, uImmOp);
803	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
804
805	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
806
807	#elif defined(RT_ARCH_ARM64)
808	/* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones, and of
809	course the immediate variant when possible to save a register load. */
810	uint32_t uImmSizeLen, uImmRotations;
811	if ( cOpBits > 32
812	? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
813	: Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
814	{
815	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
816	pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
817	}
818	else
819	{
820	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
821	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
822	pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
823	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
824	}
825	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
826
827	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
828	RT_NOREF_PV(cImmBits)
829
830	#else
831	# error "Port me"
832	#endif
833	iemNativeVarRegisterRelease(pReNative, idxVarDst);
834	return off;
835	}
836
837
838	/**
839	* The ADD instruction will set all status flags.
840	*/
841	DECL_INLINE_THROW(uint32_t)
842	iemNativeEmit_add_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
843	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
844	{
845	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
846	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
847
848	#ifdef RT_ARCH_AMD64
849	/* On AMD64 we just use the correctly sized ADD instruction to get the right EFLAGS.SF value. */
850	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
851	0x02, 0x03, cOpBits, idxRegDst, idxRegSrc);
852	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
853
854	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
855	iemNativeVarRegisterRelease(pReNative, idxVarDst);
856
857	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
858
859	#elif defined(RT_ARCH_ARM64)
860	/* On ARM64 we'll need the two input operands as well as the result in order
861	to calculate the right flags, even if we use ADDS and translates NZCV into
862	OF, CF, ZF and SF. */
863	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
864	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
865	if (cOpBits >= 32)
866	{
867	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
868	pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/, true /fSetFlags/);
869	}
870	else
871	{
872	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
873	uint32_t const cShift = 32 - cOpBits;
874	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDstIn, ARMV8_A64_REG_XZR, idxRegDst, false /f64Bit/, cShift);
875	pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDstIn, idxRegSrc, false /f64Bit/,
876	true /fSetFlags/, cShift);
877	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /f64Bit/);
878	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /f64Bit/);
879	cOpBits = 32;
880	}
881	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
882
883	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
884	idxRegDstIn, idxRegSrc, false /fInvertCarry/, 0);
885
886	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
887	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
888	iemNativeVarRegisterRelease(pReNative, idxVarDst);
889
890	#else
891	# error "port me"
892	#endif
893	return off;
894	}
895
896
897	/**
898	* The ADD instruction with immediate value as right operand.
899	*/
900	DECL_INLINE_THROW(uint32_t)
901	iemNativeEmit_add_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
902	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
903	{
904	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
905
906	#ifdef RT_ARCH_AMD64
907	/* On AMD64 we just use the correctly sized ADD instruction to get the right EFLAGS.SF value. */
908	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
909	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 0, idxRegDst, uImmOp);
910	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
911
912	iemNativeVarRegisterRelease(pReNative, idxVarDst);
913
914	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
915
916	#elif defined(RT_ARCH_ARM64)
917	/* On ARM64 we'll need the two input operands as well as the result in order
918	to calculate the right flags, even if we use ADDS and translates NZCV into
919	OF, CF, ZF and SF. */
920	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
921	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
922	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
923	if (cOpBits >= 32)
924	{
925	if (uImmOp <= 0xfffU)
926	pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegDst, idxRegDst, uImmOp, cOpBits > 32 /f64Bit/, true /fSetFlags/);
927	else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
928	pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegDst, idxRegDst, uImmOp >> 12, cOpBits > 32 /f64Bit/,
929	true /fSetFlags/, true /fShift12/);
930	else
931	{
932	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
933	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
934	pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/, true /fSetFlags/);
935	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
936	}
937	}
938	else
939	{
940	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
941	uint32_t const cShift = 32 - cOpBits;
942	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp << cShift);
943	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
944	pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegTmpImm, idxRegDstIn, false /f64Bit/, true /fSetFlags/, cShift);
945	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /f64Bit/);
946	cOpBits = 32;
947	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
948	}
949	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
950
951	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
952	idxRegDstIn, UINT8_MAX, false /fInvertCarry/, uImmOp);
953
954	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
955	iemNativeVarRegisterRelease(pReNative, idxVarDst);
956	RT_NOREF(cImmBits);
957
958	#else
959	# error "port me"
960	#endif
961	return off;
962	}
963
964
965	/**
966	* The ADC instruction takes CF as input and will set all status flags.
967	*/
968	DECL_INLINE_THROW(uint32_t)
969	iemNativeEmit_adc_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
970	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
971	{
972	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
973	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
974	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
975
976	#ifdef RT_ARCH_AMD64
977	/* On AMD64 we use BT to set EFLAGS.CF and then issue an ADC instruction
978	with matching size to get the correct flags. */
979	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
980
981	/* Use the BT instruction to set CF according to idxRegEfl. */
982	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /cOpBits/, 4, idxRegEfl);
983	pCodeBuf[off++] = X86_EFL_CF_BIT;
984
985	off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0x12, 0x13, cOpBits, idxRegDst, idxRegSrc);
986	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
987
988	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
989	iemNativeVarRegisterRelease(pReNative, idxVarDst);
990
991	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
992
993	#elif defined(RT_ARCH_ARM64)
994	/* On ARM64 we use the RMIF instruction to load PSTATE.CF from idxRegEfl and
995	then ADCS for the calculation. We need all inputs and result for the two
996	flags (AF,PF) that can't be directly derived from PSTATE.NZCV. */
997	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
998	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
999
1000	pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /fMask=C/);
1001	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1002	if (cOpBits >= 32)
1003	pCodeBuf[off++] = Armv8A64MkInstrAdcs(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
1004	else
1005	{
1006	/* Since we're also adding in the carry flag here, shifting operands up
1007	doesn't work. So, we have to calculate carry & overflow manually. */
1008	pCodeBuf[off++] = Armv8A64MkInstrAdc(idxRegDst, idxRegDst, idxRegSrc, false /f64Bit/);
1009	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1010	}
1011	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1012
1013	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1014	idxRegDstIn, idxRegSrc, false /fInvertCarry/, 0);
1015
1016	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1017	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1018	if (cOpBits < 32)
1019	off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1020	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1021
1022	#else
1023	# error "port me"
1024	#endif
1025	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1026	return off;
1027	}
1028
1029
1030	/**
1031	* The ADC instruction with immediate value as right operand.
1032	*/
1033	DECL_INLINE_THROW(uint32_t)
1034	iemNativeEmit_adc_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1035	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1036	{
1037	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1038	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
1039
1040	#ifdef RT_ARCH_AMD64
1041	/* On AMD64 we use BT to set EFLAGS.CF and then issue an ADC instruction
1042	with matching size to get the correct flags. */
1043	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1044
1045	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /cOpBits/, 4, idxRegEfl);
1046	pCodeBuf[off++] = X86_EFL_CF_BIT;
1047
1048	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 2, idxRegDst, uImmOp);
1049	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1050
1051	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1052
1053	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1054
1055	#elif defined(RT_ARCH_ARM64)
1056	/* On ARM64 we use the RMIF instructions to load PSTATE.CF from idxRegEfl
1057	and then ADCS for the calculation. We need all inputs and result for
1058	the two flags (AF,PF) that can't be directly derived from PSTATE.NZCV. */
1059	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1060	uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1061	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1062
1063	pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /fMask=C/);
1064	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1065	if (cOpBits >= 32)
1066	pCodeBuf[off++] = Armv8A64MkInstrAdcs(idxRegDst, idxRegDst, idxRegImm, cOpBits > 32 /f64Bit/);
1067	else
1068	{
1069	/* Since we're also adding in the carry flag here, shifting operands up
1070	doesn't work. So, we have to calculate carry & overflow manually. */
1071	pCodeBuf[off++] = Armv8A64MkInstrAdc(idxRegDst, idxRegDst, idxRegImm, false /f64Bit/);
1072	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1073	}
1074	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1075
1076	iemNativeRegFreeTmp(pReNative, idxRegImm);
1077
1078	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1079	idxRegDstIn, UINT8_MAX, false /fInvertCarry/, uImmOp);
1080
1081	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1082	if (cOpBits < 32)
1083	off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1084	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1085	RT_NOREF(cImmBits);
1086
1087	#else
1088	# error "port me"
1089	#endif
1090	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1091	return off;
1092	}
1093
1094
1095	/**
1096	* The SUB instruction will set all status flags.
1097	*/
1098	DECL_INLINE_THROW(uint32_t)
1099	iemNativeEmit_sub_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1100	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1101	{
1102	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1103	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
1104
1105	#ifdef RT_ARCH_AMD64
1106	/* On AMD64 we just use the correctly sized SUB instruction to get the right EFLAGS.SF value. */
1107	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
1108	0x2a, 0x2b, cOpBits, idxRegDst, idxRegSrc);
1109	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1110
1111	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1112	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1113
1114	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1115
1116	#elif defined(RT_ARCH_ARM64)
1117	/* On ARM64 we'll need the two input operands as well as the result in order
1118	to calculate the right flags, even if we use SUBS and translates NZCV into
1119	OF, CF, ZF and SF. */
1120	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1121	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1122	if (cOpBits >= 32)
1123	{
1124	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1125	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1126	}
1127	else
1128	{
1129	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1130	uint32_t const cShift = 32 - cOpBits;
1131	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDstIn, ARMV8_A64_REG_XZR, idxRegDst, false /f64Bit/, cShift);
1132	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDstIn, idxRegSrc, false /f64Bit/,
1133	true /fSetFlags/, cShift);
1134	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /f64Bit/);
1135	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /f64Bit/);
1136	cOpBits = 32;
1137	}
1138	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1139
1140	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
1141	idxRegDstIn, idxRegSrc, true /fInvertCarry/, 0);
1142
1143	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1144	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1145	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1146
1147	#else
1148	# error "port me"
1149	#endif
1150	return off;
1151	}
1152
1153
1154	/**
1155	* The SUB instruction with immediate value as right operand.
1156	*/
1157	DECL_INLINE_THROW(uint32_t)
1158	iemNativeEmit_sub_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1159	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1160	{
1161	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1162
1163	#ifdef RT_ARCH_AMD64
1164	/* On AMD64 we just use the correctly sized SUB instruction to get the right EFLAGS.SF value. */
1165	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1166	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 5, idxRegDst, uImmOp);
1167	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1168
1169	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1170
1171	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1172
1173	#elif defined(RT_ARCH_ARM64)
1174	/* On ARM64 we'll need the two input operands as well as the result in order
1175	to calculate the right flags, even if we use SUBS and translates NZCV into
1176	OF, CF, ZF and SF. */
1177	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1178	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1179	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1180	if (cOpBits >= 32)
1181	{
1182	if (uImmOp <= 0xfffU)
1183	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegDst, idxRegDst, uImmOp, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1184	else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
1185	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegDst, idxRegDst, uImmOp >> 12, cOpBits > 32 /f64Bit/,
1186	true /fSetFlags/, true /fShift12/);
1187	else
1188	{
1189	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1190	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1191	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1192	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1193	}
1194	}
1195	else
1196	{
1197	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1198	uint32_t const cShift = 32 - cOpBits;
1199	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1200	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1201	pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegDstIn, idxRegDstIn, cShift, false /f64Bit/);
1202	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDstIn, idxRegTmpImm, false /f64Bit/, true /fSetFlags/, cShift);
1203	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /f64Bit/);
1204	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /f64Bit/);
1205	cOpBits = 32;
1206	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1207	}
1208	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1209
1210	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
1211	idxRegDstIn, UINT8_MAX, true /fInvertCarry/, uImmOp);
1212
1213	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1214	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1215	RT_NOREF(cImmBits);
1216
1217	#else
1218	# error "port me"
1219	#endif
1220	return off;
1221	}
1222
1223
1224	/**
1225	* The CMP instruction will set all status flags, but modifies no registers.
1226	*/
1227	DECL_INLINE_THROW(uint32_t)
1228	iemNativeEmit_cmp_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1229	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1230	{
1231	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1232	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
1233
1234	#ifdef RT_ARCH_AMD64
1235	/* On AMD64 we just use the correctly sized CMP instruction to get the right EFLAGS.SF value. */
1236	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
1237	0x3a, 0x3b, cOpBits, idxRegDst, idxRegSrc);
1238	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1239
1240	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1241	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1242
1243	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1244
1245	#elif defined(RT_ARCH_ARM64)
1246	/* On ARM64 we'll need the actual result as well as both input operands in order
1247	to calculate the right flags, even if we use SUBS and translates NZCV into
1248	OF, CF, ZF and SF. */
1249	uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
1250	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1251	if (cOpBits >= 32)
1252	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1253	else
1254	{
1255	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1256	uint32_t const cShift = 32 - cOpBits;
1257	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegResult, ARMV8_A64_REG_XZR, idxRegDst, false /f64Bit/, cShift);
1258	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegResult, idxRegSrc, false /f64Bit/,
1259	true /fSetFlags/, cShift);
1260	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegResult, idxRegResult, cShift, false /f64Bit/);
1261	cOpBits = 32;
1262	}
1263	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1264
1265	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegResult,
1266	idxRegDst, idxRegSrc, true /fInvertCarry/, 0);
1267
1268	iemNativeRegFreeTmp(pReNative, idxRegResult);
1269	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1270	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1271
1272	#else
1273	# error "port me"
1274	#endif
1275	return off;
1276	}
1277
1278
1279	/**
1280	* The CMP instruction with immediate value as right operand.
1281	*/
1282	DECL_INLINE_THROW(uint32_t)
1283	iemNativeEmit_cmp_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1284	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1285	{
1286	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1287
1288	#ifdef RT_ARCH_AMD64
1289	/* On AMD64 we just use the correctly sized CMP instruction to get the right EFLAGS.SF value. */
1290	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1291	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 7, idxRegDst, uImmOp);
1292	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1293
1294	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1295
1296	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1297
1298	#elif defined(RT_ARCH_ARM64)
1299	/* On ARM64 we'll need the actual result as well as both input operands in order
1300	to calculate the right flags, even if we use SUBS and translates NZCV into
1301	OF, CF, ZF and SF. */
1302	uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
1303	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1304	if (cOpBits >= 32)
1305	{
1306	if (uImmOp <= 0xfffU)
1307	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegResult, idxRegDst, uImmOp, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1308	else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
1309	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegResult, idxRegDst, uImmOp >> 12, cOpBits > 32 /f64Bit/,
1310	true /fSetFlags/, true /fShift12/);
1311	else
1312	{
1313	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1314	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1315	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1316	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1317	}
1318	}
1319	else
1320	{
1321	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1322	uint32_t const cShift = 32 - cOpBits;
1323	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1324	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1325	pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegResult, idxRegDst, cShift, false /f64Bit/);
1326	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegResult, idxRegTmpImm, false /f64Bit/, true /fSetFlags/, cShift);
1327	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegResult, idxRegResult, cShift, false /f64Bit/);
1328	cOpBits = 32;
1329	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1330	}
1331	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1332
1333	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegResult,
1334	idxRegDst, UINT8_MAX, true /fInvertCarry/, uImmOp);
1335
1336	iemNativeRegFreeTmp(pReNative, idxRegResult);
1337	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1338	RT_NOREF(cImmBits);
1339
1340	#else
1341	# error "port me"
1342	#endif
1343	return off;
1344	}
1345
1346
1347	/**
1348	* The SBB instruction takes CF as input and will set all status flags.
1349	*/
1350	DECL_INLINE_THROW(uint32_t)
1351	iemNativeEmit_sbb_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1352	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1353	{
1354	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1355	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
1356	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
1357
1358	#ifdef RT_ARCH_AMD64
1359	/* On AMD64 we use BT to set EFLAGS.CF and then issue an SBB instruction
1360	with matching size to get the correct flags. */
1361	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1362
1363	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /cOpBits/, 4, idxRegEfl);
1364	pCodeBuf[off++] = X86_EFL_CF_BIT;
1365
1366	off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0x1a, 0x1b, cOpBits, idxRegDst, idxRegSrc);
1367	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1368
1369	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1370	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1371
1372	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1373
1374	#elif defined(RT_ARCH_ARM64)
1375	/* On ARM64 we use the RMIF+CFINV instructions to load PSTATE.CF from
1376	idxRegEfl and then SBCS for the calculation. We need all inputs and
1377	result for the two flags (AF,PF) that can't be directly derived from
1378	PSTATE.NZCV. */
1379	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1380	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1381
1382	pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /fMask=C/);
1383	pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
1384	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1385	if (cOpBits >= 32)
1386	pCodeBuf[off++] = Armv8A64MkInstrSbcs(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
1387	else
1388	{
1389	/* Since we're also adding in the carry flag here, shifting operands up
1390	doesn't work. So, we have to calculate carry & overflow manually. */
1391	pCodeBuf[off++] = Armv8A64MkInstrSbc(idxRegDst, idxRegDst, idxRegSrc, false /f64Bit/);
1392	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1393	}
1394	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1395
1396	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1397	idxRegDstIn, idxRegSrc, true /fInvertCarry/, 0);
1398
1399	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1400	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1401	if (cOpBits < 32)
1402	off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1403	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1404
1405	#else
1406	# error "port me"
1407	#endif
1408	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1409	return off;
1410	}
1411
1412
1413	/**
1414	* The SBB instruction with immediate value as right operand.
1415	*/
1416	DECL_INLINE_THROW(uint32_t)
1417	iemNativeEmit_sbb_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1418	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1419	{
1420	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1421	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
1422
1423	#ifdef RT_ARCH_AMD64
1424	/* On AMD64 we use BT to set EFLAGS.CF and then issue an SBB instruction
1425	with matching size to get the correct flags. */
1426	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1427
1428	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /cOpBits/, 4, idxRegEfl);
1429	pCodeBuf[off++] = X86_EFL_CF_BIT;
1430
1431	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 3, idxRegDst, uImmOp);
1432	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1433
1434	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1435
1436	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1437
1438	#elif defined(RT_ARCH_ARM64)
1439	/* On ARM64 we use the RMIF+CFINV instructions to load PSTATE.CF from
1440	idxRegEfl and then SBCS for the calculation. We need all inputs and
1441	result for the two flags (AF,PF) that can't be directly derived from
1442	PSTATE.NZCV. */
1443	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1444	uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1445	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1446
1447	pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /fMask=C/);
1448	pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
1449	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1450	if (cOpBits >= 32)
1451	pCodeBuf[off++] = Armv8A64MkInstrSbcs(idxRegDst, idxRegDst, idxRegImm, cOpBits > 32 /f64Bit/);
1452	else
1453	{
1454	/* Since we're also adding in the carry flag here, shifting operands up
1455	doesn't work. So, we have to calculate carry & overflow manually. */
1456	pCodeBuf[off++] = Armv8A64MkInstrSbc(idxRegDst, idxRegDst, idxRegImm, false /f64Bit/);
1457	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1458	}
1459	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1460
1461	iemNativeRegFreeTmp(pReNative, idxRegImm);
1462
1463	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1464	idxRegDstIn, UINT8_MAX, true /fInvertCarry/, uImmOp);
1465
1466	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1467	if (cOpBits < 32)
1468	off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1469	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1470	RT_NOREF(cImmBits);
1471
1472	#else
1473	# error "port me"
1474	#endif
1475	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1476	return off;
1477	}
1478
1479
1480	DECL_INLINE_THROW(uint32_t)
1481	iemNativeEmit_imul_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1482	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1483	{
1484	RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1485	AssertFailed();
1486	return iemNativeEmitBrk(pReNative, off, 0x666);
1487	}
1488
1489
1490	DECL_INLINE_THROW(uint32_t)
1491	iemNativeEmit_popcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1492	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1493	{
1494	RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1495	AssertFailed();
1496	return iemNativeEmitBrk(pReNative, off, 0x666);
1497	}
1498
1499
1500	DECL_INLINE_THROW(uint32_t)
1501	iemNativeEmit_tzcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1502	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1503	{
1504	RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1505	AssertFailed();
1506	return iemNativeEmitBrk(pReNative, off, 0x666);
1507	}
1508
1509
1510	DECL_INLINE_THROW(uint32_t)
1511	iemNativeEmit_lzcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1512	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1513	{
1514	RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1515	AssertFailed();
1516	return iemNativeEmitBrk(pReNative, off, 0x666);
1517	}
1518
1519
1520	#endif /* !VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h */

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMAll/target-x86/IEMAllN8veEmit-x86.h@ 103886

Download in other formats: