IEMAllN8veEmit-x86.h@ 104030

Last change on this file since 104030 was 104029, checked in by vboxsync, 11 months ago
VMM/VMMAll/target-x86/IEMAllN8veEmit-x86.h: Build fix, bugref:10391
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 69.2 KB

Line
1	/* $Id: IEMAllN8veEmit-x86.h 104029 2024-03-24 18:37:07Z vboxsync $ */
2	/** @file
3	* IEM - Native Recompiler, x86 Target - Code Emitters.
4	*/
5
6	/*
7	* Copyright (C) 2023-2024 Oracle and/or its affiliates.
8	*
9	* This file is part of VirtualBox base platform packages, as
10	* available from https://www.virtualbox.org.
11	*
12	* This program is free software; you can redistribute it and/or
13	* modify it under the terms of the GNU General Public License
14	* as published by the Free Software Foundation, in version 3 of the
15	* License.
16	*
17	* This program is distributed in the hope that it will be useful, but
18	* WITHOUT ANY WARRANTY; without even the implied warranty of
19	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20	* General Public License for more details.
21	*
22	* You should have received a copy of the GNU General Public License
23	* along with this program; if not, see <https://www.gnu.org/licenses>.
24	*
25	* SPDX-License-Identifier: GPL-3.0-only
26	*/
27
28	#ifndef VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h
29	#define VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h
30	#ifndef RT_WITHOUT_PRAGMA_ONCE
31	# pragma once
32	#endif
33
34
35	#ifdef RT_ARCH_AMD64
36
37	/**
38	* Emits an ModR/M instruction with one opcode byte and only register operands.
39	*/
40	DECL_FORCE_INLINE(uint32_t)
41	iemNativeEmitAmd64OneByteModRmInstrRREx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t bOpcode8, uint8_t bOpcodeOther,
42	uint8_t cOpBits, uint8_t idxRegReg, uint8_t idxRegRm)
43	{
44	Assert(idxRegReg < 16); Assert(idxRegRm < 16);
45	switch (cOpBits)
46	{
47	case 16:
48	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
49	RT_FALL_THRU();
50	case 32:
51	if (idxRegReg >= 8 \|\| idxRegRm >= 8)
52	pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
53	pCodeBuf[off++] = bOpcodeOther;
54	break;
55
56	default: AssertFailed(); RT_FALL_THRU();
57	case 64:
58	pCodeBuf[off++] = X86_OP_REX_W \| (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
59	pCodeBuf[off++] = bOpcodeOther;
60	break;
61
62	case 8:
63	if (idxRegReg >= 8 \|\| idxRegRm >= 8)
64	pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
65	else if (idxRegReg >= 4 \|\| idxRegRm >= 4)
66	pCodeBuf[off++] = X86_OP_REX;
67	pCodeBuf[off++] = bOpcode8;
68	break;
69	}
70	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg & 7, idxRegRm & 7);
71	return off;
72	}
73
74
75	/**
76	* Emits an ModR/M instruction with two opcode bytes and only register operands.
77	*/
78	DECL_FORCE_INLINE(uint32_t)
79	iemNativeEmitAmd64TwoByteModRmInstrRREx(PIEMNATIVEINSTR pCodeBuf, uint32_t off,
80	uint8_t bOpcode0, uint8_t bOpcode8, uint8_t bOpcodeOther,
81	uint8_t cOpBits, uint8_t idxRegReg, uint8_t idxRegRm)
82	{
83	Assert(idxRegReg < 16); Assert(idxRegRm < 16);
84	switch (cOpBits)
85	{
86	case 16:
87	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
88	RT_FALL_THRU();
89	case 32:
90	if (idxRegReg >= 8 \|\| idxRegRm >= 8)
91	pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
92	pCodeBuf[off++] = bOpcode0;
93	pCodeBuf[off++] = bOpcodeOther;
94	break;
95
96	default: AssertFailed(); RT_FALL_THRU();
97	case 64:
98	pCodeBuf[off++] = X86_OP_REX_W \| (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
99	pCodeBuf[off++] = bOpcode0;
100	pCodeBuf[off++] = bOpcodeOther;
101	break;
102
103	case 8:
104	if (idxRegReg >= 8 \|\| idxRegRm >= 8)
105	pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
106	else if (idxRegReg >= 4 \|\| idxRegRm >= 4)
107	pCodeBuf[off++] = X86_OP_REX;
108	pCodeBuf[off++] = bOpcode0;
109	pCodeBuf[off++] = bOpcode8;
110	break;
111	}
112	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg & 7, idxRegRm & 7);
113	return off;
114	}
115
116
117	/**
118	* Emits one of three opcodes with an immediate.
119	*
120	* These are expected to be a /idxRegReg form.
121	*/
122	DECL_FORCE_INLINE(uint32_t)
123	iemNativeEmitAmd64OneByteModRmInstrRIEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t bOpcode8, uint8_t bOpcodeOtherImm8,
124	uint8_t bOpcodeOther, uint8_t cOpBits, uint8_t cImmBits, uint8_t idxRegReg,
125	uint8_t idxRegRm, uint64_t uImmOp)
126	{
127	Assert(idxRegReg < 8); Assert(idxRegRm < 16);
128	if ( cImmBits == 8
129	\|\| (uImmOp <= (uint64_t)0x7f && bOpcodeOtherImm8 != 0xcc))
130	{
131	switch (cOpBits)
132	{
133	case 16:
134	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
135	RT_FALL_THRU();
136	case 32:
137	if (idxRegRm >= 8)
138	pCodeBuf[off++] = X86_OP_REX_B;
139	pCodeBuf[off++] = bOpcodeOtherImm8; Assert(bOpcodeOtherImm8 != 0xcc);
140	break;
141
142	default: AssertFailed(); RT_FALL_THRU();
143	case 64:
144	pCodeBuf[off++] = X86_OP_REX_W \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
145	pCodeBuf[off++] = bOpcodeOtherImm8; Assert(bOpcodeOtherImm8 != 0xcc);
146	break;
147
148	case 8:
149	if (idxRegRm >= 8)
150	pCodeBuf[off++] = X86_OP_REX_B;
151	else if (idxRegRm >= 4)
152	pCodeBuf[off++] = X86_OP_REX;
153	pCodeBuf[off++] = bOpcode8; Assert(bOpcode8 != 0xcc);
154	break;
155	}
156	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
157	pCodeBuf[off++] = (uint8_t)uImmOp;
158	}
159	else
160	{
161	switch (cOpBits)
162	{
163	case 32:
164	if (idxRegRm >= 8)
165	pCodeBuf[off++] = X86_OP_REX_B;
166	break;
167
168	default: AssertFailed(); RT_FALL_THRU();
169	case 64:
170	pCodeBuf[off++] = X86_OP_REX_W \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
171	break;
172
173	case 16:
174	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
175	if (idxRegRm >= 8)
176	pCodeBuf[off++] = X86_OP_REX_B;
177	pCodeBuf[off++] = bOpcodeOther;
178	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
179	pCodeBuf[off++] = RT_BYTE1(uImmOp);
180	pCodeBuf[off++] = RT_BYTE2(uImmOp);
181	Assert(cImmBits == 16);
182	return off;
183	}
184	pCodeBuf[off++] = bOpcodeOther;
185	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
186	pCodeBuf[off++] = RT_BYTE1(uImmOp);
187	pCodeBuf[off++] = RT_BYTE2(uImmOp);
188	pCodeBuf[off++] = RT_BYTE3(uImmOp);
189	pCodeBuf[off++] = RT_BYTE4(uImmOp);
190	Assert(cImmBits == 32);
191	}
192	return off;
193	}
194
195	#endif /* RT_ARCH_AMD64 */
196
197	/**
198	* This is an implementation of IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGICAL.
199	*
200	* It takes liveness stuff into account.
201	*/
202	DECL_INLINE_THROW(uint32_t)
203	iemNativeEmitEFlagsForLogical(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl
204	#ifndef RT_ARCH_AMD64
205	, uint8_t cOpBits, uint8_t idxRegResult, bool fNativeFlags = false
206	#endif
207	)
208	{
209	#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
210	/*
211	* See if we can skip this wholesale.
212	*/
213	PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
214	if (IEMLIVENESS_STATE_ARE_STATUS_EFL_TO_BE_CLOBBERED(pLivenessEntry))
215	{
216	STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEflSkippedLogical);
217	# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
218	off = iemNativeEmitOrImmIntoVCpuU32(pReNative, off, X86_EFL_STATUS_BITS, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
219	# endif
220	}
221	else
222	#endif
223	{
224	#ifdef RT_ARCH_AMD64
225	/*
226	* Collect flags and merge them with eflags.
227	*/
228	/** @todo we could alternatively use SAHF here when host rax is free since,
229	* OF is cleared. */
230	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
231	/* pushf - do this before any reg allocations as they may emit instructions too. */
232	pCodeBuf[off++] = 0x9c;
233
234	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
235	uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
236	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + 7 + 7 + 3);
237	/* pop tmp */
238	if (idxTmpReg >= 8)
239	pCodeBuf[off++] = X86_OP_REX_B;
240	pCodeBuf[off++] = 0x58 + (idxTmpReg & 7);
241	/* and tmp, X86_EFL_PF \| X86_EFL_ZF \| X86_EFL_SF */
242	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_EFL_PF \| X86_EFL_ZF \| X86_EFL_SF);
243	/* Clear the status bits in EFLs. */
244	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
245	/* OR in the flags we collected. */
246	off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxTmpReg);
247	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
248	iemNativeRegFreeTmp(pReNative, idxTmpReg);
249
250	#elif defined(RT_ARCH_ARM64)
251	/*
252	* Calculate flags.
253	*/
254	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
255	uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
256	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
257
258	/* Clear the status bits. ~0x8D5 (or ~0x8FD) can't be AND immediate, so use idxTmpReg for constant. */
259	off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, ~X86_EFL_STATUS_BITS);
260	off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxRegEfl, idxTmpReg);
261
262	/* N,Z -> SF,ZF */
263	if (cOpBits < 32)
264	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegResult, cOpBits > 8); /* sets NZ */
265	else if (!fNativeFlags)
266	pCodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, idxRegResult, idxRegResult, cOpBits > 32 /f64Bit/);
267	pCodeBuf[off++] = Armv8A64MkInstrMrs(idxTmpReg, ARMV8_AARCH64_SYSREG_NZCV); /* Bits: 31=N; 30=Z; 29=C; 28=V; */
268	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 30);
269	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_ZF_BIT, 2, false /f64Bit/);
270	AssertCompile(X86_EFL_ZF_BIT + 1 == X86_EFL_SF_BIT);
271
272	/* Calculate 8-bit parity of the result. */
273	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegResult, idxRegResult, false /f64Bit/,
274	4 /offShift6/, kArmv8A64InstrShift_Lsr);
275	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /f64Bit/,
276	2 /offShift6/, kArmv8A64InstrShift_Lsr);
277	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /f64Bit/,
278	1 /offShift6/, kArmv8A64InstrShift_Lsr);
279	Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
280	pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /f64Bit/);
281	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_PF_BIT, 1, false /f64Bit/);
282
283	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
284	iemNativeRegFreeTmp(pReNative, idxTmpReg);
285	#else
286	# error "port me"
287	#endif
288	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
289
290	# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
291	off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
292	# endif
293	}
294	return off;
295	}
296
297
298	/**
299	* This is an implementation of IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC.
300	*
301	* It takes liveness stuff into account.
302	*/
303	DECL_FORCE_INLINE_THROW(uint32_t)
304	iemNativeEmitEFlagsForArithmetic(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl, uint8_t idxRegEflIn
305	#ifndef RT_ARCH_AMD64
306	, uint8_t cOpBits, uint8_t idxRegResult, uint8_t idxRegDstIn, uint8_t idxRegSrc
307	, bool fInvertCarry, uint64_t uImmSrc
308	#endif
309	)
310	{
311	#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
312	/*
313	* See if we can skip this wholesale.
314	*/
315	PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
316	if (IEMLIVENESS_STATE_ARE_STATUS_EFL_TO_BE_CLOBBERED(pLivenessEntry))
317	{
318	STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEflSkippedArithmetic);
319	# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
320	off = iemNativeEmitOrImmIntoVCpuU32(pReNative, off, X86_EFL_STATUS_BITS, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
321	# endif
322	}
323	else
324	#endif
325	{
326	#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
327	uint32_t fSkipped = 0;
328	#endif
329	#ifdef RT_ARCH_AMD64
330	/*
331	* Collect flags and merge them with eflags.
332	*/
333	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
334	/* pushf - do this before any reg allocations as they may emit instructions too. */
335	pCodeBuf[off++] = 0x9c;
336
337	uint8_t const idxRegEfl = idxRegEflIn != UINT8_MAX ? idxRegEflIn
338	: iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
339	uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
340	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + 7 + 7 + 3);
341	/* pop tmp */
342	if (idxTmpReg >= 8)
343	pCodeBuf[off++] = X86_OP_REX_B;
344	pCodeBuf[off++] = 0x58 + (idxTmpReg & 7);
345	/* Isolate the flags we want. */
346	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_EFL_STATUS_BITS);
347	/* Clear the status bits in EFLs. */
348	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
349	/* OR in the flags we collected. */
350	off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxTmpReg);
351	if (idxRegEflIn != idxRegEfl)
352	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
353	iemNativeRegFreeTmp(pReNative, idxTmpReg);
354
355	#elif defined(RT_ARCH_ARM64)
356	/*
357	* Calculate flags.
358	*/
359	uint8_t const idxRegEfl = idxRegEflIn != UINT8_MAX ? idxRegEflIn
360	: iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
361	uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
362	uint8_t const idxTmpReg2 = cOpBits >= 32 ? UINT8_MAX : iemNativeRegAllocTmp(pReNative, &off);
363	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
364
365	/* Invert CF (stored inved on ARM) and load the flags into the temporary register. */
366	if (fInvertCarry)
367	pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
368	pCodeBuf[off++] = Armv8A64MkInstrMrs(idxTmpReg, ARMV8_AARCH64_SYSREG_NZCV); /* Bits: 31=N; 30=Z; 29=C; 28=V; */
369
370	if (cOpBits >= 32)
371	{
372	/* V -> OF */
373	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 28);
374	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_OF_BIT, 1, false /f64Bit/);
375
376	/* C -> CF */
377	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 1);
378	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_CF_BIT, 1, false /f64Bit/);
379	}
380
381	/* N,Z -> SF,ZF */
382	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, cOpBits >= 32 ? 1 : 30);
383	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_ZF_BIT, 2, false /f64Bit/);
384
385	/* For ADC and SBB we have to calculate overflow and carry our selves. */
386	if (cOpBits < 32)
387	{
388	/* Since the carry flag is the zero'th flag, we just use BFXIL got copy it over. */
389	AssertCompile(X86_EFL_CF_BIT == 0);
390	pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxRegEfl, idxRegResult, cOpBits, 1, false /f64Bit/);
391
392	/* The overflow flag is more work as we have to compare the signed bits for
393	both inputs and the result. See IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC.
394
395	Formula: ~(a_uDst ^ a_uSrcOf) & (a_uResult ^ a_uDst)
396	With a_uSrcOf as a_uSrc for additions and ~a_uSrc for subtractions.
397
398	It is a bit simpler when the right (source) side is constant:
399	adc: S D R -> OF sbb: S D R -> OF
400	0 0 0 -> 0 \ 0 0 0 -> 0 \
401	0 0 1 -> 1 \ 0 0 1 -> 0 \
402	0 1 0 -> 0 / and not(D), R 0 1 0 -> 1 / and D, not(R)
403	0 1 1 -> 0 / 0 1 1 -> 0 /
404	1 0 0 -> 0 \ 1 0 0 -> 0 \
405	1 0 1 -> 0 \ and D, not(R) 1 0 1 -> 1 \ and not(D), R
406	1 1 0 -> 1 / 1 1 0 -> 0 /
407	1 1 1 -> 0 / 1 1 1 -> 0 / */
408	if (idxRegSrc != UINT8_MAX)
409	{
410	if (fInvertCarry) /* sbb: ~((a_uDst) ^ ~(a_uSrcOf)) -> (a_uDst) ^ (a_uSrcOf); HACK ALERT: fInvertCarry == sbb */
411	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegSrc, false);
412	else /* adc: ~((a_uDst) ^ (a_uSrcOf)) -> (a_uDst) ^ ~(a_uSrcOf) */
413	pCodeBuf[off++] = Armv8A64MkInstrEon(idxTmpReg, idxRegDstIn, idxRegSrc, false);
414	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg2, idxRegDstIn, idxRegResult, false); /* (a_uDst) ^ (a_uResult) */
415	pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpReg, idxTmpReg, idxTmpReg2, false /f64Bit/);
416	}
417	else if (uImmSrc & RT_BIT_32(cOpBits - 1))
418	{
419	if (fInvertCarry) /* HACK ALERT: fInvertCarry == sbb */
420	pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegResult, idxRegDstIn, false);
421	else
422	pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegDstIn, idxRegResult, false);
423	}
424	else
425	{
426	if (fInvertCarry) /* HACK ALERT: fInvertCarry == sbb */
427	pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegDstIn, idxRegResult, false);
428	else
429	pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegResult, idxRegDstIn, false);
430	}
431	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, cOpBits - 1, false /f64Bit/);
432	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_OF_BIT, 1);
433	iemNativeRegFreeTmp(pReNative, idxTmpReg2);
434	}
435
436	/* Calculate 8-bit parity of the result. */
437	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegResult, idxRegResult, false /f64Bit/,
438	4 /offShift6/, kArmv8A64InstrShift_Lsr);
439	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /f64Bit/,
440	2 /offShift6/, kArmv8A64InstrShift_Lsr);
441	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /f64Bit/,
442	1 /offShift6/, kArmv8A64InstrShift_Lsr);
443	Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
444	pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /f64Bit/);
445	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_PF_BIT, 1, false /f64Bit/);
446
447	/* Calculate auxilary carry/borrow. This is related to 8-bit BCD.
448	General formula: ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
449	S D R
450	0 0 0 -> 0; \
451	0 0 1 -> 1; \ regular
452	0 1 0 -> 1; / xor R, D
453	0 1 1 -> 0; /
454	1 0 0 -> 1; \
455	1 0 1 -> 0; \ invert one of the two
456	1 1 0 -> 0; / xor not(R), D
457	1 1 1 -> 1; /
458	a_uSrc[bit 4]=0: ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
459	a_uSrc[bit 4]=1: ((uint32_t)~(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
460	*/
461
462	if (idxRegSrc != UINT8_MAX)
463	{
464	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegSrc, false /f64Bit/);
465	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxRegResult, false /f64Bit/);
466	}
467	else if (uImmSrc & X86_EFL_AF)
468	pCodeBuf[off++] = Armv8A64MkInstrEon(idxTmpReg, idxRegDstIn, idxRegResult, false /f64Bit/);
469	else
470	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegResult, false /f64Bit/);
471	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, X86_EFL_AF_BIT, false /f64Bit/);
472	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_AF_BIT, 1, false /f64Bit/);
473
474	if (idxRegEflIn != idxRegEfl)
475	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
476	iemNativeRegFreeTmp(pReNative, idxTmpReg);
477
478	#else
479	# error "port me"
480	#endif
481	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
482
483	#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
484	off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, fSkipped, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
485	#endif
486	}
487	return off;
488
489	}
490
491
492	/**
493	* The AND instruction will clear OF, CF and AF (latter is undefined) and
494	* set the other flags according to the result.
495	*/
496	DECL_INLINE_THROW(uint32_t)
497	iemNativeEmit_and_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
498	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
499	{
500	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
501	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
502	#ifdef RT_ARCH_AMD64
503	/* On AMD64 we just use the correctly sized AND instruction harvest the EFLAGS. */
504	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
505	0x22, 0x23, cOpBits, idxRegDst, idxRegSrc);
506	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
507	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
508
509	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
510
511	#elif defined(RT_ARCH_ARM64)
512	/* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. */
513	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
514	pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
515	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
516	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
517
518	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst, true /fNativeFlags/);
519	#else
520	# error "Port me"
521	#endif
522	iemNativeVarRegisterRelease(pReNative, idxVarDst);
523	return off;
524	}
525
526
527	/**
528	* The AND instruction with immediate value as right operand.
529	*/
530	DECL_INLINE_THROW(uint32_t)
531	iemNativeEmit_and_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
532	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
533	{
534	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
535	#ifdef RT_ARCH_AMD64
536	/* On AMD64 we just use the correctly sized AND instruction harvest the EFLAGS. */
537	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
538	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 4, idxRegDst, uImmOp);
539	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
540
541	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
542
543	#elif defined(RT_ARCH_ARM64)
544	/* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones, and of
545	course the immediate variant when possible to save a register load. */
546	uint32_t uImmSizeLen, uImmRotations;
547	if ( cOpBits > 32
548	? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
549	: Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
550	{
551	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
552	if (cOpBits >= 32)
553	pCodeBuf[off++] = Armv8A64MkInstrAndsImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
554	else
555	pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
556	}
557	else
558	{
559	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
560	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
561	if (cOpBits >= 32)
562	pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
563	else
564	pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
565	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
566	}
567	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
568
569	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst, cOpBits >= 32 /fNativeFlags/);
570	RT_NOREF_PV(cImmBits);
571
572	#else
573	# error "Port me"
574	#endif
575	iemNativeVarRegisterRelease(pReNative, idxVarDst);
576	return off;
577	}
578
579
580	/**
581	* The TEST instruction will clear OF, CF and AF (latter is undefined) and
582	* set the other flags according to the result.
583	*/
584	DECL_INLINE_THROW(uint32_t)
585	iemNativeEmit_test_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
586	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
587	{
588	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
589	uint8_t const idxRegSrc = idxVarSrc == idxVarDst ? idxRegDst /* special case of 'test samereg,samereg' */
590	: iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
591	#ifdef RT_ARCH_AMD64
592	/* On AMD64 we just use the correctly sized TEST instruction harvest the EFLAGS. */
593	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
594	0x84, 0x85, cOpBits, idxRegSrc, idxRegDst);
595	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
596
597	#elif defined(RT_ARCH_ARM64)
598	/* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. We also
599	need to keep the result in order to calculate the flags. */
600	uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
601	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
602	if (cOpBits >= 32)
603	pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
604	else
605	pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
606	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
607
608	#else
609	# error "Port me"
610	#endif
611	if (idxVarSrc != idxVarDst)
612	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
613	iemNativeVarRegisterRelease(pReNative, idxVarDst);
614
615	#ifdef RT_ARCH_AMD64
616	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
617	#else
618	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegResult, cOpBits >= 32 /fNativeFlags/);
619	iemNativeRegFreeTmp(pReNative, idxRegResult);
620	#endif
621	return off;
622	}
623
624
625	/**
626	* The TEST instruction with immediate value as right operand.
627	*/
628	DECL_INLINE_THROW(uint32_t)
629	iemNativeEmit_test_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
630	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
631	{
632	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
633	#ifdef RT_ARCH_AMD64
634	/* On AMD64 we just use the correctly sized AND instruction harvest the EFLAGS. */
635	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
636	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0xf6, 0xcc, 0xf7, cOpBits, cImmBits, 0, idxRegDst, uImmOp);
637	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
638	iemNativeVarRegisterRelease(pReNative, idxVarDst);
639
640	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
641
642	#elif defined(RT_ARCH_ARM64)
643	/* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones, and of
644	course the immediate variant when possible to save a register load.
645	We also need to keep the result in order to calculate the flags. */
646	uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
647	uint32_t uImmSizeLen, uImmRotations;
648	if ( cOpBits > 32
649	? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
650	: Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
651	{
652	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
653	if (cOpBits >= 32)
654	pCodeBuf[off++] = Armv8A64MkInstrAndsImm(idxRegResult, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
655	else
656	pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegResult, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
657	}
658	else
659	{
660	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
661	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
662	if (cOpBits >= 32)
663	pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
664	else
665	pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
666	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
667	}
668	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
669	iemNativeVarRegisterRelease(pReNative, idxVarDst);
670
671	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegResult, cOpBits >= 32 /fNativeFlags/);
672
673	iemNativeRegFreeTmp(pReNative, idxRegResult);
674	RT_NOREF_PV(cImmBits);
675
676	#else
677	# error "Port me"
678	#endif
679	return off;
680	}
681
682
683	/**
684	* The OR instruction will clear OF, CF and AF (latter is undefined) and
685	* set the other flags according to the result.
686	*/
687	DECL_INLINE_THROW(uint32_t)
688	iemNativeEmit_or_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
689	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
690	{
691	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
692	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
693	#ifdef RT_ARCH_AMD64
694	/* On AMD64 we just use the correctly sized OR instruction harvest the EFLAGS. */
695	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
696	0x0a, 0x0b, cOpBits, idxRegDst, idxRegSrc);
697	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
698	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
699
700	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
701
702	#elif defined(RT_ARCH_ARM64)
703	/* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones. */
704	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
705	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
706	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
707	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
708
709	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
710
711	#else
712	# error "Port me"
713	#endif
714	iemNativeVarRegisterRelease(pReNative, idxVarDst);
715	return off;
716	}
717
718
719	/**
720	* The OR instruction with immediate value as right operand.
721	*/
722	DECL_INLINE_THROW(uint32_t)
723	iemNativeEmit_or_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
724	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
725	{
726	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
727	#ifdef RT_ARCH_AMD64
728	/* On AMD64 we just use the correctly sized OR instruction harvest the EFLAGS. */
729	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
730	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 1, idxRegDst, uImmOp);
731	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
732
733	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
734
735	#elif defined(RT_ARCH_ARM64)
736	/* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones, and of
737	course the immediate variant when possible to save a register load. */
738	uint32_t uImmSizeLen, uImmRotations;
739	if ( cOpBits > 32
740	? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
741	: Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
742	{
743	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
744	pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
745	}
746	else
747	{
748	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
749	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
750	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
751	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
752	}
753	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
754
755	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
756	RT_NOREF_PV(cImmBits);
757
758	#else
759	# error "Port me"
760	#endif
761	iemNativeVarRegisterRelease(pReNative, idxVarDst);
762	return off;
763	}
764
765
766	/**
767	* The XOR instruction will clear OF, CF and AF (latter is undefined) and
768	* set the other flags according to the result.
769	*/
770	DECL_INLINE_THROW(uint32_t)
771	iemNativeEmit_xor_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
772	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
773	{
774	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
775	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
776	#ifdef RT_ARCH_AMD64
777	/* On AMD64 we just use the correctly sized OR instruction harvest the EFLAGS. */
778	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
779	0x32, 0x33, cOpBits, idxRegDst, idxRegSrc);
780	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
781	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
782
783	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
784
785	#elif defined(RT_ARCH_ARM64)
786	/* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones. */
787	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
788	pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
789	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
790	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
791
792	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
793
794	#else
795	# error "Port me"
796	#endif
797	iemNativeVarRegisterRelease(pReNative, idxVarDst);
798	return off;
799	}
800
801
802	/**
803	* The XOR instruction with immediate value as right operand.
804	*/
805	DECL_INLINE_THROW(uint32_t)
806	iemNativeEmit_xor_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
807	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
808	{
809	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
810	#ifdef RT_ARCH_AMD64
811	/* On AMD64 we just use the correctly sized XOR instruction harvest the EFLAGS. */
812	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
813	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 6, idxRegDst, uImmOp);
814	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
815
816	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
817
818	#elif defined(RT_ARCH_ARM64)
819	/* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones, and of
820	course the immediate variant when possible to save a register load. */
821	uint32_t uImmSizeLen, uImmRotations;
822	if ( cOpBits > 32
823	? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
824	: Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
825	{
826	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
827	pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
828	}
829	else
830	{
831	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
832	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
833	pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
834	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
835	}
836	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
837
838	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
839	RT_NOREF_PV(cImmBits);
840
841	#else
842	# error "Port me"
843	#endif
844	iemNativeVarRegisterRelease(pReNative, idxVarDst);
845	return off;
846	}
847
848
849	/**
850	* The ADD instruction will set all status flags.
851	*/
852	DECL_INLINE_THROW(uint32_t)
853	iemNativeEmit_add_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
854	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
855	{
856	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
857	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
858
859	#ifdef RT_ARCH_AMD64
860	/* On AMD64 we just use the correctly sized ADD instruction to get the right EFLAGS.SF value. */
861	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
862	0x02, 0x03, cOpBits, idxRegDst, idxRegSrc);
863	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
864
865	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
866	iemNativeVarRegisterRelease(pReNative, idxVarDst);
867
868	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
869
870	#elif defined(RT_ARCH_ARM64)
871	/* On ARM64 we'll need the two input operands as well as the result in order
872	to calculate the right flags, even if we use ADDS and translates NZCV into
873	OF, CF, ZF and SF. */
874	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
875	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
876	if (cOpBits >= 32)
877	{
878	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
879	pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/, true /fSetFlags/);
880	}
881	else
882	{
883	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
884	uint32_t const cShift = 32 - cOpBits;
885	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDstIn, ARMV8_A64_REG_XZR, idxRegDst, false /f64Bit/, cShift);
886	pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDstIn, idxRegSrc, false /f64Bit/,
887	true /fSetFlags/, cShift);
888	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /f64Bit/);
889	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /f64Bit/);
890	cOpBits = 32;
891	}
892	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
893
894	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
895	idxRegDstIn, idxRegSrc, false /fInvertCarry/, 0);
896
897	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
898	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
899	iemNativeVarRegisterRelease(pReNative, idxVarDst);
900
901	#else
902	# error "port me"
903	#endif
904	return off;
905	}
906
907
908	/**
909	* The ADD instruction with immediate value as right operand.
910	*/
911	DECL_INLINE_THROW(uint32_t)
912	iemNativeEmit_add_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
913	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
914	{
915	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
916
917	#ifdef RT_ARCH_AMD64
918	/* On AMD64 we just use the correctly sized ADD instruction to get the right EFLAGS.SF value. */
919	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
920	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 0, idxRegDst, uImmOp);
921	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
922
923	iemNativeVarRegisterRelease(pReNative, idxVarDst);
924
925	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
926
927	#elif defined(RT_ARCH_ARM64)
928	/* On ARM64 we'll need the two input operands as well as the result in order
929	to calculate the right flags, even if we use ADDS and translates NZCV into
930	OF, CF, ZF and SF. */
931	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
932	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
933	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
934	if (cOpBits >= 32)
935	{
936	if (uImmOp <= 0xfffU)
937	pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegDst, idxRegDst, uImmOp, cOpBits > 32 /f64Bit/, true /fSetFlags/);
938	else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
939	pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegDst, idxRegDst, uImmOp >> 12, cOpBits > 32 /f64Bit/,
940	true /fSetFlags/, true /fShift12/);
941	else
942	{
943	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
944	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
945	pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/, true /fSetFlags/);
946	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
947	}
948	}
949	else
950	{
951	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
952	uint32_t const cShift = 32 - cOpBits;
953	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp << cShift);
954	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
955	pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegTmpImm, idxRegDstIn, false /f64Bit/, true /fSetFlags/, cShift);
956	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /f64Bit/);
957	cOpBits = 32;
958	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
959	}
960	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
961
962	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
963	idxRegDstIn, UINT8_MAX, false /fInvertCarry/, uImmOp);
964
965	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
966	iemNativeVarRegisterRelease(pReNative, idxVarDst);
967	RT_NOREF(cImmBits);
968
969	#else
970	# error "port me"
971	#endif
972	return off;
973	}
974
975
976	/**
977	* The ADC instruction takes CF as input and will set all status flags.
978	*/
979	DECL_INLINE_THROW(uint32_t)
980	iemNativeEmit_adc_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
981	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
982	{
983	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
984	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
985	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
986
987	#ifdef RT_ARCH_AMD64
988	/* On AMD64 we use BT to set EFLAGS.CF and then issue an ADC instruction
989	with matching size to get the correct flags. */
990	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
991
992	/* Use the BT instruction to set CF according to idxRegEfl. */
993	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /cOpBits/, 4, idxRegEfl);
994	pCodeBuf[off++] = X86_EFL_CF_BIT;
995
996	off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0x12, 0x13, cOpBits, idxRegDst, idxRegSrc);
997	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
998
999	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1000	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1001
1002	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1003
1004	#elif defined(RT_ARCH_ARM64)
1005	/* On ARM64 we use the RMIF instruction to load PSTATE.CF from idxRegEfl and
1006	then ADCS for the calculation. We need all inputs and result for the two
1007	flags (AF,PF) that can't be directly derived from PSTATE.NZCV. */
1008	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1009	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1010
1011	pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /fMask=C/);
1012	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1013	if (cOpBits >= 32)
1014	pCodeBuf[off++] = Armv8A64MkInstrAdcs(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
1015	else
1016	{
1017	/* Since we're also adding in the carry flag here, shifting operands up
1018	doesn't work. So, we have to calculate carry & overflow manually. */
1019	pCodeBuf[off++] = Armv8A64MkInstrAdc(idxRegDst, idxRegDst, idxRegSrc, false /f64Bit/);
1020	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1021	}
1022	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1023
1024	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1025	idxRegDstIn, idxRegSrc, false /fInvertCarry/, 0);
1026
1027	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1028	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1029	if (cOpBits < 32)
1030	off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1031	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1032
1033	#else
1034	# error "port me"
1035	#endif
1036	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1037	return off;
1038	}
1039
1040
1041	/**
1042	* The ADC instruction with immediate value as right operand.
1043	*/
1044	DECL_INLINE_THROW(uint32_t)
1045	iemNativeEmit_adc_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1046	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1047	{
1048	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1049	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
1050
1051	#ifdef RT_ARCH_AMD64
1052	/* On AMD64 we use BT to set EFLAGS.CF and then issue an ADC instruction
1053	with matching size to get the correct flags. */
1054	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1055
1056	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /cOpBits/, 4, idxRegEfl);
1057	pCodeBuf[off++] = X86_EFL_CF_BIT;
1058
1059	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 2, idxRegDst, uImmOp);
1060	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1061
1062	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1063
1064	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1065
1066	#elif defined(RT_ARCH_ARM64)
1067	/* On ARM64 we use the RMIF instructions to load PSTATE.CF from idxRegEfl
1068	and then ADCS for the calculation. We need all inputs and result for
1069	the two flags (AF,PF) that can't be directly derived from PSTATE.NZCV. */
1070	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1071	uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1072	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1073
1074	pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /fMask=C/);
1075	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1076	if (cOpBits >= 32)
1077	pCodeBuf[off++] = Armv8A64MkInstrAdcs(idxRegDst, idxRegDst, idxRegImm, cOpBits > 32 /f64Bit/);
1078	else
1079	{
1080	/* Since we're also adding in the carry flag here, shifting operands up
1081	doesn't work. So, we have to calculate carry & overflow manually. */
1082	pCodeBuf[off++] = Armv8A64MkInstrAdc(idxRegDst, idxRegDst, idxRegImm, false /f64Bit/);
1083	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1084	}
1085	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1086
1087	iemNativeRegFreeTmp(pReNative, idxRegImm);
1088
1089	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1090	idxRegDstIn, UINT8_MAX, false /fInvertCarry/, uImmOp);
1091
1092	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1093	if (cOpBits < 32)
1094	off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1095	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1096	RT_NOREF(cImmBits);
1097
1098	#else
1099	# error "port me"
1100	#endif
1101	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1102	return off;
1103	}
1104
1105
1106	/**
1107	* The SUB instruction will set all status flags.
1108	*/
1109	DECL_INLINE_THROW(uint32_t)
1110	iemNativeEmit_sub_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1111	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1112	{
1113	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1114	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
1115
1116	#ifdef RT_ARCH_AMD64
1117	/* On AMD64 we just use the correctly sized SUB instruction to get the right EFLAGS.SF value. */
1118	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
1119	0x2a, 0x2b, cOpBits, idxRegDst, idxRegSrc);
1120	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1121
1122	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1123	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1124
1125	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1126
1127	#elif defined(RT_ARCH_ARM64)
1128	/* On ARM64 we'll need the two input operands as well as the result in order
1129	to calculate the right flags, even if we use SUBS and translates NZCV into
1130	OF, CF, ZF and SF. */
1131	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1132	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1133	if (cOpBits >= 32)
1134	{
1135	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1136	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1137	}
1138	else
1139	{
1140	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1141	uint32_t const cShift = 32 - cOpBits;
1142	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDstIn, ARMV8_A64_REG_XZR, idxRegDst, false /f64Bit/, cShift);
1143	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDstIn, idxRegSrc, false /f64Bit/,
1144	true /fSetFlags/, cShift);
1145	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /f64Bit/);
1146	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /f64Bit/);
1147	cOpBits = 32;
1148	}
1149	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1150
1151	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
1152	idxRegDstIn, idxRegSrc, true /fInvertCarry/, 0);
1153
1154	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1155	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1156	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1157
1158	#else
1159	# error "port me"
1160	#endif
1161	return off;
1162	}
1163
1164
1165	/**
1166	* The SUB instruction with immediate value as right operand.
1167	*/
1168	DECL_INLINE_THROW(uint32_t)
1169	iemNativeEmit_sub_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1170	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1171	{
1172	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1173
1174	#ifdef RT_ARCH_AMD64
1175	/* On AMD64 we just use the correctly sized SUB instruction to get the right EFLAGS.SF value. */
1176	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1177	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 5, idxRegDst, uImmOp);
1178	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1179
1180	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1181
1182	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1183
1184	#elif defined(RT_ARCH_ARM64)
1185	/* On ARM64 we'll need the two input operands as well as the result in order
1186	to calculate the right flags, even if we use SUBS and translates NZCV into
1187	OF, CF, ZF and SF. */
1188	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1189	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1190	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1191	if (cOpBits >= 32)
1192	{
1193	if (uImmOp <= 0xfffU)
1194	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegDst, idxRegDst, uImmOp, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1195	else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
1196	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegDst, idxRegDst, uImmOp >> 12, cOpBits > 32 /f64Bit/,
1197	true /fSetFlags/, true /fShift12/);
1198	else
1199	{
1200	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1201	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1202	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1203	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1204	}
1205	}
1206	else
1207	{
1208	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1209	uint32_t const cShift = 32 - cOpBits;
1210	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1211	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1212	pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegDstIn, idxRegDstIn, cShift, false /f64Bit/);
1213	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDstIn, idxRegTmpImm, false /f64Bit/, true /fSetFlags/, cShift);
1214	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /f64Bit/);
1215	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /f64Bit/);
1216	cOpBits = 32;
1217	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1218	}
1219	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1220
1221	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
1222	idxRegDstIn, UINT8_MAX, true /fInvertCarry/, uImmOp);
1223
1224	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1225	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1226	RT_NOREF(cImmBits);
1227
1228	#else
1229	# error "port me"
1230	#endif
1231	return off;
1232	}
1233
1234
1235	/**
1236	* The CMP instruction will set all status flags, but modifies no registers.
1237	*/
1238	DECL_INLINE_THROW(uint32_t)
1239	iemNativeEmit_cmp_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1240	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1241	{
1242	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1243	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
1244
1245	#ifdef RT_ARCH_AMD64
1246	/* On AMD64 we just use the correctly sized CMP instruction to get the right EFLAGS.SF value. */
1247	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
1248	0x3a, 0x3b, cOpBits, idxRegDst, idxRegSrc);
1249	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1250
1251	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1252	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1253
1254	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1255
1256	#elif defined(RT_ARCH_ARM64)
1257	/* On ARM64 we'll need the actual result as well as both input operands in order
1258	to calculate the right flags, even if we use SUBS and translates NZCV into
1259	OF, CF, ZF and SF. */
1260	uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
1261	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1262	if (cOpBits >= 32)
1263	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1264	else
1265	{
1266	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1267	uint32_t const cShift = 32 - cOpBits;
1268	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegResult, ARMV8_A64_REG_XZR, idxRegDst, false /f64Bit/, cShift);
1269	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegResult, idxRegSrc, false /f64Bit/,
1270	true /fSetFlags/, cShift);
1271	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegResult, idxRegResult, cShift, false /f64Bit/);
1272	cOpBits = 32;
1273	}
1274	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1275
1276	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegResult,
1277	idxRegDst, idxRegSrc, true /fInvertCarry/, 0);
1278
1279	iemNativeRegFreeTmp(pReNative, idxRegResult);
1280	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1281	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1282
1283	#else
1284	# error "port me"
1285	#endif
1286	return off;
1287	}
1288
1289
1290	/**
1291	* The CMP instruction with immediate value as right operand.
1292	*/
1293	DECL_INLINE_THROW(uint32_t)
1294	iemNativeEmit_cmp_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1295	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1296	{
1297	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1298
1299	#ifdef RT_ARCH_AMD64
1300	/* On AMD64 we just use the correctly sized CMP instruction to get the right EFLAGS.SF value. */
1301	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1302	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 7, idxRegDst, uImmOp);
1303	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1304
1305	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1306
1307	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1308
1309	#elif defined(RT_ARCH_ARM64)
1310	/* On ARM64 we'll need the actual result as well as both input operands in order
1311	to calculate the right flags, even if we use SUBS and translates NZCV into
1312	OF, CF, ZF and SF. */
1313	uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
1314	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1315	if (cOpBits >= 32)
1316	{
1317	if (uImmOp <= 0xfffU)
1318	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegResult, idxRegDst, uImmOp, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1319	else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
1320	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegResult, idxRegDst, uImmOp >> 12, cOpBits > 32 /f64Bit/,
1321	true /fSetFlags/, true /fShift12/);
1322	else
1323	{
1324	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1325	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1326	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1327	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1328	}
1329	}
1330	else
1331	{
1332	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1333	uint32_t const cShift = 32 - cOpBits;
1334	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1335	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1336	pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegResult, idxRegDst, cShift, false /f64Bit/);
1337	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegResult, idxRegTmpImm, false /f64Bit/, true /fSetFlags/, cShift);
1338	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegResult, idxRegResult, cShift, false /f64Bit/);
1339	cOpBits = 32;
1340	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1341	}
1342	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1343
1344	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegResult,
1345	idxRegDst, UINT8_MAX, true /fInvertCarry/, uImmOp);
1346
1347	iemNativeRegFreeTmp(pReNative, idxRegResult);
1348	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1349	RT_NOREF(cImmBits);
1350
1351	#else
1352	# error "port me"
1353	#endif
1354	return off;
1355	}
1356
1357
1358	/**
1359	* The SBB instruction takes CF as input and will set all status flags.
1360	*/
1361	DECL_INLINE_THROW(uint32_t)
1362	iemNativeEmit_sbb_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1363	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1364	{
1365	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1366	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
1367	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
1368
1369	#ifdef RT_ARCH_AMD64
1370	/* On AMD64 we use BT to set EFLAGS.CF and then issue an SBB instruction
1371	with matching size to get the correct flags. */
1372	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1373
1374	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /cOpBits/, 4, idxRegEfl);
1375	pCodeBuf[off++] = X86_EFL_CF_BIT;
1376
1377	off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0x1a, 0x1b, cOpBits, idxRegDst, idxRegSrc);
1378	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1379
1380	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1381	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1382
1383	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1384
1385	#elif defined(RT_ARCH_ARM64)
1386	/* On ARM64 we use the RMIF+CFINV instructions to load PSTATE.CF from
1387	idxRegEfl and then SBCS for the calculation. We need all inputs and
1388	result for the two flags (AF,PF) that can't be directly derived from
1389	PSTATE.NZCV. */
1390	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1391	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1392
1393	pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /fMask=C/);
1394	pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
1395	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1396	if (cOpBits >= 32)
1397	pCodeBuf[off++] = Armv8A64MkInstrSbcs(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
1398	else
1399	{
1400	/* Since we're also adding in the carry flag here, shifting operands up
1401	doesn't work. So, we have to calculate carry & overflow manually. */
1402	pCodeBuf[off++] = Armv8A64MkInstrSbc(idxRegDst, idxRegDst, idxRegSrc, false /f64Bit/);
1403	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1404	}
1405	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1406
1407	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1408	idxRegDstIn, idxRegSrc, true /fInvertCarry/, 0);
1409
1410	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1411	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1412	if (cOpBits < 32)
1413	off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1414	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1415
1416	#else
1417	# error "port me"
1418	#endif
1419	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1420	return off;
1421	}
1422
1423
1424	/**
1425	* The SBB instruction with immediate value as right operand.
1426	*/
1427	DECL_INLINE_THROW(uint32_t)
1428	iemNativeEmit_sbb_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1429	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1430	{
1431	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1432	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
1433
1434	#ifdef RT_ARCH_AMD64
1435	/* On AMD64 we use BT to set EFLAGS.CF and then issue an SBB instruction
1436	with matching size to get the correct flags. */
1437	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1438
1439	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /cOpBits/, 4, idxRegEfl);
1440	pCodeBuf[off++] = X86_EFL_CF_BIT;
1441
1442	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 3, idxRegDst, uImmOp);
1443	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1444
1445	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1446
1447	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1448
1449	#elif defined(RT_ARCH_ARM64)
1450	/* On ARM64 we use the RMIF+CFINV instructions to load PSTATE.CF from
1451	idxRegEfl and then SBCS for the calculation. We need all inputs and
1452	result for the two flags (AF,PF) that can't be directly derived from
1453	PSTATE.NZCV. */
1454	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1455	uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1456	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1457
1458	pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /fMask=C/);
1459	pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
1460	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1461	if (cOpBits >= 32)
1462	pCodeBuf[off++] = Armv8A64MkInstrSbcs(idxRegDst, idxRegDst, idxRegImm, cOpBits > 32 /f64Bit/);
1463	else
1464	{
1465	/* Since we're also adding in the carry flag here, shifting operands up
1466	doesn't work. So, we have to calculate carry & overflow manually. */
1467	pCodeBuf[off++] = Armv8A64MkInstrSbc(idxRegDst, idxRegDst, idxRegImm, false /f64Bit/);
1468	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1469	}
1470	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1471
1472	iemNativeRegFreeTmp(pReNative, idxRegImm);
1473
1474	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1475	idxRegDstIn, UINT8_MAX, true /fInvertCarry/, uImmOp);
1476
1477	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1478	if (cOpBits < 32)
1479	off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1480	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1481	RT_NOREF(cImmBits);
1482
1483	#else
1484	# error "port me"
1485	#endif
1486	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1487	return off;
1488	}
1489
1490
1491	DECL_INLINE_THROW(uint32_t)
1492	iemNativeEmit_imul_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1493	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1494	{
1495	RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1496	AssertFailed();
1497	return iemNativeEmitBrk(pReNative, off, 0x666);
1498	}
1499
1500
1501	DECL_INLINE_THROW(uint32_t)
1502	iemNativeEmit_popcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1503	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1504	{
1505	RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1506	AssertFailed();
1507	return iemNativeEmitBrk(pReNative, off, 0x666);
1508	}
1509
1510
1511	DECL_INLINE_THROW(uint32_t)
1512	iemNativeEmit_tzcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1513	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1514	{
1515	RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1516	AssertFailed();
1517	return iemNativeEmitBrk(pReNative, off, 0x666);
1518	}
1519
1520
1521	DECL_INLINE_THROW(uint32_t)
1522	iemNativeEmit_lzcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1523	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1524	{
1525	RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1526	AssertFailed();
1527	return iemNativeEmitBrk(pReNative, off, 0x666);
1528	}
1529
1530
1531	#endif /* !VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h */

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMAll/target-x86/IEMAllN8veEmit-x86.h@ 104030

Download in other formats: