IEMAllN8veEmit-x86.h@ 103799

Last change on this file since 103799 was 103799, checked in by vboxsync, 9 months ago
VMM/IEM: Implemented iemNativeEmit_test_r_i_efl and enabled it for both hosts. bugref:10376
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 64.4 KB

Line
1	/* $Id: IEMAllN8veEmit-x86.h 103799 2024-03-11 22:23:37Z vboxsync $ */
2	/** @file
3	* IEM - Native Recompiler, x86 Target - Code Emitters.
4	*/
5
6	/*
7	* Copyright (C) 2023-2024 Oracle and/or its affiliates.
8	*
9	* This file is part of VirtualBox base platform packages, as
10	* available from https://www.virtualbox.org.
11	*
12	* This program is free software; you can redistribute it and/or
13	* modify it under the terms of the GNU General Public License
14	* as published by the Free Software Foundation, in version 3 of the
15	* License.
16	*
17	* This program is distributed in the hope that it will be useful, but
18	* WITHOUT ANY WARRANTY; without even the implied warranty of
19	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20	* General Public License for more details.
21	*
22	* You should have received a copy of the GNU General Public License
23	* along with this program; if not, see <https://www.gnu.org/licenses>.
24	*
25	* SPDX-License-Identifier: GPL-3.0-only
26	*/
27
28	#ifndef VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h
29	#define VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h
30	#ifndef RT_WITHOUT_PRAGMA_ONCE
31	# pragma once
32	#endif
33
34
35	#ifdef RT_ARCH_AMD64
36
37	/**
38	* Emits an ModR/M instruction with one opcode byte and only register operands.
39	*/
40	DECL_FORCE_INLINE(uint32_t)
41	iemNativeEmitAmd64OneByteModRmInstrRREx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t bOpcode8, uint8_t bOpcodeOther,
42	uint8_t cOpBits, uint8_t idxRegReg, uint8_t idxRegRm)
43	{
44	Assert(idxRegReg < 16); Assert(idxRegRm < 16);
45	switch (cOpBits)
46	{
47	case 16:
48	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
49	RT_FALL_THRU();
50	case 32:
51	if (idxRegReg >= 8 \|\| idxRegRm >= 8)
52	pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
53	pCodeBuf[off++] = bOpcodeOther;
54	break;
55
56	default: AssertFailed(); RT_FALL_THRU();
57	case 64:
58	pCodeBuf[off++] = X86_OP_REX_W \| (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
59	pCodeBuf[off++] = bOpcodeOther;
60	break;
61
62	case 8:
63	if (idxRegReg >= 8 \|\| idxRegRm >= 8)
64	pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
65	else if (idxRegReg >= 4 \|\| idxRegRm >= 4)
66	pCodeBuf[off++] = X86_OP_REX;
67	pCodeBuf[off++] = bOpcode8;
68	break;
69	}
70	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg & 7, idxRegRm & 7);
71	return off;
72	}
73
74
75	/**
76	* Emits an ModR/M instruction with two opcode bytes and only register operands.
77	*/
78	DECL_FORCE_INLINE(uint32_t)
79	iemNativeEmitAmd64TwoByteModRmInstrRREx(PIEMNATIVEINSTR pCodeBuf, uint32_t off,
80	uint8_t bOpcode0, uint8_t bOpcode8, uint8_t bOpcodeOther,
81	uint8_t cOpBits, uint8_t idxRegReg, uint8_t idxRegRm)
82	{
83	Assert(idxRegReg < 16); Assert(idxRegRm < 16);
84	switch (cOpBits)
85	{
86	case 16:
87	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
88	RT_FALL_THRU();
89	case 32:
90	if (idxRegReg >= 8 \|\| idxRegRm >= 8)
91	pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
92	pCodeBuf[off++] = bOpcode0;
93	pCodeBuf[off++] = bOpcodeOther;
94	break;
95
96	default: AssertFailed(); RT_FALL_THRU();
97	case 64:
98	pCodeBuf[off++] = X86_OP_REX_W \| (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
99	pCodeBuf[off++] = bOpcode0;
100	pCodeBuf[off++] = bOpcodeOther;
101	break;
102
103	case 8:
104	if (idxRegReg >= 8 \|\| idxRegRm >= 8)
105	pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
106	else if (idxRegReg >= 4 \|\| idxRegRm >= 4)
107	pCodeBuf[off++] = X86_OP_REX;
108	pCodeBuf[off++] = bOpcode0;
109	pCodeBuf[off++] = bOpcode8;
110	break;
111	}
112	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg & 7, idxRegRm & 7);
113	return off;
114	}
115
116
117	/**
118	* Emits one of three opcodes with an immediate.
119	*
120	* These are expected to be a /idxRegReg form.
121	*/
122	DECL_FORCE_INLINE(uint32_t)
123	iemNativeEmitAmd64OneByteModRmInstrRIEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t bOpcode8, uint8_t bOpcodeOtherImm8,
124	uint8_t bOpcodeOther, uint8_t cOpBits, uint8_t cImmBits, uint8_t idxRegReg,
125	uint8_t idxRegRm, uint64_t uImmOp)
126	{
127	Assert(idxRegReg < 8); Assert(idxRegRm < 16);
128	if ( cImmBits == 8
129	\|\| (uImmOp <= (uint64_t)0x7f && bOpcodeOtherImm8 != 0xcc))
130	{
131	switch (cOpBits)
132	{
133	case 16:
134	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
135	RT_FALL_THRU();
136	case 32:
137	if (idxRegRm >= 8)
138	pCodeBuf[off++] = X86_OP_REX_B;
139	pCodeBuf[off++] = bOpcodeOtherImm8; Assert(bOpcodeOtherImm8 != 0xcc);
140	break;
141
142	default: AssertFailed(); RT_FALL_THRU();
143	case 64:
144	pCodeBuf[off++] = X86_OP_REX_W \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
145	pCodeBuf[off++] = bOpcodeOtherImm8; Assert(bOpcodeOtherImm8 != 0xcc);
146	break;
147
148	case 8:
149	if (idxRegRm >= 8)
150	pCodeBuf[off++] = X86_OP_REX_B;
151	else if (idxRegRm >= 4)
152	pCodeBuf[off++] = X86_OP_REX;
153	pCodeBuf[off++] = bOpcode8; Assert(bOpcode8 != 0xcc);
154	break;
155	}
156	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
157	pCodeBuf[off++] = (uint8_t)uImmOp;
158	}
159	else
160	{
161	switch (cOpBits)
162	{
163	case 32:
164	if (idxRegRm >= 8)
165	pCodeBuf[off++] = X86_OP_REX_B;
166	break;
167
168	default: AssertFailed(); RT_FALL_THRU();
169	case 64:
170	pCodeBuf[off++] = X86_OP_REX_W \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
171	break;
172
173	case 16:
174	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
175	if (idxRegRm >= 8)
176	pCodeBuf[off++] = X86_OP_REX_B;
177	pCodeBuf[off++] = bOpcodeOther;
178	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
179	pCodeBuf[off++] = RT_BYTE1(uImmOp);
180	pCodeBuf[off++] = RT_BYTE2(uImmOp);
181	Assert(cImmBits == 16);
182	return off;
183	}
184	pCodeBuf[off++] = bOpcodeOther;
185	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
186	pCodeBuf[off++] = RT_BYTE1(uImmOp);
187	pCodeBuf[off++] = RT_BYTE2(uImmOp);
188	pCodeBuf[off++] = RT_BYTE3(uImmOp);
189	pCodeBuf[off++] = RT_BYTE4(uImmOp);
190	Assert(cImmBits == 32);
191	}
192	return off;
193	}
194
195	#endif /* RT_ARCH_AMD64 */
196
197	/**
198	* This is an implementation of IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGICAL.
199	*
200	* It takes liveness stuff into account.
201	*/
202	DECL_INLINE_THROW(uint32_t)
203	iemNativeEmitEFlagsForLogical(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl
204	#ifndef RT_ARCH_AMD64
205	, uint8_t cOpBits, uint8_t idxRegResult, bool fNativeFlags = false
206	#endif
207	)
208	{
209	#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
210	if (1) /** @todo check if all bits are clobbered. */
211	#endif
212	{
213	#ifdef RT_ARCH_AMD64
214	/*
215	* Collect flags and merge them with eflags.
216	*/
217	/** @todo we could alternatively use SAHF here when host rax is free since,
218	* OF is cleared. */
219	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
220	/* pushf - do this before any reg allocations as they may emit instructions too. */
221	pCodeBuf[off++] = 0x9c;
222
223	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
224	uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
225	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + 7 + 7 + 3);
226	/* pop tmp */
227	if (idxTmpReg >= 8)
228	pCodeBuf[off++] = X86_OP_REX_B;
229	pCodeBuf[off++] = 0x58 + (idxTmpReg & 7);
230	/* and tmp, X86_EFL_PF \| X86_EFL_ZF \| X86_EFL_SF */
231	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_EFL_PF \| X86_EFL_ZF \| X86_EFL_SF);
232	/* Clear the status bits in EFLs. */
233	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
234	/* OR in the flags we collected. */
235	off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxTmpReg);
236	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
237	iemNativeRegFreeTmp(pReNative, idxTmpReg);
238
239	#elif defined(RT_ARCH_ARM64)
240	/*
241	* Calculate flags.
242	*/
243	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
244	uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
245	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
246
247	/* Clear the status bits. ~0x8D5 (or ~0x8FD) can't be AND immediate, so use idxTmpReg for constant. */
248	off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, ~X86_EFL_STATUS_BITS);
249	off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxRegEfl, idxTmpReg);
250
251	/* N,Z -> SF,ZF */
252	if (cOpBits < 32)
253	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegResult, cOpBits > 8); /* sets NZ */
254	else if (!fNativeFlags)
255	pCodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, idxRegResult, idxRegResult, cOpBits > 32 /f64Bit/);
256	pCodeBuf[off++] = Armv8A64MkInstrMrs(idxTmpReg, ARMV8_AARCH64_SYSREG_NZCV); /* Bits: 31=N; 30=Z; 29=C; 28=V; */
257	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 30);
258	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_ZF_BIT, 2, false /f64Bit/);
259	AssertCompile(X86_EFL_ZF_BIT + 1 == X86_EFL_SF_BIT);
260
261	/* Calculate 8-bit parity of the result. */
262	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegResult, idxRegResult, false /f64Bit/,
263	4 /offShift6/, kArmv8A64InstrShift_Lsr);
264	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /f64Bit/,
265	2 /offShift6/, kArmv8A64InstrShift_Lsr);
266	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /f64Bit/,
267	1 /offShift6/, kArmv8A64InstrShift_Lsr);
268	Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
269	pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /f64Bit/);
270	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_PF_BIT, 1, false /f64Bit/);
271
272	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
273	iemNativeRegFreeTmp(pReNative, idxTmpReg);
274	#else
275	# error "port me"
276	#endif
277	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
278	}
279	return off;
280	}
281
282
283	/**
284	* This is an implementation of IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC.
285	*
286	* It takes liveness stuff into account.
287	*/
288	DECL_FORCE_INLINE_THROW(uint32_t)
289	iemNativeEmitEFlagsForArithmetic(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl, uint8_t idxRegEflIn
290	#ifndef RT_ARCH_AMD64
291	, uint8_t cOpBits, uint8_t idxRegResult, uint8_t idxRegDstIn, uint8_t idxRegSrc
292	, bool fInvertCarry, uint64_t uImmSrc
293	#endif
294	)
295	{
296	#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
297	if (1) /** @todo check if all bits are clobbered. */
298	#endif
299	{
300	#ifdef RT_ARCH_AMD64
301	/*
302	* Collect flags and merge them with eflags.
303	*/
304	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
305	/* pushf - do this before any reg allocations as they may emit instructions too. */
306	pCodeBuf[off++] = 0x9c;
307
308	uint8_t const idxRegEfl = idxRegEflIn != UINT8_MAX ? idxRegEflIn
309	: iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
310	uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
311	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + 7 + 7 + 3);
312	/* pop tmp */
313	if (idxTmpReg >= 8)
314	pCodeBuf[off++] = X86_OP_REX_B;
315	pCodeBuf[off++] = 0x58 + (idxTmpReg & 7);
316	/* Isolate the flags we want. */
317	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_EFL_STATUS_BITS);
318	/* Clear the status bits in EFLs. */
319	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
320	/* OR in the flags we collected. */
321	off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxTmpReg);
322	if (idxRegEflIn != idxRegEfl)
323	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
324	iemNativeRegFreeTmp(pReNative, idxTmpReg);
325
326	#elif defined(RT_ARCH_ARM64)
327	/*
328	* Calculate flags.
329	*/
330	uint8_t const idxRegEfl = idxRegEflIn != UINT8_MAX ? idxRegEflIn
331	: iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
332	uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
333	uint8_t const idxTmpReg2 = cOpBits >= 32 ? UINT8_MAX : iemNativeRegAllocTmp(pReNative, &off);
334	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
335
336	/* Invert CF (stored inved on ARM) and load the flags into the temporary register. */
337	if (fInvertCarry)
338	pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
339	pCodeBuf[off++] = Armv8A64MkInstrMrs(idxTmpReg, ARMV8_AARCH64_SYSREG_NZCV); /* Bits: 31=N; 30=Z; 29=C; 28=V; */
340
341	if (cOpBits >= 32)
342	{
343	/* V -> OF */
344	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 28);
345	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_OF_BIT, 1, false /f64Bit/);
346
347	/* C -> CF */
348	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 1);
349	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_CF_BIT, 1, false /f64Bit/);
350	}
351
352	/* N,Z -> SF,ZF */
353	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, cOpBits >= 32 ? 1 : 30);
354	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_ZF_BIT, 2, false /f64Bit/);
355
356	/* For ADC and SBB we have to calculate overflow and carry our selves. */
357	if (cOpBits < 32)
358	{
359	/* Since the carry flag is the zero'th flag, we just use BFXIL got copy it over. */
360	AssertCompile(X86_EFL_CF_BIT == 0);
361	pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxRegEfl, idxRegResult, cOpBits, 1, false /f64Bit/);
362
363	/* The overflow flag is more work as we have to compare the signed bits for
364	both inputs and the result. See IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC.
365
366	Formula: ~(a_uDst ^ a_uSrcOf) & (a_uResult ^ a_uDst)
367	With a_uSrcOf as a_uSrc for additions and ~a_uSrc for subtractions.
368
369	It is a bit simpler when the right (source) side is constant:
370	adc: S D R -> OF sbb: S D R -> OF
371	0 0 0 -> 0 \ 0 0 0 -> 0 \
372	0 0 1 -> 1 \ 0 0 1 -> 0 \
373	0 1 0 -> 0 / and not(D), R 0 1 0 -> 1 / and D, not(R)
374	0 1 1 -> 0 / 0 1 1 -> 0 /
375	1 0 0 -> 0 \ 1 0 0 -> 0 \
376	1 0 1 -> 0 \ and D, not(R) 1 0 1 -> 1 \ and not(D), R
377	1 1 0 -> 1 / 1 1 0 -> 0 /
378	1 1 1 -> 0 / 1 1 1 -> 0 / */
379	if (idxRegSrc != UINT8_MAX)
380	{
381	if (fInvertCarry) /* sbb: ~((a_uDst) ^ ~(a_uSrcOf)) -> (a_uDst) ^ (a_uSrcOf); HACK ALERT: fInvertCarry == sbb */
382	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegSrc, false);
383	else /* adc: ~((a_uDst) ^ (a_uSrcOf)) -> (a_uDst) ^ ~(a_uSrcOf) */
384	pCodeBuf[off++] = Armv8A64MkInstrEon(idxTmpReg, idxRegDstIn, idxRegSrc, false);
385	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg2, idxRegDstIn, idxRegResult, false); /* (a_uDst) ^ (a_uResult) */
386	pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpReg, idxTmpReg, idxTmpReg2, false /f64Bit/);
387	}
388	else if (uImmSrc & RT_BIT_32(cOpBits - 1))
389	{
390	if (fInvertCarry) /* HACK ALERT: fInvertCarry == sbb */
391	pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegResult, idxRegDstIn, false);
392	else
393	pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegDstIn, idxRegResult, false);
394	}
395	else
396	{
397	if (fInvertCarry) /* HACK ALERT: fInvertCarry == sbb */
398	pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegDstIn, idxRegResult, false);
399	else
400	pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegResult, idxRegDstIn, false);
401	}
402	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, cOpBits - 1, false /f64Bit/);
403	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_OF_BIT, 1);
404	iemNativeRegFreeTmp(pReNative, idxTmpReg2);
405	}
406
407	/* Calculate 8-bit parity of the result. */
408	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegResult, idxRegResult, false /f64Bit/,
409	4 /offShift6/, kArmv8A64InstrShift_Lsr);
410	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /f64Bit/,
411	2 /offShift6/, kArmv8A64InstrShift_Lsr);
412	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /f64Bit/,
413	1 /offShift6/, kArmv8A64InstrShift_Lsr);
414	Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
415	pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /f64Bit/);
416	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_PF_BIT, 1, false /f64Bit/);
417
418	/* Calculate auxilary carry/borrow. This is related to 8-bit BCD.
419	General formula: ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
420	S D R
421	0 0 0 -> 0; \
422	0 0 1 -> 1; \ regular
423	0 1 0 -> 1; / xor R, D
424	0 1 1 -> 0; /
425	1 0 0 -> 1; \
426	1 0 1 -> 0; \ invert one of the two
427	1 1 0 -> 0; / xor not(R), D
428	1 1 1 -> 1; /
429	a_uSrc[bit 4]=0: ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
430	a_uSrc[bit 4]=1: ((uint32_t)~(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
431	*/
432
433	if (idxRegSrc != UINT8_MAX)
434	{
435	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegSrc, false /f64Bit/);
436	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxRegResult, false /f64Bit/);
437	}
438	else if (uImmSrc & X86_EFL_AF)
439	pCodeBuf[off++] = Armv8A64MkInstrEon(idxTmpReg, idxRegDstIn, idxRegResult, false /f64Bit/);
440	else
441	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegResult, false /f64Bit/);
442	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, X86_EFL_AF_BIT, false /f64Bit/);
443	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_AF_BIT, 1, false /f64Bit/);
444
445	if (idxRegEflIn != idxRegEfl)
446	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
447	iemNativeRegFreeTmp(pReNative, idxTmpReg);
448	#else
449	# error "port me"
450	#endif
451	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
452	}
453	return off;
454
455	}
456
457
458	/**
459	* The AND instruction will clear OF, CF and AF (latter is undefined) and
460	* set the other flags according to the result.
461	*/
462	DECL_INLINE_THROW(uint32_t)
463	iemNativeEmit_and_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
464	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
465	{
466	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
467	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
468	#ifdef RT_ARCH_AMD64
469	/* On AMD64 we just use the correctly size AND instruction harvest the EFLAGS. */
470	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
471	0x22, 0x23, cOpBits, idxRegDst, idxRegSrc);
472	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
473	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
474
475	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
476
477	#elif defined(RT_ARCH_ARM64)
478	/* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. */
479	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
480	pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
481	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
482	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
483
484	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst, true /fNativeFlags/);
485	#else
486	# error "Port me"
487	#endif
488	iemNativeVarRegisterRelease(pReNative, idxVarDst);
489	return off;
490	}
491
492
493	/**
494	* The AND instruction with immediate value as right operand.
495	*/
496	DECL_INLINE_THROW(uint32_t)
497	iemNativeEmit_and_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
498	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
499	{
500	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
501	#ifdef RT_ARCH_AMD64
502	/* On AMD64 we just use the correctly size AND instruction harvest the EFLAGS. */
503	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
504	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 4, idxRegDst, uImmOp);
505	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
506
507	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
508
509	#elif defined(RT_ARCH_ARM64)
510	/* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. */
511	uint32_t uImmSizeLen, uImmRotations;
512	if ( cOpBits > 32
513	? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
514	: Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
515	{
516	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
517	if (cOpBits >= 32)
518	pCodeBuf[off++] = Armv8A64MkInstrAndsImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
519	else
520	pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
521	}
522	else
523	{
524	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
525	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
526	if (cOpBits >= 32)
527	pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
528	else
529	pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
530	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
531	}
532	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
533
534	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst, cOpBits >= 32 /fNativeFlags/);
535	RT_NOREF_PV(cImmBits)
536
537	#else
538	# error "Port me"
539	#endif
540	iemNativeVarRegisterRelease(pReNative, idxVarDst);
541	return off;
542	}
543
544
545	/**
546	* The TEST instruction will clear OF, CF and AF (latter is undefined) and
547	* set the other flags according to the result.
548	*/
549	DECL_INLINE_THROW(uint32_t)
550	iemNativeEmit_test_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
551	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
552	{
553	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
554	uint8_t const idxRegSrc = idxVarSrc == idxVarDst ? idxRegDst /* special case of 'test samereg,samereg' */
555	: iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
556	#ifdef RT_ARCH_AMD64
557	/* On AMD64 we just use the correctly size TEST instruction harvest the EFLAGS. */
558	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
559	0x84, 0x85, cOpBits, idxRegSrc, idxRegDst);
560	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
561
562	#elif defined(RT_ARCH_ARM64)
563	/* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. We also
564	need to keep the result in order to calculate the flags. */
565	uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
566	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
567	if (cOpBits >= 32)
568	pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
569	else
570	pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
571	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
572
573	#else
574	# error "Port me"
575	#endif
576	if (idxVarSrc != idxVarDst)
577	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
578	iemNativeVarRegisterRelease(pReNative, idxVarDst);
579
580	#ifdef RT_ARCH_AMD64
581	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
582	#else
583	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegResult, cOpBits >= 32 /fNativeFlags/);
584	iemNativeRegFreeTmp(pReNative, idxRegResult);
585	#endif
586	return off;
587	}
588
589
590	/**
591	* The TEST instruction with immediate value as right operand.
592	*/
593	DECL_INLINE_THROW(uint32_t)
594	iemNativeEmit_test_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
595	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
596	{
597	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
598	#ifdef RT_ARCH_AMD64
599	/* On AMD64 we just use the correctly size AND instruction harvest the EFLAGS. */
600	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
601	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0xf6, 0xcc, 0xf7, cOpBits, cImmBits, 0, idxRegDst, uImmOp);
602	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
603	iemNativeVarRegisterRelease(pReNative, idxVarDst);
604
605	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
606
607	#elif defined(RT_ARCH_ARM64)
608	/* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. We also
609	need to keep the result in order to calculate the flags. */
610	uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
611	uint32_t uImmSizeLen, uImmRotations;
612	if ( cOpBits > 32
613	? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
614	: Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
615	{
616	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
617	if (cOpBits >= 32)
618	pCodeBuf[off++] = Armv8A64MkInstrAndsImm(idxRegResult, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
619	else
620	pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegResult, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
621	}
622	else
623	{
624	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
625	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
626	if (cOpBits >= 32)
627	pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
628	else
629	pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
630	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
631	}
632	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
633	iemNativeVarRegisterRelease(pReNative, idxVarDst);
634
635	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegResult, cOpBits >= 32 /fNativeFlags/);
636
637	iemNativeRegFreeTmp(pReNative, idxRegResult);
638	RT_NOREF_PV(cImmBits)
639
640	#else
641	# error "Port me"
642	#endif
643	return off;
644	}
645
646
647	/**
648	* The OR instruction will clear OF, CF and AF (latter is undefined) and
649	* set the other flags according to the result.
650	*/
651	DECL_INLINE_THROW(uint32_t)
652	iemNativeEmit_or_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
653	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
654	{
655	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
656	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
657	#ifdef RT_ARCH_AMD64
658	/* On AMD64 we just use the correctly size OR instruction harvest the EFLAGS. */
659	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
660	0x0a, 0x0b, cOpBits, idxRegDst, idxRegSrc);
661	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
662	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
663
664	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
665
666	#elif defined(RT_ARCH_ARM64)
667	/* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones. */
668	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
669	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
670	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
671	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
672
673	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
674
675	#else
676	# error "Port me"
677	#endif
678	iemNativeVarRegisterRelease(pReNative, idxVarDst);
679	return off;
680	}
681
682
683	/**
684	* The OR instruction with immediate value as right operand.
685	*/
686	DECL_INLINE_THROW(uint32_t)
687	iemNativeEmit_or_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
688	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
689	{
690	RT_NOREF(pReNative, off, idxVarDst, uImmOp, idxVarEfl, cOpBits, cImmBits);
691	return off;
692	}
693
694
695	/**
696	* The XOR instruction will clear OF, CF and AF (latter is undefined) and
697	* set the other flags according to the result.
698	*/
699	DECL_INLINE_THROW(uint32_t)
700	iemNativeEmit_xor_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
701	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
702	{
703	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
704	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
705	#ifdef RT_ARCH_AMD64
706	/* On AMD64 we just use the correctly size OR instruction harvest the EFLAGS. */
707	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
708	0x32, 0x33, cOpBits, idxRegDst, idxRegSrc);
709	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
710	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
711
712	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
713
714	#elif defined(RT_ARCH_ARM64)
715	/* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones. */
716	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
717	pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
718	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
719	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
720
721	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
722
723	#else
724	# error "Port me"
725	#endif
726	iemNativeVarRegisterRelease(pReNative, idxVarDst);
727	return off;
728	}
729
730
731	/**
732	* The XOR instruction with immediate value as right operand.
733	*/
734	DECL_INLINE_THROW(uint32_t)
735	iemNativeEmit_xor_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
736	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
737	{
738	RT_NOREF(pReNative, off, idxVarDst, uImmOp, idxVarEfl, cOpBits, cImmBits);
739	return off;
740	}
741
742
743	/**
744	* The ADD instruction will set all status flags.
745	*/
746	DECL_INLINE_THROW(uint32_t)
747	iemNativeEmit_add_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
748	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
749	{
750	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
751	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
752
753	#ifdef RT_ARCH_AMD64
754	/* On AMD64 we just use the correctly sized ADD instruction to get the right EFLAGS.SF value. */
755	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
756	0x02, 0x03, cOpBits, idxRegDst, idxRegSrc);
757	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
758
759	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
760	iemNativeVarRegisterRelease(pReNative, idxVarDst);
761
762	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
763
764	#elif defined(RT_ARCH_ARM64)
765	/* On ARM64 we'll need the two input operands as well as the result in order
766	to calculate the right flags, even if we use ADDS and translates NZCV into
767	OF, CF, ZF and SF. */
768	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
769	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
770	if (cOpBits >= 32)
771	{
772	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
773	pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/, true /fSetFlags/);
774	}
775	else
776	{
777	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
778	uint32_t const cShift = 32 - cOpBits;
779	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDstIn, ARMV8_A64_REG_XZR, idxRegDst, false /f64Bit/, cShift);
780	pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDstIn, idxRegSrc, false /f64Bit/,
781	true /fSetFlags/, cShift);
782	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /f64Bit/);
783	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /f64Bit/);
784	cOpBits = 32;
785	}
786	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
787
788	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
789	idxRegDstIn, idxRegSrc, false /fInvertCarry/, 0);
790
791	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
792	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
793	iemNativeVarRegisterRelease(pReNative, idxVarDst);
794
795	#else
796	# error "port me"
797	#endif
798	return off;
799	}
800
801
802	/**
803	* The ADD instruction with immediate value as right operand.
804	*/
805	DECL_INLINE_THROW(uint32_t)
806	iemNativeEmit_add_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
807	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
808	{
809	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
810
811	#ifdef RT_ARCH_AMD64
812	/* On AMD64 we just use the correctly sized ADD instruction to get the right EFLAGS.SF value. */
813	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
814	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 0, idxRegDst, uImmOp);
815	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
816
817	iemNativeVarRegisterRelease(pReNative, idxVarDst);
818
819	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
820
821	#elif defined(RT_ARCH_ARM64)
822	/* On ARM64 we'll need the two input operands as well as the result in order
823	to calculate the right flags, even if we use ADDS and translates NZCV into
824	OF, CF, ZF and SF. */
825	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
826	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
827	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
828	if (cOpBits >= 32)
829	{
830	if (uImmOp <= 0xfffU)
831	pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegDst, idxRegDst, uImmOp, cOpBits > 32 /f64Bit/, true /fSetFlags/);
832	else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
833	pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegDst, idxRegDst, uImmOp >> 12, cOpBits > 32 /f64Bit/,
834	true /fSetFlags/, true /fShift12/);
835	else
836	{
837	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
838	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
839	pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/, true /fSetFlags/);
840	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
841	}
842	}
843	else
844	{
845	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
846	uint32_t const cShift = 32 - cOpBits;
847	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp << cShift);
848	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
849	pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegTmpImm, idxRegDstIn, false /f64Bit/, true /fSetFlags/, cShift);
850	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /f64Bit/);
851	cOpBits = 32;
852	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
853	}
854	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
855
856	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
857	idxRegDstIn, UINT8_MAX, false /fInvertCarry/, uImmOp);
858
859	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
860	iemNativeVarRegisterRelease(pReNative, idxVarDst);
861	RT_NOREF(cImmBits);
862
863	#else
864	# error "port me"
865	#endif
866	return off;
867	}
868
869
870	/**
871	* The ADC instruction takes CF as input and will set all status flags.
872	*/
873	DECL_INLINE_THROW(uint32_t)
874	iemNativeEmit_adc_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
875	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
876	{
877	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
878	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
879	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
880
881	#ifdef RT_ARCH_AMD64
882	/* On AMD64 we use BT to set EFLAGS.CF and then issue an ADC instruction
883	with matching size to get the correct flags. */
884	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
885
886	/* Use the BT instruction to set CF according to idxRegEfl. */
887	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /cOpBits/, 4, idxRegEfl);
888	pCodeBuf[off++] = X86_EFL_CF_BIT;
889
890	off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0x12, 0x13, cOpBits, idxRegDst, idxRegSrc);
891	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
892
893	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
894	iemNativeVarRegisterRelease(pReNative, idxVarDst);
895
896	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
897
898	#elif defined(RT_ARCH_ARM64)
899	/* On ARM64 we use the RMIF instruction to load PSTATE.CF from idxRegEfl and
900	then ADCS for the calculation. We need all inputs and result for the two
901	flags (AF,PF) that can't be directly derived from PSTATE.NZCV. */
902	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
903	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
904
905	pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /fMask=C/);
906	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
907	if (cOpBits >= 32)
908	pCodeBuf[off++] = Armv8A64MkInstrAdcs(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
909	else
910	{
911	/* Since we're also adding in the carry flag here, shifting operands up
912	doesn't work. So, we have to calculate carry & overflow manually. */
913	pCodeBuf[off++] = Armv8A64MkInstrAdc(idxRegDst, idxRegDst, idxRegSrc, false /f64Bit/);
914	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
915	}
916	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
917
918	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
919	idxRegDstIn, idxRegSrc, false /fInvertCarry/, 0);
920
921	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
922	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
923	if (cOpBits < 32)
924	off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
925	iemNativeVarRegisterRelease(pReNative, idxVarDst);
926
927	#else
928	# error "port me"
929	#endif
930	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
931	return off;
932	}
933
934
935	/**
936	* The ADC instruction with immediate value as right operand.
937	*/
938	DECL_INLINE_THROW(uint32_t)
939	iemNativeEmit_adc_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
940	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
941	{
942	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
943	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
944
945	#ifdef RT_ARCH_AMD64
946	/* On AMD64 we use BT to set EFLAGS.CF and then issue an ADC instruction
947	with matching size to get the correct flags. */
948	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
949
950	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /cOpBits/, 4, idxRegEfl);
951	pCodeBuf[off++] = X86_EFL_CF_BIT;
952
953	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 2, idxRegDst, uImmOp);
954	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
955
956	iemNativeVarRegisterRelease(pReNative, idxVarDst);
957
958	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
959
960	#elif defined(RT_ARCH_ARM64)
961	/* On ARM64 we use the RMIF instructions to load PSTATE.CF from idxRegEfl
962	and then ADCS for the calculation. We need all inputs and result for
963	the two flags (AF,PF) that can't be directly derived from PSTATE.NZCV. */
964	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
965	uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
966	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
967
968	pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /fMask=C/);
969	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
970	if (cOpBits >= 32)
971	pCodeBuf[off++] = Armv8A64MkInstrAdcs(idxRegDst, idxRegDst, idxRegImm, cOpBits > 32 /f64Bit/);
972	else
973	{
974	/* Since we're also adding in the carry flag here, shifting operands up
975	doesn't work. So, we have to calculate carry & overflow manually. */
976	pCodeBuf[off++] = Armv8A64MkInstrAdc(idxRegDst, idxRegDst, idxRegImm, false /f64Bit/);
977	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
978	}
979	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
980
981	iemNativeRegFreeTmp(pReNative, idxRegImm);
982
983	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
984	idxRegDstIn, UINT8_MAX, false /fInvertCarry/, uImmOp);
985
986	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
987	if (cOpBits < 32)
988	off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
989	iemNativeVarRegisterRelease(pReNative, idxVarDst);
990	RT_NOREF(cImmBits);
991
992	#else
993	# error "port me"
994	#endif
995	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
996	return off;
997	}
998
999
1000	/**
1001	* The SUB instruction will set all status flags.
1002	*/
1003	DECL_INLINE_THROW(uint32_t)
1004	iemNativeEmit_sub_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1005	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1006	{
1007	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1008	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
1009
1010	#ifdef RT_ARCH_AMD64
1011	/* On AMD64 we just use the correctly sized SUB instruction to get the right EFLAGS.SF value. */
1012	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
1013	0x2a, 0x2b, cOpBits, idxRegDst, idxRegSrc);
1014	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1015
1016	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1017	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1018
1019	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1020
1021	#elif defined(RT_ARCH_ARM64)
1022	/* On ARM64 we'll need the two input operands as well as the result in order
1023	to calculate the right flags, even if we use SUBS and translates NZCV into
1024	OF, CF, ZF and SF. */
1025	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1026	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1027	if (cOpBits >= 32)
1028	{
1029	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1030	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1031	}
1032	else
1033	{
1034	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1035	uint32_t const cShift = 32 - cOpBits;
1036	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDstIn, ARMV8_A64_REG_XZR, idxRegDst, false /f64Bit/, cShift);
1037	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDstIn, idxRegSrc, false /f64Bit/,
1038	true /fSetFlags/, cShift);
1039	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /f64Bit/);
1040	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /f64Bit/);
1041	cOpBits = 32;
1042	}
1043	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1044
1045	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
1046	idxRegDstIn, idxRegSrc, true /fInvertCarry/, 0);
1047
1048	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1049	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1050	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1051
1052	#else
1053	# error "port me"
1054	#endif
1055	return off;
1056	}
1057
1058
1059	/**
1060	* The SUB instruction with immediate value as right operand.
1061	*/
1062	DECL_INLINE_THROW(uint32_t)
1063	iemNativeEmit_sub_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1064	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1065	{
1066	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1067
1068	#ifdef RT_ARCH_AMD64
1069	/* On AMD64 we just use the correctly sized SUB instruction to get the right EFLAGS.SF value. */
1070	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1071	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 5, idxRegDst, uImmOp);
1072	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1073
1074	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1075
1076	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1077
1078	#elif defined(RT_ARCH_ARM64)
1079	/* On ARM64 we'll need the two input operands as well as the result in order
1080	to calculate the right flags, even if we use SUBS and translates NZCV into
1081	OF, CF, ZF and SF. */
1082	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1083	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1084	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1085	if (cOpBits >= 32)
1086	{
1087	if (uImmOp <= 0xfffU)
1088	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegDst, idxRegDst, uImmOp, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1089	else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
1090	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegDst, idxRegDst, uImmOp >> 12, cOpBits > 32 /f64Bit/,
1091	true /fSetFlags/, true /fShift12/);
1092	else
1093	{
1094	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1095	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1096	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1097	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1098	}
1099	}
1100	else
1101	{
1102	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1103	uint32_t const cShift = 32 - cOpBits;
1104	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1105	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1106	pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegDstIn, idxRegDstIn, cShift, false /f64Bit/);
1107	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDstIn, idxRegTmpImm, false /f64Bit/, true /fSetFlags/, cShift);
1108	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /f64Bit/);
1109	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /f64Bit/);
1110	cOpBits = 32;
1111	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1112	}
1113	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1114
1115	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
1116	idxRegDstIn, UINT8_MAX, true /fInvertCarry/, uImmOp);
1117
1118	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1119	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1120	RT_NOREF(cImmBits);
1121
1122	#else
1123	# error "port me"
1124	#endif
1125	return off;
1126	}
1127
1128
1129	/**
1130	* The CMP instruction will set all status flags, but modifies no registers.
1131	*/
1132	DECL_INLINE_THROW(uint32_t)
1133	iemNativeEmit_cmp_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1134	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1135	{
1136	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1137	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
1138
1139	#ifdef RT_ARCH_AMD64
1140	/* On AMD64 we just use the correctly sized CMP instruction to get the right EFLAGS.SF value. */
1141	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
1142	0x3a, 0x3b, cOpBits, idxRegDst, idxRegSrc);
1143	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1144
1145	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1146	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1147
1148	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1149
1150	#elif defined(RT_ARCH_ARM64)
1151	/* On ARM64 we'll need the actual result as well as both input operands in order
1152	to calculate the right flags, even if we use SUBS and translates NZCV into
1153	OF, CF, ZF and SF. */
1154	uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
1155	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1156	if (cOpBits >= 32)
1157	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1158	else
1159	{
1160	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1161	uint32_t const cShift = 32 - cOpBits;
1162	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegResult, ARMV8_A64_REG_XZR, idxRegDst, false /f64Bit/, cShift);
1163	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegResult, idxRegSrc, false /f64Bit/,
1164	true /fSetFlags/, cShift);
1165	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegResult, idxRegResult, cShift, false /f64Bit/);
1166	cOpBits = 32;
1167	}
1168	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1169
1170	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegResult,
1171	idxRegDst, idxRegSrc, true /fInvertCarry/, 0);
1172
1173	iemNativeRegFreeTmp(pReNative, idxRegResult);
1174	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1175	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1176
1177	#else
1178	# error "port me"
1179	#endif
1180	return off;
1181	}
1182
1183
1184	/**
1185	* The CMP instruction with immediate value as right operand.
1186	*/
1187	DECL_INLINE_THROW(uint32_t)
1188	iemNativeEmit_cmp_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1189	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1190	{
1191	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1192
1193	#ifdef RT_ARCH_AMD64
1194	/* On AMD64 we just use the correctly sized CMP instruction to get the right EFLAGS.SF value. */
1195	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1196	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 7, idxRegDst, uImmOp);
1197	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1198
1199	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1200
1201	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1202
1203	#elif defined(RT_ARCH_ARM64)
1204	/* On ARM64 we'll need the actual result as well as both input operands in order
1205	to calculate the right flags, even if we use SUBS and translates NZCV into
1206	OF, CF, ZF and SF. */
1207	uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
1208	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1209	if (cOpBits >= 32)
1210	{
1211	if (uImmOp <= 0xfffU)
1212	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegResult, idxRegDst, uImmOp, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1213	else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
1214	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegResult, idxRegDst, uImmOp >> 12, cOpBits > 32 /f64Bit/,
1215	true /fSetFlags/, true /fShift12/);
1216	else
1217	{
1218	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1219	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1220	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1221	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1222	}
1223	}
1224	else
1225	{
1226	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1227	uint32_t const cShift = 32 - cOpBits;
1228	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1229	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1230	pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegResult, idxRegDst, cShift, false /f64Bit/);
1231	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegResult, idxRegTmpImm, false /f64Bit/, true /fSetFlags/, cShift);
1232	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegResult, idxRegResult, cShift, false /f64Bit/);
1233	cOpBits = 32;
1234	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1235	}
1236	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1237
1238	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegResult,
1239	idxRegDst, UINT8_MAX, true /fInvertCarry/, uImmOp);
1240
1241	iemNativeRegFreeTmp(pReNative, idxRegResult);
1242	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1243	RT_NOREF(cImmBits);
1244
1245	#else
1246	# error "port me"
1247	#endif
1248	return off;
1249	}
1250
1251
1252	/**
1253	* The SBB instruction takes CF as input and will set all status flags.
1254	*/
1255	DECL_INLINE_THROW(uint32_t)
1256	iemNativeEmit_sbb_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1257	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1258	{
1259	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1260	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
1261	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
1262
1263	#ifdef RT_ARCH_AMD64
1264	/* On AMD64 we use BT to set EFLAGS.CF and then issue an SBB instruction
1265	with matching size to get the correct flags. */
1266	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1267
1268	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /cOpBits/, 4, idxRegEfl);
1269	pCodeBuf[off++] = X86_EFL_CF_BIT;
1270
1271	off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0x1a, 0x1b, cOpBits, idxRegDst, idxRegSrc);
1272	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1273
1274	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1275	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1276
1277	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1278
1279	#elif defined(RT_ARCH_ARM64)
1280	/* On ARM64 we use the RMIF+CFINV instructions to load PSTATE.CF from
1281	idxRegEfl and then SBCS for the calculation. We need all inputs and
1282	result for the two flags (AF,PF) that can't be directly derived from
1283	PSTATE.NZCV. */
1284	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1285	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1286
1287	pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /fMask=C/);
1288	pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
1289	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1290	if (cOpBits >= 32)
1291	pCodeBuf[off++] = Armv8A64MkInstrSbcs(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
1292	else
1293	{
1294	/* Since we're also adding in the carry flag here, shifting operands up
1295	doesn't work. So, we have to calculate carry & overflow manually. */
1296	pCodeBuf[off++] = Armv8A64MkInstrSbc(idxRegDst, idxRegDst, idxRegSrc, false /f64Bit/);
1297	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1298	}
1299	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1300
1301	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1302	idxRegDstIn, idxRegSrc, true /fInvertCarry/, 0);
1303
1304	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1305	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1306	if (cOpBits < 32)
1307	off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1308	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1309
1310	#else
1311	# error "port me"
1312	#endif
1313	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1314	return off;
1315	}
1316
1317
1318	/**
1319	* The SBB instruction with immediate value as right operand.
1320	*/
1321	DECL_INLINE_THROW(uint32_t)
1322	iemNativeEmit_sbb_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1323	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1324	{
1325	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1326	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
1327
1328	#ifdef RT_ARCH_AMD64
1329	/* On AMD64 we use BT to set EFLAGS.CF and then issue an SBB instruction
1330	with matching size to get the correct flags. */
1331	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1332
1333	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /cOpBits/, 4, idxRegEfl);
1334	pCodeBuf[off++] = X86_EFL_CF_BIT;
1335
1336	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 3, idxRegDst, uImmOp);
1337	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1338
1339	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1340
1341	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1342
1343	#elif defined(RT_ARCH_ARM64)
1344	/* On ARM64 we use the RMIF+CFINV instructions to load PSTATE.CF from
1345	idxRegEfl and then SBCS for the calculation. We need all inputs and
1346	result for the two flags (AF,PF) that can't be directly derived from
1347	PSTATE.NZCV. */
1348	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1349	uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1350	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1351
1352	pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /fMask=C/);
1353	pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
1354	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1355	if (cOpBits >= 32)
1356	pCodeBuf[off++] = Armv8A64MkInstrSbcs(idxRegDst, idxRegDst, idxRegImm, cOpBits > 32 /f64Bit/);
1357	else
1358	{
1359	/* Since we're also adding in the carry flag here, shifting operands up
1360	doesn't work. So, we have to calculate carry & overflow manually. */
1361	pCodeBuf[off++] = Armv8A64MkInstrSbc(idxRegDst, idxRegDst, idxRegImm, false /f64Bit/);
1362	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1363	}
1364	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1365
1366	iemNativeRegFreeTmp(pReNative, idxRegImm);
1367
1368	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1369	idxRegDstIn, UINT8_MAX, true /fInvertCarry/, uImmOp);
1370
1371	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1372	if (cOpBits < 32)
1373	off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1374	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1375	RT_NOREF(cImmBits);
1376
1377	#else
1378	# error "port me"
1379	#endif
1380	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1381	return off;
1382	}
1383
1384
1385	DECL_INLINE_THROW(uint32_t)
1386	iemNativeEmit_imul_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1387	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1388	{
1389	RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1390	AssertFailed();
1391	return iemNativeEmitBrk(pReNative, off, 0x666);
1392	}
1393
1394
1395	DECL_INLINE_THROW(uint32_t)
1396	iemNativeEmit_popcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1397	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1398	{
1399	RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1400	AssertFailed();
1401	return iemNativeEmitBrk(pReNative, off, 0x666);
1402	}
1403
1404
1405	DECL_INLINE_THROW(uint32_t)
1406	iemNativeEmit_tzcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1407	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1408	{
1409	RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1410	AssertFailed();
1411	return iemNativeEmitBrk(pReNative, off, 0x666);
1412	}
1413
1414
1415	DECL_INLINE_THROW(uint32_t)
1416	iemNativeEmit_lzcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1417	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1418	{
1419	RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1420	AssertFailed();
1421	return iemNativeEmitBrk(pReNative, off, 0x666);
1422	}
1423
1424
1425	#endif /* !VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h */

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMAll/target-x86/IEMAllN8veEmit-x86.h@ 103799

Download in other formats: