IEMAllN8veEmit-x86.h@ 104279

Last change on this file since 104279 was 104279, checked in by vboxsync, 8 months ago
VMM/IEM: Implement a native emitter for the pand,andps,andpd instructions, bugref:10652
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 93.5 KB

Line
1	/* $Id: IEMAllN8veEmit-x86.h 104279 2024-04-10 14:22:22Z vboxsync $ */
2	/** @file
3	* IEM - Native Recompiler, x86 Target - Code Emitters.
4	*/
5
6	/*
7	* Copyright (C) 2023-2024 Oracle and/or its affiliates.
8	*
9	* This file is part of VirtualBox base platform packages, as
10	* available from https://www.virtualbox.org.
11	*
12	* This program is free software; you can redistribute it and/or
13	* modify it under the terms of the GNU General Public License
14	* as published by the Free Software Foundation, in version 3 of the
15	* License.
16	*
17	* This program is distributed in the hope that it will be useful, but
18	* WITHOUT ANY WARRANTY; without even the implied warranty of
19	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20	* General Public License for more details.
21	*
22	* You should have received a copy of the GNU General Public License
23	* along with this program; if not, see <https://www.gnu.org/licenses>.
24	*
25	* SPDX-License-Identifier: GPL-3.0-only
26	*/
27
28	#ifndef VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h
29	#define VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h
30	#ifndef RT_WITHOUT_PRAGMA_ONCE
31	# pragma once
32	#endif
33
34
35	#ifdef RT_ARCH_AMD64
36
37	/**
38	* Emits an ModR/M instruction with one opcode byte and only register operands.
39	*/
40	DECL_FORCE_INLINE(uint32_t)
41	iemNativeEmitAmd64OneByteModRmInstrRREx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t bOpcode8, uint8_t bOpcodeOther,
42	uint8_t cOpBits, uint8_t idxRegReg, uint8_t idxRegRm)
43	{
44	Assert(idxRegReg < 16); Assert(idxRegRm < 16);
45	switch (cOpBits)
46	{
47	case 16:
48	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
49	RT_FALL_THRU();
50	case 32:
51	if (idxRegReg >= 8 \|\| idxRegRm >= 8)
52	pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
53	pCodeBuf[off++] = bOpcodeOther;
54	break;
55
56	default: AssertFailed(); RT_FALL_THRU();
57	case 64:
58	pCodeBuf[off++] = X86_OP_REX_W \| (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
59	pCodeBuf[off++] = bOpcodeOther;
60	break;
61
62	case 8:
63	if (idxRegReg >= 8 \|\| idxRegRm >= 8)
64	pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
65	else if (idxRegReg >= 4 \|\| idxRegRm >= 4)
66	pCodeBuf[off++] = X86_OP_REX;
67	pCodeBuf[off++] = bOpcode8;
68	break;
69	}
70	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg & 7, idxRegRm & 7);
71	return off;
72	}
73
74
75	/**
76	* Emits an ModR/M instruction with two opcode bytes and only register operands.
77	*/
78	DECL_FORCE_INLINE(uint32_t)
79	iemNativeEmitAmd64TwoByteModRmInstrRREx(PIEMNATIVEINSTR pCodeBuf, uint32_t off,
80	uint8_t bOpcode0, uint8_t bOpcode8, uint8_t bOpcodeOther,
81	uint8_t cOpBits, uint8_t idxRegReg, uint8_t idxRegRm)
82	{
83	Assert(idxRegReg < 16); Assert(idxRegRm < 16);
84	switch (cOpBits)
85	{
86	case 16:
87	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
88	RT_FALL_THRU();
89	case 32:
90	if (idxRegReg >= 8 \|\| idxRegRm >= 8)
91	pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
92	pCodeBuf[off++] = bOpcode0;
93	pCodeBuf[off++] = bOpcodeOther;
94	break;
95
96	default: AssertFailed(); RT_FALL_THRU();
97	case 64:
98	pCodeBuf[off++] = X86_OP_REX_W \| (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
99	pCodeBuf[off++] = bOpcode0;
100	pCodeBuf[off++] = bOpcodeOther;
101	break;
102
103	case 8:
104	if (idxRegReg >= 8 \|\| idxRegRm >= 8)
105	pCodeBuf[off++] = (idxRegReg >= 8 ? X86_OP_REX_R : 0) \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
106	else if (idxRegReg >= 4 \|\| idxRegRm >= 4)
107	pCodeBuf[off++] = X86_OP_REX;
108	pCodeBuf[off++] = bOpcode0;
109	pCodeBuf[off++] = bOpcode8;
110	break;
111	}
112	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg & 7, idxRegRm & 7);
113	return off;
114	}
115
116
117	/**
118	* Emits one of three opcodes with an immediate.
119	*
120	* These are expected to be a /idxRegReg form.
121	*/
122	DECL_FORCE_INLINE(uint32_t)
123	iemNativeEmitAmd64OneByteModRmInstrRIEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t bOpcode8, uint8_t bOpcodeOtherImm8,
124	uint8_t bOpcodeOther, uint8_t cOpBits, uint8_t cImmBits, uint8_t idxRegReg,
125	uint8_t idxRegRm, uint64_t uImmOp)
126	{
127	Assert(idxRegReg < 8); Assert(idxRegRm < 16);
128	if ( cImmBits == 8
129	\|\| (uImmOp <= (uint64_t)0x7f && bOpcodeOtherImm8 != 0xcc))
130	{
131	switch (cOpBits)
132	{
133	case 16:
134	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
135	RT_FALL_THRU();
136	case 32:
137	if (idxRegRm >= 8)
138	pCodeBuf[off++] = X86_OP_REX_B;
139	pCodeBuf[off++] = bOpcodeOtherImm8; Assert(bOpcodeOtherImm8 != 0xcc);
140	break;
141
142	default: AssertFailed(); RT_FALL_THRU();
143	case 64:
144	pCodeBuf[off++] = X86_OP_REX_W \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
145	pCodeBuf[off++] = bOpcodeOtherImm8; Assert(bOpcodeOtherImm8 != 0xcc);
146	break;
147
148	case 8:
149	if (idxRegRm >= 8)
150	pCodeBuf[off++] = X86_OP_REX_B;
151	else if (idxRegRm >= 4)
152	pCodeBuf[off++] = X86_OP_REX;
153	pCodeBuf[off++] = bOpcode8; Assert(bOpcode8 != 0xcc);
154	break;
155	}
156	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
157	pCodeBuf[off++] = (uint8_t)uImmOp;
158	}
159	else
160	{
161	switch (cOpBits)
162	{
163	case 32:
164	if (idxRegRm >= 8)
165	pCodeBuf[off++] = X86_OP_REX_B;
166	break;
167
168	default: AssertFailed(); RT_FALL_THRU();
169	case 64:
170	pCodeBuf[off++] = X86_OP_REX_W \| (idxRegRm >= 8 ? X86_OP_REX_B : 0);
171	break;
172
173	case 16:
174	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
175	if (idxRegRm >= 8)
176	pCodeBuf[off++] = X86_OP_REX_B;
177	pCodeBuf[off++] = bOpcodeOther;
178	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
179	pCodeBuf[off++] = RT_BYTE1(uImmOp);
180	pCodeBuf[off++] = RT_BYTE2(uImmOp);
181	Assert(cImmBits == 16);
182	return off;
183	}
184	pCodeBuf[off++] = bOpcodeOther;
185	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegReg, idxRegRm & 7);
186	pCodeBuf[off++] = RT_BYTE1(uImmOp);
187	pCodeBuf[off++] = RT_BYTE2(uImmOp);
188	pCodeBuf[off++] = RT_BYTE3(uImmOp);
189	pCodeBuf[off++] = RT_BYTE4(uImmOp);
190	Assert(cImmBits == 32);
191	}
192	return off;
193	}
194
195	#endif /* RT_ARCH_AMD64 */
196
197	/**
198	* This is an implementation of IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGICAL.
199	*
200	* It takes liveness stuff into account.
201	*/
202	DECL_INLINE_THROW(uint32_t)
203	iemNativeEmitEFlagsForLogical(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl
204	#ifndef RT_ARCH_AMD64
205	, uint8_t cOpBits, uint8_t idxRegResult, bool fNativeFlags = false
206	#endif
207	)
208	{
209	#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
210	/*
211	* See if we can skip this wholesale.
212	*/
213	PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
214	if (IEMLIVENESS_STATE_ARE_STATUS_EFL_TO_BE_CLOBBERED(pLivenessEntry))
215	{
216	STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEflSkippedLogical);
217	# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
218	off = iemNativeEmitOrImmIntoVCpuU32(pReNative, off, X86_EFL_STATUS_BITS, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
219	# endif
220	}
221	else
222	#endif
223	{
224	#ifdef RT_ARCH_AMD64
225	/*
226	* Collect flags and merge them with eflags.
227	*/
228	/** @todo we could alternatively use SAHF here when host rax is free since,
229	* OF is cleared. */
230	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
231	/* pushf - do this before any reg allocations as they may emit instructions too. */
232	pCodeBuf[off++] = 0x9c;
233
234	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
235	uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
236	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + 7 + 7 + 3);
237	/* pop tmp */
238	if (idxTmpReg >= 8)
239	pCodeBuf[off++] = X86_OP_REX_B;
240	pCodeBuf[off++] = 0x58 + (idxTmpReg & 7);
241	/* and tmp, X86_EFL_PF \| X86_EFL_ZF \| X86_EFL_SF */
242	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_EFL_PF \| X86_EFL_ZF \| X86_EFL_SF);
243	/* Clear the status bits in EFLs. */
244	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
245	/* OR in the flags we collected. */
246	off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxTmpReg);
247	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
248	iemNativeRegFreeTmp(pReNative, idxTmpReg);
249
250	#elif defined(RT_ARCH_ARM64)
251	/*
252	* Calculate flags.
253	*/
254	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
255	uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
256	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
257
258	/* Clear the status bits. ~0x8D5 (or ~0x8FD) can't be AND immediate, so use idxTmpReg for constant. */
259	off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, ~X86_EFL_STATUS_BITS);
260	off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxRegEfl, idxTmpReg);
261
262	/* N,Z -> SF,ZF */
263	if (cOpBits < 32)
264	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegResult, cOpBits > 8); /* sets NZ */
265	else if (!fNativeFlags)
266	pCodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, idxRegResult, idxRegResult, cOpBits > 32 /f64Bit/);
267	pCodeBuf[off++] = Armv8A64MkInstrMrs(idxTmpReg, ARMV8_AARCH64_SYSREG_NZCV); /* Bits: 31=N; 30=Z; 29=C; 28=V; */
268	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 30);
269	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_ZF_BIT, 2, false /f64Bit/);
270	AssertCompile(X86_EFL_ZF_BIT + 1 == X86_EFL_SF_BIT);
271
272	/* Calculate 8-bit parity of the result. */
273	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegResult, idxRegResult, false /f64Bit/,
274	4 /offShift6/, kArmv8A64InstrShift_Lsr);
275	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /f64Bit/,
276	2 /offShift6/, kArmv8A64InstrShift_Lsr);
277	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /f64Bit/,
278	1 /offShift6/, kArmv8A64InstrShift_Lsr);
279	Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
280	pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /f64Bit/);
281	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_PF_BIT, 1, false /f64Bit/);
282
283	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
284	iemNativeRegFreeTmp(pReNative, idxTmpReg);
285	#else
286	# error "port me"
287	#endif
288	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
289
290	# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
291	off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
292	# endif
293	}
294	return off;
295	}
296
297
298	/**
299	* This is an implementation of IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC.
300	*
301	* It takes liveness stuff into account.
302	*/
303	DECL_FORCE_INLINE_THROW(uint32_t)
304	iemNativeEmitEFlagsForArithmetic(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl, uint8_t idxRegEflIn
305	#ifndef RT_ARCH_AMD64
306	, uint8_t cOpBits, uint8_t idxRegResult, uint8_t idxRegDstIn, uint8_t idxRegSrc
307	, bool fInvertCarry, uint64_t uImmSrc
308	#endif
309	)
310	{
311	#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
312	/*
313	* See if we can skip this wholesale.
314	*/
315	PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
316	if (IEMLIVENESS_STATE_ARE_STATUS_EFL_TO_BE_CLOBBERED(pLivenessEntry))
317	{
318	STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEflSkippedArithmetic);
319	# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
320	off = iemNativeEmitOrImmIntoVCpuU32(pReNative, off, X86_EFL_STATUS_BITS, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
321	# endif
322	}
323	else
324	#endif
325	{
326	#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
327	uint32_t fSkipped = 0;
328	#endif
329	#ifdef RT_ARCH_AMD64
330	/*
331	* Collect flags and merge them with eflags.
332	*/
333	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
334	/* pushf - do this before any reg allocations as they may emit instructions too. */
335	pCodeBuf[off++] = 0x9c;
336
337	uint8_t const idxRegEfl = idxRegEflIn != UINT8_MAX ? idxRegEflIn
338	: iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
339	uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
340	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + 7 + 7 + 3);
341	/* pop tmp */
342	if (idxTmpReg >= 8)
343	pCodeBuf[off++] = X86_OP_REX_B;
344	pCodeBuf[off++] = 0x58 + (idxTmpReg & 7);
345	/* Isolate the flags we want. */
346	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_EFL_STATUS_BITS);
347	/* Clear the status bits in EFLs. */
348	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
349	/* OR in the flags we collected. */
350	off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxTmpReg);
351	if (idxRegEflIn != idxRegEfl)
352	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
353	iemNativeRegFreeTmp(pReNative, idxTmpReg);
354
355	#elif defined(RT_ARCH_ARM64)
356	/*
357	* Calculate flags.
358	*/
359	uint8_t const idxRegEfl = idxRegEflIn != UINT8_MAX ? idxRegEflIn
360	: iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
361	uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
362	uint8_t const idxTmpReg2 = cOpBits >= 32 ? UINT8_MAX : iemNativeRegAllocTmp(pReNative, &off);
363	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
364
365	/* Invert CF (stored inved on ARM) and load the flags into the temporary register. */
366	if (fInvertCarry)
367	pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
368	pCodeBuf[off++] = Armv8A64MkInstrMrs(idxTmpReg, ARMV8_AARCH64_SYSREG_NZCV); /* Bits: 31=N; 30=Z; 29=C; 28=V; */
369
370	if (cOpBits >= 32)
371	{
372	/* V -> OF */
373	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 28);
374	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_OF_BIT, 1, false /f64Bit/);
375
376	/* C -> CF */
377	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, 1);
378	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_CF_BIT, 1, false /f64Bit/);
379	}
380
381	/* N,Z -> SF,ZF */
382	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, cOpBits >= 32 ? 1 : 30);
383	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_ZF_BIT, 2, false /f64Bit/);
384
385	/* For ADC and SBB we have to calculate overflow and carry our selves. */
386	if (cOpBits < 32)
387	{
388	/* Since the carry flag is the zero'th flag, we just use BFXIL got copy it over. */
389	AssertCompile(X86_EFL_CF_BIT == 0);
390	pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxRegEfl, idxRegResult, cOpBits, 1, false /f64Bit/);
391
392	/* The overflow flag is more work as we have to compare the signed bits for
393	both inputs and the result. See IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC.
394
395	Formula: ~(a_uDst ^ a_uSrcOf) & (a_uResult ^ a_uDst)
396	With a_uSrcOf as a_uSrc for additions and ~a_uSrc for subtractions.
397
398	It is a bit simpler when the right (source) side is constant:
399	adc: S D R -> OF sbb: S D R -> OF
400	0 0 0 -> 0 \ 0 0 0 -> 0 \
401	0 0 1 -> 1 \ 0 0 1 -> 0 \
402	0 1 0 -> 0 / and not(D), R 0 1 0 -> 1 / and D, not(R)
403	0 1 1 -> 0 / 0 1 1 -> 0 /
404	1 0 0 -> 0 \ 1 0 0 -> 0 \
405	1 0 1 -> 0 \ and D, not(R) 1 0 1 -> 1 \ and not(D), R
406	1 1 0 -> 1 / 1 1 0 -> 0 /
407	1 1 1 -> 0 / 1 1 1 -> 0 / */
408	if (idxRegSrc != UINT8_MAX)
409	{
410	if (fInvertCarry) /* sbb: ~((a_uDst) ^ ~(a_uSrcOf)) -> (a_uDst) ^ (a_uSrcOf); HACK ALERT: fInvertCarry == sbb */
411	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegSrc, false);
412	else /* adc: ~((a_uDst) ^ (a_uSrcOf)) -> (a_uDst) ^ ~(a_uSrcOf) */
413	pCodeBuf[off++] = Armv8A64MkInstrEon(idxTmpReg, idxRegDstIn, idxRegSrc, false);
414	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg2, idxRegDstIn, idxRegResult, false); /* (a_uDst) ^ (a_uResult) */
415	pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpReg, idxTmpReg, idxTmpReg2, false /f64Bit/);
416	}
417	else if (uImmSrc & RT_BIT_32(cOpBits - 1))
418	{
419	if (fInvertCarry) /* HACK ALERT: fInvertCarry == sbb */
420	pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegResult, idxRegDstIn, false);
421	else
422	pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegDstIn, idxRegResult, false);
423	}
424	else
425	{
426	if (fInvertCarry) /* HACK ALERT: fInvertCarry == sbb */
427	pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegDstIn, idxRegResult, false);
428	else
429	pCodeBuf[off++] = Armv8A64MkInstrBic(idxTmpReg, idxRegResult, idxRegDstIn, false);
430	}
431	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, cOpBits - 1, false /f64Bit/);
432	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_OF_BIT, 1);
433	iemNativeRegFreeTmp(pReNative, idxTmpReg2);
434	}
435
436	/* Calculate 8-bit parity of the result. */
437	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegResult, idxRegResult, false /f64Bit/,
438	4 /offShift6/, kArmv8A64InstrShift_Lsr);
439	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /f64Bit/,
440	2 /offShift6/, kArmv8A64InstrShift_Lsr);
441	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /f64Bit/,
442	1 /offShift6/, kArmv8A64InstrShift_Lsr);
443	Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
444	pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /f64Bit/);
445	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_PF_BIT, 1, false /f64Bit/);
446
447	/* Calculate auxilary carry/borrow. This is related to 8-bit BCD.
448	General formula: ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
449	S D R
450	0 0 0 -> 0; \
451	0 0 1 -> 1; \ regular
452	0 1 0 -> 1; / xor R, D
453	0 1 1 -> 0; /
454	1 0 0 -> 1; \
455	1 0 1 -> 0; \ invert one of the two
456	1 1 0 -> 0; / xor not(R), D
457	1 1 1 -> 1; /
458	a_uSrc[bit 4]=0: ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
459	a_uSrc[bit 4]=1: ((uint32_t)~(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF;
460	*/
461
462	if (idxRegSrc != UINT8_MAX)
463	{
464	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegSrc, false /f64Bit/);
465	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxRegResult, false /f64Bit/);
466	}
467	else if (uImmSrc & X86_EFL_AF)
468	pCodeBuf[off++] = Armv8A64MkInstrEon(idxTmpReg, idxRegDstIn, idxRegResult, false /f64Bit/);
469	else
470	pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegDstIn, idxRegResult, false /f64Bit/);
471	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxTmpReg, X86_EFL_AF_BIT, false /f64Bit/);
472	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_AF_BIT, 1, false /f64Bit/);
473
474	if (idxRegEflIn != idxRegEfl)
475	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
476	iemNativeRegFreeTmp(pReNative, idxTmpReg);
477
478	#else
479	# error "port me"
480	#endif
481	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
482
483	#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
484	off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, fSkipped, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
485	#endif
486	}
487	return off;
488
489	}
490
491
492	/**
493	* The AND instruction will clear OF, CF and AF (latter is undefined) and
494	* set the other flags according to the result.
495	*/
496	DECL_INLINE_THROW(uint32_t)
497	iemNativeEmit_and_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
498	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
499	{
500	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
501	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
502	#ifdef RT_ARCH_AMD64
503	/* On AMD64 we just use the correctly sized AND instruction harvest the EFLAGS. */
504	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
505	0x22, 0x23, cOpBits, idxRegDst, idxRegSrc);
506	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
507	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
508
509	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
510
511	#elif defined(RT_ARCH_ARM64)
512	/* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. */
513	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
514	pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
515	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
516	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
517
518	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst, true /fNativeFlags/);
519	#else
520	# error "Port me"
521	#endif
522	iemNativeVarRegisterRelease(pReNative, idxVarDst);
523	return off;
524	}
525
526
527	/**
528	* The AND instruction with immediate value as right operand.
529	*/
530	DECL_INLINE_THROW(uint32_t)
531	iemNativeEmit_and_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
532	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
533	{
534	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
535	#ifdef RT_ARCH_AMD64
536	/* On AMD64 we just use the correctly sized AND instruction harvest the EFLAGS. */
537	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
538	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 4, idxRegDst, uImmOp);
539	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
540
541	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
542
543	#elif defined(RT_ARCH_ARM64)
544	/* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones, and of
545	course the immediate variant when possible to save a register load. */
546	uint32_t uImmSizeLen, uImmRotations;
547	if ( cOpBits > 32
548	? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
549	: Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
550	{
551	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
552	if (cOpBits >= 32)
553	pCodeBuf[off++] = Armv8A64MkInstrAndsImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
554	else
555	pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
556	}
557	else
558	{
559	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
560	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
561	if (cOpBits >= 32)
562	pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
563	else
564	pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
565	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
566	}
567	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
568
569	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst, cOpBits >= 32 /fNativeFlags/);
570	RT_NOREF_PV(cImmBits);
571
572	#else
573	# error "Port me"
574	#endif
575	iemNativeVarRegisterRelease(pReNative, idxVarDst);
576	return off;
577	}
578
579
580	/**
581	* The TEST instruction will clear OF, CF and AF (latter is undefined) and
582	* set the other flags according to the result.
583	*/
584	DECL_INLINE_THROW(uint32_t)
585	iemNativeEmit_test_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
586	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
587	{
588	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
589	uint8_t const idxRegSrc = idxVarSrc == idxVarDst ? idxRegDst /* special case of 'test samereg,samereg' */
590	: iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
591	#ifdef RT_ARCH_AMD64
592	/* On AMD64 we just use the correctly sized TEST instruction harvest the EFLAGS. */
593	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
594	0x84, 0x85, cOpBits, idxRegSrc, idxRegDst);
595	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
596
597	#elif defined(RT_ARCH_ARM64)
598	/* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones. We also
599	need to keep the result in order to calculate the flags. */
600	uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
601	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
602	if (cOpBits >= 32)
603	pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
604	else
605	pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
606	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
607
608	#else
609	# error "Port me"
610	#endif
611	if (idxVarSrc != idxVarDst)
612	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
613	iemNativeVarRegisterRelease(pReNative, idxVarDst);
614
615	#ifdef RT_ARCH_AMD64
616	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
617	#else
618	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegResult, cOpBits >= 32 /fNativeFlags/);
619	iemNativeRegFreeTmp(pReNative, idxRegResult);
620	#endif
621	return off;
622	}
623
624
625	/**
626	* The TEST instruction with immediate value as right operand.
627	*/
628	DECL_INLINE_THROW(uint32_t)
629	iemNativeEmit_test_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
630	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
631	{
632	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
633	#ifdef RT_ARCH_AMD64
634	/* On AMD64 we just use the correctly sized AND instruction harvest the EFLAGS. */
635	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
636	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0xf6, 0xcc, 0xf7, cOpBits, cImmBits, 0, idxRegDst, uImmOp);
637	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
638	iemNativeVarRegisterRelease(pReNative, idxVarDst);
639
640	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
641
642	#elif defined(RT_ARCH_ARM64)
643	/* On ARM64 we use 32-bit AND for the 8-bit and 16-bit bit ones, and of
644	course the immediate variant when possible to save a register load.
645	We also need to keep the result in order to calculate the flags. */
646	uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
647	uint32_t uImmSizeLen, uImmRotations;
648	if ( cOpBits > 32
649	? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
650	: Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
651	{
652	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
653	if (cOpBits >= 32)
654	pCodeBuf[off++] = Armv8A64MkInstrAndsImm(idxRegResult, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
655	else
656	pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegResult, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
657	}
658	else
659	{
660	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
661	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
662	if (cOpBits >= 32)
663	pCodeBuf[off++] = Armv8A64MkInstrAnds(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
664	else
665	pCodeBuf[off++] = Armv8A64MkInstrAnd(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
666	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
667	}
668	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
669	iemNativeVarRegisterRelease(pReNative, idxVarDst);
670
671	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegResult, cOpBits >= 32 /fNativeFlags/);
672
673	iemNativeRegFreeTmp(pReNative, idxRegResult);
674	RT_NOREF_PV(cImmBits);
675
676	#else
677	# error "Port me"
678	#endif
679	return off;
680	}
681
682
683	/**
684	* The OR instruction will clear OF, CF and AF (latter is undefined) and
685	* set the other flags according to the result.
686	*/
687	DECL_INLINE_THROW(uint32_t)
688	iemNativeEmit_or_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
689	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
690	{
691	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
692	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
693	#ifdef RT_ARCH_AMD64
694	/* On AMD64 we just use the correctly sized OR instruction harvest the EFLAGS. */
695	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
696	0x0a, 0x0b, cOpBits, idxRegDst, idxRegSrc);
697	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
698	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
699
700	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
701
702	#elif defined(RT_ARCH_ARM64)
703	/* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones. */
704	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
705	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
706	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
707	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
708
709	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
710
711	#else
712	# error "Port me"
713	#endif
714	iemNativeVarRegisterRelease(pReNative, idxVarDst);
715	return off;
716	}
717
718
719	/**
720	* The OR instruction with immediate value as right operand.
721	*/
722	DECL_INLINE_THROW(uint32_t)
723	iemNativeEmit_or_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
724	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
725	{
726	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
727	#ifdef RT_ARCH_AMD64
728	/* On AMD64 we just use the correctly sized OR instruction harvest the EFLAGS. */
729	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
730	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 1, idxRegDst, uImmOp);
731	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
732
733	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
734
735	#elif defined(RT_ARCH_ARM64)
736	/* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones, and of
737	course the immediate variant when possible to save a register load. */
738	uint32_t uImmSizeLen, uImmRotations;
739	if ( cOpBits > 32
740	? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
741	: Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
742	{
743	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
744	pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
745	}
746	else
747	{
748	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
749	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
750	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
751	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
752	}
753	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
754
755	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
756	RT_NOREF_PV(cImmBits);
757
758	#else
759	# error "Port me"
760	#endif
761	iemNativeVarRegisterRelease(pReNative, idxVarDst);
762	return off;
763	}
764
765
766	/**
767	* The XOR instruction will clear OF, CF and AF (latter is undefined) and
768	* set the other flags according to the result.
769	*/
770	DECL_INLINE_THROW(uint32_t)
771	iemNativeEmit_xor_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
772	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
773	{
774	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
775	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
776	#ifdef RT_ARCH_AMD64
777	/* On AMD64 we just use the correctly sized OR instruction harvest the EFLAGS. */
778	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
779	0x32, 0x33, cOpBits, idxRegDst, idxRegSrc);
780	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
781	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
782
783	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
784
785	#elif defined(RT_ARCH_ARM64)
786	/* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones. */
787	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
788	pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
789	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
790	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
791
792	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
793
794	#else
795	# error "Port me"
796	#endif
797	iemNativeVarRegisterRelease(pReNative, idxVarDst);
798	return off;
799	}
800
801
802	/**
803	* The XOR instruction with immediate value as right operand.
804	*/
805	DECL_INLINE_THROW(uint32_t)
806	iemNativeEmit_xor_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
807	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
808	{
809	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
810	#ifdef RT_ARCH_AMD64
811	/* On AMD64 we just use the correctly sized XOR instruction harvest the EFLAGS. */
812	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
813	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 6, idxRegDst, uImmOp);
814	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
815
816	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl);
817
818	#elif defined(RT_ARCH_ARM64)
819	/* On ARM64 we use 32-bit OR for the 8-bit and 16-bit bit ones, and of
820	course the immediate variant when possible to save a register load. */
821	uint32_t uImmSizeLen, uImmRotations;
822	if ( cOpBits > 32
823	? Armv8A64ConvertMask64ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations)
824	: Armv8A64ConvertMask32ToImmRImmS(uImmOp, &uImmSizeLen, &uImmRotations))
825	{
826	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
827	pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxRegDst, idxRegDst, uImmSizeLen, uImmRotations, cOpBits > 32 /f64Bit/);
828	}
829	else
830	{
831	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
832	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
833	pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/);
834	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
835	}
836	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
837
838	off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
839	RT_NOREF_PV(cImmBits);
840
841	#else
842	# error "Port me"
843	#endif
844	iemNativeVarRegisterRelease(pReNative, idxVarDst);
845	return off;
846	}
847
848
849
850	/*********************************************************************************************************************************
851	* ADD, ADC, SUB, SBB, CMP *
852	*********************************************************************************************************************************/
853
854	/**
855	* The ADD instruction will set all status flags.
856	*/
857	DECL_INLINE_THROW(uint32_t)
858	iemNativeEmit_add_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
859	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
860	{
861	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
862	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
863
864	#ifdef RT_ARCH_AMD64
865	/* On AMD64 we just use the correctly sized ADD instruction to get the right EFLAGS.SF value. */
866	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
867	0x02, 0x03, cOpBits, idxRegDst, idxRegSrc);
868	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
869
870	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
871	iemNativeVarRegisterRelease(pReNative, idxVarDst);
872
873	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
874
875	#elif defined(RT_ARCH_ARM64)
876	/* On ARM64 we'll need the two input operands as well as the result in order
877	to calculate the right flags, even if we use ADDS and translates NZCV into
878	OF, CF, ZF and SF. */
879	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
880	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
881	if (cOpBits >= 32)
882	{
883	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
884	pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/, true /fSetFlags/);
885	}
886	else
887	{
888	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
889	uint32_t const cShift = 32 - cOpBits;
890	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDstIn, ARMV8_A64_REG_XZR, idxRegDst, false /f64Bit/, cShift);
891	pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDstIn, idxRegSrc, false /f64Bit/,
892	true /fSetFlags/, cShift);
893	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /f64Bit/);
894	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /f64Bit/);
895	cOpBits = 32;
896	}
897	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
898
899	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
900	idxRegDstIn, idxRegSrc, false /fInvertCarry/, 0);
901
902	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
903	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
904	iemNativeVarRegisterRelease(pReNative, idxVarDst);
905
906	#else
907	# error "port me"
908	#endif
909	return off;
910	}
911
912
913	/**
914	* The ADD instruction with immediate value as right operand.
915	*/
916	DECL_INLINE_THROW(uint32_t)
917	iemNativeEmit_add_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
918	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
919	{
920	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
921
922	#ifdef RT_ARCH_AMD64
923	/* On AMD64 we just use the correctly sized ADD instruction to get the right EFLAGS.SF value. */
924	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
925	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 0, idxRegDst, uImmOp);
926	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
927
928	iemNativeVarRegisterRelease(pReNative, idxVarDst);
929
930	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
931
932	#elif defined(RT_ARCH_ARM64)
933	/* On ARM64 we'll need the two input operands as well as the result in order
934	to calculate the right flags, even if we use ADDS and translates NZCV into
935	OF, CF, ZF and SF. */
936	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
937	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
938	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
939	if (cOpBits >= 32)
940	{
941	if (uImmOp <= 0xfffU)
942	pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegDst, idxRegDst, uImmOp, cOpBits > 32 /f64Bit/, true /fSetFlags/);
943	else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
944	pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegDst, idxRegDst, uImmOp >> 12, cOpBits > 32 /f64Bit/,
945	true /fSetFlags/, true /fShift12/);
946	else
947	{
948	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
949	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
950	pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/, true /fSetFlags/);
951	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
952	}
953	}
954	else
955	{
956	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
957	uint32_t const cShift = 32 - cOpBits;
958	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp << cShift);
959	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
960	pCodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegDst, idxRegTmpImm, idxRegDstIn, false /f64Bit/, true /fSetFlags/, cShift);
961	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /f64Bit/);
962	cOpBits = 32;
963	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
964	}
965	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
966
967	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
968	idxRegDstIn, UINT8_MAX, false /fInvertCarry/, uImmOp);
969
970	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
971	iemNativeVarRegisterRelease(pReNative, idxVarDst);
972	RT_NOREF(cImmBits);
973
974	#else
975	# error "port me"
976	#endif
977	return off;
978	}
979
980
981	/**
982	* The ADC instruction takes CF as input and will set all status flags.
983	*/
984	DECL_INLINE_THROW(uint32_t)
985	iemNativeEmit_adc_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
986	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
987	{
988	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
989	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
990	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
991
992	#ifdef RT_ARCH_AMD64
993	/* On AMD64 we use BT to set EFLAGS.CF and then issue an ADC instruction
994	with matching size to get the correct flags. */
995	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
996
997	/* Use the BT instruction to set CF according to idxRegEfl. */
998	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /cOpBits/, 4, idxRegEfl);
999	pCodeBuf[off++] = X86_EFL_CF_BIT;
1000
1001	off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0x12, 0x13, cOpBits, idxRegDst, idxRegSrc);
1002	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1003
1004	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1005	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1006
1007	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1008
1009	#elif defined(RT_ARCH_ARM64)
1010	/* On ARM64 we use the RMIF instruction to load PSTATE.CF from idxRegEfl and
1011	then ADCS for the calculation. We need all inputs and result for the two
1012	flags (AF,PF) that can't be directly derived from PSTATE.NZCV. */
1013	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1014	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1015
1016	pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /fMask=C/);
1017	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1018	if (cOpBits >= 32)
1019	pCodeBuf[off++] = Armv8A64MkInstrAdcs(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
1020	else
1021	{
1022	/* Since we're also adding in the carry flag here, shifting operands up
1023	doesn't work. So, we have to calculate carry & overflow manually. */
1024	pCodeBuf[off++] = Armv8A64MkInstrAdc(idxRegDst, idxRegDst, idxRegSrc, false /f64Bit/);
1025	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1026	}
1027	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1028
1029	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1030	idxRegDstIn, idxRegSrc, false /fInvertCarry/, 0);
1031
1032	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1033	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1034	if (cOpBits < 32)
1035	off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1036	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1037
1038	#else
1039	# error "port me"
1040	#endif
1041	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1042	return off;
1043	}
1044
1045
1046	/**
1047	* The ADC instruction with immediate value as right operand.
1048	*/
1049	DECL_INLINE_THROW(uint32_t)
1050	iemNativeEmit_adc_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1051	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1052	{
1053	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1054	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
1055
1056	#ifdef RT_ARCH_AMD64
1057	/* On AMD64 we use BT to set EFLAGS.CF and then issue an ADC instruction
1058	with matching size to get the correct flags. */
1059	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1060
1061	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /cOpBits/, 4, idxRegEfl);
1062	pCodeBuf[off++] = X86_EFL_CF_BIT;
1063
1064	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 2, idxRegDst, uImmOp);
1065	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1066
1067	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1068
1069	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1070
1071	#elif defined(RT_ARCH_ARM64)
1072	/* On ARM64 we use the RMIF instructions to load PSTATE.CF from idxRegEfl
1073	and then ADCS for the calculation. We need all inputs and result for
1074	the two flags (AF,PF) that can't be directly derived from PSTATE.NZCV. */
1075	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1076	uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1077	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1078
1079	pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /fMask=C/);
1080	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1081	if (cOpBits >= 32)
1082	pCodeBuf[off++] = Armv8A64MkInstrAdcs(idxRegDst, idxRegDst, idxRegImm, cOpBits > 32 /f64Bit/);
1083	else
1084	{
1085	/* Since we're also adding in the carry flag here, shifting operands up
1086	doesn't work. So, we have to calculate carry & overflow manually. */
1087	pCodeBuf[off++] = Armv8A64MkInstrAdc(idxRegDst, idxRegDst, idxRegImm, false /f64Bit/);
1088	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1089	}
1090	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1091
1092	iemNativeRegFreeTmp(pReNative, idxRegImm);
1093
1094	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1095	idxRegDstIn, UINT8_MAX, false /fInvertCarry/, uImmOp);
1096
1097	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1098	if (cOpBits < 32)
1099	off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1100	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1101	RT_NOREF(cImmBits);
1102
1103	#else
1104	# error "port me"
1105	#endif
1106	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1107	return off;
1108	}
1109
1110
1111	/**
1112	* The SUB instruction will set all status flags.
1113	*/
1114	DECL_INLINE_THROW(uint32_t)
1115	iemNativeEmit_sub_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1116	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1117	{
1118	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1119	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
1120
1121	#ifdef RT_ARCH_AMD64
1122	/* On AMD64 we just use the correctly sized SUB instruction to get the right EFLAGS.SF value. */
1123	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
1124	0x2a, 0x2b, cOpBits, idxRegDst, idxRegSrc);
1125	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1126
1127	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1128	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1129
1130	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1131
1132	#elif defined(RT_ARCH_ARM64)
1133	/* On ARM64 we'll need the two input operands as well as the result in order
1134	to calculate the right flags, even if we use SUBS and translates NZCV into
1135	OF, CF, ZF and SF. */
1136	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1137	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1138	if (cOpBits >= 32)
1139	{
1140	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1141	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1142	}
1143	else
1144	{
1145	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1146	uint32_t const cShift = 32 - cOpBits;
1147	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDstIn, ARMV8_A64_REG_XZR, idxRegDst, false /f64Bit/, cShift);
1148	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDstIn, idxRegSrc, false /f64Bit/,
1149	true /fSetFlags/, cShift);
1150	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /f64Bit/);
1151	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /f64Bit/);
1152	cOpBits = 32;
1153	}
1154	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1155
1156	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
1157	idxRegDstIn, idxRegSrc, true /fInvertCarry/, 0);
1158
1159	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1160	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1161	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1162
1163	#else
1164	# error "port me"
1165	#endif
1166	return off;
1167	}
1168
1169
1170	/**
1171	* The SUB instruction with immediate value as right operand.
1172	*/
1173	DECL_INLINE_THROW(uint32_t)
1174	iemNativeEmit_sub_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1175	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1176	{
1177	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1178
1179	#ifdef RT_ARCH_AMD64
1180	/* On AMD64 we just use the correctly sized SUB instruction to get the right EFLAGS.SF value. */
1181	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1182	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 5, idxRegDst, uImmOp);
1183	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1184
1185	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1186
1187	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1188
1189	#elif defined(RT_ARCH_ARM64)
1190	/* On ARM64 we'll need the two input operands as well as the result in order
1191	to calculate the right flags, even if we use SUBS and translates NZCV into
1192	OF, CF, ZF and SF. */
1193	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1194	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1195	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1196	if (cOpBits >= 32)
1197	{
1198	if (uImmOp <= 0xfffU)
1199	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegDst, idxRegDst, uImmOp, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1200	else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
1201	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegDst, idxRegDst, uImmOp >> 12, cOpBits > 32 /f64Bit/,
1202	true /fSetFlags/, true /fShift12/);
1203	else
1204	{
1205	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1206	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1207	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1208	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1209	}
1210	}
1211	else
1212	{
1213	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1214	uint32_t const cShift = 32 - cOpBits;
1215	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1216	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1217	pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegDstIn, idxRegDstIn, cShift, false /f64Bit/);
1218	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegDst, idxRegDstIn, idxRegTmpImm, false /f64Bit/, true /fSetFlags/, cShift);
1219	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDstIn, idxRegDstIn, cShift, false /f64Bit/);
1220	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegDst, idxRegDst, cShift, false /f64Bit/);
1221	cOpBits = 32;
1222	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1223	}
1224	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1225
1226	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegDst,
1227	idxRegDstIn, UINT8_MAX, true /fInvertCarry/, uImmOp);
1228
1229	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1230	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1231	RT_NOREF(cImmBits);
1232
1233	#else
1234	# error "port me"
1235	#endif
1236	return off;
1237	}
1238
1239
1240	/**
1241	* The CMP instruction will set all status flags, but modifies no registers.
1242	*/
1243	DECL_INLINE_THROW(uint32_t)
1244	iemNativeEmit_cmp_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1245	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1246	{
1247	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1248	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
1249
1250	#ifdef RT_ARCH_AMD64
1251	/* On AMD64 we just use the correctly sized CMP instruction to get the right EFLAGS.SF value. */
1252	off = iemNativeEmitAmd64OneByteModRmInstrRREx(iemNativeInstrBufEnsure(pReNative, off, 4), off,
1253	0x3a, 0x3b, cOpBits, idxRegDst, idxRegSrc);
1254	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1255
1256	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1257	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1258
1259	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1260
1261	#elif defined(RT_ARCH_ARM64)
1262	/* On ARM64 we'll need the actual result as well as both input operands in order
1263	to calculate the right flags, even if we use SUBS and translates NZCV into
1264	OF, CF, ZF and SF. */
1265	uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
1266	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1267	if (cOpBits >= 32)
1268	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1269	else
1270	{
1271	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1272	uint32_t const cShift = 32 - cOpBits;
1273	pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegResult, ARMV8_A64_REG_XZR, idxRegDst, false /f64Bit/, cShift);
1274	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegResult, idxRegSrc, false /f64Bit/,
1275	true /fSetFlags/, cShift);
1276	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegResult, idxRegResult, cShift, false /f64Bit/);
1277	cOpBits = 32;
1278	}
1279	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1280
1281	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegResult,
1282	idxRegDst, idxRegSrc, true /fInvertCarry/, 0);
1283
1284	iemNativeRegFreeTmp(pReNative, idxRegResult);
1285	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1286	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1287
1288	#else
1289	# error "port me"
1290	#endif
1291	return off;
1292	}
1293
1294
1295	/**
1296	* The CMP instruction with immediate value as right operand.
1297	*/
1298	DECL_INLINE_THROW(uint32_t)
1299	iemNativeEmit_cmp_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1300	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1301	{
1302	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1303
1304	#ifdef RT_ARCH_AMD64
1305	/* On AMD64 we just use the correctly sized CMP instruction to get the right EFLAGS.SF value. */
1306	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1307	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 7, idxRegDst, uImmOp);
1308	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1309
1310	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1311
1312	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX);
1313
1314	#elif defined(RT_ARCH_ARM64)
1315	/* On ARM64 we'll need the actual result as well as both input operands in order
1316	to calculate the right flags, even if we use SUBS and translates NZCV into
1317	OF, CF, ZF and SF. */
1318	uint8_t const idxRegResult = iemNativeRegAllocTmp(pReNative, &off);
1319	PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1320	if (cOpBits >= 32)
1321	{
1322	if (uImmOp <= 0xfffU)
1323	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegResult, idxRegDst, uImmOp, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1324	else if (uImmOp <= 0xfff000U && !(uImmOp & 0xfff))
1325	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegResult, idxRegDst, uImmOp >> 12, cOpBits > 32 /f64Bit/,
1326	true /fSetFlags/, true /fShift12/);
1327	else
1328	{
1329	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1330	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1331	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegDst, idxRegTmpImm, cOpBits > 32 /f64Bit/, true /fSetFlags/);
1332	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1333	}
1334	}
1335	else
1336	{
1337	/* Shift the operands up so we can perform a 32-bit operation and get all four flags. */
1338	uint32_t const cShift = 32 - cOpBits;
1339	uint8_t const idxRegTmpImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1340	pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1341	pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegResult, idxRegDst, cShift, false /f64Bit/);
1342	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegResult, idxRegResult, idxRegTmpImm, false /f64Bit/, true /fSetFlags/, cShift);
1343	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegResult, idxRegResult, cShift, false /f64Bit/);
1344	cOpBits = 32;
1345	iemNativeRegFreeTmpImm(pReNative, idxRegTmpImm);
1346	}
1347	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1348
1349	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, idxVarEfl, UINT8_MAX, cOpBits, idxRegResult,
1350	idxRegDst, UINT8_MAX, true /fInvertCarry/, uImmOp);
1351
1352	iemNativeRegFreeTmp(pReNative, idxRegResult);
1353	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1354	RT_NOREF(cImmBits);
1355
1356	#else
1357	# error "port me"
1358	#endif
1359	return off;
1360	}
1361
1362
1363	/**
1364	* The SBB instruction takes CF as input and will set all status flags.
1365	*/
1366	DECL_INLINE_THROW(uint32_t)
1367	iemNativeEmit_sbb_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1368	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1369	{
1370	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1371	uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
1372	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
1373
1374	#ifdef RT_ARCH_AMD64
1375	/* On AMD64 we use BT to set EFLAGS.CF and then issue an SBB instruction
1376	with matching size to get the correct flags. */
1377	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1378
1379	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /cOpBits/, 4, idxRegEfl);
1380	pCodeBuf[off++] = X86_EFL_CF_BIT;
1381
1382	off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0x1a, 0x1b, cOpBits, idxRegDst, idxRegSrc);
1383	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1384
1385	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1386	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1387
1388	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1389
1390	#elif defined(RT_ARCH_ARM64)
1391	/* On ARM64 we use the RMIF+CFINV instructions to load PSTATE.CF from
1392	idxRegEfl and then SBCS for the calculation. We need all inputs and
1393	result for the two flags (AF,PF) that can't be directly derived from
1394	PSTATE.NZCV. */
1395	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1396	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1397
1398	pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /fMask=C/);
1399	pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
1400	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1401	if (cOpBits >= 32)
1402	pCodeBuf[off++] = Armv8A64MkInstrSbcs(idxRegDst, idxRegDst, idxRegSrc, cOpBits > 32 /f64Bit/);
1403	else
1404	{
1405	/* Since we're also adding in the carry flag here, shifting operands up
1406	doesn't work. So, we have to calculate carry & overflow manually. */
1407	pCodeBuf[off++] = Armv8A64MkInstrSbc(idxRegDst, idxRegDst, idxRegSrc, false /f64Bit/);
1408	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1409	}
1410	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1411
1412	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1413	idxRegDstIn, idxRegSrc, true /fInvertCarry/, 0);
1414
1415	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1416	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1417	if (cOpBits < 32)
1418	off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1419	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1420
1421	#else
1422	# error "port me"
1423	#endif
1424	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1425	return off;
1426	}
1427
1428
1429	/**
1430	* The SBB instruction with immediate value as right operand.
1431	*/
1432	DECL_INLINE_THROW(uint32_t)
1433	iemNativeEmit_sbb_r_i_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1434	uint8_t idxVarDst, uint64_t uImmOp, uint8_t idxVarEfl, uint8_t cOpBits, uint8_t cImmBits)
1435	{
1436	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1437	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
1438
1439	#ifdef RT_ARCH_AMD64
1440	/* On AMD64 we use BT to set EFLAGS.CF and then issue an SBB instruction
1441	with matching size to get the correct flags. */
1442	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1443
1444	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b, 0xba, 32 /cOpBits/, 4, idxRegEfl);
1445	pCodeBuf[off++] = X86_EFL_CF_BIT;
1446
1447	off = iemNativeEmitAmd64OneByteModRmInstrRIEx(pCodeBuf, off, 0x80, 0x83, 0x81, cOpBits, cImmBits, 3, idxRegDst, uImmOp);
1448	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1449
1450	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1451
1452	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl);
1453
1454	#elif defined(RT_ARCH_ARM64)
1455	/* On ARM64 we use the RMIF+CFINV instructions to load PSTATE.CF from
1456	idxRegEfl and then SBCS for the calculation. We need all inputs and
1457	result for the two flags (AF,PF) that can't be directly derived from
1458	PSTATE.NZCV. */
1459	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1460	uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImmOp);
1461	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1462
1463	pCodeBuf[off++] = Armv8A64MkInstrRmif(idxRegEfl, (X86_EFL_CF_BIT - 1) & 63, RT_BIT_32(1) /fMask=C/);
1464	pCodeBuf[off++] = ARMV8_A64_INSTR_CFINV;
1465	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1466	if (cOpBits >= 32)
1467	pCodeBuf[off++] = Armv8A64MkInstrSbcs(idxRegDst, idxRegDst, idxRegImm, cOpBits > 32 /f64Bit/);
1468	else
1469	{
1470	/* Since we're also adding in the carry flag here, shifting operands up
1471	doesn't work. So, we have to calculate carry & overflow manually. */
1472	pCodeBuf[off++] = Armv8A64MkInstrSbc(idxRegDst, idxRegDst, idxRegImm, false /f64Bit/);
1473	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegDst, cOpBits > 8); /* NZ are okay, CV aren't.*/
1474	}
1475	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1476
1477	iemNativeRegFreeTmp(pReNative, idxRegImm);
1478
1479	off = iemNativeEmitEFlagsForArithmetic(pReNative, off, UINT8_MAX, idxRegEfl, cOpBits, idxRegDst,
1480	idxRegDstIn, UINT8_MAX, true /fInvertCarry/, uImmOp);
1481
1482	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1483	if (cOpBits < 32)
1484	off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegDst, RT_BIT_32(cOpBits) - 1U);
1485	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1486	RT_NOREF(cImmBits);
1487
1488	#else
1489	# error "port me"
1490	#endif
1491	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1492	return off;
1493	}
1494
1495
1496	DECL_INLINE_THROW(uint32_t)
1497	iemNativeEmit_imul_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1498	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1499	{
1500	RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1501	AssertFailed();
1502	return iemNativeEmitBrk(pReNative, off, 0x666);
1503	}
1504
1505
1506	DECL_INLINE_THROW(uint32_t)
1507	iemNativeEmit_popcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1508	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1509	{
1510	RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1511	AssertFailed();
1512	return iemNativeEmitBrk(pReNative, off, 0x666);
1513	}
1514
1515
1516	DECL_INLINE_THROW(uint32_t)
1517	iemNativeEmit_tzcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1518	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1519	{
1520	RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1521	AssertFailed();
1522	return iemNativeEmitBrk(pReNative, off, 0x666);
1523	}
1524
1525
1526	DECL_INLINE_THROW(uint32_t)
1527	iemNativeEmit_lzcnt_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1528	uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
1529	{
1530	RT_NOREF(idxVarDst, idxVarSrc, idxVarEfl, cOpBits);
1531	AssertFailed();
1532	return iemNativeEmitBrk(pReNative, off, 0x666);
1533	}
1534
1535
1536
1537	/*********************************************************************************************************************************
1538	* Shifting and Rotating. *
1539	*********************************************************************************************************************************/
1540
1541
1542	typedef enum
1543	{
1544	kIemNativeEmitEFlagsForShiftType_Left,
1545	kIemNativeEmitEFlagsForShiftType_Right,
1546	kIemNativeEmitEFlagsForShiftType_SignedRight
1547	} IEMNATIVEEMITEFLAGSFORSHIFTTYPE;
1548
1549	/**
1550	* This is used by SHL, SHR and SAR emulation.
1551	*
1552	* It takes liveness stuff into account.
1553	*/
1554	DECL_INLINE_THROW(uint32_t)
1555	iemNativeEmitEFlagsForShift(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegEfl, uint8_t idxRegResult,
1556	uint8_t idxRegSrc, uint8_t idxRegCount, uint8_t cOpBits, IEMNATIVEEMITEFLAGSFORSHIFTTYPE enmType,
1557	uint8_t idxRegTmp)
1558	{
1559	RT_NOREF(pReNative, off, idxRegEfl, idxRegResult, idxRegSrc, idxRegCount, cOpBits, enmType);
1560	#if 0 //def IEMNATIVE_WITH_EFLAGS_SKIPPING
1561	/*
1562	* See if we can skip this wholesale.
1563	*/
1564	PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
1565	if (IEMLIVENESS_STATE_ARE_STATUS_EFL_TO_BE_CLOBBERED(pLivenessEntry))
1566	{
1567	STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEflSkippedLogical);
1568	# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
1569	off = iemNativeEmitOrImmIntoVCpuU32(pReNative, off, X86_EFL_STATUS_BITS, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
1570	# endif
1571	}
1572	else
1573	#endif
1574	{
1575	/*
1576	* The difference between Intel and AMD flags for SHL are:
1577	* - Intel always clears AF while AMD always sets it.
1578	* - Intel sets OF for the first shift, while AMD for the last shift.
1579	*
1580	*/
1581
1582	#ifdef RT_ARCH_AMD64
1583	/*
1584	* We capture flags and does the additional OF and AF calculations as needed.
1585	*/
1586	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1587	/** @todo kIemNativeEmitEFlagsForShiftType_SignedRight: we could alternatively
1588	* use SAHF here when host rax is free since, OF is cleared. */
1589	/* pushf */
1590	pCodeBuf[off++] = 0x9c;
1591	/* pop tmp */
1592	if (idxRegTmp >= 8)
1593	pCodeBuf[off++] = X86_OP_REX_B;
1594	pCodeBuf[off++] = 0x58 + (idxRegTmp & 7);
1595	/* Clear the status bits in EFLs. */
1596	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
1597	uint8_t const idxTargetCpuEflFlavour = pReNative->pVCpu->iem.s.aidxTargetCpuEflFlavour[1];
1598	if (idxTargetCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
1599	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegTmp, X86_EFL_STATUS_BITS);
1600	else
1601	{
1602	/* and tmp, X86_EFL_PF \| X86_EFL_ZF \| X86_EFL_SF \| X86_EFL_CF */
1603	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegTmp, X86_EFL_PF \| X86_EFL_ZF \| X86_EFL_SF \| X86_EFL_CF);
1604	if (idxTargetCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD)
1605	off = iemNativeEmitOrGpr32ByImmEx(pCodeBuf, off, idxRegTmp, X86_EFL_AF);
1606	/* OR in the flags we collected. */
1607	off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxRegTmp);
1608
1609	/* Calculate OF */
1610	if (idxTargetCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD)
1611	{
1612	/* AMD last bit shifted: fEfl \|= ((uResult >> (cOpBits - 1)) ^ fCarry) << X86_EFL_OF_BIT; */
1613	/* bt idxRegResult, (cOpBits - 1) => CF=result-sign-bit */
1614	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x0b /ud2/, 0xba,
1615	RT_MAX(cOpBits, 16), 4, idxRegResult);
1616	pCodeBuf[off++] = cOpBits - 1;
1617	/* setc idxRegTmp */
1618	off = iemNativeEmitAmd64TwoByteModRmInstrRREx(pCodeBuf, off, 0x0f, 0x92, 0x0b /ud2/, 8, 0, idxRegTmp);
1619	/* xor idxRegTmp, idxRegEfl */
1620	off = iemNativeEmitXorGpr32ByGpr32Ex(pCodeBuf, off, idxRegTmp, idxRegEfl);
1621	/* and idxRegTmp, 1 */
1622	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegTmp, 1);
1623	/* shl idxRegTmp, X86_EFL_OF_BIT */
1624	off = iemNativeEmitShiftGpr32LeftEx(pCodeBuf, off, idxRegTmp, X86_EFL_OF_BIT);
1625	}
1626	else
1627	{
1628	/* Intel first bit shifted: fEfl \|= X86_EFL_GET_OF_ ## cOpBits(uDst ^ (uDst << 1)); */
1629	if (cOpBits <= 32)
1630	{
1631	/* mov idxRegTmp, idxRegSrc */
1632	off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegTmp, idxRegSrc);
1633	/* shl idxRegTmp, 1 */
1634	off = iemNativeEmitShiftGpr32LeftEx(pCodeBuf, off, idxRegTmp, 1);
1635	/* xor idxRegTmp, idxRegSrc */
1636	off = iemNativeEmitXorGprByGprEx(pCodeBuf, off, idxRegTmp, idxRegSrc);
1637	/* shr idxRegTmp, cOpBits - X86_EFL_OF_BIT - 1 or shl idxRegTmp, X86_EFL_OF_BIT - cOpBits + 1 */
1638	if (cOpBits >= X86_EFL_OF_BIT)
1639	off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, idxRegTmp, cOpBits - X86_EFL_OF_BIT - 1);
1640	else
1641	off = iemNativeEmitShiftGpr32LeftEx(pCodeBuf, off, idxRegTmp, X86_EFL_OF_BIT - cOpBits + 1);
1642	}
1643	else
1644	{
1645	/* same as above but with 64-bit grps*/
1646	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegTmp, idxRegSrc);
1647	off = iemNativeEmitShiftGprLeftEx(pCodeBuf, off, idxRegTmp, 1);
1648	off = iemNativeEmitXorGprByGprEx(pCodeBuf, off, idxRegTmp, idxRegSrc);
1649	off = iemNativeEmitShiftGprRightEx(pCodeBuf, off, idxRegTmp, cOpBits - X86_EFL_OF_BIT - 1);
1650	}
1651	/* and idxRegTmp, X86_EFL_OF */
1652	off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegTmp, X86_EFL_OF);
1653	}
1654	}
1655	/* Or in the collected flag(s) */
1656	off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxRegTmp);
1657
1658	#elif defined(RT_ARCH_ARM64)
1659	/*
1660	* Calculate flags.
1661	*/
1662	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
1663
1664	/* Clear the status bits. ~0x8D5 (or ~0x8FD) can't be AND immediate, so use idxRegTmp for constant. */
1665	off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxRegTmp, ~X86_EFL_STATUS_BITS);
1666	off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxRegEfl, idxRegTmp);
1667
1668	/* N,Z -> SF,ZF */
1669	if (cOpBits < 32)
1670	pCodeBuf[off++] = Armv8A64MkInstrSetF8SetF16(idxRegResult, cOpBits > 8); /* sets NZ */
1671	else
1672	pCodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, idxRegResult, idxRegResult, cOpBits > 32 /f64Bit/);
1673	pCodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_NZCV); /* Bits: 31=N; 30=Z; 29=C; 28=V; */
1674	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, 30);
1675	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxRegTmp, X86_EFL_ZF_BIT, 2, false /f64Bit/);
1676	AssertCompile(X86_EFL_ZF_BIT + 1 == X86_EFL_SF_BIT);
1677
1678	/* Calculate 8-bit parity of the result. */
1679	pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegTmp, idxRegResult, idxRegResult, false /f64Bit/,
1680	4 /offShift6/, kArmv8A64InstrShift_Lsr);
1681	pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegTmp, idxRegTmp, idxRegTmp, false /f64Bit/,
1682	2 /offShift6/, kArmv8A64InstrShift_Lsr);
1683	pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegTmp, idxRegTmp, idxRegTmp, false /f64Bit/,
1684	1 /offShift6/, kArmv8A64InstrShift_Lsr);
1685	Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
1686	pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxRegTmp, idxRegTmp, 0, 0, false /f64Bit/);
1687	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxRegTmp, X86_EFL_PF_BIT, 1, false /f64Bit/);
1688
1689	/* Calculate carry - the last bit shifted out of the input value. */
1690	if (enmType == kIemNativeEmitEFlagsForShiftType_Left)
1691	{
1692	/* CF = (idxRegSrc >> (cOpBits - idxRegCount))) & 1 */
1693	pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegTmp, cOpBits);
1694	pCodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegTmp, idxRegCount, false /f64Bit/, cOpBits < 32 /fSetFlags/);
1695	if (cOpBits < 32)
1696	pCodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Cc, 3); /* 16 or 8 bit: CF is clear if all shifted out */
1697	pCodeBuf[off++] = Armv8A64MkInstrLsrv(idxRegTmp, idxRegSrc, idxRegTmp, cOpBits > 32);
1698	}
1699	else
1700	{
1701	/* CF = (idxRegSrc >> (idxRegCount - 1)) & 1 */
1702	pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegTmp, idxRegCount, 1, false /f64Bit/);
1703	pCodeBuf[off++] = Armv8A64MkInstrLsrv(idxRegTmp, idxRegSrc, idxRegTmp, cOpBits > 32);
1704	}
1705	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxRegTmp, X86_EFL_CF_BIT, 1, false /f64Bit/);
1706
1707	uint8_t const idxTargetCpuEflFlavour = pReNative->pVCpu->iem.s.aidxTargetCpuEflFlavour[0];
1708	if (idxTargetCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_AMD)
1709	{
1710	/* Intel: OF = first bit shifted: fEfl \|= X86_EFL_GET_OF_ ## cOpBits(uDst ^ (uDst << 1)); */
1711	pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegTmp, idxRegSrc, idxRegSrc, cOpBits > 32, 1 /left shift count/);
1712	pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, cOpBits - 1, cOpBits > 32);
1713	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxRegTmp, X86_EFL_OF_BIT, 1, false /f64Bit/);
1714	}
1715	else
1716	{
1717	/* AMD: OF = last bit shifted: fEfl \|= ((uResult >> (cOpBits - 1)) ^ fCarry) << X86_EFL_OF_BIT; */
1718	AssertCompile(X86_EFL_CF_BIT == 0);
1719	pCodeBuf[off++] = Armv8A64MkInstrEor(idxRegTmp, idxRegEfl, idxRegResult, cOpBits > 32, /* ASSUMES CF calculated! */
1720	cOpBits - 1, kArmv8A64InstrShift_Lsr);
1721	pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxRegTmp, X86_EFL_OF_BIT, 1, false /f64Bit/);
1722
1723	/* AMD unconditionally clears AF. */
1724	Assert(Armv8A64ConvertImmRImmS2Mask32(0, 32 - X86_EFL_AF_BIT) == X86_EFL_AF);
1725	pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxRegEfl, idxRegEfl, 0, 32 - X86_EFL_AF_BIT, false /f64Bit/);
1726	}
1727	#else
1728	# error "port me"
1729	#endif
1730	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1731
1732	# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
1733	off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
1734	# endif
1735	}
1736	return off;
1737	}
1738
1739
1740	DECL_INLINE_THROW(uint32_t)
1741	iemNativeEmit_shl_r_CL_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1742	uint8_t idxVarDst, uint8_t idxVarCount, uint8_t idxVarEfl, uint8_t cOpBits)
1743	{
1744	/* Note! Since we're doing some branching here, we need to allocate all
1745	registers we need before the jump or we may end up with invalid
1746	register state if the branch is taken. */
1747	uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off); /* Do this first in hope we'll get EAX. */
1748	uint8_t const idxRegCount = iemNativeVarRegisterAcquire(pReNative, idxVarCount, &off, true /fInitialized/); /* modified on arm64 */
1749	uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /fInitialized/);
1750	uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /fInitialized/);
1751
1752	#ifdef RT_ARCH_AMD64
1753	/* Make sure IEM_MC_NATIVE_AMD64_HOST_REG_FOR_LOCAL was used. */
1754	AssertStmt(idxRegCount == X86_GREG_xCX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_UNEXPECTED_VAR_REGISTER));
1755
1756	/* We only need a copy of the input value if the target CPU differs from the host CPU. */
1757	uint8_t const idxRegDstIn = pReNative->pVCpu->iem.s.aidxTargetCpuEflFlavour[1] == IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
1758	? UINT8_MAX : iemNativeRegAllocTmp(pReNative, &off);
1759	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4+2+3+4);
1760
1761	/* Check if it's NOP before we do anything. */
1762	off = iemNativeEmitTestAnyBitsInGpr8Ex(pCodeBuf, off, idxRegCount, cOpBits <= 32 ? 0x1f : 0x3f);
1763	uint32_t const offFixup = off;
1764	off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /8-bit should be enough /, kIemNativeInstrCond_z);
1765
1766	if (idxRegDstIn != UINT8_MAX)
1767	off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, idxRegDstIn, idxRegDst);
1768	off = iemNativeEmitAmd64OneByteModRmInstrRREx(pCodeBuf, off, 0xd2, 0xd3, cOpBits, 4, idxRegDst);
1769
1770	#elif defined(RT_ARCH_ARM64)
1771	/* We always (except we can skip EFLAGS calcs) a copy of the input value. */
1772	uint8_t const idxRegDstIn = iemNativeRegAllocTmp(pReNative, &off);
1773	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
1774
1775	/* Check if it's NOP before we do anything. We MODIFY idxRegCount here! */
1776	Assert(Armv8A64ConvertImmRImmS2Mask32(4, 0) == 0x1f);
1777	Assert(Armv8A64ConvertImmRImmS2Mask32(5, 0) == 0x3f);
1778	pCodeBuf[off++] = Armv8A64MkInstrAndsImm(idxRegCount, idxRegCount, cOpBits > 32 ? 5 : 4, 0, false /f64Bit/);
1779	uint32_t const offFixup = off;
1780	off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off, kArmv8InstrCond_Eq);
1781
1782	pCodeBuf[off++] = Armv8A64MkInstrMov(idxRegDstIn, idxRegDst);
1783	pCodeBuf[off++] = Armv8A64MkInstrLslv(idxRegDst, idxRegDst, idxRegCount, cOpBits > 32 /f64Bit/);
1784	if (cOpBits < 32)
1785	{
1786	Assert(Armv8A64ConvertImmRImmS2Mask32(7, 0) == 0xff);
1787	Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1788	pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegDst, idxRegDst, cOpBits - 1, 0, false /f64Bit/);
1789	}
1790
1791	#else
1792	# error "port me"
1793	#endif
1794
1795	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1796	off = iemNativeEmitEFlagsForShift(pReNative, off, idxRegEfl, idxRegDst, idxRegDstIn, idxRegCount,
1797	cOpBits, kIemNativeEmitEFlagsForShiftType_Left, idxRegTmp);
1798
1799	/* fixup the jump */
1800	iemNativeFixupFixedJump(pReNative, offFixup, off);
1801
1802	#ifdef RT_ARCH_AMD64
1803	if (idxRegDstIn != UINT8_MAX)
1804	#endif
1805	iemNativeRegFreeTmp(pReNative, idxRegDstIn);
1806	iemNativeVarRegisterRelease(pReNative, idxVarEfl);
1807	iemNativeVarRegisterRelease(pReNative, idxVarDst);
1808	iemNativeVarRegisterRelease(pReNative, idxVarCount);
1809	iemNativeRegFreeTmp(pReNative, idxRegTmp);
1810	return off;
1811	}
1812
1813
1814	DECL_INLINE_THROW(uint32_t)
1815	iemNativeEmit_shr_r_CL_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1816	uint8_t idxVarDst, uint8_t idxVarCount, uint8_t idxVarEfl, uint8_t cOpBits)
1817	{
1818	RT_NOREF(idxVarDst, idxVarCount, idxVarEfl, cOpBits);
1819	AssertFailed();
1820	return iemNativeEmitBrk(pReNative, off, 0x666);
1821	}
1822
1823
1824	DECL_INLINE_THROW(uint32_t)
1825	iemNativeEmit_sar_r_CL_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1826	uint8_t idxVarDst, uint8_t idxVarCount, uint8_t idxVarEfl, uint8_t cOpBits)
1827	{
1828	RT_NOREF(idxVarDst, idxVarCount, idxVarEfl, cOpBits);
1829	AssertFailed();
1830	return iemNativeEmitBrk(pReNative, off, 0x666);
1831	}
1832
1833
1834	DECL_INLINE_THROW(uint32_t)
1835	iemNativeEmit_rol_r_CL_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1836	uint8_t idxVarDst, uint8_t idxVarCount, uint8_t idxVarEfl, uint8_t cOpBits)
1837	{
1838	RT_NOREF(idxVarDst, idxVarCount, idxVarEfl, cOpBits);
1839	AssertFailed();
1840	return iemNativeEmitBrk(pReNative, off, 0x666);
1841	}
1842
1843
1844	DECL_INLINE_THROW(uint32_t)
1845	iemNativeEmit_ror_r_CL_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1846	uint8_t idxVarDst, uint8_t idxVarCount, uint8_t idxVarEfl, uint8_t cOpBits)
1847	{
1848	RT_NOREF(idxVarDst, idxVarCount, idxVarEfl, cOpBits);
1849	AssertFailed();
1850	return iemNativeEmitBrk(pReNative, off, 0x666);
1851	}
1852
1853
1854	DECL_INLINE_THROW(uint32_t)
1855	iemNativeEmit_rcl_r_CL_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1856	uint8_t idxVarDst, uint8_t idxVarCount, uint8_t idxVarEfl, uint8_t cOpBits)
1857	{
1858	RT_NOREF(idxVarDst, idxVarCount, idxVarEfl, cOpBits);
1859	AssertFailed();
1860	return iemNativeEmitBrk(pReNative, off, 0x666);
1861	}
1862
1863
1864	DECL_INLINE_THROW(uint32_t)
1865	iemNativeEmit_rcr_r_CL_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1866	uint8_t idxVarDst, uint8_t idxVarCount, uint8_t idxVarEfl, uint8_t cOpBits)
1867	{
1868	RT_NOREF(idxVarDst, idxVarCount, idxVarEfl, cOpBits);
1869	AssertFailed();
1870	return iemNativeEmitBrk(pReNative, off, 0x666);
1871	}
1872
1873
1874
1875	#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1876	/*********************************************************************************************************************************
1877	* SIMD emitters. *
1878	*********************************************************************************************************************************/
1879
1880	/**
1881	* Common emitter for the PXOR, XORPS, XORPD instructions - guest register / guest register variant.
1882	*/
1883	DECL_INLINE_THROW(uint32_t)
1884	iemNativeEmit_pxor_rr_u128(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1885	uint8_t const idxSimdGstRegDst, uint8_t const idxSimdGstRegSrc)
1886	{
1887	uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegDst),
1888	kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
1889	uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegSrc),
1890	kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
1891
1892	#ifdef RT_ARCH_AMD64
1893	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1894
1895	/* pxor xmm, xmm */
1896	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1897	if (idxSimdRegDst >= 8 \|\| idxSimdRegSrc >= 8)
1898	pCodeBuf[off++] = (idxSimdRegSrc >= 8 ? X86_OP_REX_B : 0)
1899	\| (idxSimdRegDst >= 8 ? X86_OP_REX_R : 0);
1900	pCodeBuf[off++] = 0x0f;
1901	pCodeBuf[off++] = 0xef;
1902	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdRegDst & 7, idxSimdRegSrc & 7);
1903
1904	#elif defined(RT_ARCH_ARM64)
1905	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1906
1907	pCodeBuf[off++] = Armv8A64MkVecInstrEor(idxSimdRegDst, idxSimdRegDst, idxSimdRegSrc);
1908	#else
1909	# error "port me"
1910	#endif
1911
1912	iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
1913	iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
1914
1915	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1916	return off;
1917	}
1918
1919
1920	/**
1921	* Common emitter for the PXOR, XORPS, XORPD instructions - guest register / recompiler variable variant.
1922	*/
1923	DECL_INLINE_THROW(uint32_t)
1924	iemNativeEmit_pxor_rv_u128(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1925	uint8_t const idxSimdGstRegDst, uint8_t const idxVarSrc)
1926	{
1927	IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
1928	IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarSrc, sizeof(RTUINT128U));
1929
1930	uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegDst),
1931	kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
1932	uint8_t const idxSimdRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
1933
1934
1935	#ifdef RT_ARCH_AMD64
1936	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1937
1938	/* pxor xmm, xmm */
1939	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1940	if (idxSimdRegDst >= 8 \|\| idxSimdRegSrc >= 8)
1941	pCodeBuf[off++] = (idxSimdRegSrc >= 8 ? X86_OP_REX_B : 0)
1942	\| (idxSimdRegDst >= 8 ? X86_OP_REX_R : 0);
1943	pCodeBuf[off++] = 0x0f;
1944	pCodeBuf[off++] = 0xef;
1945	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdRegDst & 7, idxSimdRegSrc & 7);
1946
1947	#elif defined(RT_ARCH_ARM64)
1948	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1949
1950	pCodeBuf[off++] = Armv8A64MkVecInstrEor(idxSimdRegDst, idxSimdRegDst, idxSimdRegSrc);
1951	#else
1952	# error "port me"
1953	#endif
1954
1955	iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
1956	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
1957
1958	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1959	return off;
1960	}
1961
1962
1963	/**
1964	* Common emitter for the PAND, ANDPS, ANDPD instructions - guest register / guest register variant.
1965	*/
1966	DECL_INLINE_THROW(uint32_t)
1967	iemNativeEmit_pand_rr_u128(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1968	uint8_t const idxSimdGstRegDst, uint8_t const idxSimdGstRegSrc)
1969	{
1970	uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegDst),
1971	kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
1972	uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegSrc),
1973	kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
1974
1975	#ifdef RT_ARCH_AMD64
1976	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1977
1978	/* pand xmm, xmm */
1979	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1980	if (idxSimdRegDst >= 8 \|\| idxSimdRegSrc >= 8)
1981	pCodeBuf[off++] = (idxSimdRegSrc >= 8 ? X86_OP_REX_B : 0)
1982	\| (idxSimdRegDst >= 8 ? X86_OP_REX_R : 0);
1983	pCodeBuf[off++] = 0x0f;
1984	pCodeBuf[off++] = 0xdb;
1985	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdRegDst & 7, idxSimdRegSrc & 7);
1986
1987	#elif defined(RT_ARCH_ARM64)
1988	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1989
1990	pCodeBuf[off++] = Armv8A64MkVecInstrAnd(idxSimdRegDst, idxSimdRegDst, idxSimdRegSrc);
1991	#else
1992	# error "port me"
1993	#endif
1994
1995	iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
1996	iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
1997
1998	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1999	return off;
2000	}
2001
2002
2003	/**
2004	* Common emitter for the PAND, ANDPS, ANDPD instructions - guest register / recompiler variable variant.
2005	*/
2006	DECL_INLINE_THROW(uint32_t)
2007	iemNativeEmit_pand_rv_u128(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2008	uint8_t const idxSimdGstRegDst, uint8_t const idxVarSrc)
2009	{
2010	IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
2011	IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarSrc, sizeof(RTUINT128U));
2012
2013	uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegDst),
2014	kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
2015	uint8_t const idxSimdRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxVarSrc, &off, true /fInitialized/);
2016
2017
2018	#ifdef RT_ARCH_AMD64
2019	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
2020
2021	/* pand xmm, xmm */
2022	pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2023	if (idxSimdRegDst >= 8 \|\| idxSimdRegSrc >= 8)
2024	pCodeBuf[off++] = (idxSimdRegSrc >= 8 ? X86_OP_REX_B : 0)
2025	\| (idxSimdRegDst >= 8 ? X86_OP_REX_R : 0);
2026	pCodeBuf[off++] = 0x0f;
2027	pCodeBuf[off++] = 0xdb;
2028	pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdRegDst & 7, idxSimdRegSrc & 7);
2029
2030	#elif defined(RT_ARCH_ARM64)
2031	PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2032
2033	pCodeBuf[off++] = Armv8A64MkVecInstrAnd(idxSimdRegDst, idxSimdRegDst, idxSimdRegSrc);
2034	#else
2035	# error "port me"
2036	#endif
2037
2038	iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
2039	iemNativeVarRegisterRelease(pReNative, idxVarSrc);
2040
2041	IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2042	return off;
2043	}
2044	#endif
2045
2046	#endif /* !VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h */

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMAll/target-x86/IEMAllN8veEmit-x86.h@ 104279

Download in other formats: