IEMAllAImplC.cpp@ 93792

Last change on this file since 93792 was 93792, checked in by vboxsync, 3 years ago
VMM/IEM: Working on adding missing C version of IEMAllAImpl.asm functions. bugref:9898
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 87.8 KB

Line
1	/* $Id: IEMAllAImplC.cpp 93792 2022-02-16 13:30:16Z vboxsync $ */
2	/** @file
3	* IEM - Instruction Implementation in Assembly, portable C variant.
4	*/
5
6	/*
7	* Copyright (C) 2011-2022 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*/
17
18
19	/*********************************************************************************************************************************
20	* Header Files *
21	*********************************************************************************************************************************/
22	#include "IEMInternal.h"
23	#include <VBox/vmm/vmcc.h>
24	#include <VBox/err.h>
25	#include <iprt/x86.h>
26	#include <iprt/uint128.h>
27
28
29	/*********************************************************************************************************************************
30	* Defined Constants And Macros *
31	*********************************************************************************************************************************/
32	#if defined(RT_ARCH_ARM32) \|\| defined(RT_ARCH_ARM64)
33	# define IEM_WITHOUT_ASSEMBLY
34	#endif
35
36	/**
37	* Calculates the signed flag value given a result and it's bit width.
38	*
39	* The signed flag (SF) is a duplication of the most significant bit in the
40	* result.
41	*
42	* @returns X86_EFL_SF or 0.
43	* @param a_uResult Unsigned result value.
44	* @param a_cBitsWidth The width of the result (8, 16, 32, 64).
45	*/
46	#define X86_EFL_CALC_SF(a_uResult, a_cBitsWidth) \
47	( (uint32_t)((a_uResult) >> ((a_cBitsWidth) - X86_EFL_SF_BIT - 1)) & X86_EFL_SF )
48
49	/**
50	* Calculates the zero flag value given a result.
51	*
52	* The zero flag (ZF) indicates whether the result is zero or not.
53	*
54	* @returns X86_EFL_ZF or 0.
55	* @param a_uResult Unsigned result value.
56	*/
57	#define X86_EFL_CALC_ZF(a_uResult) \
58	( (uint32_t)((a_uResult) == 0) << X86_EFL_ZF_BIT )
59
60	/**
61	* Extracts the OF flag from a OF calculation result.
62	*
63	* These are typically used by concating with a bitcount. The problem is that
64	* 8-bit values needs shifting in the other direction than the others.
65	*/
66	#define X86_EFL_GET_OF_8(a_uValue) ((uint32_t)((a_uValue) << (X86_EFL_OF_BIT - 8)) & X86_EFL_OF)
67	#define X86_EFL_GET_OF_16(a_uValue) ((uint32_t)((a_uValue) >> (16 - X86_EFL_OF_BIT)) & X86_EFL_OF)
68	#define X86_EFL_GET_OF_32(a_uValue) ((uint32_t)((a_uValue) >> (32 - X86_EFL_OF_BIT)) & X86_EFL_OF)
69	#define X86_EFL_GET_OF_64(a_uValue) ((uint32_t)((a_uValue) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF)
70
71	/**
72	* Updates the status bits (CF, PF, AF, ZF, SF, and OF) after arithmetic op.
73	*
74	* @returns Status bits.
75	* @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
76	* @param a_uResult Unsigned result value.
77	* @param a_uSrc The source value (for AF calc).
78	* @param a_uDst The original destination value (for AF calc).
79	* @param a_cBitsWidth The width of the result (8, 16, 32, 64).
80	* @param a_CfExpr Bool expression for the carry flag (CF).
81	* @param a_OfMethod 0 for ADD-style, 1 for SUB-style.
82	*/
83	#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(a_pfEFlags, a_uResult, a_uDst, a_uSrc, a_cBitsWidth, a_CfExpr, a_OfMethod) \
84	do { \
85	uint32_t fEflTmp = *(a_pfEFlags); \
86	fEflTmp &= ~X86_EFL_STATUS_BITS; \
87	fEflTmp \|= (a_CfExpr) << X86_EFL_CF_BIT; \
88	fEflTmp \|= g_afParity[(a_uResult) & 0xff]; \
89	fEflTmp \|= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
90	fEflTmp \|= X86_EFL_CALC_ZF(a_uResult); \
91	fEflTmp \|= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
92	fEflTmp \|= X86_EFL_GET_OF_ ## a_cBitsWidth( ((a_uDst) ^ (a_uSrc) ^ (a_OfMethod == 0 ? RT_BIT_64(a_cBitsWidth - 1) : 0)) \
93	& ((a_uResult) ^ (a_uDst)) ); \
94	*(a_pfEFlags) = fEflTmp; \
95	} while (0)
96
97	/**
98	* Updates the status bits (CF, PF, AF, ZF, SF, and OF) after a logical op.
99	*
100	* CF and OF are defined to be 0 by logical operations. AF on the other hand is
101	* undefined. We do not set AF, as that seems to make the most sense (which
102	* probably makes it the most wrong in real life).
103	*
104	* @returns Status bits.
105	* @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
106	* @param a_uResult Unsigned result value.
107	* @param a_cBitsWidth The width of the result (8, 16, 32, 64).
108	* @param a_fExtra Additional bits to set.
109	*/
110	#define IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(a_pfEFlags, a_uResult, a_cBitsWidth, a_fExtra) \
111	do { \
112	uint32_t fEflTmp = *(a_pfEFlags); \
113	fEflTmp &= ~X86_EFL_STATUS_BITS; \
114	fEflTmp \|= g_afParity[(a_uResult) & 0xff]; \
115	fEflTmp \|= X86_EFL_CALC_ZF(a_uResult); \
116	fEflTmp \|= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
117	fEflTmp \|= (a_fExtra); \
118	*(a_pfEFlags) = fEflTmp; \
119	} while (0)
120
121
122	/*********************************************************************************************************************************
123	* Global Variables *
124	*********************************************************************************************************************************/
125	#if !defined(RT_ARCH_AMD64) \|\| defined(IEM_WITHOUT_ASSEMBLY)
126	/**
127	* Parity calculation table.
128	*
129	* The generator code:
130	* @code
131	* #include <stdio.h>
132	*
133	* int main()
134	* {
135	* unsigned b;
136	* for (b = 0; b < 256; b++)
137	* {
138	* int cOnes = ( b & 1)
139	* + ((b >> 1) & 1)
140	* + ((b >> 2) & 1)
141	* + ((b >> 3) & 1)
142	* + ((b >> 4) & 1)
143	* + ((b >> 5) & 1)
144	* + ((b >> 6) & 1)
145	* + ((b >> 7) & 1);
146	* printf(" /" "* %#04x = %u%u%u%u%u%u%u%ub *" "/ %s,\n",
147	* b,
148	* (b >> 7) & 1,
149	* (b >> 6) & 1,
150	* (b >> 5) & 1,
151	* (b >> 4) & 1,
152	* (b >> 3) & 1,
153	* (b >> 2) & 1,
154	* (b >> 1) & 1,
155	* b & 1,
156	* cOnes & 1 ? "0" : "X86_EFL_PF");
157	* }
158	* return 0;
159	* }
160	* @endcode
161	*/
162	static uint8_t const g_afParity[256] =
163	{
164	/* 0000 = 00000000b */ X86_EFL_PF,
165	/* 0x01 = 00000001b */ 0,
166	/* 0x02 = 00000010b */ 0,
167	/* 0x03 = 00000011b */ X86_EFL_PF,
168	/* 0x04 = 00000100b */ 0,
169	/* 0x05 = 00000101b */ X86_EFL_PF,
170	/* 0x06 = 00000110b */ X86_EFL_PF,
171	/* 0x07 = 00000111b */ 0,
172	/* 0x08 = 00001000b */ 0,
173	/* 0x09 = 00001001b */ X86_EFL_PF,
174	/* 0x0a = 00001010b */ X86_EFL_PF,
175	/* 0x0b = 00001011b */ 0,
176	/* 0x0c = 00001100b */ X86_EFL_PF,
177	/* 0x0d = 00001101b */ 0,
178	/* 0x0e = 00001110b */ 0,
179	/* 0x0f = 00001111b */ X86_EFL_PF,
180	/* 0x10 = 00010000b */ 0,
181	/* 0x11 = 00010001b */ X86_EFL_PF,
182	/* 0x12 = 00010010b */ X86_EFL_PF,
183	/* 0x13 = 00010011b */ 0,
184	/* 0x14 = 00010100b */ X86_EFL_PF,
185	/* 0x15 = 00010101b */ 0,
186	/* 0x16 = 00010110b */ 0,
187	/* 0x17 = 00010111b */ X86_EFL_PF,
188	/* 0x18 = 00011000b */ X86_EFL_PF,
189	/* 0x19 = 00011001b */ 0,
190	/* 0x1a = 00011010b */ 0,
191	/* 0x1b = 00011011b */ X86_EFL_PF,
192	/* 0x1c = 00011100b */ 0,
193	/* 0x1d = 00011101b */ X86_EFL_PF,
194	/* 0x1e = 00011110b */ X86_EFL_PF,
195	/* 0x1f = 00011111b */ 0,
196	/* 0x20 = 00100000b */ 0,
197	/* 0x21 = 00100001b */ X86_EFL_PF,
198	/* 0x22 = 00100010b */ X86_EFL_PF,
199	/* 0x23 = 00100011b */ 0,
200	/* 0x24 = 00100100b */ X86_EFL_PF,
201	/* 0x25 = 00100101b */ 0,
202	/* 0x26 = 00100110b */ 0,
203	/* 0x27 = 00100111b */ X86_EFL_PF,
204	/* 0x28 = 00101000b */ X86_EFL_PF,
205	/* 0x29 = 00101001b */ 0,
206	/* 0x2a = 00101010b */ 0,
207	/* 0x2b = 00101011b */ X86_EFL_PF,
208	/* 0x2c = 00101100b */ 0,
209	/* 0x2d = 00101101b */ X86_EFL_PF,
210	/* 0x2e = 00101110b */ X86_EFL_PF,
211	/* 0x2f = 00101111b */ 0,
212	/* 0x30 = 00110000b */ X86_EFL_PF,
213	/* 0x31 = 00110001b */ 0,
214	/* 0x32 = 00110010b */ 0,
215	/* 0x33 = 00110011b */ X86_EFL_PF,
216	/* 0x34 = 00110100b */ 0,
217	/* 0x35 = 00110101b */ X86_EFL_PF,
218	/* 0x36 = 00110110b */ X86_EFL_PF,
219	/* 0x37 = 00110111b */ 0,
220	/* 0x38 = 00111000b */ 0,
221	/* 0x39 = 00111001b */ X86_EFL_PF,
222	/* 0x3a = 00111010b */ X86_EFL_PF,
223	/* 0x3b = 00111011b */ 0,
224	/* 0x3c = 00111100b */ X86_EFL_PF,
225	/* 0x3d = 00111101b */ 0,
226	/* 0x3e = 00111110b */ 0,
227	/* 0x3f = 00111111b */ X86_EFL_PF,
228	/* 0x40 = 01000000b */ 0,
229	/* 0x41 = 01000001b */ X86_EFL_PF,
230	/* 0x42 = 01000010b */ X86_EFL_PF,
231	/* 0x43 = 01000011b */ 0,
232	/* 0x44 = 01000100b */ X86_EFL_PF,
233	/* 0x45 = 01000101b */ 0,
234	/* 0x46 = 01000110b */ 0,
235	/* 0x47 = 01000111b */ X86_EFL_PF,
236	/* 0x48 = 01001000b */ X86_EFL_PF,
237	/* 0x49 = 01001001b */ 0,
238	/* 0x4a = 01001010b */ 0,
239	/* 0x4b = 01001011b */ X86_EFL_PF,
240	/* 0x4c = 01001100b */ 0,
241	/* 0x4d = 01001101b */ X86_EFL_PF,
242	/* 0x4e = 01001110b */ X86_EFL_PF,
243	/* 0x4f = 01001111b */ 0,
244	/* 0x50 = 01010000b */ X86_EFL_PF,
245	/* 0x51 = 01010001b */ 0,
246	/* 0x52 = 01010010b */ 0,
247	/* 0x53 = 01010011b */ X86_EFL_PF,
248	/* 0x54 = 01010100b */ 0,
249	/* 0x55 = 01010101b */ X86_EFL_PF,
250	/* 0x56 = 01010110b */ X86_EFL_PF,
251	/* 0x57 = 01010111b */ 0,
252	/* 0x58 = 01011000b */ 0,
253	/* 0x59 = 01011001b */ X86_EFL_PF,
254	/* 0x5a = 01011010b */ X86_EFL_PF,
255	/* 0x5b = 01011011b */ 0,
256	/* 0x5c = 01011100b */ X86_EFL_PF,
257	/* 0x5d = 01011101b */ 0,
258	/* 0x5e = 01011110b */ 0,
259	/* 0x5f = 01011111b */ X86_EFL_PF,
260	/* 0x60 = 01100000b */ X86_EFL_PF,
261	/* 0x61 = 01100001b */ 0,
262	/* 0x62 = 01100010b */ 0,
263	/* 0x63 = 01100011b */ X86_EFL_PF,
264	/* 0x64 = 01100100b */ 0,
265	/* 0x65 = 01100101b */ X86_EFL_PF,
266	/* 0x66 = 01100110b */ X86_EFL_PF,
267	/* 0x67 = 01100111b */ 0,
268	/* 0x68 = 01101000b */ 0,
269	/* 0x69 = 01101001b */ X86_EFL_PF,
270	/* 0x6a = 01101010b */ X86_EFL_PF,
271	/* 0x6b = 01101011b */ 0,
272	/* 0x6c = 01101100b */ X86_EFL_PF,
273	/* 0x6d = 01101101b */ 0,
274	/* 0x6e = 01101110b */ 0,
275	/* 0x6f = 01101111b */ X86_EFL_PF,
276	/* 0x70 = 01110000b */ 0,
277	/* 0x71 = 01110001b */ X86_EFL_PF,
278	/* 0x72 = 01110010b */ X86_EFL_PF,
279	/* 0x73 = 01110011b */ 0,
280	/* 0x74 = 01110100b */ X86_EFL_PF,
281	/* 0x75 = 01110101b */ 0,
282	/* 0x76 = 01110110b */ 0,
283	/* 0x77 = 01110111b */ X86_EFL_PF,
284	/* 0x78 = 01111000b */ X86_EFL_PF,
285	/* 0x79 = 01111001b */ 0,
286	/* 0x7a = 01111010b */ 0,
287	/* 0x7b = 01111011b */ X86_EFL_PF,
288	/* 0x7c = 01111100b */ 0,
289	/* 0x7d = 01111101b */ X86_EFL_PF,
290	/* 0x7e = 01111110b */ X86_EFL_PF,
291	/* 0x7f = 01111111b */ 0,
292	/* 0x80 = 10000000b */ 0,
293	/* 0x81 = 10000001b */ X86_EFL_PF,
294	/* 0x82 = 10000010b */ X86_EFL_PF,
295	/* 0x83 = 10000011b */ 0,
296	/* 0x84 = 10000100b */ X86_EFL_PF,
297	/* 0x85 = 10000101b */ 0,
298	/* 0x86 = 10000110b */ 0,
299	/* 0x87 = 10000111b */ X86_EFL_PF,
300	/* 0x88 = 10001000b */ X86_EFL_PF,
301	/* 0x89 = 10001001b */ 0,
302	/* 0x8a = 10001010b */ 0,
303	/* 0x8b = 10001011b */ X86_EFL_PF,
304	/* 0x8c = 10001100b */ 0,
305	/* 0x8d = 10001101b */ X86_EFL_PF,
306	/* 0x8e = 10001110b */ X86_EFL_PF,
307	/* 0x8f = 10001111b */ 0,
308	/* 0x90 = 10010000b */ X86_EFL_PF,
309	/* 0x91 = 10010001b */ 0,
310	/* 0x92 = 10010010b */ 0,
311	/* 0x93 = 10010011b */ X86_EFL_PF,
312	/* 0x94 = 10010100b */ 0,
313	/* 0x95 = 10010101b */ X86_EFL_PF,
314	/* 0x96 = 10010110b */ X86_EFL_PF,
315	/* 0x97 = 10010111b */ 0,
316	/* 0x98 = 10011000b */ 0,
317	/* 0x99 = 10011001b */ X86_EFL_PF,
318	/* 0x9a = 10011010b */ X86_EFL_PF,
319	/* 0x9b = 10011011b */ 0,
320	/* 0x9c = 10011100b */ X86_EFL_PF,
321	/* 0x9d = 10011101b */ 0,
322	/* 0x9e = 10011110b */ 0,
323	/* 0x9f = 10011111b */ X86_EFL_PF,
324	/* 0xa0 = 10100000b */ X86_EFL_PF,
325	/* 0xa1 = 10100001b */ 0,
326	/* 0xa2 = 10100010b */ 0,
327	/* 0xa3 = 10100011b */ X86_EFL_PF,
328	/* 0xa4 = 10100100b */ 0,
329	/* 0xa5 = 10100101b */ X86_EFL_PF,
330	/* 0xa6 = 10100110b */ X86_EFL_PF,
331	/* 0xa7 = 10100111b */ 0,
332	/* 0xa8 = 10101000b */ 0,
333	/* 0xa9 = 10101001b */ X86_EFL_PF,
334	/* 0xaa = 10101010b */ X86_EFL_PF,
335	/* 0xab = 10101011b */ 0,
336	/* 0xac = 10101100b */ X86_EFL_PF,
337	/* 0xad = 10101101b */ 0,
338	/* 0xae = 10101110b */ 0,
339	/* 0xaf = 10101111b */ X86_EFL_PF,
340	/* 0xb0 = 10110000b */ 0,
341	/* 0xb1 = 10110001b */ X86_EFL_PF,
342	/* 0xb2 = 10110010b */ X86_EFL_PF,
343	/* 0xb3 = 10110011b */ 0,
344	/* 0xb4 = 10110100b */ X86_EFL_PF,
345	/* 0xb5 = 10110101b */ 0,
346	/* 0xb6 = 10110110b */ 0,
347	/* 0xb7 = 10110111b */ X86_EFL_PF,
348	/* 0xb8 = 10111000b */ X86_EFL_PF,
349	/* 0xb9 = 10111001b */ 0,
350	/* 0xba = 10111010b */ 0,
351	/* 0xbb = 10111011b */ X86_EFL_PF,
352	/* 0xbc = 10111100b */ 0,
353	/* 0xbd = 10111101b */ X86_EFL_PF,
354	/* 0xbe = 10111110b */ X86_EFL_PF,
355	/* 0xbf = 10111111b */ 0,
356	/* 0xc0 = 11000000b */ X86_EFL_PF,
357	/* 0xc1 = 11000001b */ 0,
358	/* 0xc2 = 11000010b */ 0,
359	/* 0xc3 = 11000011b */ X86_EFL_PF,
360	/* 0xc4 = 11000100b */ 0,
361	/* 0xc5 = 11000101b */ X86_EFL_PF,
362	/* 0xc6 = 11000110b */ X86_EFL_PF,
363	/* 0xc7 = 11000111b */ 0,
364	/* 0xc8 = 11001000b */ 0,
365	/* 0xc9 = 11001001b */ X86_EFL_PF,
366	/* 0xca = 11001010b */ X86_EFL_PF,
367	/* 0xcb = 11001011b */ 0,
368	/* 0xcc = 11001100b */ X86_EFL_PF,
369	/* 0xcd = 11001101b */ 0,
370	/* 0xce = 11001110b */ 0,
371	/* 0xcf = 11001111b */ X86_EFL_PF,
372	/* 0xd0 = 11010000b */ 0,
373	/* 0xd1 = 11010001b */ X86_EFL_PF,
374	/* 0xd2 = 11010010b */ X86_EFL_PF,
375	/* 0xd3 = 11010011b */ 0,
376	/* 0xd4 = 11010100b */ X86_EFL_PF,
377	/* 0xd5 = 11010101b */ 0,
378	/* 0xd6 = 11010110b */ 0,
379	/* 0xd7 = 11010111b */ X86_EFL_PF,
380	/* 0xd8 = 11011000b */ X86_EFL_PF,
381	/* 0xd9 = 11011001b */ 0,
382	/* 0xda = 11011010b */ 0,
383	/* 0xdb = 11011011b */ X86_EFL_PF,
384	/* 0xdc = 11011100b */ 0,
385	/* 0xdd = 11011101b */ X86_EFL_PF,
386	/* 0xde = 11011110b */ X86_EFL_PF,
387	/* 0xdf = 11011111b */ 0,
388	/* 0xe0 = 11100000b */ 0,
389	/* 0xe1 = 11100001b */ X86_EFL_PF,
390	/* 0xe2 = 11100010b */ X86_EFL_PF,
391	/* 0xe3 = 11100011b */ 0,
392	/* 0xe4 = 11100100b */ X86_EFL_PF,
393	/* 0xe5 = 11100101b */ 0,
394	/* 0xe6 = 11100110b */ 0,
395	/* 0xe7 = 11100111b */ X86_EFL_PF,
396	/* 0xe8 = 11101000b */ X86_EFL_PF,
397	/* 0xe9 = 11101001b */ 0,
398	/* 0xea = 11101010b */ 0,
399	/* 0xeb = 11101011b */ X86_EFL_PF,
400	/* 0xec = 11101100b */ 0,
401	/* 0xed = 11101101b */ X86_EFL_PF,
402	/* 0xee = 11101110b */ X86_EFL_PF,
403	/* 0xef = 11101111b */ 0,
404	/* 0xf0 = 11110000b */ X86_EFL_PF,
405	/* 0xf1 = 11110001b */ 0,
406	/* 0xf2 = 11110010b */ 0,
407	/* 0xf3 = 11110011b */ X86_EFL_PF,
408	/* 0xf4 = 11110100b */ 0,
409	/* 0xf5 = 11110101b */ X86_EFL_PF,
410	/* 0xf6 = 11110110b */ X86_EFL_PF,
411	/* 0xf7 = 11110111b */ 0,
412	/* 0xf8 = 11111000b */ 0,
413	/* 0xf9 = 11111001b */ X86_EFL_PF,
414	/* 0xfa = 11111010b */ X86_EFL_PF,
415	/* 0xfb = 11111011b */ 0,
416	/* 0xfc = 11111100b */ X86_EFL_PF,
417	/* 0xfd = 11111101b */ 0,
418	/* 0xfe = 11111110b */ 0,
419	/* 0xff = 11111111b */ X86_EFL_PF,
420	};
421	#endif /* !RT_ARCH_AMD64 \|\| IEM_WITHOUT_ASSEMBLY */
422
423
424
425	/*
426	* There are a few 64-bit on 32-bit things we'd rather do in C. Actually, doing
427	* it all in C is probably safer atm., optimize what's necessary later, maybe.
428	*/
429	#if !defined(RT_ARCH_AMD64) \|\| defined(IEM_WITHOUT_ASSEMBLY)
430
431
432	/*********************************************************************************************************************************
433	* Binary Operations *
434	*********************************************************************************************************************************/
435
436	/*
437	* ADD
438	*/
439
440	IEM_DECL_IMPL_DEF(void, iemAImpl_add_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
441	{
442	uint64_t uDst = *puDst;
443	uint64_t uResult = uDst + uSrc;
444	*puDst = uResult;
445	IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult < uDst, 0);
446	}
447
448	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
449
450	IEM_DECL_IMPL_DEF(void, iemAImpl_add_u32,(uint32_t puDst, uint32_t uSrc, uint32_t pfEFlags))
451	{
452	uint32_t uDst = *puDst;
453	uint32_t uResult = uDst + uSrc;
454	*puDst = uResult;
455	IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult < uDst, 0);
456	}
457
458
459	IEM_DECL_IMPL_DEF(void, iemAImpl_add_u16,(uint16_t puDst, uint16_t uSrc, uint32_t pfEFlags))
460	{
461	uint16_t uDst = *puDst;
462	uint16_t uResult = uDst + uSrc;
463	*puDst = uResult;
464	IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult < uDst, 0);
465	}
466
467
468	IEM_DECL_IMPL_DEF(void, iemAImpl_add_u8,(uint8_t puDst, uint8_t uSrc, uint32_t pfEFlags))
469	{
470	uint8_t uDst = *puDst;
471	uint8_t uResult = uDst + uSrc;
472	*puDst = uResult;
473	IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult < uDst, 0);
474	}
475
476	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
477
478	/*
479	* ADC
480	*/
481
482	IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
483	{
484	if (!(*pfEFlags & X86_EFL_CF))
485	iemAImpl_add_u64(puDst, uSrc, pfEFlags);
486	else
487	{
488	uint64_t uDst = *puDst;
489	uint64_t uResult = uDst + uSrc + 1;
490	*puDst = uResult;
491	/** @todo verify AF and OF calculations. */
492	IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult <= uDst, 0);
493	}
494	}
495
496	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
497
498	IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u32,(uint32_t puDst, uint32_t uSrc, uint32_t pfEFlags))
499	{
500	if (!(*pfEFlags & X86_EFL_CF))
501	iemAImpl_add_u32(puDst, uSrc, pfEFlags);
502	else
503	{
504	uint32_t uDst = *puDst;
505	uint32_t uResult = uDst + uSrc + 1;
506	*puDst = uResult;
507	/** @todo verify AF and OF calculations. */
508	IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult <= uDst, 0);
509	}
510	}
511
512
513	IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u16,(uint16_t puDst, uint16_t uSrc, uint32_t pfEFlags))
514	{
515	if (!(*pfEFlags & X86_EFL_CF))
516	iemAImpl_add_u16(puDst, uSrc, pfEFlags);
517	else
518	{
519	uint16_t uDst = *puDst;
520	uint16_t uResult = uDst + uSrc + 1;
521	*puDst = uResult;
522	/** @todo verify AF and OF calculations. */
523	IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult <= uDst, 0);
524	}
525	}
526
527
528	IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u8,(uint8_t puDst, uint8_t uSrc, uint32_t pfEFlags))
529	{
530	if (!(*pfEFlags & X86_EFL_CF))
531	iemAImpl_add_u8(puDst, uSrc, pfEFlags);
532	else
533	{
534	uint8_t uDst = *puDst;
535	uint8_t uResult = uDst + uSrc + 1;
536	*puDst = uResult;
537	/** @todo verify AF and OF calculations. */
538	IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult <= uDst, 0);
539	}
540	}
541
542	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
543
544	/*
545	* SUB
546	*/
547
548	IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
549	{
550	uint64_t uDst = *puDst;
551	uint64_t uResult = uDst - uSrc;
552	*puDst = uResult;
553	IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult < uDst, 1);
554	}
555
556	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
557
558	IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u32,(uint32_t puDst, uint32_t uSrc, uint32_t pfEFlags))
559	{
560	uint32_t uDst = *puDst;
561	uint32_t uResult = uDst - uSrc;
562	*puDst = uResult;
563	IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult < uDst, 1);
564	}
565
566
567	IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u16,(uint16_t puDst, uint16_t uSrc, uint32_t pfEFlags))
568	{
569	uint16_t uDst = *puDst;
570	uint16_t uResult = uDst - uSrc;
571	*puDst = uResult;
572	IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult < uDst, 1);
573	}
574
575
576	IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u8,(uint8_t puDst, uint8_t uSrc, uint32_t pfEFlags))
577	{
578	uint8_t uDst = *puDst;
579	uint8_t uResult = uDst - uSrc;
580	*puDst = uResult;
581	IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult < uDst, 1);
582	}
583
584	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
585
586	/*
587	* SBB
588	*/
589
590	IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
591	{
592	if (!(*pfEFlags & X86_EFL_CF))
593	iemAImpl_sub_u64(puDst, uSrc, pfEFlags);
594	else
595	{
596	uint64_t uDst = *puDst;
597	uint64_t uResult = uDst - uSrc - 1;
598	*puDst = uResult;
599	/** @todo verify AF and OF calculations. */
600	IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult <= uDst, 1);
601	}
602	}
603
604	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
605
606	IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u32,(uint32_t puDst, uint32_t uSrc, uint32_t pfEFlags))
607	{
608	if (!(*pfEFlags & X86_EFL_CF))
609	iemAImpl_sub_u32(puDst, uSrc, pfEFlags);
610	else
611	{
612	uint32_t uDst = *puDst;
613	uint32_t uResult = uDst - uSrc - 1;
614	*puDst = uResult;
615	/** @todo verify AF and OF calculations. */
616	IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult <= uDst, 1);
617	}
618	}
619
620
621	IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u16,(uint16_t puDst, uint16_t uSrc, uint32_t pfEFlags))
622	{
623	if (!(*pfEFlags & X86_EFL_CF))
624	iemAImpl_sub_u16(puDst, uSrc, pfEFlags);
625	else
626	{
627	uint16_t uDst = *puDst;
628	uint16_t uResult = uDst - uSrc - 1;
629	*puDst = uResult;
630	/** @todo verify AF and OF calculations. */
631	IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult <= uDst, 1);
632	}
633	}
634
635
636	IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u8,(uint8_t puDst, uint8_t uSrc, uint32_t pfEFlags))
637	{
638	if (!(*pfEFlags & X86_EFL_CF))
639	iemAImpl_sub_u8(puDst, uSrc, pfEFlags);
640	else
641	{
642	uint8_t uDst = *puDst;
643	uint8_t uResult = uDst - uSrc - 1;
644	*puDst = uResult;
645	/** @todo verify AF and OF calculations. */
646	IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult <= uDst, 1);
647	}
648	}
649
650	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
651
652
653	/*
654	* OR
655	*/
656
657	IEM_DECL_IMPL_DEF(void, iemAImpl_or_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
658	{
659	uint64_t uResult = *puDst \| uSrc;
660	*puDst = uResult;
661	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
662	}
663
664	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
665
666	IEM_DECL_IMPL_DEF(void, iemAImpl_or_u32,(uint32_t puDst, uint32_t uSrc, uint32_t pfEFlags))
667	{
668	uint32_t uResult = *puDst \| uSrc;
669	*puDst = uResult;
670	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
671	}
672
673
674	IEM_DECL_IMPL_DEF(void, iemAImpl_or_u16,(uint16_t puDst, uint16_t uSrc, uint32_t pfEFlags))
675	{
676	uint16_t uResult = *puDst \| uSrc;
677	*puDst = uResult;
678	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
679	}
680
681
682	IEM_DECL_IMPL_DEF(void, iemAImpl_or_u8,(uint8_t puDst, uint8_t uSrc, uint32_t pfEFlags))
683	{
684	uint8_t uResult = *puDst \| uSrc;
685	*puDst = uResult;
686	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
687	}
688
689	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
690
691	/*
692	* XOR
693	*/
694
695	IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
696	{
697	uint64_t uResult = *puDst ^ uSrc;
698	*puDst = uResult;
699	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
700	}
701
702	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
703
704	IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u32,(uint32_t puDst, uint32_t uSrc, uint32_t pfEFlags))
705	{
706	uint32_t uResult = *puDst ^ uSrc;
707	*puDst = uResult;
708	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
709	}
710
711
712	IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u16,(uint16_t puDst, uint16_t uSrc, uint32_t pfEFlags))
713	{
714	uint16_t uResult = *puDst ^ uSrc;
715	*puDst = uResult;
716	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
717	}
718
719
720	IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u8,(uint8_t puDst, uint8_t uSrc, uint32_t pfEFlags))
721	{
722	uint8_t uResult = *puDst ^ uSrc;
723	*puDst = uResult;
724	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
725	}
726
727	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
728
729	/*
730	* AND
731	*/
732
733	IEM_DECL_IMPL_DEF(void, iemAImpl_and_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
734	{
735	uint64_t uResult = *puDst & uSrc;
736	*puDst = uResult;
737	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
738	}
739
740	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
741
742	IEM_DECL_IMPL_DEF(void, iemAImpl_and_u32,(uint32_t puDst, uint32_t uSrc, uint32_t pfEFlags))
743	{
744	uint32_t uResult = *puDst & uSrc;
745	*puDst = uResult;
746	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
747	}
748
749
750	IEM_DECL_IMPL_DEF(void, iemAImpl_and_u16,(uint16_t puDst, uint16_t uSrc, uint32_t pfEFlags))
751	{
752	uint16_t uResult = *puDst & uSrc;
753	*puDst = uResult;
754	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
755	}
756
757
758	IEM_DECL_IMPL_DEF(void, iemAImpl_and_u8,(uint8_t puDst, uint8_t uSrc, uint32_t pfEFlags))
759	{
760	uint8_t uResult = *puDst & uSrc;
761	*puDst = uResult;
762	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
763	}
764
765	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
766
767	/*
768	* CMP
769	*/
770
771	IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
772	{
773	uint64_t uDstTmp = *puDst;
774	iemAImpl_sub_u64(&uDstTmp, uSrc, pfEFlags);
775	}
776
777	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
778
779	IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u32,(uint32_t puDst, uint32_t uSrc, uint32_t pfEFlags))
780	{
781	uint32_t uDstTmp = *puDst;
782	iemAImpl_sub_u32(&uDstTmp, uSrc, pfEFlags);
783	}
784
785
786	IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u16,(uint16_t puDst, uint16_t uSrc, uint32_t pfEFlags))
787	{
788	uint16_t uDstTmp = *puDst;
789	iemAImpl_sub_u16(&uDstTmp, uSrc, pfEFlags);
790	}
791
792
793	IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u8,(uint8_t puDst, uint8_t uSrc, uint32_t pfEFlags))
794	{
795	uint8_t uDstTmp = *puDst;
796	iemAImpl_sub_u8(&uDstTmp, uSrc, pfEFlags);
797	}
798
799	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
800
801	/*
802	* TEST
803	*/
804
805	IEM_DECL_IMPL_DEF(void, iemAImpl_test_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
806	{
807	uint64_t uResult = *puDst & uSrc;
808	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
809	}
810
811	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
812
813	IEM_DECL_IMPL_DEF(void, iemAImpl_test_u32,(uint32_t puDst, uint32_t uSrc, uint32_t pfEFlags))
814	{
815	uint32_t uResult = *puDst & uSrc;
816	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
817	}
818
819
820	IEM_DECL_IMPL_DEF(void, iemAImpl_test_u16,(uint16_t puDst, uint16_t uSrc, uint32_t pfEFlags))
821	{
822	uint16_t uResult = *puDst & uSrc;
823	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
824	}
825
826
827	IEM_DECL_IMPL_DEF(void, iemAImpl_test_u8,(uint8_t puDst, uint8_t uSrc, uint32_t pfEFlags))
828	{
829	uint8_t uResult = *puDst & uSrc;
830	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
831	}
832
833	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
834
835
836	/*
837	* LOCK prefixed variants of the above
838	*/
839
840	/** 64-bit locked binary operand operation. */
841	# define DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
842	do { \
843	uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
844	uint ## a_cBitsWidth ## _t uTmp; \
845	uint32_t fEflTmp; \
846	do \
847	{ \
848	uTmp = uOld; \
849	fEflTmp = *pfEFlags; \
850	iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, uSrc, &fEflTmp); \
851	} while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
852	*pfEFlags = fEflTmp; \
853	} while (0)
854
855
856	#define EMIT_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
857	IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
858	uint ## a_cBitsWidth ## _t uSrc, \
859	uint32_t *pfEFlags)) \
860	{ \
861	DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth); \
862	}
863
864	EMIT_LOCKED_BIN_OP(add, 64)
865	EMIT_LOCKED_BIN_OP(adc, 64)
866	EMIT_LOCKED_BIN_OP(sub, 64)
867	EMIT_LOCKED_BIN_OP(sbb, 64)
868	EMIT_LOCKED_BIN_OP(or, 64)
869	EMIT_LOCKED_BIN_OP(xor, 64)
870	EMIT_LOCKED_BIN_OP(and, 64)
871	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
872	EMIT_LOCKED_BIN_OP(add, 32)
873	EMIT_LOCKED_BIN_OP(adc, 32)
874	EMIT_LOCKED_BIN_OP(sub, 32)
875	EMIT_LOCKED_BIN_OP(sbb, 32)
876	EMIT_LOCKED_BIN_OP(or, 32)
877	EMIT_LOCKED_BIN_OP(xor, 32)
878	EMIT_LOCKED_BIN_OP(and, 32)
879
880	EMIT_LOCKED_BIN_OP(add, 16)
881	EMIT_LOCKED_BIN_OP(adc, 16)
882	EMIT_LOCKED_BIN_OP(sub, 16)
883	EMIT_LOCKED_BIN_OP(sbb, 16)
884	EMIT_LOCKED_BIN_OP(or, 16)
885	EMIT_LOCKED_BIN_OP(xor, 16)
886	EMIT_LOCKED_BIN_OP(and, 16)
887
888	EMIT_LOCKED_BIN_OP(add, 8)
889	EMIT_LOCKED_BIN_OP(adc, 8)
890	EMIT_LOCKED_BIN_OP(sub, 8)
891	EMIT_LOCKED_BIN_OP(sbb, 8)
892	EMIT_LOCKED_BIN_OP(or, 8)
893	EMIT_LOCKED_BIN_OP(xor, 8)
894	EMIT_LOCKED_BIN_OP(and, 8)
895	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
896
897
898	/*
899	* Bit operations (same signature as above).
900	*/
901
902	/*
903	* BT
904	*/
905
906	IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
907	{
908	/* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
909	logical operation (AND/OR/whatever). */
910	Assert(uSrc < 64);
911	uint64_t uDst = *puDst;
912	if (uDst & RT_BIT_64(uSrc))
913	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
914	else
915	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
916	}
917
918	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
919
920	IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u32,(uint32_t puDst, uint32_t uSrc, uint32_t pfEFlags))
921	{
922	/* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
923	logical operation (AND/OR/whatever). */
924	Assert(uSrc < 32);
925	uint32_t uDst = *puDst;
926	if (uDst & RT_BIT_32(uSrc))
927	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
928	else
929	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
930	}
931
932	IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u16,(uint16_t puDst, uint16_t uSrc, uint32_t pfEFlags))
933	{
934	/* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
935	logical operation (AND/OR/whatever). */
936	Assert(uSrc < 16);
937	uint16_t uDst = *puDst;
938	if (uDst & RT_BIT_32(uSrc))
939	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, X86_EFL_CF);
940	else
941	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, 0);
942	}
943
944	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
945
946	/*
947	* BTC
948	*/
949
950	IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
951	{
952	/* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
953	logical operation (AND/OR/whatever). */
954	Assert(uSrc < 64);
955	uint64_t fMask = RT_BIT_64(uSrc);
956	uint64_t uDst = *puDst;
957	if (uDst & fMask)
958	{
959	uDst &= ~fMask;
960	*puDst = uDst;
961	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
962	}
963	else
964	{
965	uDst \|= fMask;
966	*puDst = uDst;
967	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
968	}
969	}
970
971	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
972
973	IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u32,(uint32_t puDst, uint32_t uSrc, uint32_t pfEFlags))
974	{
975	/* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
976	logical operation (AND/OR/whatever). */
977	Assert(uSrc < 32);
978	uint32_t fMask = RT_BIT_32(uSrc);
979	uint32_t uDst = *puDst;
980	if (uDst & fMask)
981	{
982	uDst &= ~fMask;
983	*puDst = uDst;
984	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
985	}
986	else
987	{
988	uDst \|= fMask;
989	*puDst = uDst;
990	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
991	}
992	}
993
994
995	IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u16,(uint16_t puDst, uint16_t uSrc, uint32_t pfEFlags))
996	{
997	/* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
998	logical operation (AND/OR/whatever). */
999	Assert(uSrc < 16);
1000	uint16_t fMask = RT_BIT_32(uSrc);
1001	uint16_t uDst = *puDst;
1002	if (uDst & fMask)
1003	{
1004	uDst &= ~fMask;
1005	*puDst = uDst;
1006	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, X86_EFL_CF);
1007	}
1008	else
1009	{
1010	uDst \|= fMask;
1011	*puDst = uDst;
1012	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, 0);
1013	}
1014	}
1015
1016	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
1017
1018	/*
1019	* BTR
1020	*/
1021
1022	IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
1023	{
1024	/* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1025	logical operation (AND/OR/whatever). */
1026	Assert(uSrc < 64);
1027	uint64_t fMask = RT_BIT_64(uSrc);
1028	uint64_t uDst = *puDst;
1029	if (uDst & fMask)
1030	{
1031	uDst &= ~fMask;
1032	*puDst = uDst;
1033	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
1034	}
1035	else
1036	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
1037	}
1038
1039	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
1040
1041	IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u32,(uint32_t puDst, uint32_t uSrc, uint32_t pfEFlags))
1042	{
1043	/* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1044	logical operation (AND/OR/whatever). */
1045	Assert(uSrc < 32);
1046	uint32_t fMask = RT_BIT_32(uSrc);
1047	uint32_t uDst = *puDst;
1048	if (uDst & fMask)
1049	{
1050	uDst &= ~fMask;
1051	*puDst = uDst;
1052	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
1053	}
1054	else
1055	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
1056	}
1057
1058
1059	IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u16,(uint16_t puDst, uint16_t uSrc, uint32_t pfEFlags))
1060	{
1061	/* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1062	logical operation (AND/OR/whatever). */
1063	Assert(uSrc < 16);
1064	uint16_t fMask = RT_BIT_32(uSrc);
1065	uint16_t uDst = *puDst;
1066	if (uDst & fMask)
1067	{
1068	uDst &= ~fMask;
1069	*puDst = uDst;
1070	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, X86_EFL_CF);
1071	}
1072	else
1073	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, 0);
1074	}
1075
1076	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
1077
1078	/*
1079	* BTS
1080	*/
1081
1082	IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
1083	{
1084	/* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1085	logical operation (AND/OR/whatever). */
1086	Assert(uSrc < 64);
1087	uint64_t fMask = RT_BIT_64(uSrc);
1088	uint64_t uDst = *puDst;
1089	if (uDst & fMask)
1090	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
1091	else
1092	{
1093	uDst \|= fMask;
1094	*puDst = uDst;
1095	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
1096	}
1097	}
1098
1099	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
1100
1101	IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u32,(uint32_t puDst, uint32_t uSrc, uint32_t pfEFlags))
1102	{
1103	/* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1104	logical operation (AND/OR/whatever). */
1105	Assert(uSrc < 32);
1106	uint32_t fMask = RT_BIT_32(uSrc);
1107	uint32_t uDst = *puDst;
1108	if (uDst & fMask)
1109	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
1110	else
1111	{
1112	uDst \|= fMask;
1113	*puDst = uDst;
1114	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
1115	}
1116	}
1117
1118
1119	IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u16,(uint16_t puDst, uint16_t uSrc, uint32_t pfEFlags))
1120	{
1121	/* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1122	logical operation (AND/OR/whatever). */
1123	Assert(uSrc < 16);
1124	uint16_t fMask = RT_BIT_32(uSrc);
1125	uint32_t uDst = *puDst;
1126	if (uDst & fMask)
1127	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
1128	else
1129	{
1130	uDst \|= fMask;
1131	*puDst = uDst;
1132	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
1133	}
1134	}
1135
1136	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
1137
1138
1139	EMIT_LOCKED_BIN_OP(btc, 64)
1140	EMIT_LOCKED_BIN_OP(btr, 64)
1141	EMIT_LOCKED_BIN_OP(bts, 64)
1142	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
1143	EMIT_LOCKED_BIN_OP(btc, 32)
1144	EMIT_LOCKED_BIN_OP(btr, 32)
1145	EMIT_LOCKED_BIN_OP(bts, 32)
1146
1147	EMIT_LOCKED_BIN_OP(btc, 16)
1148	EMIT_LOCKED_BIN_OP(btr, 16)
1149	EMIT_LOCKED_BIN_OP(bts, 16)
1150	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
1151
1152
1153	/*
1154	* BSF - first (least significant) bit set
1155	*/
1156
1157	IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
1158	{
1159	/* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1160	/** @todo check what real CPUs do. */
1161	unsigned iBit = ASMBitFirstSetU64(uSrc);
1162	if (iBit)
1163	{
1164	*puDst = iBit - 1;
1165	*pfEFlags &= ~X86_EFL_ZF;
1166	}
1167	else
1168	*pfEFlags \|= X86_EFL_ZF;
1169	}
1170
1171	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
1172
1173	IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32,(uint32_t puDst, uint32_t uSrc, uint32_t pfEFlags))
1174	{
1175	/* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1176	/** @todo check what real CPUs do. */
1177	unsigned iBit = ASMBitFirstSetU32(uSrc);
1178	if (iBit)
1179	{
1180	*puDst = iBit - 1;
1181	*pfEFlags &= ~X86_EFL_ZF;
1182	}
1183	else
1184	*pfEFlags \|= X86_EFL_ZF;
1185	}
1186
1187
1188	IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16,(uint16_t puDst, uint16_t uSrc, uint32_t pfEFlags))
1189	{
1190	/* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1191	/** @todo check what real CPUs do. */
1192	unsigned iBit = ASMBitFirstSetU16(uSrc);
1193	if (iBit)
1194	{
1195	*puDst = iBit - 1;
1196	*pfEFlags &= ~X86_EFL_ZF;
1197	}
1198	else
1199	*pfEFlags \|= X86_EFL_ZF;
1200	}
1201
1202	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
1203
1204	/*
1205	* BSR - last (most significant) bit set
1206	*/
1207
1208	IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
1209	{
1210	/* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1211	/** @todo check what real CPUs do. */
1212	unsigned iBit = ASMBitLastSetU64(uSrc);
1213	if (uSrc)
1214	{
1215	*puDst = iBit - 1;
1216	*pfEFlags &= ~X86_EFL_ZF;
1217	}
1218	else
1219	*pfEFlags \|= X86_EFL_ZF;
1220	}
1221
1222	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
1223
1224	IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32,(uint32_t puDst, uint32_t uSrc, uint32_t pfEFlags))
1225	{
1226	/* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1227	/** @todo check what real CPUs do. */
1228	unsigned iBit = ASMBitLastSetU32(uSrc);
1229	if (uSrc)
1230	{
1231	*puDst = iBit - 1;
1232	*pfEFlags &= ~X86_EFL_ZF;
1233	}
1234	else
1235	*pfEFlags \|= X86_EFL_ZF;
1236	}
1237
1238
1239	IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16,(uint16_t puDst, uint16_t uSrc, uint32_t pfEFlags))
1240	{
1241	/* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1242	/** @todo check what real CPUs do. */
1243	unsigned iBit = ASMBitLastSetU16(uSrc);
1244	if (uSrc)
1245	{
1246	*puDst = iBit - 1;
1247	*pfEFlags &= ~X86_EFL_ZF;
1248	}
1249	else
1250	*pfEFlags \|= X86_EFL_ZF;
1251	}
1252
1253	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
1254
1255
1256	/*
1257	* XCHG
1258	*/
1259
1260	IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64,(uint64_t puMem, uint64_t puReg))
1261	{
1262	/* XCHG implies LOCK. */
1263	uint64_t uOldMem = *puMem;
1264	while (!ASMAtomicCmpXchgExU64(puMem, *puReg, uOldMem, &uOldMem))
1265	ASMNopPause();
1266	*puReg = uOldMem;
1267	}
1268
1269	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
1270
1271	IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u32,(uint32_t puMem, uint32_t puReg))
1272	{
1273	/* XCHG implies LOCK. */
1274	uint32_t uOldMem = *puMem;
1275	while (!ASMAtomicCmpXchgExU32(puMem, *puReg, uOldMem, &uOldMem))
1276	ASMNopPause();
1277	*puReg = uOldMem;
1278	}
1279
1280
1281	IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u16,(uint16_t puMem, uint16_t puReg))
1282	{
1283	/* XCHG implies LOCK. */
1284	uint16_t uOldMem = *puMem;
1285	while (!ASMAtomicCmpXchgExU16(puMem, *puReg, uOldMem, &uOldMem))
1286	ASMNopPause();
1287	*puReg = uOldMem;
1288	}
1289
1290
1291	IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8,(uint8_t puMem, uint8_t puReg))
1292	{
1293	/* XCHG implies LOCK. */
1294	uint8_t uOldMem = *puMem;
1295	while (!ASMAtomicCmpXchgExU8(puMem, *puReg, uOldMem, &uOldMem))
1296	ASMNopPause();
1297	*puReg = uOldMem;
1298	}
1299
1300	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
1301
1302
1303	/*
1304	* XADD and LOCK XADD.
1305	*/
1306
1307	IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u64,(uint64_t puDst, uint64_t puReg, uint32_t *pfEFlags))
1308	{
1309	uint64_t uDst = *puDst;
1310	uint64_t uResult = uDst;
1311	iemAImpl_add_u64(&uResult, *puReg, pfEFlags);
1312	*puDst = uResult;
1313	*puReg = uDst;
1314	}
1315
1316
1317	IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u64_locked,(uint64_t puDst, uint64_t puReg, uint32_t *pfEFlags))
1318	{
1319	uint64_t uOld = ASMAtomicUoReadU64(puDst);
1320	uint64_t uTmpDst;
1321	uint32_t fEflTmp;
1322	do
1323	{
1324	uTmpDst = uOld;
1325	fEflTmp = *pfEFlags;
1326	iemAImpl_add_u64(&uTmpDst, *puReg, pfEFlags);
1327	} while (!ASMAtomicCmpXchgExU64(puDst, uTmpDst, uOld, &uOld));
1328	*puReg = uOld;
1329	*pfEFlags = fEflTmp;
1330	}
1331
1332	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
1333
1334	IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u32,(uint32_t puDst, uint32_t puReg, uint32_t *pfEFlags))
1335	{
1336	uint32_t uDst = *puDst;
1337	uint32_t uResult = uDst;
1338	iemAImpl_add_u32(&uResult, *puReg, pfEFlags);
1339	*puDst = uResult;
1340	*puReg = uDst;
1341	}
1342
1343
1344	IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u32_locked,(uint32_t puDst, uint32_t puReg, uint32_t *pfEFlags))
1345	{
1346	uint32_t uOld = ASMAtomicUoReadU32(puDst);
1347	uint32_t uTmpDst;
1348	uint32_t fEflTmp;
1349	do
1350	{
1351	uTmpDst = uOld;
1352	fEflTmp = *pfEFlags;
1353	iemAImpl_add_u32(&uTmpDst, *puReg, pfEFlags);
1354	} while (!ASMAtomicCmpXchgExU32(puDst, uTmpDst, uOld, &uOld));
1355	*puReg = uOld;
1356	*pfEFlags = fEflTmp;
1357	}
1358
1359
1360	IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u16,(uint16_t puDst, uint16_t puReg, uint32_t *pfEFlags))
1361	{
1362	uint16_t uDst = *puDst;
1363	uint16_t uResult = uDst;
1364	iemAImpl_add_u16(&uResult, *puReg, pfEFlags);
1365	*puDst = uResult;
1366	*puReg = uDst;
1367	}
1368
1369
1370	IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u16_locked,(uint16_t puDst, uint16_t puReg, uint32_t *pfEFlags))
1371	{
1372	uint16_t uOld = ASMAtomicUoReadU16(puDst);
1373	uint16_t uTmpDst;
1374	uint32_t fEflTmp;
1375	do
1376	{
1377	uTmpDst = uOld;
1378	fEflTmp = *pfEFlags;
1379	iemAImpl_add_u16(&uTmpDst, *puReg, pfEFlags);
1380	} while (!ASMAtomicCmpXchgExU16(puDst, uTmpDst, uOld, &uOld));
1381	*puReg = uOld;
1382	*pfEFlags = fEflTmp;
1383	}
1384
1385
1386	IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u8,(uint8_t puDst, uint8_t puReg, uint32_t *pfEFlags))
1387	{
1388	uint8_t uDst = *puDst;
1389	uint8_t uResult = uDst;
1390	iemAImpl_add_u8(&uResult, *puReg, pfEFlags);
1391	*puDst = uResult;
1392	*puReg = uDst;
1393	}
1394
1395
1396	IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u8_locked,(uint8_t puDst, uint8_t puReg, uint32_t *pfEFlags))
1397	{
1398	uint8_t uOld = ASMAtomicUoReadU8(puDst);
1399	uint8_t uTmpDst;
1400	uint32_t fEflTmp;
1401	do
1402	{
1403	uTmpDst = uOld;
1404	fEflTmp = *pfEFlags;
1405	iemAImpl_add_u8(&uTmpDst, *puReg, pfEFlags);
1406	} while (!ASMAtomicCmpXchgExU8(puDst, uTmpDst, uOld, &uOld));
1407	*puReg = uOld;
1408	*pfEFlags = fEflTmp;
1409	}
1410
1411	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
1412
1413
1414	/*
1415	* MUL
1416	*/
1417
1418	IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u64,(uint64_t pu64RAX, uint64_t pu64RDX, uint64_t u64Factor, uint32_t *pfEFlags))
1419	{
1420	RTUINT128U Result;
1421	RTUInt128MulU64ByU64(&Result, *pu64RAX, u64Factor);
1422	*pu64RAX = Result.s.Lo;
1423	*pu64RDX = Result.s.Hi;
1424
1425	/* MUL EFLAGS according to Skylake (similar to IMUL). */
1426	*pfEFlags &= ~(X86_EFL_SF \| X86_EFL_CF \| X86_EFL_OF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_PF);
1427	if (Result.s.Lo & RT_BIT_64(63))
1428	*pfEFlags \|= X86_EFL_SF;
1429	pfEFlags \|= g_afParity[Result.s.Lo & 0xff]; / (Skylake behaviour) */
1430	if (Result.s.Hi != 0)
1431	*pfEFlags \|= X86_EFL_CF \| X86_EFL_OF;
1432	return 0;
1433	}
1434
1435	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
1436
1437	IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u32,(uint32_t pu32RAX, uint32_t pu32RDX, uint32_t u32Factor, uint32_t *pfEFlags))
1438	{
1439	RTUINT64U Result;
1440	Result.u = (uint64_t)pu32RAX u32Factor;
1441	*pu32RAX = Result.s.Lo;
1442	*pu32RDX = Result.s.Hi;
1443
1444	/* MUL EFLAGS according to Skylake (similar to IMUL). */
1445	*pfEFlags &= ~(X86_EFL_SF \| X86_EFL_CF \| X86_EFL_OF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_PF);
1446	if (Result.s.Lo & RT_BIT_32(31))
1447	*pfEFlags \|= X86_EFL_SF;
1448	pfEFlags \|= g_afParity[Result.s.Lo & 0xff]; / (Skylake behaviour) */
1449	if (Result.s.Hi != 0)
1450	*pfEFlags \|= X86_EFL_CF \| X86_EFL_OF;
1451	return 0;
1452	}
1453
1454
1455	IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u16,(uint16_t pu16RAX, uint16_t pu16RDX, uint16_t u16Factor, uint32_t *pfEFlags))
1456	{
1457	RTUINT32U Result;
1458	Result.u = (uint32_t)pu16RAX u16Factor;
1459	*pu16RAX = Result.s.Lo;
1460	*pu16RDX = Result.s.Hi;
1461
1462	/* MUL EFLAGS according to Skylake (similar to IMUL). */
1463	*pfEFlags &= ~(X86_EFL_SF \| X86_EFL_CF \| X86_EFL_OF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_PF);
1464	if (Result.s.Lo & RT_BIT_32(15))
1465	*pfEFlags \|= X86_EFL_SF;
1466	pfEFlags \|= g_afParity[Result.s.Lo & 0xff]; / (Skylake behaviour) */
1467	if (Result.s.Hi != 0)
1468	*pfEFlags \|= X86_EFL_CF \| X86_EFL_OF;
1469	return 0;
1470	}
1471
1472	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
1473
1474
1475	/*
1476	* IMUL
1477	*/
1478
1479	IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u64,(uint64_t pu64RAX, uint64_t pu64RDX, uint64_t u64Factor, uint32_t *pfEFlags))
1480	{
1481	RTUINT128U Result;
1482	*pfEFlags &= ~( X86_EFL_SF \| X86_EFL_CF \| X86_EFL_OF
1483	/* Skylake always clears: */ \| X86_EFL_AF \| X86_EFL_ZF
1484	/* Skylake may set: */ \| X86_EFL_PF);
1485
1486	if ((int64_t)*pu64RAX >= 0)
1487	{
1488	if ((int64_t)u64Factor >= 0)
1489	{
1490	RTUInt128MulU64ByU64(&Result, *pu64RAX, u64Factor);
1491	if (Result.s.Hi != 0 \|\| Result.s.Lo >= UINT64_C(0x8000000000000000))
1492	*pfEFlags \|= X86_EFL_CF \| X86_EFL_OF;
1493	}
1494	else
1495	{
1496	RTUInt128MulU64ByU64(&Result, *pu64RAX, UINT64_C(0) - u64Factor);
1497	if (Result.s.Hi != 0 \|\| Result.s.Lo > UINT64_C(0x8000000000000000))
1498	*pfEFlags \|= X86_EFL_CF \| X86_EFL_OF;
1499	RTUInt128AssignNeg(&Result);
1500	}
1501	}
1502	else
1503	{
1504	if ((int64_t)u64Factor >= 0)
1505	{
1506	RTUInt128MulU64ByU64(&Result, UINT64_C(0) - *pu64RAX, u64Factor);
1507	if (Result.s.Hi != 0 \|\| Result.s.Lo > UINT64_C(0x8000000000000000))
1508	*pfEFlags \|= X86_EFL_CF \| X86_EFL_OF;
1509	RTUInt128AssignNeg(&Result);
1510	}
1511	else
1512	{
1513	RTUInt128MulU64ByU64(&Result, UINT64_C(0) - *pu64RAX, UINT64_C(0) - u64Factor);
1514	if (Result.s.Hi != 0 \|\| Result.s.Lo >= UINT64_C(0x8000000000000000))
1515	*pfEFlags \|= X86_EFL_CF \| X86_EFL_OF;
1516	}
1517	}
1518	*pu64RAX = Result.s.Lo;
1519	if (Result.s.Lo & RT_BIT_64(63))
1520	*pfEFlags \|= X86_EFL_SF;
1521	pfEFlags \|= g_afParity[Result.s.Lo & 0xff]; / (Skylake behaviour) */
1522	*pu64RDX = Result.s.Hi;
1523
1524	return 0;
1525	}
1526
1527
1528	IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
1529	{
1530	/** @todo Testcase: IMUL 2 and 3 operands. */
1531	uint64_t u64Ign;
1532	iemAImpl_imul_u64(puDst, &u64Ign, uSrc, pfEFlags);
1533	}
1534
1535	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
1536
1537	IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u32,(uint32_t pu32RAX, uint32_t pu32RDX, uint32_t u32Factor, uint32_t *pfEFlags))
1538	{
1539	RTUINT64U Result;
1540	*pfEFlags &= ~( X86_EFL_SF \| X86_EFL_CF \| X86_EFL_OF
1541	/* Skylake always clears: */ \| X86_EFL_AF \| X86_EFL_ZF
1542	/* Skylake may set: */ \| X86_EFL_PF);
1543
1544	if ((int32_t)*pu32RAX >= 0)
1545	{
1546	if ((int32_t)u32Factor >= 0)
1547	{
1548	Result.u = (uint64_t)pu32RAX u32Factor;
1549	if (Result.s.Hi != 0 \|\| Result.s.Lo >= RT_BIT_32(31))
1550	*pfEFlags \|= X86_EFL_CF \| X86_EFL_OF;
1551	}
1552	else
1553	{
1554	Result.u = (uint64_t)pu32RAX (UINT32_C(0) - u32Factor);
1555	if (Result.s.Hi != 0 \|\| Result.s.Lo > RT_BIT_32(31))
1556	*pfEFlags \|= X86_EFL_CF \| X86_EFL_OF;
1557	Result.u = UINT64_C(0) - Result.u;
1558	}
1559	}
1560	else
1561	{
1562	if ((int32_t)u32Factor >= 0)
1563	{
1564	Result.u = (uint64_t)(UINT32_C(0) - pu32RAX) u32Factor;
1565	if (Result.s.Hi != 0 \|\| Result.s.Lo > RT_BIT_32(31))
1566	*pfEFlags \|= X86_EFL_CF \| X86_EFL_OF;
1567	Result.u = UINT64_C(0) - Result.u;
1568	}
1569	else
1570	{
1571	Result.u = (uint64_t)(UINT32_C(0) - pu32RAX) (UINT32_C(0) - u32Factor);
1572	if (Result.s.Hi != 0 \|\| Result.s.Lo >= RT_BIT_32(31))
1573	*pfEFlags \|= X86_EFL_CF \| X86_EFL_OF;
1574	}
1575	}
1576	*pu32RAX = Result.s.Lo;
1577	if (Result.s.Lo & RT_BIT_32(31))
1578	*pfEFlags \|= X86_EFL_SF;
1579	pfEFlags \|= g_afParity[Result.s.Lo & 0xff]; / (Skylake behaviour) */
1580	*pu32RDX = Result.s.Hi;
1581
1582	return 0;
1583	}
1584
1585
1586	IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u32,(uint32_t puDst, uint32_t uSrc, uint32_t pfEFlags))
1587	{
1588	/** @todo Testcase: IMUL 2 and 3 operands. */
1589	uint32_t u32Ign;
1590	iemAImpl_imul_u32(puDst, &u32Ign, uSrc, pfEFlags);
1591	}
1592
1593
1594	IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u16,(uint16_t pu16RAX, uint16_t pu16RDX, uint16_t u16Factor, uint32_t *pfEFlags))
1595	{
1596	RTUINT32U Result;
1597	*pfEFlags &= ~( X86_EFL_SF \| X86_EFL_CF \| X86_EFL_OF
1598	/* Skylake always clears: */ \| X86_EFL_AF \| X86_EFL_ZF
1599	/* Skylake may set: */ \| X86_EFL_PF);
1600
1601	if ((int16_t)*pu16RAX >= 0)
1602	{
1603	if ((int16_t)u16Factor >= 0)
1604	{
1605	Result.u = (uint32_t)pu16RAX u16Factor;
1606	if (Result.s.Hi != 0 \|\| Result.s.Lo >= RT_BIT_32(15))
1607	*pfEFlags \|= X86_EFL_CF \| X86_EFL_OF;
1608	}
1609	else
1610	{
1611	Result.u = (uint32_t)pu16RAX (UINT16_C(0) - u16Factor);
1612	if (Result.s.Hi != 0 \|\| Result.s.Lo > RT_BIT_32(15))
1613	*pfEFlags \|= X86_EFL_CF \| X86_EFL_OF;
1614	Result.u = UINT32_C(0) - Result.u;
1615	}
1616	}
1617	else
1618	{
1619	if ((int16_t)u16Factor >= 0)
1620	{
1621	Result.u = (uint32_t)(UINT16_C(0) - pu16RAX) u16Factor;
1622	if (Result.s.Hi != 0 \|\| Result.s.Lo > RT_BIT_32(15))
1623	*pfEFlags \|= X86_EFL_CF \| X86_EFL_OF;
1624	Result.u = UINT32_C(0) - Result.u;
1625	}
1626	else
1627	{
1628	Result.u = (uint32_t)(UINT16_C(0) - pu16RAX) (UINT16_C(0) - u16Factor);
1629	if (Result.s.Hi != 0 \|\| Result.s.Lo >= RT_BIT_32(15))
1630	*pfEFlags \|= X86_EFL_CF \| X86_EFL_OF;
1631	}
1632	}
1633	*pu16RAX = Result.s.Lo;
1634	if (Result.s.Lo & RT_BIT_32(15))
1635	*pfEFlags \|= X86_EFL_SF;
1636	pfEFlags \|= g_afParity[Result.s.Lo & 0xff]; / (Skylake behaviour) */
1637	*pu16RDX = Result.s.Hi;
1638
1639	return 0;
1640	}
1641
1642
1643	IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u16,(uint16_t puDst, uint16_t uSrc, uint32_t pfEFlags))
1644	{
1645	/** @todo Testcase: IMUL 2 and 3 operands. */
1646	uint16_t u16Ign;
1647	iemAImpl_imul_u16(puDst, &u16Ign, uSrc, pfEFlags);
1648	}
1649
1650	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
1651
1652
1653	/*
1654	* DIV
1655	*/
1656
1657	IEM_DECL_IMPL_DEF(int, iemAImpl_div_u64,(uint64_t pu64RAX, uint64_t pu64RDX, uint64_t u64Divisor, uint32_t *pfEFlags))
1658	{
1659	/* Note! Skylake leaves all flags alone. */
1660	RT_NOREF_PV(pfEFlags);
1661
1662	if ( u64Divisor != 0
1663	&& *pu64RDX < u64Divisor)
1664	{
1665	RTUINT128U Dividend;
1666	Dividend.s.Lo = *pu64RAX;
1667	Dividend.s.Hi = *pu64RDX;
1668
1669	RTUINT128U Divisor;
1670	Divisor.s.Lo = u64Divisor;
1671	Divisor.s.Hi = 0;
1672
1673	RTUINT128U Remainder;
1674	RTUINT128U Quotient;
1675	# ifdef __GNUC__ /* GCC maybe really annoying in function. */
1676	Quotient.s.Lo = 0;
1677	Quotient.s.Hi = 0;
1678	# endif
1679	RTUInt128DivRem(&Quotient, &Remainder, &Dividend, &Divisor);
1680	Assert(Quotient.s.Hi == 0);
1681	Assert(Remainder.s.Hi == 0);
1682
1683	*pu64RAX = Quotient.s.Lo;
1684	*pu64RDX = Remainder.s.Lo;
1685	/** @todo research the undefined DIV flags. */
1686	return 0;
1687
1688	}
1689	/* #DE */
1690	return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1691	}
1692
1693	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
1694
1695	IEM_DECL_IMPL_DEF(int, iemAImpl_div_u32,(uint32_t pu32RAX, uint32_t pu32RDX, uint32_t u32Divisor, uint32_t *pfEFlags))
1696	{
1697	/* Note! Skylake leaves all flags alone. */
1698	RT_NOREF_PV(pfEFlags);
1699
1700	if ( u32Divisor != 0
1701	&& *pu32RDX < u32Divisor)
1702	{
1703	RTUINT64U Dividend;
1704	Dividend.s.Lo = *pu32RAX;
1705	Dividend.s.Hi = *pu32RDX;
1706
1707	RTUINT64U Remainder;
1708	RTUINT64U Quotient;
1709	Quotient.u = Dividend.u / u32Divisor;
1710	Remainder.u = Dividend.u % u32Divisor;
1711
1712	*pu32RAX = Quotient.s.Lo;
1713	*pu32RDX = Remainder.s.Lo;
1714	/** @todo research the undefined DIV flags. */
1715	return 0;
1716
1717	}
1718	/* #DE */
1719	return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1720	}
1721
1722
1723	IEM_DECL_IMPL_DEF(int, iemAImpl_div_u16,(uint16_t pu16RAX, uint16_t pu16RDX, uint16_t u16Divisor, uint32_t *pfEFlags))
1724	{
1725	/* Note! Skylake leaves all flags alone. */
1726	RT_NOREF_PV(pfEFlags);
1727
1728	if ( u16Divisor != 0
1729	&& *pu16RDX < u16Divisor)
1730	{
1731	RTUINT32U Dividend;
1732	Dividend.s.Lo = *pu16RAX;
1733	Dividend.s.Hi = *pu16RDX;
1734
1735	RTUINT32U Remainder;
1736	RTUINT32U Quotient;
1737	Quotient.u = Dividend.u / u16Divisor;
1738	Remainder.u = Dividend.u % u16Divisor;
1739
1740	*pu16RAX = Quotient.s.Lo;
1741	*pu16RDX = Remainder.s.Lo;
1742	/** @todo research the undefined DIV flags. */
1743	return 0;
1744
1745	}
1746	/* #DE */
1747	return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1748	}
1749
1750	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
1751
1752
1753	/*
1754	* IDIV
1755	*/
1756
1757	IEM_DECL_IMPL_DEF(int, iemAImpl_idiv_u64,(uint64_t pu64RAX, uint64_t pu64RDX, uint64_t u64Divisor, uint32_t *pfEFlags))
1758	{
1759	/* Note! Skylake leaves all flags alone. */
1760	RT_NOREF_PV(pfEFlags);
1761
1762	/** @todo overflow checks */
1763	if (u64Divisor != 0)
1764	{
1765	/*
1766	* Convert to unsigned division.
1767	*/
1768	RTUINT128U Dividend;
1769	Dividend.s.Lo = *pu64RAX;
1770	Dividend.s.Hi = *pu64RDX;
1771	if ((int64_t)*pu64RDX < 0)
1772	RTUInt128AssignNeg(&Dividend);
1773
1774	RTUINT128U Divisor;
1775	Divisor.s.Hi = 0;
1776	if ((int64_t)u64Divisor >= 0)
1777	Divisor.s.Lo = u64Divisor;
1778	else
1779	Divisor.s.Lo = UINT64_C(0) - u64Divisor;
1780
1781	RTUINT128U Remainder;
1782	RTUINT128U Quotient;
1783	# ifdef __GNUC__ /* GCC maybe really annoying. */
1784	Quotient.s.Lo = 0;
1785	Quotient.s.Hi = 0;
1786	# endif
1787	RTUInt128DivRem(&Quotient, &Remainder, &Dividend, &Divisor);
1788
1789	/*
1790	* Setup the result, checking for overflows.
1791	*/
1792	if ((int64_t)u64Divisor >= 0)
1793	{
1794	if ((int64_t)*pu64RDX >= 0)
1795	{
1796	/* Positive divisor, positive dividend => result positive. */
1797	if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint64_t)INT64_MAX)
1798	{
1799	*pu64RAX = Quotient.s.Lo;
1800	*pu64RDX = Remainder.s.Lo;
1801	return 0;
1802	}
1803	}
1804	else
1805	{
1806	/* Positive divisor, positive dividend => result negative. */
1807	if (Quotient.s.Hi == 0 && Quotient.s.Lo <= UINT64_C(0x8000000000000000))
1808	{
1809	*pu64RAX = UINT64_C(0) - Quotient.s.Lo;
1810	*pu64RDX = UINT64_C(0) - Remainder.s.Lo;
1811	return 0;
1812	}
1813	}
1814	}
1815	else
1816	{
1817	if ((int64_t)*pu64RDX >= 0)
1818	{
1819	/* Negative divisor, positive dividend => negative quotient, positive remainder. */
1820	if (Quotient.s.Hi == 0 && Quotient.s.Lo <= UINT64_C(0x8000000000000000))
1821	{
1822	*pu64RAX = UINT64_C(0) - Quotient.s.Lo;
1823	*pu64RDX = Remainder.s.Lo;
1824	return 0;
1825	}
1826	}
1827	else
1828	{
1829	/* Negative divisor, negative dividend => positive quotient, negative remainder. */
1830	if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint64_t)INT64_MAX)
1831	{
1832	*pu64RAX = Quotient.s.Lo;
1833	*pu64RDX = UINT64_C(0) - Remainder.s.Lo;
1834	return 0;
1835	}
1836	}
1837	}
1838	}
1839	/* #DE */
1840	return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1841	}
1842
1843	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
1844
1845	IEM_DECL_IMPL_DEF(int, iemAImpl_idiv_u32,(uint32_t pu32RAX, uint32_t pu32RDX, uint32_t u32Divisor, uint32_t *pfEFlags))
1846	{
1847	/* Note! Skylake leaves all flags alone. */
1848	RT_NOREF_PV(pfEFlags);
1849
1850	/** @todo overflow checks */
1851	if (u32Divisor != 0)
1852	{
1853	/*
1854	* Convert to unsigned division.
1855	*/
1856	RTUINT64U Dividend;
1857	Dividend.s.Lo = *pu32RAX;
1858	Dividend.s.Hi = *pu32RDX;
1859	if ((int32_t)*pu32RDX < 0)
1860	Dividend.u = UINT64_C(0) - Dividend.u;
1861
1862	uint32_t u32DivisorPositive;
1863	if ((int32_t)u32Divisor >= 0)
1864	u32DivisorPositive = u32Divisor;
1865	else
1866	u32DivisorPositive = UINT32_C(0) - u32Divisor;
1867
1868	RTUINT64U Remainder;
1869	RTUINT64U Quotient;
1870	Quotient.u = Dividend.u / u32DivisorPositive;
1871	Remainder.u = Dividend.u % u32DivisorPositive;
1872
1873	/*
1874	* Setup the result, checking for overflows.
1875	*/
1876	if ((int32_t)u32Divisor >= 0)
1877	{
1878	if ((int32_t)*pu32RDX >= 0)
1879	{
1880	/* Positive divisor, positive dividend => result positive. */
1881	if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint32_t)INT32_MAX)
1882	{
1883	*pu32RAX = Quotient.s.Lo;
1884	*pu32RDX = Remainder.s.Lo;
1885	return 0;
1886	}
1887	}
1888	else
1889	{
1890	/* Positive divisor, positive dividend => result negative. */
1891	if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_32(31))
1892	{
1893	*pu32RAX = UINT32_C(0) - Quotient.s.Lo;
1894	*pu32RDX = UINT32_C(0) - Remainder.s.Lo;
1895	return 0;
1896	}
1897	}
1898	}
1899	else
1900	{
1901	if ((int32_t)*pu32RDX >= 0)
1902	{
1903	/* Negative divisor, positive dividend => negative quotient, positive remainder. */
1904	if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_32(31))
1905	{
1906	*pu32RAX = UINT32_C(0) - Quotient.s.Lo;
1907	*pu32RDX = Remainder.s.Lo;
1908	return 0;
1909	}
1910	}
1911	else
1912	{
1913	/* Negative divisor, negative dividend => positive quotient, negative remainder. */
1914	if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint32_t)INT32_MAX)
1915	{
1916	*pu32RAX = Quotient.s.Lo;
1917	*pu32RDX = UINT32_C(0) - Remainder.s.Lo;
1918	return 0;
1919	}
1920	}
1921	}
1922	}
1923	/* #DE */
1924	return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1925	}
1926
1927
1928	IEM_DECL_IMPL_DEF(int, iemAImpl_idiv_u16,(uint16_t pu16RAX, uint16_t pu16RDX, uint16_t u16Divisor, uint32_t *pfEFlags))
1929	{
1930	/* Note! Skylake leaves all flags alone. */
1931	RT_NOREF_PV(pfEFlags);
1932
1933	if (u16Divisor != 0)
1934	{
1935	/*
1936	* Convert to unsigned division.
1937	*/
1938	RTUINT32U Dividend;
1939	Dividend.s.Lo = *pu16RAX;
1940	Dividend.s.Hi = *pu16RDX;
1941	if ((int16_t)*pu16RDX < 0)
1942	Dividend.u = UINT32_C(0) - Dividend.u;
1943
1944	uint16_t u16DivisorPositive;
1945	if ((int16_t)u16Divisor >= 0)
1946	u16DivisorPositive = u16Divisor;
1947	else
1948	u16DivisorPositive = UINT16_C(0) - u16Divisor;
1949
1950	RTUINT32U Remainder;
1951	RTUINT32U Quotient;
1952	Quotient.u = Dividend.u / u16DivisorPositive;
1953	Remainder.u = Dividend.u % u16DivisorPositive;
1954
1955	/*
1956	* Setup the result, checking for overflows.
1957	*/
1958	if ((int16_t)u16Divisor >= 0)
1959	{
1960	if ((int16_t)*pu16RDX >= 0)
1961	{
1962	/* Positive divisor, positive dividend => result positive. */
1963	if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint16_t)INT16_MAX)
1964	{
1965	*pu16RAX = Quotient.s.Lo;
1966	*pu16RDX = Remainder.s.Lo;
1967	return 0;
1968	}
1969	}
1970	else
1971	{
1972	/* Positive divisor, positive dividend => result negative. */
1973	if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_32(15))
1974	{
1975	*pu16RAX = UINT16_C(0) - Quotient.s.Lo;
1976	*pu16RDX = UINT16_C(0) - Remainder.s.Lo;
1977	return 0;
1978	}
1979	}
1980	}
1981	else
1982	{
1983	if ((int16_t)*pu16RDX >= 0)
1984	{
1985	/* Negative divisor, positive dividend => negative quotient, positive remainder. */
1986	if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_32(15))
1987	{
1988	*pu16RAX = UINT16_C(0) - Quotient.s.Lo;
1989	*pu16RDX = Remainder.s.Lo;
1990	return 0;
1991	}
1992	}
1993	else
1994	{
1995	/* Negative divisor, negative dividend => positive quotient, negative remainder. */
1996	if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint16_t)INT16_MAX)
1997	{
1998	*pu16RAX = Quotient.s.Lo;
1999	*pu16RDX = UINT16_C(0) - Remainder.s.Lo;
2000	return 0;
2001	}
2002	}
2003	}
2004	}
2005	/* #DE */
2006	return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
2007	}
2008
2009	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
2010
2011
2012	/*********************************************************************************************************************************
2013	* Unary operations. *
2014	*********************************************************************************************************************************/
2015
2016	/**
2017	* Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an INC or DEC instruction.
2018	*
2019	* CF is NOT modified for hysterical raisins (allegedly for carrying and
2020	* borrowing in arithmetic loops on intel 8008).
2021	*
2022	* @returns Status bits.
2023	* @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2024	* @param a_uResult Unsigned result value.
2025	* @param a_uDst The original destination value (for AF calc).
2026	* @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2027	* @param a_OfMethod 0 for INC-style, 1 for DEC-style.
2028	*/
2029	#define IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth, a_OfMethod) \
2030	do { \
2031	uint32_t fEflTmp = *(a_pfEFlags); \
2032	fEflTmp &= ~X86_EFL_STATUS_BITS & ~X86_EFL_CF; \
2033	fEflTmp \|= g_afParity[(a_uResult) & 0xff]; \
2034	fEflTmp \|= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
2035	fEflTmp \|= X86_EFL_CALC_ZF(a_uResult); \
2036	fEflTmp \|= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
2037	fEflTmp \|= X86_EFL_GET_OF_ ## a_cBitsWidth(a_OfMethod == 0 ? (((a_uDst) ^ RT_BIT_64(63)) & (a_uResult)) \
2038	: ((a_uDst) & ((a_uResult) ^ RT_BIT_64(63))) ); \
2039	*(a_pfEFlags) = fEflTmp; \
2040	} while (0)
2041
2042	/*
2043	* INC
2044	*/
2045
2046	IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u64,(uint64_t puDst, uint32_t pfEFlags))
2047	{
2048	uint64_t uDst = *puDst;
2049	uint64_t uResult = uDst + 1;
2050	*puDst = uResult;
2051	IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 0 /INC/);
2052	}
2053
2054	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
2055
2056	IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u32,(uint32_t puDst, uint32_t pfEFlags))
2057	{
2058	uint32_t uDst = *puDst;
2059	uint32_t uResult = uDst + 1;
2060	*puDst = uResult;
2061	IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 0 /INC/);
2062	}
2063
2064
2065	IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u16,(uint16_t puDst, uint32_t pfEFlags))
2066	{
2067	uint16_t uDst = *puDst;
2068	uint16_t uResult = uDst + 1;
2069	*puDst = uResult;
2070	IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 0 /INC/);
2071	}
2072
2073	IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u8,(uint8_t puDst, uint32_t pfEFlags))
2074	{
2075	uint8_t uDst = *puDst;
2076	uint8_t uResult = uDst + 1;
2077	*puDst = uResult;
2078	IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 0 /INC/);
2079	}
2080
2081	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
2082
2083
2084	/*
2085	* DEC
2086	*/
2087
2088	IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u64,(uint64_t puDst, uint32_t pfEFlags))
2089	{
2090	uint64_t uDst = *puDst;
2091	uint64_t uResult = uDst - 1;
2092	*puDst = uResult;
2093	IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 1 /INC/);
2094	}
2095
2096	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
2097
2098	IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u32,(uint32_t puDst, uint32_t pfEFlags))
2099	{
2100	uint32_t uDst = *puDst;
2101	uint32_t uResult = uDst - 1;
2102	*puDst = uResult;
2103	IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 1 /INC/);
2104	}
2105
2106
2107	IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u16,(uint16_t puDst, uint32_t pfEFlags))
2108	{
2109	uint16_t uDst = *puDst;
2110	uint16_t uResult = uDst - 1;
2111	*puDst = uResult;
2112	IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 1 /INC/);
2113	}
2114
2115
2116	IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u8,(uint8_t puDst, uint32_t pfEFlags))
2117	{
2118	uint8_t uDst = *puDst;
2119	uint8_t uResult = uDst - 1;
2120	*puDst = uResult;
2121	IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 1 /INC/);
2122	}
2123
2124	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
2125
2126
2127	/*
2128	* NOT
2129	*/
2130
2131	IEM_DECL_IMPL_DEF(void, iemAImpl_not_u64,(uint64_t puDst, uint32_t pfEFlags))
2132	{
2133	uint64_t uDst = *puDst;
2134	uint64_t uResult = ~uDst;
2135	*puDst = uResult;
2136	/* EFLAGS are not modified. */
2137	RT_NOREF_PV(pfEFlags);
2138	}
2139
2140	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
2141
2142	IEM_DECL_IMPL_DEF(void, iemAImpl_not_u32,(uint32_t puDst, uint32_t pfEFlags))
2143	{
2144	uint32_t uDst = *puDst;
2145	uint32_t uResult = ~uDst;
2146	*puDst = uResult;
2147	/* EFLAGS are not modified. */
2148	RT_NOREF_PV(pfEFlags);
2149	}
2150
2151	IEM_DECL_IMPL_DEF(void, iemAImpl_not_u16,(uint16_t puDst, uint32_t pfEFlags))
2152	{
2153	uint16_t uDst = *puDst;
2154	uint16_t uResult = ~uDst;
2155	*puDst = uResult;
2156	/* EFLAGS are not modified. */
2157	RT_NOREF_PV(pfEFlags);
2158	}
2159
2160	IEM_DECL_IMPL_DEF(void, iemAImpl_not_u8,(uint8_t puDst, uint32_t pfEFlags))
2161	{
2162	uint8_t uDst = *puDst;
2163	uint8_t uResult = ~uDst;
2164	*puDst = uResult;
2165	/* EFLAGS are not modified. */
2166	RT_NOREF_PV(pfEFlags);
2167	}
2168
2169	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
2170
2171
2172	/*
2173	* NEG
2174	*/
2175
2176	/**
2177	* Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an NEG instruction.
2178	*
2179	* @returns Status bits.
2180	* @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2181	* @param a_uResult Unsigned result value.
2182	* @param a_uDst The original destination value (for AF calc).
2183	* @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2184	*/
2185	#define IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth) \
2186	do { \
2187	uint32_t fEflTmp = *(a_pfEFlags); \
2188	fEflTmp &= ~X86_EFL_STATUS_BITS & ~X86_EFL_CF; \
2189	fEflTmp \|= ((a_uDst) != 0) << X86_EFL_CF_BIT; \
2190	fEflTmp \|= g_afParity[(a_uResult) & 0xff]; \
2191	fEflTmp \|= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
2192	fEflTmp \|= X86_EFL_CALC_ZF(a_uResult); \
2193	fEflTmp \|= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
2194	fEflTmp \|= X86_EFL_GET_OF_ ## a_cBitsWidth((a_uDst) & (a_uResult)); \
2195	*(a_pfEFlags) = fEflTmp; \
2196	} while (0)
2197
2198	IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u64,(uint64_t puDst, uint32_t pfEFlags))
2199	{
2200	uint64_t uDst = *puDst;
2201	uint64_t uResult = (uint64_t)0 - uDst;
2202	*puDst = uResult;
2203	IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 64);
2204	}
2205
2206	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
2207
2208	IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u32,(uint32_t puDst, uint32_t pfEFlags))
2209	{
2210	uint32_t uDst = *puDst;
2211	uint32_t uResult = (uint32_t)0 - uDst;
2212	*puDst = uResult;
2213	IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 32);
2214	}
2215
2216
2217	IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u16,(uint16_t puDst, uint32_t pfEFlags))
2218	{
2219	uint16_t uDst = *puDst;
2220	uint16_t uResult = (uint16_t)0 - uDst;
2221	*puDst = uResult;
2222	IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 16);
2223	}
2224
2225
2226	IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u8,(uint8_t puDst, uint32_t pfEFlags))
2227	{
2228	uint8_t uDst = *puDst;
2229	uint8_t uResult = (uint8_t)0 - uDst;
2230	*puDst = uResult;
2231	IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 8);
2232	}
2233
2234	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
2235
2236	/*
2237	* Locked variants.
2238	*/
2239
2240	/** Emit a function for doing a locked unary operand operation. */
2241	# define EMIT_LOCKED_UNARY_OP(a_Mnemonic, a_cBitsWidth) \
2242	IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
2243	uint32_t *pfEFlags)) \
2244	{ \
2245	uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
2246	uint ## a_cBitsWidth ## _t uTmp; \
2247	uint32_t fEflTmp; \
2248	do \
2249	{ \
2250	uTmp = uOld; \
2251	fEflTmp = *pfEFlags; \
2252	iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, &fEflTmp); \
2253	} while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
2254	*pfEFlags = fEflTmp; \
2255	}
2256
2257	EMIT_LOCKED_UNARY_OP(inc, 64);
2258	EMIT_LOCKED_UNARY_OP(dec, 64);
2259	EMIT_LOCKED_UNARY_OP(not, 64);
2260	EMIT_LOCKED_UNARY_OP(neg, 64);
2261	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
2262	EMIT_LOCKED_UNARY_OP(inc, 32);
2263	EMIT_LOCKED_UNARY_OP(dec, 32);
2264	EMIT_LOCKED_UNARY_OP(not, 32);
2265	EMIT_LOCKED_UNARY_OP(neg, 32);
2266
2267	EMIT_LOCKED_UNARY_OP(inc, 16);
2268	EMIT_LOCKED_UNARY_OP(dec, 16);
2269	EMIT_LOCKED_UNARY_OP(not, 16);
2270	EMIT_LOCKED_UNARY_OP(neg, 16);
2271
2272	EMIT_LOCKED_UNARY_OP(inc, 8);
2273	EMIT_LOCKED_UNARY_OP(dec, 8);
2274	EMIT_LOCKED_UNARY_OP(not, 8);
2275	EMIT_LOCKED_UNARY_OP(neg, 8);
2276	# endif
2277
2278
2279	/*********************************************************************************************************************************
2280	* Shifting and Rotating *
2281	*********************************************************************************************************************************/
2282
2283	/*
2284	* ROL
2285	*/
2286
2287	/**
2288	* Updates the status bits (OF and CF) for an ROL instruction.
2289	*
2290	* @returns Status bits.
2291	* @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2292	* @param a_uResult Unsigned result value.
2293	* @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2294	*/
2295	#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(a_pfEFlags, a_uResult, a_cBitsWidth) do { \
2296	/* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2297	it the same way as for 1 bit shifts. */ \
2298	AssertCompile(X86_EFL_CF_BIT == 0); \
2299	uint32_t fEflTmp = *(a_pfEFlags); \
2300	fEflTmp &= ~(X86_EFL_CF \| X86_EFL_OF); \
2301	uint32_t const fCarry = ((a_uResult) & X86_EFL_CF); \
2302	fEflTmp \|= fCarry; \
2303	fEflTmp \|= (((a_uResult) >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2304	*(a_pfEFlags) = fEflTmp; \
2305	} while (0)
2306
2307	IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u64,(uint64_t puDst, uint8_t cShift, uint32_t pfEFlags))
2308	{
2309	cShift &= 63;
2310	if (cShift)
2311	{
2312	uint64_t uResult = ASMRotateLeftU64(*puDst, cShift);
2313	*puDst = uResult;
2314	IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(pfEFlags, uResult, 64);
2315	}
2316	}
2317
2318	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
2319
2320	IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u32,(uint32_t puDst, uint8_t cShift, uint32_t pfEFlags))
2321	{
2322	cShift &= 31;
2323	if (cShift)
2324	{
2325	uint32_t uResult = ASMRotateLeftU32(*puDst, cShift);
2326	*puDst = uResult;
2327	IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(pfEFlags, uResult, 32);
2328	}
2329	}
2330
2331
2332	IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u16,(uint16_t puDst, uint8_t cShift, uint32_t pfEFlags))
2333	{
2334	cShift &= 15;
2335	if (cShift)
2336	{
2337	uint16_t uDst = *puDst;
2338	uint16_t uResult = (uDst << cShift) \| (uDst >> (16 - cShift));
2339	*puDst = uResult;
2340	IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(pfEFlags, uResult, 16);
2341	}
2342	}
2343
2344
2345	IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u8,(uint8_t puDst, uint8_t cShift, uint32_t pfEFlags))
2346	{
2347	cShift &= 7;
2348	if (cShift)
2349	{
2350	uint8_t uDst = *puDst;
2351	uint8_t uResult = (uDst << cShift) \| (uDst >> (8 - cShift));
2352	*puDst = uResult;
2353	IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(pfEFlags, uResult, 8);
2354	}
2355	}
2356
2357	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
2358
2359
2360	/*
2361	* ROR
2362	*/
2363
2364	/**
2365	* Updates the status bits (OF and CF) for an ROL instruction.
2366	*
2367	* @returns Status bits.
2368	* @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2369	* @param a_uResult Unsigned result value.
2370	* @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2371	*/
2372	#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(a_pfEFlags, a_uResult, a_cBitsWidth) do { \
2373	/* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2374	it the same way as for 1 bit shifts. */ \
2375	AssertCompile(X86_EFL_CF_BIT == 0); \
2376	uint32_t fEflTmp = *(a_pfEFlags); \
2377	fEflTmp &= ~(X86_EFL_CF \| X86_EFL_OF); \
2378	uint32_t const fCarry = ((a_uResult) >> ((a_cBitsWidth) - 1)) & X86_EFL_CF; \
2379	fEflTmp \|= fCarry; \
2380	fEflTmp \|= (((a_uResult) >> ((a_cBitsWidth) - 2)) ^ fCarry) << X86_EFL_OF_BIT; \
2381	*(a_pfEFlags) = fEflTmp; \
2382	} while (0)
2383
2384	IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u64,(uint64_t puDst, uint8_t cShift, uint32_t pfEFlags))
2385	{
2386	cShift &= 63;
2387	if (cShift)
2388	{
2389	uint64_t const uResult = ASMRotateRightU64(*puDst, cShift);
2390	*puDst = uResult;
2391	IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(pfEFlags, uResult, 64);
2392	}
2393	}
2394
2395	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
2396
2397	IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u32,(uint32_t puDst, uint8_t cShift, uint32_t pfEFlags))
2398	{
2399	cShift &= 31;
2400	if (cShift)
2401	{
2402	uint64_t const uResult = ASMRotateRightU32(*puDst, cShift);
2403	*puDst = uResult;
2404	IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(pfEFlags, uResult, 32);
2405	}
2406	}
2407
2408
2409	IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u16,(uint16_t puDst, uint8_t cShift, uint32_t pfEFlags))
2410	{
2411	cShift &= 15;
2412	if (cShift)
2413	{
2414	uint16_t uDst = *puDst;
2415	uint16_t uResult;
2416	uResult = uDst >> cShift;
2417	uResult \|= uDst << (16 - cShift);
2418	*puDst = uResult;
2419	IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(pfEFlags, uResult, 16);
2420	}
2421	}
2422
2423
2424	IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u8,(uint8_t puDst, uint8_t cShift, uint32_t pfEFlags))
2425	{
2426	cShift &= 7;
2427	if (cShift)
2428	{
2429	uint8_t uDst = *puDst;
2430	uint8_t uResult;
2431	uResult = uDst >> cShift;
2432	uResult \|= uDst << (8 - cShift);
2433	*puDst = uResult;
2434	IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(pfEFlags, uResult, 8);
2435	}
2436	}
2437
2438	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
2439
2440
2441	/*
2442	* RCL
2443	*/
2444	#define EMIT_RCL(a_cBitsWidth) \
2445	IEM_DECL_IMPL_DEF(void, iemAImpl_rcl_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t puDst, uint8_t cShift, uint32_t pfEFlags)) \
2446	{ \
2447	cShift &= a_cBitsWidth - 1; \
2448	if (cShift) \
2449	{ \
2450	uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2451	uint ## a_cBitsWidth ## _t uResult = uDst << cShift; \
2452	if (cShift > 1) \
2453	uResult \|= uDst >> (a_cBitsWidth + 1 - cShift); \
2454	\
2455	uint32_t fEfl = *pfEFlags; \
2456	AssertCompile(X86_EFL_CF_BIT == 0); \
2457	uResult \|= (uint ## a_cBitsWidth ## _t)(fEfl & X86_EFL_CF) << (cShift - 1); \
2458	\
2459	*puDst = uResult; \
2460	\
2461	/* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2462	it the same way as for 1 bit shifts. */ \
2463	fEfl &= ~(X86_EFL_CF \| X86_EFL_OF); \
2464	uint32_t const fCarry = (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2465	fEfl \|= fCarry; \
2466	fEfl \|= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2467	*pfEFlags = fEfl; \
2468	} \
2469	}
2470	EMIT_RCL(64);
2471	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
2472	EMIT_RCL(32);
2473	EMIT_RCL(16);
2474	EMIT_RCL(8);
2475	# endif
2476
2477
2478	/*
2479	* RCR
2480	*/
2481	#define EMIT_RCR(a_cBitsWidth) \
2482	IEM_DECL_IMPL_DEF(void, iemAImpl_rcr_u ## a_cBitsWidth,(uint ## a_cBitsWidth ##_t puDst, uint8_t cShift, uint32_t pfEFlags)) \
2483	{ \
2484	cShift &= a_cBitsWidth - 1; \
2485	if (cShift) \
2486	{ \
2487	uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2488	uint ## a_cBitsWidth ## _t uResult = uDst >> cShift; \
2489	if (cShift > 1) \
2490	uResult \|= uDst << (a_cBitsWidth + 1 - cShift); \
2491	\
2492	AssertCompile(X86_EFL_CF_BIT == 0); \
2493	uint32_t fEfl = *pfEFlags; \
2494	uResult \|= (uint ## a_cBitsWidth ## _t)(fEfl & X86_EFL_CF) << (a_cBitsWidth - cShift); \
2495	*puDst = uResult; \
2496	\
2497	/* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2498	it the same way as for 1 bit shifts. */ \
2499	fEfl &= ~(X86_EFL_CF \| X86_EFL_OF); \
2500	uint32_t const fCarry = (uDst >> (cShift - 1)) & X86_EFL_CF; \
2501	fEfl \|= fCarry; \
2502	fEfl \|= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2503	*pfEFlags = fEfl; \
2504	} \
2505	}
2506	EMIT_RCR(64);
2507	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
2508	EMIT_RCR(32);
2509	EMIT_RCR(16);
2510	EMIT_RCR(8);
2511	# endif
2512
2513
2514	/*
2515	* SHL
2516	*/
2517	#define EMIT_SHL(a_cBitsWidth) \
2518	IEM_DECL_IMPL_DEF(void, iemAImpl_shl_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t puDst, uint8_t cShift, uint32_t pfEFlags)) \
2519	{ \
2520	cShift &= a_cBitsWidth - 1; \
2521	if (cShift) \
2522	{ \
2523	uint ## a_cBitsWidth ##_t const uDst = *puDst; \
2524	uint ## a_cBitsWidth ##_t uResult = uDst << cShift; \
2525	*puDst = uResult; \
2526	\
2527	/* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2528	it the same way as for 1 bit shifts. The AF bit is undefined, we \
2529	always set it to zero atm. */ \
2530	AssertCompile(X86_EFL_CF_BIT == 0); \
2531	uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2532	uint32_t fCarry = (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2533	fEfl \|= fCarry; \
2534	fEfl \|= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2535	fEfl \|= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2536	fEfl \|= X86_EFL_CALC_ZF(uResult); \
2537	fEfl \|= g_afParity[uResult & 0xff]; \
2538	*pfEFlags = fEfl; \
2539	} \
2540	}
2541	EMIT_SHL(64)
2542	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
2543	EMIT_SHL(32)
2544	EMIT_SHL(16)
2545	EMIT_SHL(8)
2546	# endif
2547
2548
2549	/*
2550	* SHR
2551	*/
2552	#define EMIT_SHR(a_cBitsWidth) \
2553	IEM_DECL_IMPL_DEF(void, iemAImpl_shr_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t puDst, uint8_t cShift, uint32_t pfEFlags)) \
2554	{ \
2555	cShift &= a_cBitsWidth - 1; \
2556	if (cShift) \
2557	{ \
2558	uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2559	uint ## a_cBitsWidth ## _t uResult = uDst >> cShift; \
2560	*puDst = uResult; \
2561	\
2562	/* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2563	it the same way as for 1 bit shifts. The AF bit is undefined, we \
2564	always set it to zero atm. */ \
2565	AssertCompile(X86_EFL_CF_BIT == 0); \
2566	uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2567	fEfl \|= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2568	fEfl \|= (uDst >> (a_cBitsWidth - 1)) << X86_EFL_OF_BIT; \
2569	fEfl \|= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2570	fEfl \|= X86_EFL_CALC_ZF(uResult); \
2571	fEfl \|= g_afParity[uResult & 0xff]; \
2572	*pfEFlags = fEfl; \
2573	} \
2574	}
2575	EMIT_SHR(64)
2576	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
2577	EMIT_SHR(32)
2578	EMIT_SHR(16)
2579	EMIT_SHR(8)
2580	# endif
2581
2582
2583	/*
2584	* SAR
2585	*/
2586	#define EMIT_SAR(a_cBitsWidth) \
2587	IEM_DECL_IMPL_DEF(void, iemAImpl_sar_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t puDst, uint8_t cShift, uint32_t pfEFlags)) \
2588	{ \
2589	cShift &= a_cBitsWidth - 1; \
2590	if (cShift) \
2591	{ \
2592	uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2593	uint ## a_cBitsWidth ## _t uResult = (int ## a_cBitsWidth ## _t)uDst >> cShift; \
2594	*puDst = uResult; \
2595	\
2596	/* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2597	it the same way as for 1 bit shifts (0). The AF bit is undefined, \
2598	we always set it to zero atm. */ \
2599	AssertCompile(X86_EFL_CF_BIT == 0); \
2600	uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2601	fEfl \|= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2602	fEfl \|= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2603	fEfl \|= X86_EFL_CALC_ZF(uResult); \
2604	fEfl \|= g_afParity[uResult & 0xff]; \
2605	*pfEFlags = fEfl; \
2606	} \
2607	}
2608	EMIT_SAR(64)
2609	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
2610	EMIT_SAR(32)
2611	EMIT_SAR(16)
2612	EMIT_SAR(8)
2613	# endif
2614
2615
2616	/*
2617	* SHLD
2618	*/
2619	#define EMIT_SHLD(a_cBitsWidth) \
2620	IEM_DECL_IMPL_DEF(void, iemAImpl_shld_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, \
2621	uint ## a_cBitsWidth ## _t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2622	{ \
2623	cShift &= a_cBitsWidth - 1; \
2624	if (cShift) \
2625	{ \
2626	uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2627	uint ## a_cBitsWidth ## _t uResult = uDst << cShift; \
2628	uResult \|= uSrc >> (a_cBitsWidth - cShift); \
2629	*puDst = uResult; \
2630	\
2631	/* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2632	it the same way as for 1 bit shifts. The AF bit is undefined, \
2633	we always set it to zero atm. */ \
2634	AssertCompile(X86_EFL_CF_BIT == 0); \
2635	uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2636	fEfl \|= (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2637	fEfl \|= (uint32_t)((uDst >> (a_cBitsWidth - 1)) ^ (uint32_t)(uResult >> (a_cBitsWidth - 1))) << X86_EFL_OF_BIT; \
2638	fEfl \|= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2639	fEfl \|= X86_EFL_CALC_ZF(uResult); \
2640	fEfl \|= g_afParity[uResult & 0xff]; \
2641	*pfEFlags = fEfl; \
2642	} \
2643	}
2644	EMIT_SHLD(64)
2645	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
2646	EMIT_SHLD(32)
2647	EMIT_SHLD(16)
2648	EMIT_SHLD(8)
2649	# endif
2650
2651
2652	/*
2653	* SHRD
2654	*/
2655	#define EMIT_SHRD(a_cBitsWidth) \
2656	IEM_DECL_IMPL_DEF(void, iemAImpl_shrd_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, \
2657	uint ## a_cBitsWidth ## _t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2658	{ \
2659	cShift &= a_cBitsWidth - 1; \
2660	if (cShift) \
2661	{ \
2662	uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2663	uint ## a_cBitsWidth ## _t uResult = uDst >> cShift; \
2664	uResult \|= uSrc << (a_cBitsWidth - cShift); \
2665	*puDst = uResult; \
2666	\
2667	/* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2668	it the same way as for 1 bit shifts. The AF bit is undefined, \
2669	we always set it to zero atm. */ \
2670	AssertCompile(X86_EFL_CF_BIT == 0); \
2671	uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2672	fEfl \|= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2673	fEfl \|= (uint32_t)((uDst >> (a_cBitsWidth - 1)) ^ (uint32_t)(uResult >> (a_cBitsWidth - 1))) << X86_EFL_OF_BIT; \
2674	fEfl \|= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2675	fEfl \|= X86_EFL_CALC_ZF(uResult); \
2676	fEfl \|= g_afParity[uResult & 0xff]; \
2677	*pfEFlags = fEfl; \
2678	} \
2679	}
2680	EMIT_SHRD(64)
2681	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
2682	EMIT_SHRD(32)
2683	EMIT_SHRD(16)
2684	EMIT_SHRD(8)
2685	# endif
2686
2687
2688	# if !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY)
2689	/*
2690	* BSWAP
2691	*/
2692
2693	IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u64,(uint64_t *puDst))
2694	{
2695	puDst = ASMByteSwapU64(puDst);
2696	}
2697
2698
2699	IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u32,(uint32_t *puDst))
2700	{
2701	puDst = ASMByteSwapU32(puDst);
2702	}
2703
2704
2705	/* Note! undocument, so 32-bit arg */
2706	IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u16,(uint32_t *puDst))
2707	{
2708	puDst = ASMByteSwapU16((uint16_t)puDst) \| (*puDst & UINT32_C(0xffff0000));
2709	}
2710
2711	# endif /* !defined(RT_ARCH_X86) \|\| defined(IEM_WITHOUT_ASSEMBLY) */
2712
2713	#endif /* !RT_ARCH_AMD64 \|\| IEM_WITHOUT_ASSEMBLY */
2714
2715
2716	IEM_DECL_IMPL_DEF(void, iemAImpl_arpl,(uint16_t pu16Dst, uint16_t u16Src, uint32_t pfEFlags))
2717	{
2718	if ((*pu16Dst & X86_SEL_RPL) < (u16Src & X86_SEL_RPL))
2719	{
2720	*pu16Dst &= X86_SEL_MASK_OFF_RPL;
2721	*pu16Dst \|= u16Src & X86_SEL_RPL;
2722
2723	*pfEFlags \|= X86_EFL_ZF;
2724	}
2725	else
2726	*pfEFlags &= ~X86_EFL_ZF;
2727	}
2728
2729
2730
2731	IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_fallback,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx,
2732	PRTUINT128U pu128RbxRcx, uint32_t *pEFlags))
2733	{
2734	RTUINT128U u128Tmp = *pu128Dst;
2735	if ( u128Tmp.s.Lo == pu128RaxRdx->s.Lo
2736	&& u128Tmp.s.Hi == pu128RaxRdx->s.Hi)
2737	{
2738	pu128Dst = pu128RbxRcx;
2739	*pEFlags \|= X86_EFL_ZF;
2740	}
2741	else
2742	{
2743	*pu128RaxRdx = u128Tmp;
2744	*pEFlags &= ~X86_EFL_ZF;
2745	}
2746	}
2747
2748
2749	IEM_DECL_IMPL_DEF(void, iemAImpl_movsldup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
2750	{
2751	RT_NOREF(pFpuState);
2752	puDst->au32[0] = puSrc->au32[0];
2753	puDst->au32[1] = puSrc->au32[0];
2754	puDst->au32[2] = puSrc->au32[2];
2755	puDst->au32[3] = puSrc->au32[2];
2756	}
2757
2758	#ifdef IEM_WITH_VEX
2759
2760	IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
2761	{
2762	pXState->x87.aXMM[iYRegDst].au32[0] = pXState->x87.aXMM[iYRegSrc].au32[0];
2763	pXState->x87.aXMM[iYRegDst].au32[1] = pXState->x87.aXMM[iYRegSrc].au32[0];
2764	pXState->x87.aXMM[iYRegDst].au32[2] = pXState->x87.aXMM[iYRegSrc].au32[2];
2765	pXState->x87.aXMM[iYRegDst].au32[3] = pXState->x87.aXMM[iYRegSrc].au32[2];
2766	pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
2767	pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
2768	pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
2769	pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
2770	}
2771
2772
2773	IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
2774	{
2775	pXState->x87.aXMM[iYRegDst].au32[0] = pSrc->au32[0];
2776	pXState->x87.aXMM[iYRegDst].au32[1] = pSrc->au32[0];
2777	pXState->x87.aXMM[iYRegDst].au32[2] = pSrc->au32[2];
2778	pXState->x87.aXMM[iYRegDst].au32[3] = pSrc->au32[2];
2779	pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pSrc->au32[4];
2780	pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pSrc->au32[4];
2781	pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pSrc->au32[6];
2782	pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pSrc->au32[6];
2783	}
2784
2785	#endif /* IEM_WITH_VEX */
2786
2787
2788	IEM_DECL_IMPL_DEF(void, iemAImpl_movshdup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
2789	{
2790	RT_NOREF(pFpuState);
2791	puDst->au32[0] = puSrc->au32[1];
2792	puDst->au32[1] = puSrc->au32[1];
2793	puDst->au32[2] = puSrc->au32[3];
2794	puDst->au32[3] = puSrc->au32[3];
2795	}
2796
2797
2798	IEM_DECL_IMPL_DEF(void, iemAImpl_movddup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, uint64_t uSrc))
2799	{
2800	RT_NOREF(pFpuState);
2801	puDst->au64[0] = uSrc;
2802	puDst->au64[1] = uSrc;
2803	}
2804
2805	#ifdef IEM_WITH_VEX
2806
2807	IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
2808	{
2809	pXState->x87.aXMM[iYRegDst].au64[0] = pXState->x87.aXMM[iYRegSrc].au64[0];
2810	pXState->x87.aXMM[iYRegDst].au64[1] = pXState->x87.aXMM[iYRegSrc].au64[0];
2811	pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
2812	pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
2813	}
2814
2815	IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
2816	{
2817	pXState->x87.aXMM[iYRegDst].au64[0] = pSrc->au64[0];
2818	pXState->x87.aXMM[iYRegDst].au64[1] = pSrc->au64[0];
2819	pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pSrc->au64[2];
2820	pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pSrc->au64[2];
2821	}
2822
2823	#endif /* IEM_WITH_VEX */
2824

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp@ 93792

Download in other formats: