IEMAllAImplC.cpp@ 66974

Last change on this file since 66974 was 66965, checked in by vboxsync, 8 years ago
IEM: Implemented vmovddup Vx,Wx (VEX.F2.0F 12).
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 44.7 KB

Line
1	/* $Id: IEMAllAImplC.cpp 66965 2017-05-19 09:38:05Z vboxsync $ */
2	/** @file
3	* IEM - Instruction Implementation in Assembly, portable C variant.
4	*/
5
6	/*
7	* Copyright (C) 2011-2016 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*/
17
18
19	/*********************************************************************************************************************************
20	* Header Files *
21	*********************************************************************************************************************************/
22	#include "IEMInternal.h"
23	#include <VBox/vmm/vm.h>
24	#include <iprt/x86.h>
25	#include <iprt/uint128.h>
26
27
28	/*********************************************************************************************************************************
29	* Global Variables *
30	*********************************************************************************************************************************/
31	#ifdef RT_ARCH_X86
32	/**
33	* Parity calculation table.
34	*
35	* The generator code:
36	* @code
37	* #include <stdio.h>
38	*
39	* int main()
40	* {
41	* unsigned b;
42	* for (b = 0; b < 256; b++)
43	* {
44	* int cOnes = ( b & 1)
45	* + ((b >> 1) & 1)
46	* + ((b >> 2) & 1)
47	* + ((b >> 3) & 1)
48	* + ((b >> 4) & 1)
49	* + ((b >> 5) & 1)
50	* + ((b >> 6) & 1)
51	* + ((b >> 7) & 1);
52	* printf(" /" "* %#04x = %u%u%u%u%u%u%u%ub *" "/ %s,\n",
53	* b,
54	* (b >> 7) & 1,
55	* (b >> 6) & 1,
56	* (b >> 5) & 1,
57	* (b >> 4) & 1,
58	* (b >> 3) & 1,
59	* (b >> 2) & 1,
60	* (b >> 1) & 1,
61	* b & 1,
62	* cOnes & 1 ? "0" : "X86_EFL_PF");
63	* }
64	* return 0;
65	* }
66	* @endcode
67	*/
68	static uint8_t const g_afParity[256] =
69	{
70	/* 0000 = 00000000b */ X86_EFL_PF,
71	/* 0x01 = 00000001b */ 0,
72	/* 0x02 = 00000010b */ 0,
73	/* 0x03 = 00000011b */ X86_EFL_PF,
74	/* 0x04 = 00000100b */ 0,
75	/* 0x05 = 00000101b */ X86_EFL_PF,
76	/* 0x06 = 00000110b */ X86_EFL_PF,
77	/* 0x07 = 00000111b */ 0,
78	/* 0x08 = 00001000b */ 0,
79	/* 0x09 = 00001001b */ X86_EFL_PF,
80	/* 0x0a = 00001010b */ X86_EFL_PF,
81	/* 0x0b = 00001011b */ 0,
82	/* 0x0c = 00001100b */ X86_EFL_PF,
83	/* 0x0d = 00001101b */ 0,
84	/* 0x0e = 00001110b */ 0,
85	/* 0x0f = 00001111b */ X86_EFL_PF,
86	/* 0x10 = 00010000b */ 0,
87	/* 0x11 = 00010001b */ X86_EFL_PF,
88	/* 0x12 = 00010010b */ X86_EFL_PF,
89	/* 0x13 = 00010011b */ 0,
90	/* 0x14 = 00010100b */ X86_EFL_PF,
91	/* 0x15 = 00010101b */ 0,
92	/* 0x16 = 00010110b */ 0,
93	/* 0x17 = 00010111b */ X86_EFL_PF,
94	/* 0x18 = 00011000b */ X86_EFL_PF,
95	/* 0x19 = 00011001b */ 0,
96	/* 0x1a = 00011010b */ 0,
97	/* 0x1b = 00011011b */ X86_EFL_PF,
98	/* 0x1c = 00011100b */ 0,
99	/* 0x1d = 00011101b */ X86_EFL_PF,
100	/* 0x1e = 00011110b */ X86_EFL_PF,
101	/* 0x1f = 00011111b */ 0,
102	/* 0x20 = 00100000b */ 0,
103	/* 0x21 = 00100001b */ X86_EFL_PF,
104	/* 0x22 = 00100010b */ X86_EFL_PF,
105	/* 0x23 = 00100011b */ 0,
106	/* 0x24 = 00100100b */ X86_EFL_PF,
107	/* 0x25 = 00100101b */ 0,
108	/* 0x26 = 00100110b */ 0,
109	/* 0x27 = 00100111b */ X86_EFL_PF,
110	/* 0x28 = 00101000b */ X86_EFL_PF,
111	/* 0x29 = 00101001b */ 0,
112	/* 0x2a = 00101010b */ 0,
113	/* 0x2b = 00101011b */ X86_EFL_PF,
114	/* 0x2c = 00101100b */ 0,
115	/* 0x2d = 00101101b */ X86_EFL_PF,
116	/* 0x2e = 00101110b */ X86_EFL_PF,
117	/* 0x2f = 00101111b */ 0,
118	/* 0x30 = 00110000b */ X86_EFL_PF,
119	/* 0x31 = 00110001b */ 0,
120	/* 0x32 = 00110010b */ 0,
121	/* 0x33 = 00110011b */ X86_EFL_PF,
122	/* 0x34 = 00110100b */ 0,
123	/* 0x35 = 00110101b */ X86_EFL_PF,
124	/* 0x36 = 00110110b */ X86_EFL_PF,
125	/* 0x37 = 00110111b */ 0,
126	/* 0x38 = 00111000b */ 0,
127	/* 0x39 = 00111001b */ X86_EFL_PF,
128	/* 0x3a = 00111010b */ X86_EFL_PF,
129	/* 0x3b = 00111011b */ 0,
130	/* 0x3c = 00111100b */ X86_EFL_PF,
131	/* 0x3d = 00111101b */ 0,
132	/* 0x3e = 00111110b */ 0,
133	/* 0x3f = 00111111b */ X86_EFL_PF,
134	/* 0x40 = 01000000b */ 0,
135	/* 0x41 = 01000001b */ X86_EFL_PF,
136	/* 0x42 = 01000010b */ X86_EFL_PF,
137	/* 0x43 = 01000011b */ 0,
138	/* 0x44 = 01000100b */ X86_EFL_PF,
139	/* 0x45 = 01000101b */ 0,
140	/* 0x46 = 01000110b */ 0,
141	/* 0x47 = 01000111b */ X86_EFL_PF,
142	/* 0x48 = 01001000b */ X86_EFL_PF,
143	/* 0x49 = 01001001b */ 0,
144	/* 0x4a = 01001010b */ 0,
145	/* 0x4b = 01001011b */ X86_EFL_PF,
146	/* 0x4c = 01001100b */ 0,
147	/* 0x4d = 01001101b */ X86_EFL_PF,
148	/* 0x4e = 01001110b */ X86_EFL_PF,
149	/* 0x4f = 01001111b */ 0,
150	/* 0x50 = 01010000b */ X86_EFL_PF,
151	/* 0x51 = 01010001b */ 0,
152	/* 0x52 = 01010010b */ 0,
153	/* 0x53 = 01010011b */ X86_EFL_PF,
154	/* 0x54 = 01010100b */ 0,
155	/* 0x55 = 01010101b */ X86_EFL_PF,
156	/* 0x56 = 01010110b */ X86_EFL_PF,
157	/* 0x57 = 01010111b */ 0,
158	/* 0x58 = 01011000b */ 0,
159	/* 0x59 = 01011001b */ X86_EFL_PF,
160	/* 0x5a = 01011010b */ X86_EFL_PF,
161	/* 0x5b = 01011011b */ 0,
162	/* 0x5c = 01011100b */ X86_EFL_PF,
163	/* 0x5d = 01011101b */ 0,
164	/* 0x5e = 01011110b */ 0,
165	/* 0x5f = 01011111b */ X86_EFL_PF,
166	/* 0x60 = 01100000b */ X86_EFL_PF,
167	/* 0x61 = 01100001b */ 0,
168	/* 0x62 = 01100010b */ 0,
169	/* 0x63 = 01100011b */ X86_EFL_PF,
170	/* 0x64 = 01100100b */ 0,
171	/* 0x65 = 01100101b */ X86_EFL_PF,
172	/* 0x66 = 01100110b */ X86_EFL_PF,
173	/* 0x67 = 01100111b */ 0,
174	/* 0x68 = 01101000b */ 0,
175	/* 0x69 = 01101001b */ X86_EFL_PF,
176	/* 0x6a = 01101010b */ X86_EFL_PF,
177	/* 0x6b = 01101011b */ 0,
178	/* 0x6c = 01101100b */ X86_EFL_PF,
179	/* 0x6d = 01101101b */ 0,
180	/* 0x6e = 01101110b */ 0,
181	/* 0x6f = 01101111b */ X86_EFL_PF,
182	/* 0x70 = 01110000b */ 0,
183	/* 0x71 = 01110001b */ X86_EFL_PF,
184	/* 0x72 = 01110010b */ X86_EFL_PF,
185	/* 0x73 = 01110011b */ 0,
186	/* 0x74 = 01110100b */ X86_EFL_PF,
187	/* 0x75 = 01110101b */ 0,
188	/* 0x76 = 01110110b */ 0,
189	/* 0x77 = 01110111b */ X86_EFL_PF,
190	/* 0x78 = 01111000b */ X86_EFL_PF,
191	/* 0x79 = 01111001b */ 0,
192	/* 0x7a = 01111010b */ 0,
193	/* 0x7b = 01111011b */ X86_EFL_PF,
194	/* 0x7c = 01111100b */ 0,
195	/* 0x7d = 01111101b */ X86_EFL_PF,
196	/* 0x7e = 01111110b */ X86_EFL_PF,
197	/* 0x7f = 01111111b */ 0,
198	/* 0x80 = 10000000b */ 0,
199	/* 0x81 = 10000001b */ X86_EFL_PF,
200	/* 0x82 = 10000010b */ X86_EFL_PF,
201	/* 0x83 = 10000011b */ 0,
202	/* 0x84 = 10000100b */ X86_EFL_PF,
203	/* 0x85 = 10000101b */ 0,
204	/* 0x86 = 10000110b */ 0,
205	/* 0x87 = 10000111b */ X86_EFL_PF,
206	/* 0x88 = 10001000b */ X86_EFL_PF,
207	/* 0x89 = 10001001b */ 0,
208	/* 0x8a = 10001010b */ 0,
209	/* 0x8b = 10001011b */ X86_EFL_PF,
210	/* 0x8c = 10001100b */ 0,
211	/* 0x8d = 10001101b */ X86_EFL_PF,
212	/* 0x8e = 10001110b */ X86_EFL_PF,
213	/* 0x8f = 10001111b */ 0,
214	/* 0x90 = 10010000b */ X86_EFL_PF,
215	/* 0x91 = 10010001b */ 0,
216	/* 0x92 = 10010010b */ 0,
217	/* 0x93 = 10010011b */ X86_EFL_PF,
218	/* 0x94 = 10010100b */ 0,
219	/* 0x95 = 10010101b */ X86_EFL_PF,
220	/* 0x96 = 10010110b */ X86_EFL_PF,
221	/* 0x97 = 10010111b */ 0,
222	/* 0x98 = 10011000b */ 0,
223	/* 0x99 = 10011001b */ X86_EFL_PF,
224	/* 0x9a = 10011010b */ X86_EFL_PF,
225	/* 0x9b = 10011011b */ 0,
226	/* 0x9c = 10011100b */ X86_EFL_PF,
227	/* 0x9d = 10011101b */ 0,
228	/* 0x9e = 10011110b */ 0,
229	/* 0x9f = 10011111b */ X86_EFL_PF,
230	/* 0xa0 = 10100000b */ X86_EFL_PF,
231	/* 0xa1 = 10100001b */ 0,
232	/* 0xa2 = 10100010b */ 0,
233	/* 0xa3 = 10100011b */ X86_EFL_PF,
234	/* 0xa4 = 10100100b */ 0,
235	/* 0xa5 = 10100101b */ X86_EFL_PF,
236	/* 0xa6 = 10100110b */ X86_EFL_PF,
237	/* 0xa7 = 10100111b */ 0,
238	/* 0xa8 = 10101000b */ 0,
239	/* 0xa9 = 10101001b */ X86_EFL_PF,
240	/* 0xaa = 10101010b */ X86_EFL_PF,
241	/* 0xab = 10101011b */ 0,
242	/* 0xac = 10101100b */ X86_EFL_PF,
243	/* 0xad = 10101101b */ 0,
244	/* 0xae = 10101110b */ 0,
245	/* 0xaf = 10101111b */ X86_EFL_PF,
246	/* 0xb0 = 10110000b */ 0,
247	/* 0xb1 = 10110001b */ X86_EFL_PF,
248	/* 0xb2 = 10110010b */ X86_EFL_PF,
249	/* 0xb3 = 10110011b */ 0,
250	/* 0xb4 = 10110100b */ X86_EFL_PF,
251	/* 0xb5 = 10110101b */ 0,
252	/* 0xb6 = 10110110b */ 0,
253	/* 0xb7 = 10110111b */ X86_EFL_PF,
254	/* 0xb8 = 10111000b */ X86_EFL_PF,
255	/* 0xb9 = 10111001b */ 0,
256	/* 0xba = 10111010b */ 0,
257	/* 0xbb = 10111011b */ X86_EFL_PF,
258	/* 0xbc = 10111100b */ 0,
259	/* 0xbd = 10111101b */ X86_EFL_PF,
260	/* 0xbe = 10111110b */ X86_EFL_PF,
261	/* 0xbf = 10111111b */ 0,
262	/* 0xc0 = 11000000b */ X86_EFL_PF,
263	/* 0xc1 = 11000001b */ 0,
264	/* 0xc2 = 11000010b */ 0,
265	/* 0xc3 = 11000011b */ X86_EFL_PF,
266	/* 0xc4 = 11000100b */ 0,
267	/* 0xc5 = 11000101b */ X86_EFL_PF,
268	/* 0xc6 = 11000110b */ X86_EFL_PF,
269	/* 0xc7 = 11000111b */ 0,
270	/* 0xc8 = 11001000b */ 0,
271	/* 0xc9 = 11001001b */ X86_EFL_PF,
272	/* 0xca = 11001010b */ X86_EFL_PF,
273	/* 0xcb = 11001011b */ 0,
274	/* 0xcc = 11001100b */ X86_EFL_PF,
275	/* 0xcd = 11001101b */ 0,
276	/* 0xce = 11001110b */ 0,
277	/* 0xcf = 11001111b */ X86_EFL_PF,
278	/* 0xd0 = 11010000b */ 0,
279	/* 0xd1 = 11010001b */ X86_EFL_PF,
280	/* 0xd2 = 11010010b */ X86_EFL_PF,
281	/* 0xd3 = 11010011b */ 0,
282	/* 0xd4 = 11010100b */ X86_EFL_PF,
283	/* 0xd5 = 11010101b */ 0,
284	/* 0xd6 = 11010110b */ 0,
285	/* 0xd7 = 11010111b */ X86_EFL_PF,
286	/* 0xd8 = 11011000b */ X86_EFL_PF,
287	/* 0xd9 = 11011001b */ 0,
288	/* 0xda = 11011010b */ 0,
289	/* 0xdb = 11011011b */ X86_EFL_PF,
290	/* 0xdc = 11011100b */ 0,
291	/* 0xdd = 11011101b */ X86_EFL_PF,
292	/* 0xde = 11011110b */ X86_EFL_PF,
293	/* 0xdf = 11011111b */ 0,
294	/* 0xe0 = 11100000b */ 0,
295	/* 0xe1 = 11100001b */ X86_EFL_PF,
296	/* 0xe2 = 11100010b */ X86_EFL_PF,
297	/* 0xe3 = 11100011b */ 0,
298	/* 0xe4 = 11100100b */ X86_EFL_PF,
299	/* 0xe5 = 11100101b */ 0,
300	/* 0xe6 = 11100110b */ 0,
301	/* 0xe7 = 11100111b */ X86_EFL_PF,
302	/* 0xe8 = 11101000b */ X86_EFL_PF,
303	/* 0xe9 = 11101001b */ 0,
304	/* 0xea = 11101010b */ 0,
305	/* 0xeb = 11101011b */ X86_EFL_PF,
306	/* 0xec = 11101100b */ 0,
307	/* 0xed = 11101101b */ X86_EFL_PF,
308	/* 0xee = 11101110b */ X86_EFL_PF,
309	/* 0xef = 11101111b */ 0,
310	/* 0xf0 = 11110000b */ X86_EFL_PF,
311	/* 0xf1 = 11110001b */ 0,
312	/* 0xf2 = 11110010b */ 0,
313	/* 0xf3 = 11110011b */ X86_EFL_PF,
314	/* 0xf4 = 11110100b */ 0,
315	/* 0xf5 = 11110101b */ X86_EFL_PF,
316	/* 0xf6 = 11110110b */ X86_EFL_PF,
317	/* 0xf7 = 11110111b */ 0,
318	/* 0xf8 = 11111000b */ 0,
319	/* 0xf9 = 11111001b */ X86_EFL_PF,
320	/* 0xfa = 11111010b */ X86_EFL_PF,
321	/* 0xfb = 11111011b */ 0,
322	/* 0xfc = 11111100b */ X86_EFL_PF,
323	/* 0xfd = 11111101b */ 0,
324	/* 0xfe = 11111110b */ 0,
325	/* 0xff = 11111111b */ X86_EFL_PF,
326	};
327	#endif /* RT_ARCH_X86 */
328
329
330	/**
331	* Calculates the signed flag value given a result and it's bit width.
332	*
333	* The signed flag (SF) is a duplication of the most significant bit in the
334	* result.
335	*
336	* @returns X86_EFL_SF or 0.
337	* @param a_uResult Unsigned result value.
338	* @param a_cBitsWidth The width of the result (8, 16, 32, 64).
339	*/
340	#define X86_EFL_CALC_SF(a_uResult, a_cBitsWidth) \
341	( (uint32_t)((a_uResult) >> ((a_cBitsWidth) - X86_EFL_SF_BIT - 1)) & X86_EFL_SF )
342
343	/**
344	* Calculates the zero flag value given a result.
345	*
346	* The zero flag (ZF) indicates whether the result is zero or not.
347	*
348	* @returns X86_EFL_ZF or 0.
349	* @param a_uResult Unsigned result value.
350	*/
351	#define X86_EFL_CALC_ZF(a_uResult) \
352	( (uint32_t)((a_uResult) == 0) << X86_EFL_ZF_BIT )
353
354	/**
355	* Updates the status bits (CF, PF, AF, ZF, SF, and OF) after a logical op.
356	*
357	* CF and OF are defined to be 0 by logical operations. AF on the other hand is
358	* undefined. We do not set AF, as that seems to make the most sense (which
359	* probably makes it the most wrong in real life).
360	*
361	* @returns Status bits.
362	* @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
363	* @param a_uResult Unsigned result value.
364	* @param a_cBitsWidth The width of the result (8, 16, 32, 64).
365	* @param a_fExtra Additional bits to set.
366	*/
367	#define IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(a_pfEFlags, a_uResult, a_cBitsWidth, a_fExtra) \
368	do { \
369	uint32_t fEflTmp = *(a_pfEFlags); \
370	fEflTmp &= ~X86_EFL_STATUS_BITS; \
371	fEflTmp \|= g_afParity[(a_uResult) & 0xff]; \
372	fEflTmp \|= X86_EFL_CALC_ZF(a_uResult); \
373	fEflTmp \|= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
374	fEflTmp \|= (a_fExtra); \
375	*(a_pfEFlags) = fEflTmp; \
376	} while (0)
377
378
379	#ifdef RT_ARCH_X86
380	/*
381	* There are a few 64-bit on 32-bit things we'd rather do in C. Actually, doing
382	* it all in C is probably safer atm., optimize what's necessary later, maybe.
383	*/
384
385
386	/* Binary ops */
387
388	IEM_DECL_IMPL_DEF(void, iemAImpl_add_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
389	{
390	uint64_t uDst = *puDst;
391	uint64_t uResult = uDst + uSrc;
392	*puDst = uResult;
393
394	/* Calc EFLAGS. */
395	uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
396	fEfl \|= (uResult < uDst) << X86_EFL_CF_BIT;
397	fEfl \|= g_afParity[uResult & 0xff];
398	fEfl \|= ((uint32_t)uResult ^ (uint32_t)uSrc ^ (uint32_t)uDst) & X86_EFL_AF;
399	fEfl \|= X86_EFL_CALC_ZF(uResult);
400	fEfl \|= X86_EFL_CALC_SF(uResult, 64);
401	fEfl \|= (((uDst ^ uSrc ^ RT_BIT_64(63)) & (uResult ^ uDst)) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF;
402	*pfEFlags = fEfl;
403	}
404
405
406	IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
407	{
408	if (!(*pfEFlags & X86_EFL_CF))
409	iemAImpl_add_u64(puDst, uSrc, pfEFlags);
410	else
411	{
412	uint64_t uDst = *puDst;
413	uint64_t uResult = uDst + uSrc + 1;
414	*puDst = uResult;
415
416	/* Calc EFLAGS. */
417	/** @todo verify AF and OF calculations. */
418	uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
419	fEfl \|= (uResult <= uDst) << X86_EFL_CF_BIT;
420	fEfl \|= g_afParity[uResult & 0xff];
421	fEfl \|= ((uint32_t)uResult ^ (uint32_t)uSrc ^ (uint32_t)uDst) & X86_EFL_AF;
422	fEfl \|= X86_EFL_CALC_ZF(uResult);
423	fEfl \|= X86_EFL_CALC_SF(uResult, 64);
424	fEfl \|= (((uDst ^ uSrc ^ RT_BIT_64(63)) & (uResult ^ uDst)) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF;
425	*pfEFlags = fEfl;
426	}
427	}
428
429
430	IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
431	{
432	uint64_t uDst = *puDst;
433	uint64_t uResult = uDst - uSrc;
434	*puDst = uResult;
435
436	/* Calc EFLAGS. */
437	uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
438	fEfl \|= (uDst < uSrc) << X86_EFL_CF_BIT;
439	fEfl \|= g_afParity[uResult & 0xff];
440	fEfl \|= ((uint32_t)uResult ^ (uint32_t)uSrc ^ (uint32_t)uDst) & X86_EFL_AF;
441	fEfl \|= X86_EFL_CALC_ZF(uResult);
442	fEfl \|= X86_EFL_CALC_SF(uResult, 64);
443	fEfl \|= (((uDst ^ uSrc) & (uResult ^ uDst)) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF;
444	*pfEFlags = fEfl;
445	}
446
447
448	IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
449	{
450	if (!(*pfEFlags & X86_EFL_CF))
451	iemAImpl_sub_u64(puDst, uSrc, pfEFlags);
452	else
453	{
454	uint64_t uDst = *puDst;
455	uint64_t uResult = uDst - uSrc - 1;
456	*puDst = uResult;
457
458	/* Calc EFLAGS. */
459	/** @todo verify AF and OF calculations. */
460	uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
461	fEfl \|= (uDst <= uSrc) << X86_EFL_CF_BIT;
462	fEfl \|= g_afParity[uResult & 0xff];
463	fEfl \|= ((uint32_t)uResult ^ (uint32_t)uSrc ^ (uint32_t)uDst) & X86_EFL_AF;
464	fEfl \|= X86_EFL_CALC_ZF(uResult);
465	fEfl \|= X86_EFL_CALC_SF(uResult, 64);
466	fEfl \|= (((uDst ^ uSrc) & (uResult ^ uDst)) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF;
467	*pfEFlags = fEfl;
468	}
469	}
470
471
472	IEM_DECL_IMPL_DEF(void, iemAImpl_or_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
473	{
474	uint64_t uResult = *puDst \| uSrc;
475	*puDst = uResult;
476	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
477	}
478
479
480	IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
481	{
482	uint64_t uResult = *puDst ^ uSrc;
483	*puDst = uResult;
484	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
485	}
486
487
488	IEM_DECL_IMPL_DEF(void, iemAImpl_and_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
489	{
490	uint64_t uResult = *puDst & uSrc;
491	*puDst = uResult;
492	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
493	}
494
495
496	IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
497	{
498	uint64_t uDstTmp = *puDst;
499	iemAImpl_sub_u64(&uDstTmp, uSrc, pfEFlags);
500	}
501
502
503	IEM_DECL_IMPL_DEF(void, iemAImpl_test_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
504	{
505	uint64_t uResult = *puDst & uSrc;
506	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
507	}
508
509
510	/** 64-bit locked binary operand operation. */
511	# define DO_LOCKED_BIN_OP_U64(a_Mnemonic) \
512	do { \
513	uint64_t uOld = ASMAtomicReadU64(puDst); \
514	uint64_t uTmp; \
515	uint32_t fEflTmp; \
516	do \
517	{ \
518	uTmp = uOld; \
519	fEflTmp = *pfEFlags; \
520	iemAImpl_ ## a_Mnemonic ## _u64(&uTmp, uSrc, &fEflTmp); \
521	} while (!ASMAtomicCmpXchgExU64(puDst, uTmp, uOld, &uOld)); \
522	*pfEFlags = fEflTmp; \
523	} while (0)
524
525
526	IEM_DECL_IMPL_DEF(void, iemAImpl_add_u64_locked,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
527	{
528	DO_LOCKED_BIN_OP_U64(add);
529	}
530
531
532	IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u64_locked,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
533	{
534	DO_LOCKED_BIN_OP_U64(adc);
535	}
536
537
538	IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u64_locked,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
539	{
540	DO_LOCKED_BIN_OP_U64(sub);
541	}
542
543
544	IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u64_locked,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
545	{
546	DO_LOCKED_BIN_OP_U64(sbb);
547	}
548
549
550	IEM_DECL_IMPL_DEF(void, iemAImpl_or_u64_locked,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
551	{
552	DO_LOCKED_BIN_OP_U64(or);
553	}
554
555
556	IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u64_locked,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
557	{
558	DO_LOCKED_BIN_OP_U64(xor);
559	}
560
561
562	IEM_DECL_IMPL_DEF(void, iemAImpl_and_u64_locked,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
563	{
564	DO_LOCKED_BIN_OP_U64(and);
565	}
566
567
568	IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u64,(uint64_t puDst, uint64_t puReg, uint32_t *pfEFlags))
569	{
570	uint64_t uDst = *puDst;
571	uint64_t uResult = uDst;
572	iemAImpl_add_u64(&uResult, *puReg, pfEFlags);
573	*puDst = uResult;
574	*puReg = uDst;
575	}
576
577
578	IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u64_locked,(uint64_t puDst, uint64_t puReg, uint32_t *pfEFlags))
579	{
580	uint64_t uOld = ASMAtomicReadU64(puDst);
581	uint64_t uTmpDst;
582	uint32_t fEflTmp;
583	do
584	{
585	uTmpDst = uOld;
586	fEflTmp = *pfEFlags;
587	iemAImpl_add_u64(&uTmpDst, *puReg, pfEFlags);
588	} while (!ASMAtomicCmpXchgExU64(puDst, uTmpDst, uOld, &uOld));
589	*puReg = uOld;
590	*pfEFlags = fEflTmp;
591	}
592
593
594	/* Bit operations (same signature as above). */
595
596	IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
597	{
598	/* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
599	logical operation (AND/OR/whatever). */
600	Assert(uSrc < 64);
601	uint64_t uDst = *puDst;
602	if (uDst & RT_BIT_64(uSrc))
603	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
604	else
605	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
606	}
607
608	IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
609	{
610	/* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
611	logical operation (AND/OR/whatever). */
612	Assert(uSrc < 64);
613	uint64_t fMask = RT_BIT_64(uSrc);
614	uint64_t uDst = *puDst;
615	if (uDst & fMask)
616	{
617	uDst &= ~fMask;
618	*puDst = uDst;
619	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
620	}
621	else
622	{
623	uDst \|= fMask;
624	*puDst = uDst;
625	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
626	}
627	}
628
629	IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
630	{
631	/* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
632	logical operation (AND/OR/whatever). */
633	Assert(uSrc < 64);
634	uint64_t fMask = RT_BIT_64(uSrc);
635	uint64_t uDst = *puDst;
636	if (uDst & fMask)
637	{
638	uDst &= ~fMask;
639	*puDst = uDst;
640	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
641	}
642	else
643	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
644	}
645
646	IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
647	{
648	/* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
649	logical operation (AND/OR/whatever). */
650	Assert(uSrc < 64);
651	uint64_t fMask = RT_BIT_64(uSrc);
652	uint64_t uDst = *puDst;
653	if (uDst & fMask)
654	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
655	else
656	{
657	uDst \|= fMask;
658	*puDst = uDst;
659	IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
660	}
661	}
662
663
664	IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u64_locked,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
665	{
666	DO_LOCKED_BIN_OP_U64(btc);
667	}
668
669	IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u64_locked,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
670	{
671	DO_LOCKED_BIN_OP_U64(btr);
672	}
673
674	IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u64_locked,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
675	{
676	DO_LOCKED_BIN_OP_U64(bts);
677	}
678
679
680	/* bit scan */
681
682	IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
683	{
684	/* Note! "undefined" flags: OF, SF, AF, PF, CF. */
685	/** @todo check what real CPUs do. */
686	if (uSrc)
687	{
688	uint8_t iBit;
689	uint32_t u32Src;
690	if (uSrc & UINT32_MAX)
691	{
692	iBit = 0;
693	u32Src = uSrc;
694	}
695	else
696	{
697	iBit = 32;
698	u32Src = uSrc >> 32;
699	}
700	if (!(u32Src & UINT16_MAX))
701	{
702	iBit += 16;
703	u32Src >>= 16;
704	}
705	if (!(u32Src & UINT8_MAX))
706	{
707	iBit += 8;
708	u32Src >>= 8;
709	}
710	if (!(u32Src & 0xf))
711	{
712	iBit += 4;
713	u32Src >>= 4;
714	}
715	if (!(u32Src & 0x3))
716	{
717	iBit += 2;
718	u32Src >>= 2;
719	}
720	if (!(u32Src & 1))
721	{
722	iBit += 1;
723	Assert(u32Src & 2);
724	}
725
726	*puDst = iBit;
727	*pfEFlags &= ~X86_EFL_ZF;
728	}
729	else
730	*pfEFlags \|= X86_EFL_ZF;
731	}
732
733	IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
734	{
735	/* Note! "undefined" flags: OF, SF, AF, PF, CF. */
736	/** @todo check what real CPUs do. */
737	if (uSrc)
738	{
739	uint8_t iBit;
740	uint32_t u32Src;
741	if (uSrc & UINT64_C(0xffffffff00000000))
742	{
743	iBit = 63;
744	u32Src = uSrc >> 32;
745	}
746	else
747	{
748	iBit = 31;
749	u32Src = uSrc;
750	}
751	if (!(u32Src & UINT32_C(0xffff0000)))
752	{
753	iBit -= 16;
754	u32Src <<= 16;
755	}
756	if (!(u32Src & UINT32_C(0xff000000)))
757	{
758	iBit -= 8;
759	u32Src <<= 8;
760	}
761	if (!(u32Src & UINT32_C(0xf0000000)))
762	{
763	iBit -= 4;
764	u32Src <<= 4;
765	}
766	if (!(u32Src & UINT32_C(0xc0000000)))
767	{
768	iBit -= 2;
769	u32Src <<= 2;
770	}
771	if (!(u32Src & UINT32_C(0x80000000)))
772	{
773	iBit -= 1;
774	Assert(u32Src & RT_BIT(30));
775	}
776
777	*puDst = iBit;
778	*pfEFlags &= ~X86_EFL_ZF;
779	}
780	else
781	*pfEFlags \|= X86_EFL_ZF;
782	}
783
784
785	/* Unary operands. */
786
787	IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u64,(uint64_t puDst, uint32_t pfEFlags))
788	{
789	uint64_t uDst = *puDst;
790	uint64_t uResult = uDst + 1;
791	*puDst = uResult;
792
793	/*
794	* Calc EFLAGS.
795	* CF is NOT modified for hysterical raisins (allegedly for carrying and
796	* borrowing in arithmetic loops on intel 8008).
797	*/
798	uint32_t fEfl = *pfEFlags & ~(X86_EFL_STATUS_BITS & ~X86_EFL_CF);
799	fEfl \|= g_afParity[uResult & 0xff];
800	fEfl \|= ((uint32_t)uResult ^ (uint32_t)uDst) & X86_EFL_AF;
801	fEfl \|= X86_EFL_CALC_ZF(uResult);
802	fEfl \|= X86_EFL_CALC_SF(uResult, 64);
803	fEfl \|= (((uDst ^ RT_BIT_64(63)) & uResult) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF;
804	*pfEFlags = fEfl;
805	}
806
807
808	IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u64,(uint64_t puDst, uint32_t pfEFlags))
809	{
810	uint64_t uDst = *puDst;
811	uint64_t uResult = uDst - 1;
812	*puDst = uResult;
813
814	/*
815	* Calc EFLAGS.
816	* CF is NOT modified for hysterical raisins (allegedly for carrying and
817	* borrowing in arithmetic loops on intel 8008).
818	*/
819	uint32_t fEfl = *pfEFlags & ~(X86_EFL_STATUS_BITS & ~X86_EFL_CF);
820	fEfl \|= g_afParity[uResult & 0xff];
821	fEfl \|= ((uint32_t)uResult ^ (uint32_t)uDst) & X86_EFL_AF;
822	fEfl \|= X86_EFL_CALC_ZF(uResult);
823	fEfl \|= X86_EFL_CALC_SF(uResult, 64);
824	fEfl \|= ((uDst & (uResult ^ RT_BIT_64(63))) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF;
825	*pfEFlags = fEfl;
826	}
827
828
829	IEM_DECL_IMPL_DEF(void, iemAImpl_not_u64,(uint64_t puDst, uint32_t pfEFlags))
830	{
831	uint64_t uDst = *puDst;
832	uint64_t uResult = ~uDst;
833	*puDst = uResult;
834	/* EFLAGS are not modified. */
835	RT_NOREF_PV(pfEFlags);
836	}
837
838
839	IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u64,(uint64_t puDst, uint32_t pfEFlags))
840	{
841	uint64_t uDst = 0;
842	uint64_t uSrc = *puDst;
843	uint64_t uResult = uDst - uSrc;
844	*puDst = uResult;
845
846	/* Calc EFLAGS. */
847	uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
848	fEfl \|= (uSrc != 0) << X86_EFL_CF_BIT;
849	fEfl \|= g_afParity[uResult & 0xff];
850	fEfl \|= ((uint32_t)uResult ^ (uint32_t)uDst) & X86_EFL_AF;
851	fEfl \|= X86_EFL_CALC_ZF(uResult);
852	fEfl \|= X86_EFL_CALC_SF(uResult, 64);
853	fEfl \|= ((uSrc & uResult) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF;
854	*pfEFlags = fEfl;
855	}
856
857
858	/** 64-bit locked unary operand operation. */
859	# define DO_LOCKED_UNARY_OP_U64(a_Mnemonic) \
860	do { \
861	uint64_t uOld = ASMAtomicReadU64(puDst); \
862	uint64_t uTmp; \
863	uint32_t fEflTmp; \
864	do \
865	{ \
866	uTmp = uOld; \
867	fEflTmp = *pfEFlags; \
868	iemAImpl_ ## a_Mnemonic ## _u64(&uTmp, &fEflTmp); \
869	} while (!ASMAtomicCmpXchgExU64(puDst, uTmp, uOld, &uOld)); \
870	*pfEFlags = fEflTmp; \
871	} while (0)
872
873	IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u64_locked,(uint64_t puDst, uint32_t pfEFlags))
874	{
875	DO_LOCKED_UNARY_OP_U64(inc);
876	}
877
878
879	IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u64_locked,(uint64_t puDst, uint32_t pfEFlags))
880	{
881	DO_LOCKED_UNARY_OP_U64(dec);
882	}
883
884
885	IEM_DECL_IMPL_DEF(void, iemAImpl_not_u64_locked,(uint64_t puDst, uint32_t pfEFlags))
886	{
887	DO_LOCKED_UNARY_OP_U64(not);
888	}
889
890
891	IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u64_locked,(uint64_t puDst, uint32_t pfEFlags))
892	{
893	DO_LOCKED_UNARY_OP_U64(neg);
894	}
895
896
897	/* Shift and rotate. */
898
899	IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u64,(uint64_t puDst, uint8_t cShift, uint32_t pfEFlags))
900	{
901	cShift &= 63;
902	if (cShift)
903	{
904	uint64_t uDst = *puDst;
905	uint64_t uResult;
906	uResult = uDst << cShift;
907	uResult \|= uDst >> (64 - cShift);
908	*puDst = uResult;
909
910	/* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
911	it the same way as for 1 bit shifts. */
912	AssertCompile(X86_EFL_CF_BIT == 0);
913	uint32_t fEfl = *pfEFlags & ~(X86_EFL_CF \| X86_EFL_OF);
914	uint32_t fCarry = (uResult & 1);
915	fEfl \|= fCarry;
916	fEfl \|= ((uResult >> 63) ^ fCarry) << X86_EFL_OF_BIT;
917	*pfEFlags = fEfl;
918	}
919	}
920
921
922	IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u64,(uint64_t puDst, uint8_t cShift, uint32_t pfEFlags))
923	{
924	cShift &= 63;
925	if (cShift)
926	{
927	uint64_t uDst = *puDst;
928	uint64_t uResult;
929	uResult = uDst >> cShift;
930	uResult \|= uDst << (64 - cShift);
931	*puDst = uResult;
932
933	/* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
934	it the same way as for 1 bit shifts (OF = OF XOR New-CF). */
935	AssertCompile(X86_EFL_CF_BIT == 0);
936	uint32_t fEfl = *pfEFlags & ~(X86_EFL_CF \| X86_EFL_OF);
937	uint32_t fCarry = (uResult >> 63) & X86_EFL_CF;
938	fEfl \|= fCarry;
939	fEfl \|= (((uResult >> 62) ^ fCarry) << X86_EFL_OF_BIT) & X86_EFL_OF;
940	*pfEFlags = fEfl;
941	}
942	}
943
944
945	IEM_DECL_IMPL_DEF(void, iemAImpl_rcl_u64,(uint64_t puDst, uint8_t cShift, uint32_t pfEFlags))
946	{
947	cShift &= 63;
948	if (cShift)
949	{
950	uint32_t fEfl = *pfEFlags;
951	uint64_t uDst = *puDst;
952	uint64_t uResult;
953	uResult = uDst << cShift;
954	AssertCompile(X86_EFL_CF_BIT == 0);
955	if (cShift > 1)
956	uResult \|= uDst >> (65 - cShift);
957	uResult \|= (uint64_t)(fEfl & X86_EFL_CF) << (cShift - 1);
958	*puDst = uResult;
959
960	/* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
961	it the same way as for 1 bit shifts. */
962	uint32_t fCarry = (uDst >> (64 - cShift)) & X86_EFL_CF;
963	fEfl &= ~(X86_EFL_CF \| X86_EFL_OF);
964	fEfl \|= fCarry;
965	fEfl \|= ((uResult >> 63) ^ fCarry) << X86_EFL_OF_BIT;
966	*pfEFlags = fEfl;
967	}
968	}
969
970
971	IEM_DECL_IMPL_DEF(void, iemAImpl_rcr_u64,(uint64_t puDst, uint8_t cShift, uint32_t pfEFlags))
972	{
973	cShift &= 63;
974	if (cShift)
975	{
976	uint32_t fEfl = *pfEFlags;
977	uint64_t uDst = *puDst;
978	uint64_t uResult;
979	uResult = uDst >> cShift;
980	AssertCompile(X86_EFL_CF_BIT == 0);
981	if (cShift > 1)
982	uResult \|= uDst << (65 - cShift);
983	uResult \|= (uint64_t)(fEfl & X86_EFL_CF) << (64 - cShift);
984	*puDst = uResult;
985
986	/* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
987	it the same way as for 1 bit shifts. */
988	uint32_t fCarry = (uDst >> (cShift - 1)) & X86_EFL_CF;
989	fEfl &= ~(X86_EFL_CF \| X86_EFL_OF);
990	fEfl \|= fCarry;
991	fEfl \|= ((uResult >> 63) ^ fCarry) << X86_EFL_OF_BIT;
992	*pfEFlags = fEfl;
993	}
994	}
995
996
997	IEM_DECL_IMPL_DEF(void, iemAImpl_shl_u64,(uint64_t puDst, uint8_t cShift, uint32_t pfEFlags))
998	{
999	cShift &= 63;
1000	if (cShift)
1001	{
1002	uint64_t uDst = *puDst;
1003	uint64_t uResult = uDst << cShift;
1004	*puDst = uResult;
1005
1006	/* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
1007	it the same way as for 1 bit shifts. The AF bit is undefined, we
1008	always set it to zero atm. */
1009	AssertCompile(X86_EFL_CF_BIT == 0);
1010	uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
1011	uint32_t fCarry = (uDst >> (64 - cShift)) & X86_EFL_CF;
1012	fEfl \|= fCarry;
1013	fEfl \|= ((uResult >> 63) ^ fCarry) << X86_EFL_OF_BIT;
1014	fEfl \|= X86_EFL_CALC_SF(uResult, 64);
1015	fEfl \|= X86_EFL_CALC_ZF(uResult);
1016	fEfl \|= g_afParity[uResult & 0xff];
1017	*pfEFlags = fEfl;
1018	}
1019	}
1020
1021
1022	IEM_DECL_IMPL_DEF(void, iemAImpl_shr_u64,(uint64_t puDst, uint8_t cShift, uint32_t pfEFlags))
1023	{
1024	cShift &= 63;
1025	if (cShift)
1026	{
1027	uint64_t uDst = *puDst;
1028	uint64_t uResult = uDst >> cShift;
1029	*puDst = uResult;
1030
1031	/* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
1032	it the same way as for 1 bit shifts. The AF bit is undefined, we
1033	always set it to zero atm. */
1034	AssertCompile(X86_EFL_CF_BIT == 0);
1035	uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
1036	fEfl \|= (uDst >> (cShift - 1)) & X86_EFL_CF;
1037	fEfl \|= (uDst >> 63) << X86_EFL_OF_BIT;
1038	fEfl \|= X86_EFL_CALC_SF(uResult, 64);
1039	fEfl \|= X86_EFL_CALC_ZF(uResult);
1040	fEfl \|= g_afParity[uResult & 0xff];
1041	*pfEFlags = fEfl;
1042	}
1043	}
1044
1045
1046	IEM_DECL_IMPL_DEF(void, iemAImpl_sar_u64,(uint64_t puDst, uint8_t cShift, uint32_t pfEFlags))
1047	{
1048	cShift &= 63;
1049	if (cShift)
1050	{
1051	uint64_t uDst = *puDst;
1052	uint64_t uResult = (int64_t)uDst >> cShift;
1053	*puDst = uResult;
1054
1055	/* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
1056	it the same way as for 1 bit shifts (0). The AF bit is undefined,
1057	we always set it to zero atm. */
1058	AssertCompile(X86_EFL_CF_BIT == 0);
1059	uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
1060	fEfl \|= (uDst >> (cShift - 1)) & X86_EFL_CF;
1061	fEfl \|= X86_EFL_CALC_SF(uResult, 64);
1062	fEfl \|= X86_EFL_CALC_ZF(uResult);
1063	fEfl \|= g_afParity[uResult & 0xff];
1064	*pfEFlags = fEfl;
1065	}
1066	}
1067
1068
1069	IEM_DECL_IMPL_DEF(void, iemAImpl_shld_u64,(uint64_t puDst, uint64_t uSrc, uint8_t cShift, uint32_t pfEFlags))
1070	{
1071	cShift &= 63;
1072	if (cShift)
1073	{
1074	uint64_t uDst = *puDst;
1075	uint64_t uResult;
1076	uResult = uDst << cShift;
1077	uResult \|= uSrc >> (64 - cShift);
1078	*puDst = uResult;
1079
1080	/* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
1081	it the same way as for 1 bit shifts. The AF bit is undefined,
1082	we always set it to zero atm. */
1083	AssertCompile(X86_EFL_CF_BIT == 0);
1084	uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
1085	fEfl \|= (uDst >> (64 - cShift)) & X86_EFL_CF;
1086	fEfl \|= (uint32_t)((uDst >> 63) ^ (uint32_t)(uResult >> 63)) << X86_EFL_OF_BIT;
1087	fEfl \|= X86_EFL_CALC_SF(uResult, 64);
1088	fEfl \|= X86_EFL_CALC_ZF(uResult);
1089	fEfl \|= g_afParity[uResult & 0xff];
1090	*pfEFlags = fEfl;
1091	}
1092	}
1093
1094
1095	IEM_DECL_IMPL_DEF(void, iemAImpl_shrd_u64,(uint64_t puDst, uint64_t uSrc, uint8_t cShift, uint32_t pfEFlags))
1096	{
1097	cShift &= 63;
1098	if (cShift)
1099	{
1100	uint64_t uDst = *puDst;
1101	uint64_t uResult;
1102	uResult = uDst >> cShift;
1103	uResult \|= uSrc << (64 - cShift);
1104	*puDst = uResult;
1105
1106	/* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
1107	it the same way as for 1 bit shifts. The AF bit is undefined,
1108	we always set it to zero atm. */
1109	AssertCompile(X86_EFL_CF_BIT == 0);
1110	uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
1111	fEfl \|= (uDst >> (cShift - 1)) & X86_EFL_CF;
1112	fEfl \|= (uint32_t)((uDst >> 63) ^ (uint32_t)(uResult >> 63)) << X86_EFL_OF_BIT;
1113	fEfl \|= X86_EFL_CALC_SF(uResult, 64);
1114	fEfl \|= X86_EFL_CALC_ZF(uResult);
1115	fEfl \|= g_afParity[uResult & 0xff];
1116	*pfEFlags = fEfl;
1117	}
1118	}
1119
1120
1121	/* misc */
1122
1123	IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64,(uint64_t puMem, uint64_t puReg))
1124	{
1125	/* XCHG implies LOCK. */
1126	uint64_t uOldMem = *puMem;
1127	while (!ASMAtomicCmpXchgExU64(puMem, *puReg, uOldMem, &uOldMem))
1128	ASMNopPause();
1129	*puReg = uOldMem;
1130	}
1131
1132
1133	#endif /* RT_ARCH_X86 */
1134	#ifdef RT_ARCH_X86
1135
1136	/* multiplication and division */
1137
1138
1139	IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u64,(uint64_t pu64RAX, uint64_t pu64RDX, uint64_t u64Factor, uint32_t *pfEFlags))
1140	{
1141	RTUINT128U Result;
1142	RTUInt128MulU64ByU64(&Result, *pu64RAX, u64Factor);
1143	*pu64RAX = Result.s.Lo;
1144	*pu64RDX = Result.s.Hi;
1145
1146	/* MUL EFLAGS according to Skylake (similar to IMUL). */
1147	*pfEFlags &= ~(X86_EFL_SF \| X86_EFL_CF \| X86_EFL_OF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_PF);
1148	if (Result.s.Lo & RT_BIT_64(63))
1149	*pfEFlags \|= X86_EFL_SF;
1150	pfEFlags \|= g_afParity[Result.s.Lo & 0xff]; / (Skylake behaviour) */
1151	if (Result.s.Hi != 0)
1152	*pfEFlags \|= X86_EFL_CF \| X86_EFL_OF;
1153	return 0;
1154	}
1155
1156
1157	IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u64,(uint64_t pu64RAX, uint64_t pu64RDX, uint64_t u64Factor, uint32_t *pfEFlags))
1158	{
1159	RTUINT128U Result;
1160	*pfEFlags &= ~( X86_EFL_SF \| X86_EFL_CF \| X86_EFL_OF
1161	/* Skylake always clears: */ \| X86_EFL_AF \| X86_EFL_ZF
1162	/* Skylake may set: */ \| X86_EFL_PF);
1163
1164	if ((int64_t)*pu64RAX >= 0)
1165	{
1166	if ((int64_t)u64Factor >= 0)
1167	{
1168	RTUInt128MulU64ByU64(&Result, *pu64RAX, u64Factor);
1169	if (Result.s.Hi != 0 \|\| Result.s.Lo >= UINT64_C(0x8000000000000000))
1170	*pfEFlags \|= X86_EFL_CF \| X86_EFL_OF;
1171	}
1172	else
1173	{
1174	RTUInt128MulU64ByU64(&Result, *pu64RAX, UINT64_C(0) - u64Factor);
1175	if (Result.s.Hi != 0 \|\| Result.s.Lo > UINT64_C(0x8000000000000000))
1176	*pfEFlags \|= X86_EFL_CF \| X86_EFL_OF;
1177	RTUInt128AssignNeg(&Result);
1178	}
1179	}
1180	else
1181	{
1182	if ((int64_t)u64Factor >= 0)
1183	{
1184	RTUInt128MulU64ByU64(&Result, UINT64_C(0) - *pu64RAX, u64Factor);
1185	if (Result.s.Hi != 0 \|\| Result.s.Lo > UINT64_C(0x8000000000000000))
1186	*pfEFlags \|= X86_EFL_CF \| X86_EFL_OF;
1187	RTUInt128AssignNeg(&Result);
1188	}
1189	else
1190	{
1191	RTUInt128MulU64ByU64(&Result, UINT64_C(0) - *pu64RAX, UINT64_C(0) - u64Factor);
1192	if (Result.s.Hi != 0 \|\| Result.s.Lo >= UINT64_C(0x8000000000000000))
1193	*pfEFlags \|= X86_EFL_CF \| X86_EFL_OF;
1194	}
1195	}
1196	*pu64RAX = Result.s.Lo;
1197	if (Result.s.Lo & RT_BIT_64(63))
1198	*pfEFlags \|= X86_EFL_SF;
1199	pfEFlags \|= g_afParity[Result.s.Lo & 0xff]; / (Skylake behaviour) */
1200	*pu64RDX = Result.s.Hi;
1201
1202	return 0;
1203	}
1204
1205
1206	IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u64,(uint64_t puDst, uint64_t uSrc, uint32_t pfEFlags))
1207	{
1208	/** @todo Testcase: IMUL 2 and 3 operands. */
1209	uint64_t u64Ign;
1210	iemAImpl_imul_u64(puDst, &u64Ign, uSrc, pfEFlags);
1211	}
1212
1213
1214
1215	IEM_DECL_IMPL_DEF(int, iemAImpl_div_u64,(uint64_t pu64RAX, uint64_t pu64RDX, uint64_t u64Divisor, uint32_t *pfEFlags))
1216	{
1217	/* Note! Skylake leaves all flags alone. */
1218	RT_NOREF_PV(pfEFlags);
1219
1220	if ( u64Divisor != 0
1221	&& *pu64RDX < u64Divisor)
1222	{
1223	RTUINT128U Dividend;
1224	Dividend.s.Lo = *pu64RAX;
1225	Dividend.s.Hi = *pu64RDX;
1226
1227	RTUINT128U Divisor;
1228	Divisor.s.Lo = u64Divisor;
1229	Divisor.s.Hi = 0;
1230
1231	RTUINT128U Remainder;
1232	RTUINT128U Quotient;
1233	# ifdef __GNUC__ /* GCC maybe really annoying in function. */
1234	Quotient.s.Lo = 0;
1235	Quotient.s.Hi = 0;
1236	# endif
1237	RTUInt128DivRem(&Quotient, &Remainder, &Dividend, &Divisor);
1238	Assert(Quotient.s.Hi == 0);
1239	Assert(Remainder.s.Hi == 0);
1240
1241	*pu64RAX = Quotient.s.Lo;
1242	*pu64RDX = Remainder.s.Lo;
1243	/** @todo research the undefined DIV flags. */
1244	return 0;
1245
1246	}
1247	/* #DE */
1248	return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1249	}
1250
1251
1252	IEM_DECL_IMPL_DEF(int, iemAImpl_idiv_u64,(uint64_t pu64RAX, uint64_t pu64RDX, uint64_t u64Divisor, uint32_t *pfEFlags))
1253	{
1254	/* Note! Skylake leaves all flags alone. */
1255	RT_NOREF_PV(pfEFlags);
1256
1257	if (u64Divisor != 0)
1258	{
1259	/*
1260	* Convert to unsigned division.
1261	*/
1262	RTUINT128U Dividend;
1263	Dividend.s.Lo = *pu64RAX;
1264	Dividend.s.Hi = *pu64RDX;
1265	if ((int64_t)*pu64RDX < 0)
1266	RTUInt128AssignNeg(&Dividend);
1267
1268	RTUINT128U Divisor;
1269	Divisor.s.Hi = 0;
1270	if ((int64_t)u64Divisor >= 0)
1271	Divisor.s.Lo = u64Divisor;
1272	else
1273	Divisor.s.Lo = UINT64_C(0) - u64Divisor;
1274
1275	RTUINT128U Remainder;
1276	RTUINT128U Quotient;
1277	# ifdef __GNUC__ /* GCC maybe really annoying in function. */
1278	Quotient.s.Lo = 0;
1279	Quotient.s.Hi = 0;
1280	# endif
1281	RTUInt128DivRem(&Quotient, &Remainder, &Dividend, &Divisor);
1282
1283	/*
1284	* Setup the result, checking for overflows.
1285	*/
1286	if ((int64_t)u64Divisor >= 0)
1287	{
1288	if ((int64_t)*pu64RDX >= 0)
1289	{
1290	/* Positive divisor, positive dividend => result positive. */
1291	if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint64_t)INT64_MAX)
1292	{
1293	*pu64RAX = Quotient.s.Lo;
1294	*pu64RDX = Remainder.s.Lo;
1295	return 0;
1296	}
1297	}
1298	else
1299	{
1300	/* Positive divisor, positive dividend => result negative. */
1301	if (Quotient.s.Hi == 0 && Quotient.s.Lo <= UINT64_C(0x8000000000000000))
1302	{
1303	*pu64RAX = UINT64_C(0) - Quotient.s.Lo;
1304	*pu64RDX = UINT64_C(0) - Remainder.s.Lo;
1305	return 0;
1306	}
1307	}
1308	}
1309	else
1310	{
1311	if ((int64_t)*pu64RDX >= 0)
1312	{
1313	/* Negative divisor, positive dividend => negative quotient, positive remainder. */
1314	if (Quotient.s.Hi == 0 && Quotient.s.Lo <= UINT64_C(0x8000000000000000))
1315	{
1316	*pu64RAX = UINT64_C(0) - Quotient.s.Lo;
1317	*pu64RDX = Remainder.s.Lo;
1318	return 0;
1319	}
1320	}
1321	else
1322	{
1323	/* Negative divisor, negative dividend => positive quotient, negative remainder. */
1324	if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint64_t)INT64_MAX)
1325	{
1326	*pu64RAX = Quotient.s.Lo;
1327	*pu64RDX = UINT64_C(0) - Remainder.s.Lo;
1328	return 0;
1329	}
1330	}
1331	}
1332	}
1333	/* #DE */
1334	return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1335	}
1336
1337
1338	#endif /* RT_ARCH_X86 */
1339
1340
1341	IEM_DECL_IMPL_DEF(void, iemAImpl_arpl,(uint16_t pu16Dst, uint16_t u16Src, uint32_t pfEFlags))
1342	{
1343	if ((*pu16Dst & X86_SEL_RPL) < (u16Src & X86_SEL_RPL))
1344	{
1345	*pu16Dst &= X86_SEL_MASK_OFF_RPL;
1346	*pu16Dst \|= u16Src & X86_SEL_RPL;
1347
1348	*pfEFlags \|= X86_EFL_ZF;
1349	}
1350	else
1351	*pfEFlags &= ~X86_EFL_ZF;
1352	}
1353
1354
1355
1356	IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_fallback,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx,
1357	PRTUINT128U pu128RbxRcx, uint32_t *pEFlags))
1358	{
1359	RTUINT128U u128Tmp = *pu128Dst;
1360	if ( u128Tmp.s.Lo == pu128RaxRdx->s.Lo
1361	&& u128Tmp.s.Hi == pu128RaxRdx->s.Hi)
1362	{
1363	pu128Dst = pu128RbxRcx;
1364	*pEFlags \|= X86_EFL_ZF;
1365	}
1366	else
1367	{
1368	*pu128RaxRdx = u128Tmp;
1369	*pEFlags &= ~X86_EFL_ZF;
1370	}
1371	}
1372
1373
1374	IEM_DECL_IMPL_DEF(void, iemAImpl_movsldup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
1375	{
1376	RT_NOREF(pFpuState);
1377	puDst->au32[0] = puSrc->au32[0];
1378	puDst->au32[1] = puSrc->au32[0];
1379	puDst->au32[2] = puSrc->au32[2];
1380	puDst->au32[3] = puSrc->au32[2];
1381	}
1382
1383	#ifdef IEM_WITH_VEX
1384
1385	IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
1386	{
1387	pXState->x87.aXMM[iYRegDst].au32[0] = pXState->x87.aXMM[iYRegSrc].au32[0];
1388	pXState->x87.aXMM[iYRegDst].au32[1] = pXState->x87.aXMM[iYRegSrc].au32[0];
1389	pXState->x87.aXMM[iYRegDst].au32[2] = pXState->x87.aXMM[iYRegSrc].au32[2];
1390	pXState->x87.aXMM[iYRegDst].au32[3] = pXState->x87.aXMM[iYRegSrc].au32[2];
1391	pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
1392	pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
1393	pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
1394	pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
1395	}
1396
1397
1398	IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
1399	{
1400	pXState->x87.aXMM[iYRegDst].au32[0] = pSrc->au32[0];
1401	pXState->x87.aXMM[iYRegDst].au32[1] = pSrc->au32[0];
1402	pXState->x87.aXMM[iYRegDst].au32[2] = pSrc->au32[2];
1403	pXState->x87.aXMM[iYRegDst].au32[3] = pSrc->au32[2];
1404	pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pSrc->au32[4];
1405	pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pSrc->au32[4];
1406	pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pSrc->au32[6];
1407	pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pSrc->au32[6];
1408	}
1409
1410	#endif /* IEM_WITH_VEX */
1411
1412
1413	IEM_DECL_IMPL_DEF(void, iemAImpl_movshdup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
1414	{
1415	RT_NOREF(pFpuState);
1416	puDst->au32[0] = puSrc->au32[1];
1417	puDst->au32[1] = puSrc->au32[1];
1418	puDst->au32[2] = puSrc->au32[3];
1419	puDst->au32[3] = puSrc->au32[3];
1420	}
1421
1422
1423	IEM_DECL_IMPL_DEF(void, iemAImpl_movddup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, uint64_t uSrc))
1424	{
1425	RT_NOREF(pFpuState);
1426	puDst->au64[0] = uSrc;
1427	puDst->au64[1] = uSrc;
1428	}
1429
1430	#ifdef IEM_WITH_VEX
1431
1432	IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
1433	{
1434	pXState->x87.aXMM[iYRegDst].au64[0] = pXState->x87.aXMM[iYRegSrc].au64[0];
1435	pXState->x87.aXMM[iYRegDst].au64[1] = pXState->x87.aXMM[iYRegSrc].au64[0];
1436	pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
1437	pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
1438	}
1439
1440	IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
1441	{
1442	pXState->x87.aXMM[iYRegDst].au64[0] = pSrc->au64[0];
1443	pXState->x87.aXMM[iYRegDst].au64[1] = pSrc->au64[0];
1444	pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pSrc->au64[2];
1445	pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pSrc->au64[2];
1446	}
1447
1448	#endif /* IEM_WITH_VEX */
1449

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp@ 66974

Download in other formats: