VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp@ 66974

Last change on this file since 66974 was 66965, checked in by vboxsync, 8 years ago

IEM: Implemented vmovddup Vx,Wx (VEX.F2.0F 12).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 44.7 KB
Line 
1/* $Id: IEMAllAImplC.cpp 66965 2017-05-19 09:38:05Z vboxsync $ */
2/** @file
3 * IEM - Instruction Implementation in Assembly, portable C variant.
4 */
5
6/*
7 * Copyright (C) 2011-2016 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#include "IEMInternal.h"
23#include <VBox/vmm/vm.h>
24#include <iprt/x86.h>
25#include <iprt/uint128.h>
26
27
28/*********************************************************************************************************************************
29* Global Variables *
30*********************************************************************************************************************************/
31#ifdef RT_ARCH_X86
32/**
33 * Parity calculation table.
34 *
35 * The generator code:
36 * @code
37 * #include <stdio.h>
38 *
39 * int main()
40 * {
41 * unsigned b;
42 * for (b = 0; b < 256; b++)
43 * {
44 * int cOnes = ( b & 1)
45 * + ((b >> 1) & 1)
46 * + ((b >> 2) & 1)
47 * + ((b >> 3) & 1)
48 * + ((b >> 4) & 1)
49 * + ((b >> 5) & 1)
50 * + ((b >> 6) & 1)
51 * + ((b >> 7) & 1);
52 * printf(" /" "* %#04x = %u%u%u%u%u%u%u%ub *" "/ %s,\n",
53 * b,
54 * (b >> 7) & 1,
55 * (b >> 6) & 1,
56 * (b >> 5) & 1,
57 * (b >> 4) & 1,
58 * (b >> 3) & 1,
59 * (b >> 2) & 1,
60 * (b >> 1) & 1,
61 * b & 1,
62 * cOnes & 1 ? "0" : "X86_EFL_PF");
63 * }
64 * return 0;
65 * }
66 * @endcode
67 */
68static uint8_t const g_afParity[256] =
69{
70 /* 0000 = 00000000b */ X86_EFL_PF,
71 /* 0x01 = 00000001b */ 0,
72 /* 0x02 = 00000010b */ 0,
73 /* 0x03 = 00000011b */ X86_EFL_PF,
74 /* 0x04 = 00000100b */ 0,
75 /* 0x05 = 00000101b */ X86_EFL_PF,
76 /* 0x06 = 00000110b */ X86_EFL_PF,
77 /* 0x07 = 00000111b */ 0,
78 /* 0x08 = 00001000b */ 0,
79 /* 0x09 = 00001001b */ X86_EFL_PF,
80 /* 0x0a = 00001010b */ X86_EFL_PF,
81 /* 0x0b = 00001011b */ 0,
82 /* 0x0c = 00001100b */ X86_EFL_PF,
83 /* 0x0d = 00001101b */ 0,
84 /* 0x0e = 00001110b */ 0,
85 /* 0x0f = 00001111b */ X86_EFL_PF,
86 /* 0x10 = 00010000b */ 0,
87 /* 0x11 = 00010001b */ X86_EFL_PF,
88 /* 0x12 = 00010010b */ X86_EFL_PF,
89 /* 0x13 = 00010011b */ 0,
90 /* 0x14 = 00010100b */ X86_EFL_PF,
91 /* 0x15 = 00010101b */ 0,
92 /* 0x16 = 00010110b */ 0,
93 /* 0x17 = 00010111b */ X86_EFL_PF,
94 /* 0x18 = 00011000b */ X86_EFL_PF,
95 /* 0x19 = 00011001b */ 0,
96 /* 0x1a = 00011010b */ 0,
97 /* 0x1b = 00011011b */ X86_EFL_PF,
98 /* 0x1c = 00011100b */ 0,
99 /* 0x1d = 00011101b */ X86_EFL_PF,
100 /* 0x1e = 00011110b */ X86_EFL_PF,
101 /* 0x1f = 00011111b */ 0,
102 /* 0x20 = 00100000b */ 0,
103 /* 0x21 = 00100001b */ X86_EFL_PF,
104 /* 0x22 = 00100010b */ X86_EFL_PF,
105 /* 0x23 = 00100011b */ 0,
106 /* 0x24 = 00100100b */ X86_EFL_PF,
107 /* 0x25 = 00100101b */ 0,
108 /* 0x26 = 00100110b */ 0,
109 /* 0x27 = 00100111b */ X86_EFL_PF,
110 /* 0x28 = 00101000b */ X86_EFL_PF,
111 /* 0x29 = 00101001b */ 0,
112 /* 0x2a = 00101010b */ 0,
113 /* 0x2b = 00101011b */ X86_EFL_PF,
114 /* 0x2c = 00101100b */ 0,
115 /* 0x2d = 00101101b */ X86_EFL_PF,
116 /* 0x2e = 00101110b */ X86_EFL_PF,
117 /* 0x2f = 00101111b */ 0,
118 /* 0x30 = 00110000b */ X86_EFL_PF,
119 /* 0x31 = 00110001b */ 0,
120 /* 0x32 = 00110010b */ 0,
121 /* 0x33 = 00110011b */ X86_EFL_PF,
122 /* 0x34 = 00110100b */ 0,
123 /* 0x35 = 00110101b */ X86_EFL_PF,
124 /* 0x36 = 00110110b */ X86_EFL_PF,
125 /* 0x37 = 00110111b */ 0,
126 /* 0x38 = 00111000b */ 0,
127 /* 0x39 = 00111001b */ X86_EFL_PF,
128 /* 0x3a = 00111010b */ X86_EFL_PF,
129 /* 0x3b = 00111011b */ 0,
130 /* 0x3c = 00111100b */ X86_EFL_PF,
131 /* 0x3d = 00111101b */ 0,
132 /* 0x3e = 00111110b */ 0,
133 /* 0x3f = 00111111b */ X86_EFL_PF,
134 /* 0x40 = 01000000b */ 0,
135 /* 0x41 = 01000001b */ X86_EFL_PF,
136 /* 0x42 = 01000010b */ X86_EFL_PF,
137 /* 0x43 = 01000011b */ 0,
138 /* 0x44 = 01000100b */ X86_EFL_PF,
139 /* 0x45 = 01000101b */ 0,
140 /* 0x46 = 01000110b */ 0,
141 /* 0x47 = 01000111b */ X86_EFL_PF,
142 /* 0x48 = 01001000b */ X86_EFL_PF,
143 /* 0x49 = 01001001b */ 0,
144 /* 0x4a = 01001010b */ 0,
145 /* 0x4b = 01001011b */ X86_EFL_PF,
146 /* 0x4c = 01001100b */ 0,
147 /* 0x4d = 01001101b */ X86_EFL_PF,
148 /* 0x4e = 01001110b */ X86_EFL_PF,
149 /* 0x4f = 01001111b */ 0,
150 /* 0x50 = 01010000b */ X86_EFL_PF,
151 /* 0x51 = 01010001b */ 0,
152 /* 0x52 = 01010010b */ 0,
153 /* 0x53 = 01010011b */ X86_EFL_PF,
154 /* 0x54 = 01010100b */ 0,
155 /* 0x55 = 01010101b */ X86_EFL_PF,
156 /* 0x56 = 01010110b */ X86_EFL_PF,
157 /* 0x57 = 01010111b */ 0,
158 /* 0x58 = 01011000b */ 0,
159 /* 0x59 = 01011001b */ X86_EFL_PF,
160 /* 0x5a = 01011010b */ X86_EFL_PF,
161 /* 0x5b = 01011011b */ 0,
162 /* 0x5c = 01011100b */ X86_EFL_PF,
163 /* 0x5d = 01011101b */ 0,
164 /* 0x5e = 01011110b */ 0,
165 /* 0x5f = 01011111b */ X86_EFL_PF,
166 /* 0x60 = 01100000b */ X86_EFL_PF,
167 /* 0x61 = 01100001b */ 0,
168 /* 0x62 = 01100010b */ 0,
169 /* 0x63 = 01100011b */ X86_EFL_PF,
170 /* 0x64 = 01100100b */ 0,
171 /* 0x65 = 01100101b */ X86_EFL_PF,
172 /* 0x66 = 01100110b */ X86_EFL_PF,
173 /* 0x67 = 01100111b */ 0,
174 /* 0x68 = 01101000b */ 0,
175 /* 0x69 = 01101001b */ X86_EFL_PF,
176 /* 0x6a = 01101010b */ X86_EFL_PF,
177 /* 0x6b = 01101011b */ 0,
178 /* 0x6c = 01101100b */ X86_EFL_PF,
179 /* 0x6d = 01101101b */ 0,
180 /* 0x6e = 01101110b */ 0,
181 /* 0x6f = 01101111b */ X86_EFL_PF,
182 /* 0x70 = 01110000b */ 0,
183 /* 0x71 = 01110001b */ X86_EFL_PF,
184 /* 0x72 = 01110010b */ X86_EFL_PF,
185 /* 0x73 = 01110011b */ 0,
186 /* 0x74 = 01110100b */ X86_EFL_PF,
187 /* 0x75 = 01110101b */ 0,
188 /* 0x76 = 01110110b */ 0,
189 /* 0x77 = 01110111b */ X86_EFL_PF,
190 /* 0x78 = 01111000b */ X86_EFL_PF,
191 /* 0x79 = 01111001b */ 0,
192 /* 0x7a = 01111010b */ 0,
193 /* 0x7b = 01111011b */ X86_EFL_PF,
194 /* 0x7c = 01111100b */ 0,
195 /* 0x7d = 01111101b */ X86_EFL_PF,
196 /* 0x7e = 01111110b */ X86_EFL_PF,
197 /* 0x7f = 01111111b */ 0,
198 /* 0x80 = 10000000b */ 0,
199 /* 0x81 = 10000001b */ X86_EFL_PF,
200 /* 0x82 = 10000010b */ X86_EFL_PF,
201 /* 0x83 = 10000011b */ 0,
202 /* 0x84 = 10000100b */ X86_EFL_PF,
203 /* 0x85 = 10000101b */ 0,
204 /* 0x86 = 10000110b */ 0,
205 /* 0x87 = 10000111b */ X86_EFL_PF,
206 /* 0x88 = 10001000b */ X86_EFL_PF,
207 /* 0x89 = 10001001b */ 0,
208 /* 0x8a = 10001010b */ 0,
209 /* 0x8b = 10001011b */ X86_EFL_PF,
210 /* 0x8c = 10001100b */ 0,
211 /* 0x8d = 10001101b */ X86_EFL_PF,
212 /* 0x8e = 10001110b */ X86_EFL_PF,
213 /* 0x8f = 10001111b */ 0,
214 /* 0x90 = 10010000b */ X86_EFL_PF,
215 /* 0x91 = 10010001b */ 0,
216 /* 0x92 = 10010010b */ 0,
217 /* 0x93 = 10010011b */ X86_EFL_PF,
218 /* 0x94 = 10010100b */ 0,
219 /* 0x95 = 10010101b */ X86_EFL_PF,
220 /* 0x96 = 10010110b */ X86_EFL_PF,
221 /* 0x97 = 10010111b */ 0,
222 /* 0x98 = 10011000b */ 0,
223 /* 0x99 = 10011001b */ X86_EFL_PF,
224 /* 0x9a = 10011010b */ X86_EFL_PF,
225 /* 0x9b = 10011011b */ 0,
226 /* 0x9c = 10011100b */ X86_EFL_PF,
227 /* 0x9d = 10011101b */ 0,
228 /* 0x9e = 10011110b */ 0,
229 /* 0x9f = 10011111b */ X86_EFL_PF,
230 /* 0xa0 = 10100000b */ X86_EFL_PF,
231 /* 0xa1 = 10100001b */ 0,
232 /* 0xa2 = 10100010b */ 0,
233 /* 0xa3 = 10100011b */ X86_EFL_PF,
234 /* 0xa4 = 10100100b */ 0,
235 /* 0xa5 = 10100101b */ X86_EFL_PF,
236 /* 0xa6 = 10100110b */ X86_EFL_PF,
237 /* 0xa7 = 10100111b */ 0,
238 /* 0xa8 = 10101000b */ 0,
239 /* 0xa9 = 10101001b */ X86_EFL_PF,
240 /* 0xaa = 10101010b */ X86_EFL_PF,
241 /* 0xab = 10101011b */ 0,
242 /* 0xac = 10101100b */ X86_EFL_PF,
243 /* 0xad = 10101101b */ 0,
244 /* 0xae = 10101110b */ 0,
245 /* 0xaf = 10101111b */ X86_EFL_PF,
246 /* 0xb0 = 10110000b */ 0,
247 /* 0xb1 = 10110001b */ X86_EFL_PF,
248 /* 0xb2 = 10110010b */ X86_EFL_PF,
249 /* 0xb3 = 10110011b */ 0,
250 /* 0xb4 = 10110100b */ X86_EFL_PF,
251 /* 0xb5 = 10110101b */ 0,
252 /* 0xb6 = 10110110b */ 0,
253 /* 0xb7 = 10110111b */ X86_EFL_PF,
254 /* 0xb8 = 10111000b */ X86_EFL_PF,
255 /* 0xb9 = 10111001b */ 0,
256 /* 0xba = 10111010b */ 0,
257 /* 0xbb = 10111011b */ X86_EFL_PF,
258 /* 0xbc = 10111100b */ 0,
259 /* 0xbd = 10111101b */ X86_EFL_PF,
260 /* 0xbe = 10111110b */ X86_EFL_PF,
261 /* 0xbf = 10111111b */ 0,
262 /* 0xc0 = 11000000b */ X86_EFL_PF,
263 /* 0xc1 = 11000001b */ 0,
264 /* 0xc2 = 11000010b */ 0,
265 /* 0xc3 = 11000011b */ X86_EFL_PF,
266 /* 0xc4 = 11000100b */ 0,
267 /* 0xc5 = 11000101b */ X86_EFL_PF,
268 /* 0xc6 = 11000110b */ X86_EFL_PF,
269 /* 0xc7 = 11000111b */ 0,
270 /* 0xc8 = 11001000b */ 0,
271 /* 0xc9 = 11001001b */ X86_EFL_PF,
272 /* 0xca = 11001010b */ X86_EFL_PF,
273 /* 0xcb = 11001011b */ 0,
274 /* 0xcc = 11001100b */ X86_EFL_PF,
275 /* 0xcd = 11001101b */ 0,
276 /* 0xce = 11001110b */ 0,
277 /* 0xcf = 11001111b */ X86_EFL_PF,
278 /* 0xd0 = 11010000b */ 0,
279 /* 0xd1 = 11010001b */ X86_EFL_PF,
280 /* 0xd2 = 11010010b */ X86_EFL_PF,
281 /* 0xd3 = 11010011b */ 0,
282 /* 0xd4 = 11010100b */ X86_EFL_PF,
283 /* 0xd5 = 11010101b */ 0,
284 /* 0xd6 = 11010110b */ 0,
285 /* 0xd7 = 11010111b */ X86_EFL_PF,
286 /* 0xd8 = 11011000b */ X86_EFL_PF,
287 /* 0xd9 = 11011001b */ 0,
288 /* 0xda = 11011010b */ 0,
289 /* 0xdb = 11011011b */ X86_EFL_PF,
290 /* 0xdc = 11011100b */ 0,
291 /* 0xdd = 11011101b */ X86_EFL_PF,
292 /* 0xde = 11011110b */ X86_EFL_PF,
293 /* 0xdf = 11011111b */ 0,
294 /* 0xe0 = 11100000b */ 0,
295 /* 0xe1 = 11100001b */ X86_EFL_PF,
296 /* 0xe2 = 11100010b */ X86_EFL_PF,
297 /* 0xe3 = 11100011b */ 0,
298 /* 0xe4 = 11100100b */ X86_EFL_PF,
299 /* 0xe5 = 11100101b */ 0,
300 /* 0xe6 = 11100110b */ 0,
301 /* 0xe7 = 11100111b */ X86_EFL_PF,
302 /* 0xe8 = 11101000b */ X86_EFL_PF,
303 /* 0xe9 = 11101001b */ 0,
304 /* 0xea = 11101010b */ 0,
305 /* 0xeb = 11101011b */ X86_EFL_PF,
306 /* 0xec = 11101100b */ 0,
307 /* 0xed = 11101101b */ X86_EFL_PF,
308 /* 0xee = 11101110b */ X86_EFL_PF,
309 /* 0xef = 11101111b */ 0,
310 /* 0xf0 = 11110000b */ X86_EFL_PF,
311 /* 0xf1 = 11110001b */ 0,
312 /* 0xf2 = 11110010b */ 0,
313 /* 0xf3 = 11110011b */ X86_EFL_PF,
314 /* 0xf4 = 11110100b */ 0,
315 /* 0xf5 = 11110101b */ X86_EFL_PF,
316 /* 0xf6 = 11110110b */ X86_EFL_PF,
317 /* 0xf7 = 11110111b */ 0,
318 /* 0xf8 = 11111000b */ 0,
319 /* 0xf9 = 11111001b */ X86_EFL_PF,
320 /* 0xfa = 11111010b */ X86_EFL_PF,
321 /* 0xfb = 11111011b */ 0,
322 /* 0xfc = 11111100b */ X86_EFL_PF,
323 /* 0xfd = 11111101b */ 0,
324 /* 0xfe = 11111110b */ 0,
325 /* 0xff = 11111111b */ X86_EFL_PF,
326};
327#endif /* RT_ARCH_X86 */
328
329
330/**
331 * Calculates the signed flag value given a result and it's bit width.
332 *
333 * The signed flag (SF) is a duplication of the most significant bit in the
334 * result.
335 *
336 * @returns X86_EFL_SF or 0.
337 * @param a_uResult Unsigned result value.
338 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
339 */
340#define X86_EFL_CALC_SF(a_uResult, a_cBitsWidth) \
341 ( (uint32_t)((a_uResult) >> ((a_cBitsWidth) - X86_EFL_SF_BIT - 1)) & X86_EFL_SF )
342
343/**
344 * Calculates the zero flag value given a result.
345 *
346 * The zero flag (ZF) indicates whether the result is zero or not.
347 *
348 * @returns X86_EFL_ZF or 0.
349 * @param a_uResult Unsigned result value.
350 */
351#define X86_EFL_CALC_ZF(a_uResult) \
352 ( (uint32_t)((a_uResult) == 0) << X86_EFL_ZF_BIT )
353
354/**
355 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after a logical op.
356 *
357 * CF and OF are defined to be 0 by logical operations. AF on the other hand is
358 * undefined. We do not set AF, as that seems to make the most sense (which
359 * probably makes it the most wrong in real life).
360 *
361 * @returns Status bits.
362 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
363 * @param a_uResult Unsigned result value.
364 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
365 * @param a_fExtra Additional bits to set.
366 */
367#define IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(a_pfEFlags, a_uResult, a_cBitsWidth, a_fExtra) \
368 do { \
369 uint32_t fEflTmp = *(a_pfEFlags); \
370 fEflTmp &= ~X86_EFL_STATUS_BITS; \
371 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
372 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
373 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
374 fEflTmp |= (a_fExtra); \
375 *(a_pfEFlags) = fEflTmp; \
376 } while (0)
377
378
379#ifdef RT_ARCH_X86
380/*
381 * There are a few 64-bit on 32-bit things we'd rather do in C. Actually, doing
382 * it all in C is probably safer atm., optimize what's necessary later, maybe.
383 */
384
385
386/* Binary ops */
387
388IEM_DECL_IMPL_DEF(void, iemAImpl_add_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
389{
390 uint64_t uDst = *puDst;
391 uint64_t uResult = uDst + uSrc;
392 *puDst = uResult;
393
394 /* Calc EFLAGS. */
395 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
396 fEfl |= (uResult < uDst) << X86_EFL_CF_BIT;
397 fEfl |= g_afParity[uResult & 0xff];
398 fEfl |= ((uint32_t)uResult ^ (uint32_t)uSrc ^ (uint32_t)uDst) & X86_EFL_AF;
399 fEfl |= X86_EFL_CALC_ZF(uResult);
400 fEfl |= X86_EFL_CALC_SF(uResult, 64);
401 fEfl |= (((uDst ^ uSrc ^ RT_BIT_64(63)) & (uResult ^ uDst)) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF;
402 *pfEFlags = fEfl;
403}
404
405
406IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
407{
408 if (!(*pfEFlags & X86_EFL_CF))
409 iemAImpl_add_u64(puDst, uSrc, pfEFlags);
410 else
411 {
412 uint64_t uDst = *puDst;
413 uint64_t uResult = uDst + uSrc + 1;
414 *puDst = uResult;
415
416 /* Calc EFLAGS. */
417 /** @todo verify AF and OF calculations. */
418 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
419 fEfl |= (uResult <= uDst) << X86_EFL_CF_BIT;
420 fEfl |= g_afParity[uResult & 0xff];
421 fEfl |= ((uint32_t)uResult ^ (uint32_t)uSrc ^ (uint32_t)uDst) & X86_EFL_AF;
422 fEfl |= X86_EFL_CALC_ZF(uResult);
423 fEfl |= X86_EFL_CALC_SF(uResult, 64);
424 fEfl |= (((uDst ^ uSrc ^ RT_BIT_64(63)) & (uResult ^ uDst)) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF;
425 *pfEFlags = fEfl;
426 }
427}
428
429
430IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
431{
432 uint64_t uDst = *puDst;
433 uint64_t uResult = uDst - uSrc;
434 *puDst = uResult;
435
436 /* Calc EFLAGS. */
437 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
438 fEfl |= (uDst < uSrc) << X86_EFL_CF_BIT;
439 fEfl |= g_afParity[uResult & 0xff];
440 fEfl |= ((uint32_t)uResult ^ (uint32_t)uSrc ^ (uint32_t)uDst) & X86_EFL_AF;
441 fEfl |= X86_EFL_CALC_ZF(uResult);
442 fEfl |= X86_EFL_CALC_SF(uResult, 64);
443 fEfl |= (((uDst ^ uSrc) & (uResult ^ uDst)) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF;
444 *pfEFlags = fEfl;
445}
446
447
448IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
449{
450 if (!(*pfEFlags & X86_EFL_CF))
451 iemAImpl_sub_u64(puDst, uSrc, pfEFlags);
452 else
453 {
454 uint64_t uDst = *puDst;
455 uint64_t uResult = uDst - uSrc - 1;
456 *puDst = uResult;
457
458 /* Calc EFLAGS. */
459 /** @todo verify AF and OF calculations. */
460 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
461 fEfl |= (uDst <= uSrc) << X86_EFL_CF_BIT;
462 fEfl |= g_afParity[uResult & 0xff];
463 fEfl |= ((uint32_t)uResult ^ (uint32_t)uSrc ^ (uint32_t)uDst) & X86_EFL_AF;
464 fEfl |= X86_EFL_CALC_ZF(uResult);
465 fEfl |= X86_EFL_CALC_SF(uResult, 64);
466 fEfl |= (((uDst ^ uSrc) & (uResult ^ uDst)) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF;
467 *pfEFlags = fEfl;
468 }
469}
470
471
472IEM_DECL_IMPL_DEF(void, iemAImpl_or_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
473{
474 uint64_t uResult = *puDst | uSrc;
475 *puDst = uResult;
476 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
477}
478
479
480IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
481{
482 uint64_t uResult = *puDst ^ uSrc;
483 *puDst = uResult;
484 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
485}
486
487
488IEM_DECL_IMPL_DEF(void, iemAImpl_and_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
489{
490 uint64_t uResult = *puDst & uSrc;
491 *puDst = uResult;
492 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
493}
494
495
496IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
497{
498 uint64_t uDstTmp = *puDst;
499 iemAImpl_sub_u64(&uDstTmp, uSrc, pfEFlags);
500}
501
502
503IEM_DECL_IMPL_DEF(void, iemAImpl_test_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
504{
505 uint64_t uResult = *puDst & uSrc;
506 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
507}
508
509
510/** 64-bit locked binary operand operation. */
511# define DO_LOCKED_BIN_OP_U64(a_Mnemonic) \
512 do { \
513 uint64_t uOld = ASMAtomicReadU64(puDst); \
514 uint64_t uTmp; \
515 uint32_t fEflTmp; \
516 do \
517 { \
518 uTmp = uOld; \
519 fEflTmp = *pfEFlags; \
520 iemAImpl_ ## a_Mnemonic ## _u64(&uTmp, uSrc, &fEflTmp); \
521 } while (!ASMAtomicCmpXchgExU64(puDst, uTmp, uOld, &uOld)); \
522 *pfEFlags = fEflTmp; \
523 } while (0)
524
525
526IEM_DECL_IMPL_DEF(void, iemAImpl_add_u64_locked,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
527{
528 DO_LOCKED_BIN_OP_U64(add);
529}
530
531
532IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u64_locked,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
533{
534 DO_LOCKED_BIN_OP_U64(adc);
535}
536
537
538IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u64_locked,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
539{
540 DO_LOCKED_BIN_OP_U64(sub);
541}
542
543
544IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u64_locked,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
545{
546 DO_LOCKED_BIN_OP_U64(sbb);
547}
548
549
550IEM_DECL_IMPL_DEF(void, iemAImpl_or_u64_locked,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
551{
552 DO_LOCKED_BIN_OP_U64(or);
553}
554
555
556IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u64_locked,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
557{
558 DO_LOCKED_BIN_OP_U64(xor);
559}
560
561
562IEM_DECL_IMPL_DEF(void, iemAImpl_and_u64_locked,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
563{
564 DO_LOCKED_BIN_OP_U64(and);
565}
566
567
568IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u64,(uint64_t *puDst, uint64_t *puReg, uint32_t *pfEFlags))
569{
570 uint64_t uDst = *puDst;
571 uint64_t uResult = uDst;
572 iemAImpl_add_u64(&uResult, *puReg, pfEFlags);
573 *puDst = uResult;
574 *puReg = uDst;
575}
576
577
578IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u64_locked,(uint64_t *puDst, uint64_t *puReg, uint32_t *pfEFlags))
579{
580 uint64_t uOld = ASMAtomicReadU64(puDst);
581 uint64_t uTmpDst;
582 uint32_t fEflTmp;
583 do
584 {
585 uTmpDst = uOld;
586 fEflTmp = *pfEFlags;
587 iemAImpl_add_u64(&uTmpDst, *puReg, pfEFlags);
588 } while (!ASMAtomicCmpXchgExU64(puDst, uTmpDst, uOld, &uOld));
589 *puReg = uOld;
590 *pfEFlags = fEflTmp;
591}
592
593
594/* Bit operations (same signature as above). */
595
596IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
597{
598 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
599 logical operation (AND/OR/whatever). */
600 Assert(uSrc < 64);
601 uint64_t uDst = *puDst;
602 if (uDst & RT_BIT_64(uSrc))
603 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
604 else
605 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
606}
607
608IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
609{
610 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
611 logical operation (AND/OR/whatever). */
612 Assert(uSrc < 64);
613 uint64_t fMask = RT_BIT_64(uSrc);
614 uint64_t uDst = *puDst;
615 if (uDst & fMask)
616 {
617 uDst &= ~fMask;
618 *puDst = uDst;
619 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
620 }
621 else
622 {
623 uDst |= fMask;
624 *puDst = uDst;
625 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
626 }
627}
628
629IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
630{
631 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
632 logical operation (AND/OR/whatever). */
633 Assert(uSrc < 64);
634 uint64_t fMask = RT_BIT_64(uSrc);
635 uint64_t uDst = *puDst;
636 if (uDst & fMask)
637 {
638 uDst &= ~fMask;
639 *puDst = uDst;
640 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
641 }
642 else
643 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
644}
645
646IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
647{
648 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
649 logical operation (AND/OR/whatever). */
650 Assert(uSrc < 64);
651 uint64_t fMask = RT_BIT_64(uSrc);
652 uint64_t uDst = *puDst;
653 if (uDst & fMask)
654 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
655 else
656 {
657 uDst |= fMask;
658 *puDst = uDst;
659 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
660 }
661}
662
663
664IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u64_locked,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
665{
666 DO_LOCKED_BIN_OP_U64(btc);
667}
668
669IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u64_locked,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
670{
671 DO_LOCKED_BIN_OP_U64(btr);
672}
673
674IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u64_locked,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
675{
676 DO_LOCKED_BIN_OP_U64(bts);
677}
678
679
680/* bit scan */
681
682IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
683{
684 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
685 /** @todo check what real CPUs do. */
686 if (uSrc)
687 {
688 uint8_t iBit;
689 uint32_t u32Src;
690 if (uSrc & UINT32_MAX)
691 {
692 iBit = 0;
693 u32Src = uSrc;
694 }
695 else
696 {
697 iBit = 32;
698 u32Src = uSrc >> 32;
699 }
700 if (!(u32Src & UINT16_MAX))
701 {
702 iBit += 16;
703 u32Src >>= 16;
704 }
705 if (!(u32Src & UINT8_MAX))
706 {
707 iBit += 8;
708 u32Src >>= 8;
709 }
710 if (!(u32Src & 0xf))
711 {
712 iBit += 4;
713 u32Src >>= 4;
714 }
715 if (!(u32Src & 0x3))
716 {
717 iBit += 2;
718 u32Src >>= 2;
719 }
720 if (!(u32Src & 1))
721 {
722 iBit += 1;
723 Assert(u32Src & 2);
724 }
725
726 *puDst = iBit;
727 *pfEFlags &= ~X86_EFL_ZF;
728 }
729 else
730 *pfEFlags |= X86_EFL_ZF;
731}
732
733IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
734{
735 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
736 /** @todo check what real CPUs do. */
737 if (uSrc)
738 {
739 uint8_t iBit;
740 uint32_t u32Src;
741 if (uSrc & UINT64_C(0xffffffff00000000))
742 {
743 iBit = 63;
744 u32Src = uSrc >> 32;
745 }
746 else
747 {
748 iBit = 31;
749 u32Src = uSrc;
750 }
751 if (!(u32Src & UINT32_C(0xffff0000)))
752 {
753 iBit -= 16;
754 u32Src <<= 16;
755 }
756 if (!(u32Src & UINT32_C(0xff000000)))
757 {
758 iBit -= 8;
759 u32Src <<= 8;
760 }
761 if (!(u32Src & UINT32_C(0xf0000000)))
762 {
763 iBit -= 4;
764 u32Src <<= 4;
765 }
766 if (!(u32Src & UINT32_C(0xc0000000)))
767 {
768 iBit -= 2;
769 u32Src <<= 2;
770 }
771 if (!(u32Src & UINT32_C(0x80000000)))
772 {
773 iBit -= 1;
774 Assert(u32Src & RT_BIT(30));
775 }
776
777 *puDst = iBit;
778 *pfEFlags &= ~X86_EFL_ZF;
779 }
780 else
781 *pfEFlags |= X86_EFL_ZF;
782}
783
784
785/* Unary operands. */
786
787IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u64,(uint64_t *puDst, uint32_t *pfEFlags))
788{
789 uint64_t uDst = *puDst;
790 uint64_t uResult = uDst + 1;
791 *puDst = uResult;
792
793 /*
794 * Calc EFLAGS.
795 * CF is NOT modified for hysterical raisins (allegedly for carrying and
796 * borrowing in arithmetic loops on intel 8008).
797 */
798 uint32_t fEfl = *pfEFlags & ~(X86_EFL_STATUS_BITS & ~X86_EFL_CF);
799 fEfl |= g_afParity[uResult & 0xff];
800 fEfl |= ((uint32_t)uResult ^ (uint32_t)uDst) & X86_EFL_AF;
801 fEfl |= X86_EFL_CALC_ZF(uResult);
802 fEfl |= X86_EFL_CALC_SF(uResult, 64);
803 fEfl |= (((uDst ^ RT_BIT_64(63)) & uResult) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF;
804 *pfEFlags = fEfl;
805}
806
807
808IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u64,(uint64_t *puDst, uint32_t *pfEFlags))
809{
810 uint64_t uDst = *puDst;
811 uint64_t uResult = uDst - 1;
812 *puDst = uResult;
813
814 /*
815 * Calc EFLAGS.
816 * CF is NOT modified for hysterical raisins (allegedly for carrying and
817 * borrowing in arithmetic loops on intel 8008).
818 */
819 uint32_t fEfl = *pfEFlags & ~(X86_EFL_STATUS_BITS & ~X86_EFL_CF);
820 fEfl |= g_afParity[uResult & 0xff];
821 fEfl |= ((uint32_t)uResult ^ (uint32_t)uDst) & X86_EFL_AF;
822 fEfl |= X86_EFL_CALC_ZF(uResult);
823 fEfl |= X86_EFL_CALC_SF(uResult, 64);
824 fEfl |= ((uDst & (uResult ^ RT_BIT_64(63))) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF;
825 *pfEFlags = fEfl;
826}
827
828
829IEM_DECL_IMPL_DEF(void, iemAImpl_not_u64,(uint64_t *puDst, uint32_t *pfEFlags))
830{
831 uint64_t uDst = *puDst;
832 uint64_t uResult = ~uDst;
833 *puDst = uResult;
834 /* EFLAGS are not modified. */
835 RT_NOREF_PV(pfEFlags);
836}
837
838
839IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u64,(uint64_t *puDst, uint32_t *pfEFlags))
840{
841 uint64_t uDst = 0;
842 uint64_t uSrc = *puDst;
843 uint64_t uResult = uDst - uSrc;
844 *puDst = uResult;
845
846 /* Calc EFLAGS. */
847 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
848 fEfl |= (uSrc != 0) << X86_EFL_CF_BIT;
849 fEfl |= g_afParity[uResult & 0xff];
850 fEfl |= ((uint32_t)uResult ^ (uint32_t)uDst) & X86_EFL_AF;
851 fEfl |= X86_EFL_CALC_ZF(uResult);
852 fEfl |= X86_EFL_CALC_SF(uResult, 64);
853 fEfl |= ((uSrc & uResult) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF;
854 *pfEFlags = fEfl;
855}
856
857
858/** 64-bit locked unary operand operation. */
859# define DO_LOCKED_UNARY_OP_U64(a_Mnemonic) \
860 do { \
861 uint64_t uOld = ASMAtomicReadU64(puDst); \
862 uint64_t uTmp; \
863 uint32_t fEflTmp; \
864 do \
865 { \
866 uTmp = uOld; \
867 fEflTmp = *pfEFlags; \
868 iemAImpl_ ## a_Mnemonic ## _u64(&uTmp, &fEflTmp); \
869 } while (!ASMAtomicCmpXchgExU64(puDst, uTmp, uOld, &uOld)); \
870 *pfEFlags = fEflTmp; \
871 } while (0)
872
873IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u64_locked,(uint64_t *puDst, uint32_t *pfEFlags))
874{
875 DO_LOCKED_UNARY_OP_U64(inc);
876}
877
878
879IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u64_locked,(uint64_t *puDst, uint32_t *pfEFlags))
880{
881 DO_LOCKED_UNARY_OP_U64(dec);
882}
883
884
885IEM_DECL_IMPL_DEF(void, iemAImpl_not_u64_locked,(uint64_t *puDst, uint32_t *pfEFlags))
886{
887 DO_LOCKED_UNARY_OP_U64(not);
888}
889
890
891IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u64_locked,(uint64_t *puDst, uint32_t *pfEFlags))
892{
893 DO_LOCKED_UNARY_OP_U64(neg);
894}
895
896
897/* Shift and rotate. */
898
899IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
900{
901 cShift &= 63;
902 if (cShift)
903 {
904 uint64_t uDst = *puDst;
905 uint64_t uResult;
906 uResult = uDst << cShift;
907 uResult |= uDst >> (64 - cShift);
908 *puDst = uResult;
909
910 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
911 it the same way as for 1 bit shifts. */
912 AssertCompile(X86_EFL_CF_BIT == 0);
913 uint32_t fEfl = *pfEFlags & ~(X86_EFL_CF | X86_EFL_OF);
914 uint32_t fCarry = (uResult & 1);
915 fEfl |= fCarry;
916 fEfl |= ((uResult >> 63) ^ fCarry) << X86_EFL_OF_BIT;
917 *pfEFlags = fEfl;
918 }
919}
920
921
922IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
923{
924 cShift &= 63;
925 if (cShift)
926 {
927 uint64_t uDst = *puDst;
928 uint64_t uResult;
929 uResult = uDst >> cShift;
930 uResult |= uDst << (64 - cShift);
931 *puDst = uResult;
932
933 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
934 it the same way as for 1 bit shifts (OF = OF XOR New-CF). */
935 AssertCompile(X86_EFL_CF_BIT == 0);
936 uint32_t fEfl = *pfEFlags & ~(X86_EFL_CF | X86_EFL_OF);
937 uint32_t fCarry = (uResult >> 63) & X86_EFL_CF;
938 fEfl |= fCarry;
939 fEfl |= (((uResult >> 62) ^ fCarry) << X86_EFL_OF_BIT) & X86_EFL_OF;
940 *pfEFlags = fEfl;
941 }
942}
943
944
945IEM_DECL_IMPL_DEF(void, iemAImpl_rcl_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
946{
947 cShift &= 63;
948 if (cShift)
949 {
950 uint32_t fEfl = *pfEFlags;
951 uint64_t uDst = *puDst;
952 uint64_t uResult;
953 uResult = uDst << cShift;
954 AssertCompile(X86_EFL_CF_BIT == 0);
955 if (cShift > 1)
956 uResult |= uDst >> (65 - cShift);
957 uResult |= (uint64_t)(fEfl & X86_EFL_CF) << (cShift - 1);
958 *puDst = uResult;
959
960 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
961 it the same way as for 1 bit shifts. */
962 uint32_t fCarry = (uDst >> (64 - cShift)) & X86_EFL_CF;
963 fEfl &= ~(X86_EFL_CF | X86_EFL_OF);
964 fEfl |= fCarry;
965 fEfl |= ((uResult >> 63) ^ fCarry) << X86_EFL_OF_BIT;
966 *pfEFlags = fEfl;
967 }
968}
969
970
971IEM_DECL_IMPL_DEF(void, iemAImpl_rcr_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
972{
973 cShift &= 63;
974 if (cShift)
975 {
976 uint32_t fEfl = *pfEFlags;
977 uint64_t uDst = *puDst;
978 uint64_t uResult;
979 uResult = uDst >> cShift;
980 AssertCompile(X86_EFL_CF_BIT == 0);
981 if (cShift > 1)
982 uResult |= uDst << (65 - cShift);
983 uResult |= (uint64_t)(fEfl & X86_EFL_CF) << (64 - cShift);
984 *puDst = uResult;
985
986 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
987 it the same way as for 1 bit shifts. */
988 uint32_t fCarry = (uDst >> (cShift - 1)) & X86_EFL_CF;
989 fEfl &= ~(X86_EFL_CF | X86_EFL_OF);
990 fEfl |= fCarry;
991 fEfl |= ((uResult >> 63) ^ fCarry) << X86_EFL_OF_BIT;
992 *pfEFlags = fEfl;
993 }
994}
995
996
997IEM_DECL_IMPL_DEF(void, iemAImpl_shl_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
998{
999 cShift &= 63;
1000 if (cShift)
1001 {
1002 uint64_t uDst = *puDst;
1003 uint64_t uResult = uDst << cShift;
1004 *puDst = uResult;
1005
1006 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
1007 it the same way as for 1 bit shifts. The AF bit is undefined, we
1008 always set it to zero atm. */
1009 AssertCompile(X86_EFL_CF_BIT == 0);
1010 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
1011 uint32_t fCarry = (uDst >> (64 - cShift)) & X86_EFL_CF;
1012 fEfl |= fCarry;
1013 fEfl |= ((uResult >> 63) ^ fCarry) << X86_EFL_OF_BIT;
1014 fEfl |= X86_EFL_CALC_SF(uResult, 64);
1015 fEfl |= X86_EFL_CALC_ZF(uResult);
1016 fEfl |= g_afParity[uResult & 0xff];
1017 *pfEFlags = fEfl;
1018 }
1019}
1020
1021
1022IEM_DECL_IMPL_DEF(void, iemAImpl_shr_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
1023{
1024 cShift &= 63;
1025 if (cShift)
1026 {
1027 uint64_t uDst = *puDst;
1028 uint64_t uResult = uDst >> cShift;
1029 *puDst = uResult;
1030
1031 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
1032 it the same way as for 1 bit shifts. The AF bit is undefined, we
1033 always set it to zero atm. */
1034 AssertCompile(X86_EFL_CF_BIT == 0);
1035 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
1036 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF;
1037 fEfl |= (uDst >> 63) << X86_EFL_OF_BIT;
1038 fEfl |= X86_EFL_CALC_SF(uResult, 64);
1039 fEfl |= X86_EFL_CALC_ZF(uResult);
1040 fEfl |= g_afParity[uResult & 0xff];
1041 *pfEFlags = fEfl;
1042 }
1043}
1044
1045
1046IEM_DECL_IMPL_DEF(void, iemAImpl_sar_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
1047{
1048 cShift &= 63;
1049 if (cShift)
1050 {
1051 uint64_t uDst = *puDst;
1052 uint64_t uResult = (int64_t)uDst >> cShift;
1053 *puDst = uResult;
1054
1055 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
1056 it the same way as for 1 bit shifts (0). The AF bit is undefined,
1057 we always set it to zero atm. */
1058 AssertCompile(X86_EFL_CF_BIT == 0);
1059 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
1060 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF;
1061 fEfl |= X86_EFL_CALC_SF(uResult, 64);
1062 fEfl |= X86_EFL_CALC_ZF(uResult);
1063 fEfl |= g_afParity[uResult & 0xff];
1064 *pfEFlags = fEfl;
1065 }
1066}
1067
1068
1069IEM_DECL_IMPL_DEF(void, iemAImpl_shld_u64,(uint64_t *puDst, uint64_t uSrc, uint8_t cShift, uint32_t *pfEFlags))
1070{
1071 cShift &= 63;
1072 if (cShift)
1073 {
1074 uint64_t uDst = *puDst;
1075 uint64_t uResult;
1076 uResult = uDst << cShift;
1077 uResult |= uSrc >> (64 - cShift);
1078 *puDst = uResult;
1079
1080 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
1081 it the same way as for 1 bit shifts. The AF bit is undefined,
1082 we always set it to zero atm. */
1083 AssertCompile(X86_EFL_CF_BIT == 0);
1084 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
1085 fEfl |= (uDst >> (64 - cShift)) & X86_EFL_CF;
1086 fEfl |= (uint32_t)((uDst >> 63) ^ (uint32_t)(uResult >> 63)) << X86_EFL_OF_BIT;
1087 fEfl |= X86_EFL_CALC_SF(uResult, 64);
1088 fEfl |= X86_EFL_CALC_ZF(uResult);
1089 fEfl |= g_afParity[uResult & 0xff];
1090 *pfEFlags = fEfl;
1091 }
1092}
1093
1094
1095IEM_DECL_IMPL_DEF(void, iemAImpl_shrd_u64,(uint64_t *puDst, uint64_t uSrc, uint8_t cShift, uint32_t *pfEFlags))
1096{
1097 cShift &= 63;
1098 if (cShift)
1099 {
1100 uint64_t uDst = *puDst;
1101 uint64_t uResult;
1102 uResult = uDst >> cShift;
1103 uResult |= uSrc << (64 - cShift);
1104 *puDst = uResult;
1105
1106 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
1107 it the same way as for 1 bit shifts. The AF bit is undefined,
1108 we always set it to zero atm. */
1109 AssertCompile(X86_EFL_CF_BIT == 0);
1110 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
1111 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF;
1112 fEfl |= (uint32_t)((uDst >> 63) ^ (uint32_t)(uResult >> 63)) << X86_EFL_OF_BIT;
1113 fEfl |= X86_EFL_CALC_SF(uResult, 64);
1114 fEfl |= X86_EFL_CALC_ZF(uResult);
1115 fEfl |= g_afParity[uResult & 0xff];
1116 *pfEFlags = fEfl;
1117 }
1118}
1119
1120
1121/* misc */
1122
1123IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64,(uint64_t *puMem, uint64_t *puReg))
1124{
1125 /* XCHG implies LOCK. */
1126 uint64_t uOldMem = *puMem;
1127 while (!ASMAtomicCmpXchgExU64(puMem, *puReg, uOldMem, &uOldMem))
1128 ASMNopPause();
1129 *puReg = uOldMem;
1130}
1131
1132
1133#endif /* RT_ARCH_X86 */
1134#ifdef RT_ARCH_X86
1135
1136/* multiplication and division */
1137
1138
1139IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u64,(uint64_t *pu64RAX, uint64_t *pu64RDX, uint64_t u64Factor, uint32_t *pfEFlags))
1140{
1141 RTUINT128U Result;
1142 RTUInt128MulU64ByU64(&Result, *pu64RAX, u64Factor);
1143 *pu64RAX = Result.s.Lo;
1144 *pu64RDX = Result.s.Hi;
1145
1146 /* MUL EFLAGS according to Skylake (similar to IMUL). */
1147 *pfEFlags &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF);
1148 if (Result.s.Lo & RT_BIT_64(63))
1149 *pfEFlags |= X86_EFL_SF;
1150 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1151 if (Result.s.Hi != 0)
1152 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1153 return 0;
1154}
1155
1156
1157IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u64,(uint64_t *pu64RAX, uint64_t *pu64RDX, uint64_t u64Factor, uint32_t *pfEFlags))
1158{
1159 RTUINT128U Result;
1160 *pfEFlags &= ~( X86_EFL_SF | X86_EFL_CF | X86_EFL_OF
1161 /* Skylake always clears: */ | X86_EFL_AF | X86_EFL_ZF
1162 /* Skylake may set: */ | X86_EFL_PF);
1163
1164 if ((int64_t)*pu64RAX >= 0)
1165 {
1166 if ((int64_t)u64Factor >= 0)
1167 {
1168 RTUInt128MulU64ByU64(&Result, *pu64RAX, u64Factor);
1169 if (Result.s.Hi != 0 || Result.s.Lo >= UINT64_C(0x8000000000000000))
1170 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1171 }
1172 else
1173 {
1174 RTUInt128MulU64ByU64(&Result, *pu64RAX, UINT64_C(0) - u64Factor);
1175 if (Result.s.Hi != 0 || Result.s.Lo > UINT64_C(0x8000000000000000))
1176 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1177 RTUInt128AssignNeg(&Result);
1178 }
1179 }
1180 else
1181 {
1182 if ((int64_t)u64Factor >= 0)
1183 {
1184 RTUInt128MulU64ByU64(&Result, UINT64_C(0) - *pu64RAX, u64Factor);
1185 if (Result.s.Hi != 0 || Result.s.Lo > UINT64_C(0x8000000000000000))
1186 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1187 RTUInt128AssignNeg(&Result);
1188 }
1189 else
1190 {
1191 RTUInt128MulU64ByU64(&Result, UINT64_C(0) - *pu64RAX, UINT64_C(0) - u64Factor);
1192 if (Result.s.Hi != 0 || Result.s.Lo >= UINT64_C(0x8000000000000000))
1193 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1194 }
1195 }
1196 *pu64RAX = Result.s.Lo;
1197 if (Result.s.Lo & RT_BIT_64(63))
1198 *pfEFlags |= X86_EFL_SF;
1199 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1200 *pu64RDX = Result.s.Hi;
1201
1202 return 0;
1203}
1204
1205
1206IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1207{
1208/** @todo Testcase: IMUL 2 and 3 operands. */
1209 uint64_t u64Ign;
1210 iemAImpl_imul_u64(puDst, &u64Ign, uSrc, pfEFlags);
1211}
1212
1213
1214
1215IEM_DECL_IMPL_DEF(int, iemAImpl_div_u64,(uint64_t *pu64RAX, uint64_t *pu64RDX, uint64_t u64Divisor, uint32_t *pfEFlags))
1216{
1217 /* Note! Skylake leaves all flags alone. */
1218 RT_NOREF_PV(pfEFlags);
1219
1220 if ( u64Divisor != 0
1221 && *pu64RDX < u64Divisor)
1222 {
1223 RTUINT128U Dividend;
1224 Dividend.s.Lo = *pu64RAX;
1225 Dividend.s.Hi = *pu64RDX;
1226
1227 RTUINT128U Divisor;
1228 Divisor.s.Lo = u64Divisor;
1229 Divisor.s.Hi = 0;
1230
1231 RTUINT128U Remainder;
1232 RTUINT128U Quotient;
1233# ifdef __GNUC__ /* GCC maybe really annoying in function. */
1234 Quotient.s.Lo = 0;
1235 Quotient.s.Hi = 0;
1236# endif
1237 RTUInt128DivRem(&Quotient, &Remainder, &Dividend, &Divisor);
1238 Assert(Quotient.s.Hi == 0);
1239 Assert(Remainder.s.Hi == 0);
1240
1241 *pu64RAX = Quotient.s.Lo;
1242 *pu64RDX = Remainder.s.Lo;
1243 /** @todo research the undefined DIV flags. */
1244 return 0;
1245
1246 }
1247 /* #DE */
1248 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1249}
1250
1251
1252IEM_DECL_IMPL_DEF(int, iemAImpl_idiv_u64,(uint64_t *pu64RAX, uint64_t *pu64RDX, uint64_t u64Divisor, uint32_t *pfEFlags))
1253{
1254 /* Note! Skylake leaves all flags alone. */
1255 RT_NOREF_PV(pfEFlags);
1256
1257 if (u64Divisor != 0)
1258 {
1259 /*
1260 * Convert to unsigned division.
1261 */
1262 RTUINT128U Dividend;
1263 Dividend.s.Lo = *pu64RAX;
1264 Dividend.s.Hi = *pu64RDX;
1265 if ((int64_t)*pu64RDX < 0)
1266 RTUInt128AssignNeg(&Dividend);
1267
1268 RTUINT128U Divisor;
1269 Divisor.s.Hi = 0;
1270 if ((int64_t)u64Divisor >= 0)
1271 Divisor.s.Lo = u64Divisor;
1272 else
1273 Divisor.s.Lo = UINT64_C(0) - u64Divisor;
1274
1275 RTUINT128U Remainder;
1276 RTUINT128U Quotient;
1277# ifdef __GNUC__ /* GCC maybe really annoying in function. */
1278 Quotient.s.Lo = 0;
1279 Quotient.s.Hi = 0;
1280# endif
1281 RTUInt128DivRem(&Quotient, &Remainder, &Dividend, &Divisor);
1282
1283 /*
1284 * Setup the result, checking for overflows.
1285 */
1286 if ((int64_t)u64Divisor >= 0)
1287 {
1288 if ((int64_t)*pu64RDX >= 0)
1289 {
1290 /* Positive divisor, positive dividend => result positive. */
1291 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint64_t)INT64_MAX)
1292 {
1293 *pu64RAX = Quotient.s.Lo;
1294 *pu64RDX = Remainder.s.Lo;
1295 return 0;
1296 }
1297 }
1298 else
1299 {
1300 /* Positive divisor, positive dividend => result negative. */
1301 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= UINT64_C(0x8000000000000000))
1302 {
1303 *pu64RAX = UINT64_C(0) - Quotient.s.Lo;
1304 *pu64RDX = UINT64_C(0) - Remainder.s.Lo;
1305 return 0;
1306 }
1307 }
1308 }
1309 else
1310 {
1311 if ((int64_t)*pu64RDX >= 0)
1312 {
1313 /* Negative divisor, positive dividend => negative quotient, positive remainder. */
1314 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= UINT64_C(0x8000000000000000))
1315 {
1316 *pu64RAX = UINT64_C(0) - Quotient.s.Lo;
1317 *pu64RDX = Remainder.s.Lo;
1318 return 0;
1319 }
1320 }
1321 else
1322 {
1323 /* Negative divisor, negative dividend => positive quotient, negative remainder. */
1324 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint64_t)INT64_MAX)
1325 {
1326 *pu64RAX = Quotient.s.Lo;
1327 *pu64RDX = UINT64_C(0) - Remainder.s.Lo;
1328 return 0;
1329 }
1330 }
1331 }
1332 }
1333 /* #DE */
1334 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1335}
1336
1337
1338#endif /* RT_ARCH_X86 */
1339
1340
1341IEM_DECL_IMPL_DEF(void, iemAImpl_arpl,(uint16_t *pu16Dst, uint16_t u16Src, uint32_t *pfEFlags))
1342{
1343 if ((*pu16Dst & X86_SEL_RPL) < (u16Src & X86_SEL_RPL))
1344 {
1345 *pu16Dst &= X86_SEL_MASK_OFF_RPL;
1346 *pu16Dst |= u16Src & X86_SEL_RPL;
1347
1348 *pfEFlags |= X86_EFL_ZF;
1349 }
1350 else
1351 *pfEFlags &= ~X86_EFL_ZF;
1352}
1353
1354
1355
1356IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_fallback,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx,
1357 PRTUINT128U pu128RbxRcx, uint32_t *pEFlags))
1358{
1359 RTUINT128U u128Tmp = *pu128Dst;
1360 if ( u128Tmp.s.Lo == pu128RaxRdx->s.Lo
1361 && u128Tmp.s.Hi == pu128RaxRdx->s.Hi)
1362 {
1363 *pu128Dst = *pu128RbxRcx;
1364 *pEFlags |= X86_EFL_ZF;
1365 }
1366 else
1367 {
1368 *pu128RaxRdx = u128Tmp;
1369 *pEFlags &= ~X86_EFL_ZF;
1370 }
1371}
1372
1373
1374IEM_DECL_IMPL_DEF(void, iemAImpl_movsldup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
1375{
1376 RT_NOREF(pFpuState);
1377 puDst->au32[0] = puSrc->au32[0];
1378 puDst->au32[1] = puSrc->au32[0];
1379 puDst->au32[2] = puSrc->au32[2];
1380 puDst->au32[3] = puSrc->au32[2];
1381}
1382
1383#ifdef IEM_WITH_VEX
1384
1385IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
1386{
1387 pXState->x87.aXMM[iYRegDst].au32[0] = pXState->x87.aXMM[iYRegSrc].au32[0];
1388 pXState->x87.aXMM[iYRegDst].au32[1] = pXState->x87.aXMM[iYRegSrc].au32[0];
1389 pXState->x87.aXMM[iYRegDst].au32[2] = pXState->x87.aXMM[iYRegSrc].au32[2];
1390 pXState->x87.aXMM[iYRegDst].au32[3] = pXState->x87.aXMM[iYRegSrc].au32[2];
1391 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
1392 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
1393 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
1394 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
1395}
1396
1397
1398IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
1399{
1400 pXState->x87.aXMM[iYRegDst].au32[0] = pSrc->au32[0];
1401 pXState->x87.aXMM[iYRegDst].au32[1] = pSrc->au32[0];
1402 pXState->x87.aXMM[iYRegDst].au32[2] = pSrc->au32[2];
1403 pXState->x87.aXMM[iYRegDst].au32[3] = pSrc->au32[2];
1404 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pSrc->au32[4];
1405 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pSrc->au32[4];
1406 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pSrc->au32[6];
1407 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pSrc->au32[6];
1408}
1409
1410#endif /* IEM_WITH_VEX */
1411
1412
1413IEM_DECL_IMPL_DEF(void, iemAImpl_movshdup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
1414{
1415 RT_NOREF(pFpuState);
1416 puDst->au32[0] = puSrc->au32[1];
1417 puDst->au32[1] = puSrc->au32[1];
1418 puDst->au32[2] = puSrc->au32[3];
1419 puDst->au32[3] = puSrc->au32[3];
1420}
1421
1422
1423IEM_DECL_IMPL_DEF(void, iemAImpl_movddup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, uint64_t uSrc))
1424{
1425 RT_NOREF(pFpuState);
1426 puDst->au64[0] = uSrc;
1427 puDst->au64[1] = uSrc;
1428}
1429
1430#ifdef IEM_WITH_VEX
1431
1432IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
1433{
1434 pXState->x87.aXMM[iYRegDst].au64[0] = pXState->x87.aXMM[iYRegSrc].au64[0];
1435 pXState->x87.aXMM[iYRegDst].au64[1] = pXState->x87.aXMM[iYRegSrc].au64[0];
1436 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
1437 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
1438}
1439
1440IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
1441{
1442 pXState->x87.aXMM[iYRegDst].au64[0] = pSrc->au64[0];
1443 pXState->x87.aXMM[iYRegDst].au64[1] = pSrc->au64[0];
1444 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pSrc->au64[2];
1445 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pSrc->au64[2];
1446}
1447
1448#endif /* IEM_WITH_VEX */
1449
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette