VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp@ 93783

Last change on this file since 93783 was 93783, checked in by vboxsync, 3 years ago

VMM/IEM: Working on adding missing C version of IEMAllAImpl.asm functions. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 75.8 KB
Line 
1/* $Id: IEMAllAImplC.cpp 93783 2022-02-16 09:47:34Z vboxsync $ */
2/** @file
3 * IEM - Instruction Implementation in Assembly, portable C variant.
4 */
5
6/*
7 * Copyright (C) 2011-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#include "IEMInternal.h"
23#include <VBox/vmm/vmcc.h>
24#include <VBox/err.h>
25#include <iprt/x86.h>
26#include <iprt/uint128.h>
27
28
29/*********************************************************************************************************************************
30* Defined Constants And Macros *
31*********************************************************************************************************************************/
32#if defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
33# define IEM_WITHOUT_ASSEMBLY
34#endif
35
36/**
37 * Calculates the signed flag value given a result and it's bit width.
38 *
39 * The signed flag (SF) is a duplication of the most significant bit in the
40 * result.
41 *
42 * @returns X86_EFL_SF or 0.
43 * @param a_uResult Unsigned result value.
44 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
45 */
46#define X86_EFL_CALC_SF(a_uResult, a_cBitsWidth) \
47 ( (uint32_t)((a_uResult) >> ((a_cBitsWidth) - X86_EFL_SF_BIT - 1)) & X86_EFL_SF )
48
49/**
50 * Calculates the zero flag value given a result.
51 *
52 * The zero flag (ZF) indicates whether the result is zero or not.
53 *
54 * @returns X86_EFL_ZF or 0.
55 * @param a_uResult Unsigned result value.
56 */
57#define X86_EFL_CALC_ZF(a_uResult) \
58 ( (uint32_t)((a_uResult) == 0) << X86_EFL_ZF_BIT )
59
60/**
61 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after arithmetic op.
62 *
63 * CF and OF are defined to be 0 by logical operations. AF on the other hand is
64 * undefined. We do not set AF, as that seems to make the most sense (which
65 * probably makes it the most wrong in real life).
66 *
67 * @returns Status bits.
68 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
69 * @param a_uResult Unsigned result value.
70 * @param a_uSrc The source value (for AF calc).
71 * @param a_uDst The original destination value (for AF calc).
72 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
73 * @param a_CfExpr Bool expression for the carry flag (CF).
74 * @param a_OfMethod 0 for ADD-style, 1 for SUB-style.
75 */
76#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(a_pfEFlags, a_uResult, a_uDst, a_uSrc, a_cBitsWidth, a_CfExpr, a_OfMethod) \
77 do { \
78 uint32_t fEflTmp = *(a_pfEFlags); \
79 fEflTmp &= ~X86_EFL_STATUS_BITS; \
80 fEflTmp |= (a_CfExpr) << X86_EFL_CF_BIT; \
81 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
82 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
83 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
84 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
85 fEflTmp |= ( (((a_uDst) ^ (a_uSrc) ^ (a_OfMethod == 0 ? RT_BIT_64(a_cBitsWidth - 1) : 0)) & ((a_uResult) ^ (a_uDst))) \
86 >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF; \
87 *(a_pfEFlags) = fEflTmp; \
88 } while (0)
89
90/**
91 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after a logical op.
92 *
93 * CF and OF are defined to be 0 by logical operations. AF on the other hand is
94 * undefined. We do not set AF, as that seems to make the most sense (which
95 * probably makes it the most wrong in real life).
96 *
97 * @returns Status bits.
98 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
99 * @param a_uResult Unsigned result value.
100 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
101 * @param a_fExtra Additional bits to set.
102 */
103#define IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(a_pfEFlags, a_uResult, a_cBitsWidth, a_fExtra) \
104 do { \
105 uint32_t fEflTmp = *(a_pfEFlags); \
106 fEflTmp &= ~X86_EFL_STATUS_BITS; \
107 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
108 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
109 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
110 fEflTmp |= (a_fExtra); \
111 *(a_pfEFlags) = fEflTmp; \
112 } while (0)
113
114
115/*********************************************************************************************************************************
116* Global Variables *
117*********************************************************************************************************************************/
118#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
119/**
120 * Parity calculation table.
121 *
122 * The generator code:
123 * @code
124 * #include <stdio.h>
125 *
126 * int main()
127 * {
128 * unsigned b;
129 * for (b = 0; b < 256; b++)
130 * {
131 * int cOnes = ( b & 1)
132 * + ((b >> 1) & 1)
133 * + ((b >> 2) & 1)
134 * + ((b >> 3) & 1)
135 * + ((b >> 4) & 1)
136 * + ((b >> 5) & 1)
137 * + ((b >> 6) & 1)
138 * + ((b >> 7) & 1);
139 * printf(" /" "* %#04x = %u%u%u%u%u%u%u%ub *" "/ %s,\n",
140 * b,
141 * (b >> 7) & 1,
142 * (b >> 6) & 1,
143 * (b >> 5) & 1,
144 * (b >> 4) & 1,
145 * (b >> 3) & 1,
146 * (b >> 2) & 1,
147 * (b >> 1) & 1,
148 * b & 1,
149 * cOnes & 1 ? "0" : "X86_EFL_PF");
150 * }
151 * return 0;
152 * }
153 * @endcode
154 */
155static uint8_t const g_afParity[256] =
156{
157 /* 0000 = 00000000b */ X86_EFL_PF,
158 /* 0x01 = 00000001b */ 0,
159 /* 0x02 = 00000010b */ 0,
160 /* 0x03 = 00000011b */ X86_EFL_PF,
161 /* 0x04 = 00000100b */ 0,
162 /* 0x05 = 00000101b */ X86_EFL_PF,
163 /* 0x06 = 00000110b */ X86_EFL_PF,
164 /* 0x07 = 00000111b */ 0,
165 /* 0x08 = 00001000b */ 0,
166 /* 0x09 = 00001001b */ X86_EFL_PF,
167 /* 0x0a = 00001010b */ X86_EFL_PF,
168 /* 0x0b = 00001011b */ 0,
169 /* 0x0c = 00001100b */ X86_EFL_PF,
170 /* 0x0d = 00001101b */ 0,
171 /* 0x0e = 00001110b */ 0,
172 /* 0x0f = 00001111b */ X86_EFL_PF,
173 /* 0x10 = 00010000b */ 0,
174 /* 0x11 = 00010001b */ X86_EFL_PF,
175 /* 0x12 = 00010010b */ X86_EFL_PF,
176 /* 0x13 = 00010011b */ 0,
177 /* 0x14 = 00010100b */ X86_EFL_PF,
178 /* 0x15 = 00010101b */ 0,
179 /* 0x16 = 00010110b */ 0,
180 /* 0x17 = 00010111b */ X86_EFL_PF,
181 /* 0x18 = 00011000b */ X86_EFL_PF,
182 /* 0x19 = 00011001b */ 0,
183 /* 0x1a = 00011010b */ 0,
184 /* 0x1b = 00011011b */ X86_EFL_PF,
185 /* 0x1c = 00011100b */ 0,
186 /* 0x1d = 00011101b */ X86_EFL_PF,
187 /* 0x1e = 00011110b */ X86_EFL_PF,
188 /* 0x1f = 00011111b */ 0,
189 /* 0x20 = 00100000b */ 0,
190 /* 0x21 = 00100001b */ X86_EFL_PF,
191 /* 0x22 = 00100010b */ X86_EFL_PF,
192 /* 0x23 = 00100011b */ 0,
193 /* 0x24 = 00100100b */ X86_EFL_PF,
194 /* 0x25 = 00100101b */ 0,
195 /* 0x26 = 00100110b */ 0,
196 /* 0x27 = 00100111b */ X86_EFL_PF,
197 /* 0x28 = 00101000b */ X86_EFL_PF,
198 /* 0x29 = 00101001b */ 0,
199 /* 0x2a = 00101010b */ 0,
200 /* 0x2b = 00101011b */ X86_EFL_PF,
201 /* 0x2c = 00101100b */ 0,
202 /* 0x2d = 00101101b */ X86_EFL_PF,
203 /* 0x2e = 00101110b */ X86_EFL_PF,
204 /* 0x2f = 00101111b */ 0,
205 /* 0x30 = 00110000b */ X86_EFL_PF,
206 /* 0x31 = 00110001b */ 0,
207 /* 0x32 = 00110010b */ 0,
208 /* 0x33 = 00110011b */ X86_EFL_PF,
209 /* 0x34 = 00110100b */ 0,
210 /* 0x35 = 00110101b */ X86_EFL_PF,
211 /* 0x36 = 00110110b */ X86_EFL_PF,
212 /* 0x37 = 00110111b */ 0,
213 /* 0x38 = 00111000b */ 0,
214 /* 0x39 = 00111001b */ X86_EFL_PF,
215 /* 0x3a = 00111010b */ X86_EFL_PF,
216 /* 0x3b = 00111011b */ 0,
217 /* 0x3c = 00111100b */ X86_EFL_PF,
218 /* 0x3d = 00111101b */ 0,
219 /* 0x3e = 00111110b */ 0,
220 /* 0x3f = 00111111b */ X86_EFL_PF,
221 /* 0x40 = 01000000b */ 0,
222 /* 0x41 = 01000001b */ X86_EFL_PF,
223 /* 0x42 = 01000010b */ X86_EFL_PF,
224 /* 0x43 = 01000011b */ 0,
225 /* 0x44 = 01000100b */ X86_EFL_PF,
226 /* 0x45 = 01000101b */ 0,
227 /* 0x46 = 01000110b */ 0,
228 /* 0x47 = 01000111b */ X86_EFL_PF,
229 /* 0x48 = 01001000b */ X86_EFL_PF,
230 /* 0x49 = 01001001b */ 0,
231 /* 0x4a = 01001010b */ 0,
232 /* 0x4b = 01001011b */ X86_EFL_PF,
233 /* 0x4c = 01001100b */ 0,
234 /* 0x4d = 01001101b */ X86_EFL_PF,
235 /* 0x4e = 01001110b */ X86_EFL_PF,
236 /* 0x4f = 01001111b */ 0,
237 /* 0x50 = 01010000b */ X86_EFL_PF,
238 /* 0x51 = 01010001b */ 0,
239 /* 0x52 = 01010010b */ 0,
240 /* 0x53 = 01010011b */ X86_EFL_PF,
241 /* 0x54 = 01010100b */ 0,
242 /* 0x55 = 01010101b */ X86_EFL_PF,
243 /* 0x56 = 01010110b */ X86_EFL_PF,
244 /* 0x57 = 01010111b */ 0,
245 /* 0x58 = 01011000b */ 0,
246 /* 0x59 = 01011001b */ X86_EFL_PF,
247 /* 0x5a = 01011010b */ X86_EFL_PF,
248 /* 0x5b = 01011011b */ 0,
249 /* 0x5c = 01011100b */ X86_EFL_PF,
250 /* 0x5d = 01011101b */ 0,
251 /* 0x5e = 01011110b */ 0,
252 /* 0x5f = 01011111b */ X86_EFL_PF,
253 /* 0x60 = 01100000b */ X86_EFL_PF,
254 /* 0x61 = 01100001b */ 0,
255 /* 0x62 = 01100010b */ 0,
256 /* 0x63 = 01100011b */ X86_EFL_PF,
257 /* 0x64 = 01100100b */ 0,
258 /* 0x65 = 01100101b */ X86_EFL_PF,
259 /* 0x66 = 01100110b */ X86_EFL_PF,
260 /* 0x67 = 01100111b */ 0,
261 /* 0x68 = 01101000b */ 0,
262 /* 0x69 = 01101001b */ X86_EFL_PF,
263 /* 0x6a = 01101010b */ X86_EFL_PF,
264 /* 0x6b = 01101011b */ 0,
265 /* 0x6c = 01101100b */ X86_EFL_PF,
266 /* 0x6d = 01101101b */ 0,
267 /* 0x6e = 01101110b */ 0,
268 /* 0x6f = 01101111b */ X86_EFL_PF,
269 /* 0x70 = 01110000b */ 0,
270 /* 0x71 = 01110001b */ X86_EFL_PF,
271 /* 0x72 = 01110010b */ X86_EFL_PF,
272 /* 0x73 = 01110011b */ 0,
273 /* 0x74 = 01110100b */ X86_EFL_PF,
274 /* 0x75 = 01110101b */ 0,
275 /* 0x76 = 01110110b */ 0,
276 /* 0x77 = 01110111b */ X86_EFL_PF,
277 /* 0x78 = 01111000b */ X86_EFL_PF,
278 /* 0x79 = 01111001b */ 0,
279 /* 0x7a = 01111010b */ 0,
280 /* 0x7b = 01111011b */ X86_EFL_PF,
281 /* 0x7c = 01111100b */ 0,
282 /* 0x7d = 01111101b */ X86_EFL_PF,
283 /* 0x7e = 01111110b */ X86_EFL_PF,
284 /* 0x7f = 01111111b */ 0,
285 /* 0x80 = 10000000b */ 0,
286 /* 0x81 = 10000001b */ X86_EFL_PF,
287 /* 0x82 = 10000010b */ X86_EFL_PF,
288 /* 0x83 = 10000011b */ 0,
289 /* 0x84 = 10000100b */ X86_EFL_PF,
290 /* 0x85 = 10000101b */ 0,
291 /* 0x86 = 10000110b */ 0,
292 /* 0x87 = 10000111b */ X86_EFL_PF,
293 /* 0x88 = 10001000b */ X86_EFL_PF,
294 /* 0x89 = 10001001b */ 0,
295 /* 0x8a = 10001010b */ 0,
296 /* 0x8b = 10001011b */ X86_EFL_PF,
297 /* 0x8c = 10001100b */ 0,
298 /* 0x8d = 10001101b */ X86_EFL_PF,
299 /* 0x8e = 10001110b */ X86_EFL_PF,
300 /* 0x8f = 10001111b */ 0,
301 /* 0x90 = 10010000b */ X86_EFL_PF,
302 /* 0x91 = 10010001b */ 0,
303 /* 0x92 = 10010010b */ 0,
304 /* 0x93 = 10010011b */ X86_EFL_PF,
305 /* 0x94 = 10010100b */ 0,
306 /* 0x95 = 10010101b */ X86_EFL_PF,
307 /* 0x96 = 10010110b */ X86_EFL_PF,
308 /* 0x97 = 10010111b */ 0,
309 /* 0x98 = 10011000b */ 0,
310 /* 0x99 = 10011001b */ X86_EFL_PF,
311 /* 0x9a = 10011010b */ X86_EFL_PF,
312 /* 0x9b = 10011011b */ 0,
313 /* 0x9c = 10011100b */ X86_EFL_PF,
314 /* 0x9d = 10011101b */ 0,
315 /* 0x9e = 10011110b */ 0,
316 /* 0x9f = 10011111b */ X86_EFL_PF,
317 /* 0xa0 = 10100000b */ X86_EFL_PF,
318 /* 0xa1 = 10100001b */ 0,
319 /* 0xa2 = 10100010b */ 0,
320 /* 0xa3 = 10100011b */ X86_EFL_PF,
321 /* 0xa4 = 10100100b */ 0,
322 /* 0xa5 = 10100101b */ X86_EFL_PF,
323 /* 0xa6 = 10100110b */ X86_EFL_PF,
324 /* 0xa7 = 10100111b */ 0,
325 /* 0xa8 = 10101000b */ 0,
326 /* 0xa9 = 10101001b */ X86_EFL_PF,
327 /* 0xaa = 10101010b */ X86_EFL_PF,
328 /* 0xab = 10101011b */ 0,
329 /* 0xac = 10101100b */ X86_EFL_PF,
330 /* 0xad = 10101101b */ 0,
331 /* 0xae = 10101110b */ 0,
332 /* 0xaf = 10101111b */ X86_EFL_PF,
333 /* 0xb0 = 10110000b */ 0,
334 /* 0xb1 = 10110001b */ X86_EFL_PF,
335 /* 0xb2 = 10110010b */ X86_EFL_PF,
336 /* 0xb3 = 10110011b */ 0,
337 /* 0xb4 = 10110100b */ X86_EFL_PF,
338 /* 0xb5 = 10110101b */ 0,
339 /* 0xb6 = 10110110b */ 0,
340 /* 0xb7 = 10110111b */ X86_EFL_PF,
341 /* 0xb8 = 10111000b */ X86_EFL_PF,
342 /* 0xb9 = 10111001b */ 0,
343 /* 0xba = 10111010b */ 0,
344 /* 0xbb = 10111011b */ X86_EFL_PF,
345 /* 0xbc = 10111100b */ 0,
346 /* 0xbd = 10111101b */ X86_EFL_PF,
347 /* 0xbe = 10111110b */ X86_EFL_PF,
348 /* 0xbf = 10111111b */ 0,
349 /* 0xc0 = 11000000b */ X86_EFL_PF,
350 /* 0xc1 = 11000001b */ 0,
351 /* 0xc2 = 11000010b */ 0,
352 /* 0xc3 = 11000011b */ X86_EFL_PF,
353 /* 0xc4 = 11000100b */ 0,
354 /* 0xc5 = 11000101b */ X86_EFL_PF,
355 /* 0xc6 = 11000110b */ X86_EFL_PF,
356 /* 0xc7 = 11000111b */ 0,
357 /* 0xc8 = 11001000b */ 0,
358 /* 0xc9 = 11001001b */ X86_EFL_PF,
359 /* 0xca = 11001010b */ X86_EFL_PF,
360 /* 0xcb = 11001011b */ 0,
361 /* 0xcc = 11001100b */ X86_EFL_PF,
362 /* 0xcd = 11001101b */ 0,
363 /* 0xce = 11001110b */ 0,
364 /* 0xcf = 11001111b */ X86_EFL_PF,
365 /* 0xd0 = 11010000b */ 0,
366 /* 0xd1 = 11010001b */ X86_EFL_PF,
367 /* 0xd2 = 11010010b */ X86_EFL_PF,
368 /* 0xd3 = 11010011b */ 0,
369 /* 0xd4 = 11010100b */ X86_EFL_PF,
370 /* 0xd5 = 11010101b */ 0,
371 /* 0xd6 = 11010110b */ 0,
372 /* 0xd7 = 11010111b */ X86_EFL_PF,
373 /* 0xd8 = 11011000b */ X86_EFL_PF,
374 /* 0xd9 = 11011001b */ 0,
375 /* 0xda = 11011010b */ 0,
376 /* 0xdb = 11011011b */ X86_EFL_PF,
377 /* 0xdc = 11011100b */ 0,
378 /* 0xdd = 11011101b */ X86_EFL_PF,
379 /* 0xde = 11011110b */ X86_EFL_PF,
380 /* 0xdf = 11011111b */ 0,
381 /* 0xe0 = 11100000b */ 0,
382 /* 0xe1 = 11100001b */ X86_EFL_PF,
383 /* 0xe2 = 11100010b */ X86_EFL_PF,
384 /* 0xe3 = 11100011b */ 0,
385 /* 0xe4 = 11100100b */ X86_EFL_PF,
386 /* 0xe5 = 11100101b */ 0,
387 /* 0xe6 = 11100110b */ 0,
388 /* 0xe7 = 11100111b */ X86_EFL_PF,
389 /* 0xe8 = 11101000b */ X86_EFL_PF,
390 /* 0xe9 = 11101001b */ 0,
391 /* 0xea = 11101010b */ 0,
392 /* 0xeb = 11101011b */ X86_EFL_PF,
393 /* 0xec = 11101100b */ 0,
394 /* 0xed = 11101101b */ X86_EFL_PF,
395 /* 0xee = 11101110b */ X86_EFL_PF,
396 /* 0xef = 11101111b */ 0,
397 /* 0xf0 = 11110000b */ X86_EFL_PF,
398 /* 0xf1 = 11110001b */ 0,
399 /* 0xf2 = 11110010b */ 0,
400 /* 0xf3 = 11110011b */ X86_EFL_PF,
401 /* 0xf4 = 11110100b */ 0,
402 /* 0xf5 = 11110101b */ X86_EFL_PF,
403 /* 0xf6 = 11110110b */ X86_EFL_PF,
404 /* 0xf7 = 11110111b */ 0,
405 /* 0xf8 = 11111000b */ 0,
406 /* 0xf9 = 11111001b */ X86_EFL_PF,
407 /* 0xfa = 11111010b */ X86_EFL_PF,
408 /* 0xfb = 11111011b */ 0,
409 /* 0xfc = 11111100b */ X86_EFL_PF,
410 /* 0xfd = 11111101b */ 0,
411 /* 0xfe = 11111110b */ 0,
412 /* 0xff = 11111111b */ X86_EFL_PF,
413};
414#endif /* !RT_ARCH_AMD64 || IEM_WITHOUT_ASSEMBLY */
415
416
417
418/*
419 * There are a few 64-bit on 32-bit things we'd rather do in C. Actually, doing
420 * it all in C is probably safer atm., optimize what's necessary later, maybe.
421 */
422#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
423
424
425/*********************************************************************************************************************************
426* Binary Operations *
427*********************************************************************************************************************************/
428
429/*
430 * ADD
431 */
432
433IEM_DECL_IMPL_DEF(void, iemAImpl_add_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
434{
435 uint64_t uDst = *puDst;
436 uint64_t uResult = uDst + uSrc;
437 *puDst = uResult;
438 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult < uDst, 0);
439}
440
441# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
442
443IEM_DECL_IMPL_DEF(void, iemAImpl_add_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
444{
445 uint32_t uDst = *puDst;
446 uint32_t uResult = uDst + uSrc;
447 *puDst = uResult;
448 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult < uDst, 0);
449}
450
451
452IEM_DECL_IMPL_DEF(void, iemAImpl_add_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
453{
454 uint16_t uDst = *puDst;
455 uint16_t uResult = uDst + uSrc;
456 *puDst = uResult;
457 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult < uDst, 0);
458}
459
460
461IEM_DECL_IMPL_DEF(void, iemAImpl_add_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
462{
463 uint8_t uDst = *puDst;
464 uint8_t uResult = uDst + uSrc;
465 *puDst = uResult;
466 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult < uDst, 0);
467}
468
469# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
470
471/*
472 * ADC
473 */
474
475IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
476{
477 if (!(*pfEFlags & X86_EFL_CF))
478 iemAImpl_add_u64(puDst, uSrc, pfEFlags);
479 else
480 {
481 uint64_t uDst = *puDst;
482 uint64_t uResult = uDst + uSrc + 1;
483 *puDst = uResult;
484 /** @todo verify AF and OF calculations. */
485 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult <= uDst, 0);
486 }
487}
488
489# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
490
491IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
492{
493 if (!(*pfEFlags & X86_EFL_CF))
494 iemAImpl_add_u32(puDst, uSrc, pfEFlags);
495 else
496 {
497 uint32_t uDst = *puDst;
498 uint32_t uResult = uDst + uSrc + 1;
499 *puDst = uResult;
500 /** @todo verify AF and OF calculations. */
501 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult <= uDst, 0);
502 }
503}
504
505
506IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
507{
508 if (!(*pfEFlags & X86_EFL_CF))
509 iemAImpl_add_u16(puDst, uSrc, pfEFlags);
510 else
511 {
512 uint16_t uDst = *puDst;
513 uint16_t uResult = uDst + uSrc + 1;
514 *puDst = uResult;
515 /** @todo verify AF and OF calculations. */
516 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult <= uDst, 0);
517 }
518}
519
520
521IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
522{
523 if (!(*pfEFlags & X86_EFL_CF))
524 iemAImpl_add_u8(puDst, uSrc, pfEFlags);
525 else
526 {
527 uint8_t uDst = *puDst;
528 uint8_t uResult = uDst + uSrc + 1;
529 *puDst = uResult;
530 /** @todo verify AF and OF calculations. */
531 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult <= uDst, 0);
532 }
533}
534
535# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
536
537/*
538 * SUB
539 */
540
541IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
542{
543 uint64_t uDst = *puDst;
544 uint64_t uResult = uDst - uSrc;
545 *puDst = uResult;
546 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult < uDst, 1);
547}
548
549# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
550
551IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
552{
553 uint32_t uDst = *puDst;
554 uint32_t uResult = uDst - uSrc;
555 *puDst = uResult;
556 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult < uDst, 1);
557}
558
559
560IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
561{
562 uint16_t uDst = *puDst;
563 uint16_t uResult = uDst - uSrc;
564 *puDst = uResult;
565 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult < uDst, 1);
566}
567
568
569IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
570{
571 uint8_t uDst = *puDst;
572 uint8_t uResult = uDst - uSrc;
573 *puDst = uResult;
574 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult < uDst, 1);
575}
576
577# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
578
579/*
580 * SBB
581 */
582
583IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
584{
585 if (!(*pfEFlags & X86_EFL_CF))
586 iemAImpl_sub_u64(puDst, uSrc, pfEFlags);
587 else
588 {
589 uint64_t uDst = *puDst;
590 uint64_t uResult = uDst - uSrc - 1;
591 *puDst = uResult;
592 /** @todo verify AF and OF calculations. */
593 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult <= uDst, 1);
594 }
595}
596
597# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
598
599IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
600{
601 if (!(*pfEFlags & X86_EFL_CF))
602 iemAImpl_sub_u32(puDst, uSrc, pfEFlags);
603 else
604 {
605 uint32_t uDst = *puDst;
606 uint32_t uResult = uDst - uSrc - 1;
607 *puDst = uResult;
608 /** @todo verify AF and OF calculations. */
609 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult <= uDst, 1);
610 }
611}
612
613
614IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
615{
616 if (!(*pfEFlags & X86_EFL_CF))
617 iemAImpl_sub_u16(puDst, uSrc, pfEFlags);
618 else
619 {
620 uint16_t uDst = *puDst;
621 uint16_t uResult = uDst - uSrc - 1;
622 *puDst = uResult;
623 /** @todo verify AF and OF calculations. */
624 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult <= uDst, 1);
625 }
626}
627
628
629IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
630{
631 if (!(*pfEFlags & X86_EFL_CF))
632 iemAImpl_sub_u8(puDst, uSrc, pfEFlags);
633 else
634 {
635 uint8_t uDst = *puDst;
636 uint8_t uResult = uDst - uSrc - 1;
637 *puDst = uResult;
638 /** @todo verify AF and OF calculations. */
639 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult <= uDst, 1);
640 }
641}
642
643# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
644
645
646/*
647 * OR
648 */
649
650IEM_DECL_IMPL_DEF(void, iemAImpl_or_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
651{
652 uint64_t uResult = *puDst | uSrc;
653 *puDst = uResult;
654 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
655}
656
657# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
658
659IEM_DECL_IMPL_DEF(void, iemAImpl_or_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
660{
661 uint32_t uResult = *puDst | uSrc;
662 *puDst = uResult;
663 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
664}
665
666
667IEM_DECL_IMPL_DEF(void, iemAImpl_or_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
668{
669 uint16_t uResult = *puDst | uSrc;
670 *puDst = uResult;
671 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
672}
673
674
675IEM_DECL_IMPL_DEF(void, iemAImpl_or_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
676{
677 uint8_t uResult = *puDst | uSrc;
678 *puDst = uResult;
679 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
680}
681
682# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
683
684/*
685 * XOR
686 */
687
688IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
689{
690 uint64_t uResult = *puDst ^ uSrc;
691 *puDst = uResult;
692 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
693}
694
695# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
696
697IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
698{
699 uint32_t uResult = *puDst ^ uSrc;
700 *puDst = uResult;
701 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
702}
703
704
705IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
706{
707 uint16_t uResult = *puDst ^ uSrc;
708 *puDst = uResult;
709 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
710}
711
712
713IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
714{
715 uint8_t uResult = *puDst ^ uSrc;
716 *puDst = uResult;
717 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
718}
719
720# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
721
722/*
723 * AND
724 */
725
726IEM_DECL_IMPL_DEF(void, iemAImpl_and_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
727{
728 uint64_t uResult = *puDst & uSrc;
729 *puDst = uResult;
730 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
731}
732
733# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
734
735IEM_DECL_IMPL_DEF(void, iemAImpl_and_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
736{
737 uint32_t uResult = *puDst & uSrc;
738 *puDst = uResult;
739 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
740}
741
742
743IEM_DECL_IMPL_DEF(void, iemAImpl_and_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
744{
745 uint16_t uResult = *puDst & uSrc;
746 *puDst = uResult;
747 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
748}
749
750
751IEM_DECL_IMPL_DEF(void, iemAImpl_and_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
752{
753 uint8_t uResult = *puDst & uSrc;
754 *puDst = uResult;
755 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
756}
757
758# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
759
760/*
761 * CMP
762 */
763
764IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
765{
766 uint64_t uDstTmp = *puDst;
767 iemAImpl_sub_u64(&uDstTmp, uSrc, pfEFlags);
768}
769
770# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
771
772IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
773{
774 uint32_t uDstTmp = *puDst;
775 iemAImpl_sub_u32(&uDstTmp, uSrc, pfEFlags);
776}
777
778
779IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
780{
781 uint16_t uDstTmp = *puDst;
782 iemAImpl_sub_u16(&uDstTmp, uSrc, pfEFlags);
783}
784
785
786IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
787{
788 uint8_t uDstTmp = *puDst;
789 iemAImpl_sub_u8(&uDstTmp, uSrc, pfEFlags);
790}
791
792# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
793
794/*
795 * TEST
796 */
797
798IEM_DECL_IMPL_DEF(void, iemAImpl_test_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
799{
800 uint64_t uResult = *puDst & uSrc;
801 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
802}
803
804# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
805
806IEM_DECL_IMPL_DEF(void, iemAImpl_test_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
807{
808 uint32_t uResult = *puDst & uSrc;
809 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
810}
811
812
813IEM_DECL_IMPL_DEF(void, iemAImpl_test_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
814{
815 uint16_t uResult = *puDst & uSrc;
816 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
817}
818
819
820IEM_DECL_IMPL_DEF(void, iemAImpl_test_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
821{
822 uint8_t uResult = *puDst & uSrc;
823 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
824}
825
826# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
827
828
829/*
830 * LOCK prefixed variants of the above
831 */
832
833/** 64-bit locked binary operand operation. */
834# define DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
835 do { \
836 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
837 uint ## a_cBitsWidth ## _t uTmp; \
838 uint32_t fEflTmp; \
839 do \
840 { \
841 uTmp = uOld; \
842 fEflTmp = *pfEFlags; \
843 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, uSrc, &fEflTmp); \
844 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
845 *pfEFlags = fEflTmp; \
846 } while (0)
847
848
849#define EMIT_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
850 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
851 uint ## a_cBitsWidth ## _t uSrc, \
852 uint32_t *pfEFlags)) \
853 { \
854 DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth); \
855 }
856
857EMIT_LOCKED_BIN_OP(add, 64)
858EMIT_LOCKED_BIN_OP(adc, 64)
859EMIT_LOCKED_BIN_OP(sub, 64)
860EMIT_LOCKED_BIN_OP(sbb, 64)
861EMIT_LOCKED_BIN_OP(or, 64)
862EMIT_LOCKED_BIN_OP(xor, 64)
863EMIT_LOCKED_BIN_OP(and, 64)
864# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
865EMIT_LOCKED_BIN_OP(add, 32)
866EMIT_LOCKED_BIN_OP(adc, 32)
867EMIT_LOCKED_BIN_OP(sub, 32)
868EMIT_LOCKED_BIN_OP(sbb, 32)
869EMIT_LOCKED_BIN_OP(or, 32)
870EMIT_LOCKED_BIN_OP(xor, 32)
871EMIT_LOCKED_BIN_OP(and, 32)
872
873EMIT_LOCKED_BIN_OP(add, 16)
874EMIT_LOCKED_BIN_OP(adc, 16)
875EMIT_LOCKED_BIN_OP(sub, 16)
876EMIT_LOCKED_BIN_OP(sbb, 16)
877EMIT_LOCKED_BIN_OP(or, 16)
878EMIT_LOCKED_BIN_OP(xor, 16)
879EMIT_LOCKED_BIN_OP(and, 16)
880
881EMIT_LOCKED_BIN_OP(add, 8)
882EMIT_LOCKED_BIN_OP(adc, 8)
883EMIT_LOCKED_BIN_OP(sub, 8)
884EMIT_LOCKED_BIN_OP(sbb, 8)
885EMIT_LOCKED_BIN_OP(or, 8)
886EMIT_LOCKED_BIN_OP(xor, 8)
887EMIT_LOCKED_BIN_OP(and, 8)
888# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
889
890
891/*
892 * Bit operations (same signature as above).
893 */
894
895/*
896 * BT
897 */
898
899IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
900{
901 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
902 logical operation (AND/OR/whatever). */
903 Assert(uSrc < 64);
904 uint64_t uDst = *puDst;
905 if (uDst & RT_BIT_64(uSrc))
906 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
907 else
908 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
909}
910
911# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
912
913IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
914{
915 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
916 logical operation (AND/OR/whatever). */
917 Assert(uSrc < 32);
918 uint32_t uDst = *puDst;
919 if (uDst & RT_BIT_32(uSrc))
920 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
921 else
922 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
923}
924
925IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
926{
927 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
928 logical operation (AND/OR/whatever). */
929 Assert(uSrc < 16);
930 uint16_t uDst = *puDst;
931 if (uDst & RT_BIT_32(uSrc))
932 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, X86_EFL_CF);
933 else
934 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, 0);
935}
936
937# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
938
939/*
940 * BTC
941 */
942
943IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
944{
945 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
946 logical operation (AND/OR/whatever). */
947 Assert(uSrc < 64);
948 uint64_t fMask = RT_BIT_64(uSrc);
949 uint64_t uDst = *puDst;
950 if (uDst & fMask)
951 {
952 uDst &= ~fMask;
953 *puDst = uDst;
954 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
955 }
956 else
957 {
958 uDst |= fMask;
959 *puDst = uDst;
960 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
961 }
962}
963
964# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
965
966IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
967{
968 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
969 logical operation (AND/OR/whatever). */
970 Assert(uSrc < 32);
971 uint32_t fMask = RT_BIT_32(uSrc);
972 uint32_t uDst = *puDst;
973 if (uDst & fMask)
974 {
975 uDst &= ~fMask;
976 *puDst = uDst;
977 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
978 }
979 else
980 {
981 uDst |= fMask;
982 *puDst = uDst;
983 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
984 }
985}
986
987
988IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
989{
990 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
991 logical operation (AND/OR/whatever). */
992 Assert(uSrc < 16);
993 uint16_t fMask = RT_BIT_32(uSrc);
994 uint16_t uDst = *puDst;
995 if (uDst & fMask)
996 {
997 uDst &= ~fMask;
998 *puDst = uDst;
999 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, X86_EFL_CF);
1000 }
1001 else
1002 {
1003 uDst |= fMask;
1004 *puDst = uDst;
1005 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, 0);
1006 }
1007}
1008
1009# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1010
1011/*
1012 * BTR
1013 */
1014
1015IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1016{
1017 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1018 logical operation (AND/OR/whatever). */
1019 Assert(uSrc < 64);
1020 uint64_t fMask = RT_BIT_64(uSrc);
1021 uint64_t uDst = *puDst;
1022 if (uDst & fMask)
1023 {
1024 uDst &= ~fMask;
1025 *puDst = uDst;
1026 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
1027 }
1028 else
1029 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
1030}
1031
1032# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1033
1034IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1035{
1036 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1037 logical operation (AND/OR/whatever). */
1038 Assert(uSrc < 32);
1039 uint32_t fMask = RT_BIT_32(uSrc);
1040 uint32_t uDst = *puDst;
1041 if (uDst & fMask)
1042 {
1043 uDst &= ~fMask;
1044 *puDst = uDst;
1045 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
1046 }
1047 else
1048 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
1049}
1050
1051
1052IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1053{
1054 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1055 logical operation (AND/OR/whatever). */
1056 Assert(uSrc < 16);
1057 uint16_t fMask = RT_BIT_32(uSrc);
1058 uint16_t uDst = *puDst;
1059 if (uDst & fMask)
1060 {
1061 uDst &= ~fMask;
1062 *puDst = uDst;
1063 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, X86_EFL_CF);
1064 }
1065 else
1066 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, 0);
1067}
1068
1069# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1070
1071/*
1072 * BTS
1073 */
1074
1075IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1076{
1077 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1078 logical operation (AND/OR/whatever). */
1079 Assert(uSrc < 64);
1080 uint64_t fMask = RT_BIT_64(uSrc);
1081 uint64_t uDst = *puDst;
1082 if (uDst & fMask)
1083 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
1084 else
1085 {
1086 uDst |= fMask;
1087 *puDst = uDst;
1088 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
1089 }
1090}
1091
1092# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1093
1094IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1095{
1096 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1097 logical operation (AND/OR/whatever). */
1098 Assert(uSrc < 32);
1099 uint32_t fMask = RT_BIT_32(uSrc);
1100 uint32_t uDst = *puDst;
1101 if (uDst & fMask)
1102 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
1103 else
1104 {
1105 uDst |= fMask;
1106 *puDst = uDst;
1107 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
1108 }
1109}
1110
1111
1112IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1113{
1114 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1115 logical operation (AND/OR/whatever). */
1116 Assert(uSrc < 16);
1117 uint16_t fMask = RT_BIT_32(uSrc);
1118 uint32_t uDst = *puDst;
1119 if (uDst & fMask)
1120 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
1121 else
1122 {
1123 uDst |= fMask;
1124 *puDst = uDst;
1125 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
1126 }
1127}
1128
1129# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1130
1131
1132EMIT_LOCKED_BIN_OP(btc, 64)
1133EMIT_LOCKED_BIN_OP(btr, 64)
1134EMIT_LOCKED_BIN_OP(bts, 64)
1135# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1136EMIT_LOCKED_BIN_OP(btc, 32)
1137EMIT_LOCKED_BIN_OP(btr, 32)
1138EMIT_LOCKED_BIN_OP(bts, 32)
1139
1140EMIT_LOCKED_BIN_OP(btc, 16)
1141EMIT_LOCKED_BIN_OP(btr, 16)
1142EMIT_LOCKED_BIN_OP(bts, 16)
1143# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1144
1145
1146/*
1147 * BSF - first (least significant) bit set
1148 */
1149
1150IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1151{
1152 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1153 /** @todo check what real CPUs do. */
1154 unsigned iBit = ASMBitFirstSetU64(uSrc);
1155 if (iBit)
1156 {
1157 *puDst = iBit - 1;
1158 *pfEFlags &= ~X86_EFL_ZF;
1159 }
1160 else
1161 *pfEFlags |= X86_EFL_ZF;
1162}
1163
1164# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1165
1166IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1167{
1168 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1169 /** @todo check what real CPUs do. */
1170 unsigned iBit = ASMBitFirstSetU32(uSrc);
1171 if (iBit)
1172 {
1173 *puDst = iBit - 1;
1174 *pfEFlags &= ~X86_EFL_ZF;
1175 }
1176 else
1177 *pfEFlags |= X86_EFL_ZF;
1178}
1179
1180
1181IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1182{
1183 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1184 /** @todo check what real CPUs do. */
1185 unsigned iBit = ASMBitFirstSetU16(uSrc);
1186 if (iBit)
1187 {
1188 *puDst = iBit - 1;
1189 *pfEFlags &= ~X86_EFL_ZF;
1190 }
1191 else
1192 *pfEFlags |= X86_EFL_ZF;
1193}
1194
1195# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1196
1197/*
1198 * BSR - last (most significant) bit set
1199 */
1200
1201IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1202{
1203 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1204 /** @todo check what real CPUs do. */
1205 unsigned iBit = ASMBitLastSetU64(uSrc);
1206 if (uSrc)
1207 {
1208 *puDst = iBit - 1;
1209 *pfEFlags &= ~X86_EFL_ZF;
1210 }
1211 else
1212 *pfEFlags |= X86_EFL_ZF;
1213}
1214
1215# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1216
1217IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1218{
1219 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1220 /** @todo check what real CPUs do. */
1221 unsigned iBit = ASMBitLastSetU32(uSrc);
1222 if (uSrc)
1223 {
1224 *puDst = iBit - 1;
1225 *pfEFlags &= ~X86_EFL_ZF;
1226 }
1227 else
1228 *pfEFlags |= X86_EFL_ZF;
1229}
1230
1231
1232IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1233{
1234 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1235 /** @todo check what real CPUs do. */
1236 unsigned iBit = ASMBitLastSetU16(uSrc);
1237 if (uSrc)
1238 {
1239 *puDst = iBit - 1;
1240 *pfEFlags &= ~X86_EFL_ZF;
1241 }
1242 else
1243 *pfEFlags |= X86_EFL_ZF;
1244}
1245
1246# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1247
1248
1249/*
1250 * XCHG
1251 */
1252
1253IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64,(uint64_t *puMem, uint64_t *puReg))
1254{
1255 /* XCHG implies LOCK. */
1256 uint64_t uOldMem = *puMem;
1257 while (!ASMAtomicCmpXchgExU64(puMem, *puReg, uOldMem, &uOldMem))
1258 ASMNopPause();
1259 *puReg = uOldMem;
1260}
1261
1262# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1263
1264IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u32,(uint32_t *puMem, uint32_t *puReg))
1265{
1266 /* XCHG implies LOCK. */
1267 uint32_t uOldMem = *puMem;
1268 while (!ASMAtomicCmpXchgExU32(puMem, *puReg, uOldMem, &uOldMem))
1269 ASMNopPause();
1270 *puReg = uOldMem;
1271}
1272
1273
1274IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u16,(uint16_t *puMem, uint16_t *puReg))
1275{
1276 /* XCHG implies LOCK. */
1277 uint16_t uOldMem = *puMem;
1278 while (!ASMAtomicCmpXchgExU16(puMem, *puReg, uOldMem, &uOldMem))
1279 ASMNopPause();
1280 *puReg = uOldMem;
1281}
1282
1283
1284IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8,(uint8_t *puMem, uint8_t *puReg))
1285{
1286 /* XCHG implies LOCK. */
1287 uint8_t uOldMem = *puMem;
1288 while (!ASMAtomicCmpXchgExU8(puMem, *puReg, uOldMem, &uOldMem))
1289 ASMNopPause();
1290 *puReg = uOldMem;
1291}
1292
1293# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1294
1295
1296/*
1297 * XADD and LOCK XADD.
1298 */
1299
1300IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u64,(uint64_t *puDst, uint64_t *puReg, uint32_t *pfEFlags))
1301{
1302 uint64_t uDst = *puDst;
1303 uint64_t uResult = uDst;
1304 iemAImpl_add_u64(&uResult, *puReg, pfEFlags);
1305 *puDst = uResult;
1306 *puReg = uDst;
1307}
1308
1309
1310IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u64_locked,(uint64_t *puDst, uint64_t *puReg, uint32_t *pfEFlags))
1311{
1312 uint64_t uOld = ASMAtomicUoReadU64(puDst);
1313 uint64_t uTmpDst;
1314 uint32_t fEflTmp;
1315 do
1316 {
1317 uTmpDst = uOld;
1318 fEflTmp = *pfEFlags;
1319 iemAImpl_add_u64(&uTmpDst, *puReg, pfEFlags);
1320 } while (!ASMAtomicCmpXchgExU64(puDst, uTmpDst, uOld, &uOld));
1321 *puReg = uOld;
1322 *pfEFlags = fEflTmp;
1323}
1324
1325# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1326
1327IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u32,(uint32_t *puDst, uint32_t *puReg, uint32_t *pfEFlags))
1328{
1329 uint32_t uDst = *puDst;
1330 uint32_t uResult = uDst;
1331 iemAImpl_add_u32(&uResult, *puReg, pfEFlags);
1332 *puDst = uResult;
1333 *puReg = uDst;
1334}
1335
1336
1337IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u32_locked,(uint32_t *puDst, uint32_t *puReg, uint32_t *pfEFlags))
1338{
1339 uint32_t uOld = ASMAtomicUoReadU32(puDst);
1340 uint32_t uTmpDst;
1341 uint32_t fEflTmp;
1342 do
1343 {
1344 uTmpDst = uOld;
1345 fEflTmp = *pfEFlags;
1346 iemAImpl_add_u32(&uTmpDst, *puReg, pfEFlags);
1347 } while (!ASMAtomicCmpXchgExU32(puDst, uTmpDst, uOld, &uOld));
1348 *puReg = uOld;
1349 *pfEFlags = fEflTmp;
1350}
1351
1352
1353IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u16,(uint16_t *puDst, uint16_t *puReg, uint32_t *pfEFlags))
1354{
1355 uint16_t uDst = *puDst;
1356 uint16_t uResult = uDst;
1357 iemAImpl_add_u16(&uResult, *puReg, pfEFlags);
1358 *puDst = uResult;
1359 *puReg = uDst;
1360}
1361
1362
1363IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u16_locked,(uint16_t *puDst, uint16_t *puReg, uint32_t *pfEFlags))
1364{
1365 uint16_t uOld = ASMAtomicUoReadU16(puDst);
1366 uint16_t uTmpDst;
1367 uint32_t fEflTmp;
1368 do
1369 {
1370 uTmpDst = uOld;
1371 fEflTmp = *pfEFlags;
1372 iemAImpl_add_u16(&uTmpDst, *puReg, pfEFlags);
1373 } while (!ASMAtomicCmpXchgExU16(puDst, uTmpDst, uOld, &uOld));
1374 *puReg = uOld;
1375 *pfEFlags = fEflTmp;
1376}
1377
1378
1379IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u8,(uint8_t *puDst, uint8_t *puReg, uint32_t *pfEFlags))
1380{
1381 uint8_t uDst = *puDst;
1382 uint8_t uResult = uDst;
1383 iemAImpl_add_u8(&uResult, *puReg, pfEFlags);
1384 *puDst = uResult;
1385 *puReg = uDst;
1386}
1387
1388
1389IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u8_locked,(uint8_t *puDst, uint8_t *puReg, uint32_t *pfEFlags))
1390{
1391 uint8_t uOld = ASMAtomicUoReadU8(puDst);
1392 uint8_t uTmpDst;
1393 uint32_t fEflTmp;
1394 do
1395 {
1396 uTmpDst = uOld;
1397 fEflTmp = *pfEFlags;
1398 iemAImpl_add_u8(&uTmpDst, *puReg, pfEFlags);
1399 } while (!ASMAtomicCmpXchgExU8(puDst, uTmpDst, uOld, &uOld));
1400 *puReg = uOld;
1401 *pfEFlags = fEflTmp;
1402}
1403
1404# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1405
1406
1407/*
1408 * MUL
1409 */
1410
1411IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u64,(uint64_t *pu64RAX, uint64_t *pu64RDX, uint64_t u64Factor, uint32_t *pfEFlags))
1412{
1413 RTUINT128U Result;
1414 RTUInt128MulU64ByU64(&Result, *pu64RAX, u64Factor);
1415 *pu64RAX = Result.s.Lo;
1416 *pu64RDX = Result.s.Hi;
1417
1418 /* MUL EFLAGS according to Skylake (similar to IMUL). */
1419 *pfEFlags &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF);
1420 if (Result.s.Lo & RT_BIT_64(63))
1421 *pfEFlags |= X86_EFL_SF;
1422 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1423 if (Result.s.Hi != 0)
1424 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1425 return 0;
1426}
1427
1428# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1429
1430IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u32,(uint32_t *pu32RAX, uint32_t *pu32RDX, uint32_t u32Factor, uint32_t *pfEFlags))
1431{
1432 RTUINT64U Result;
1433 Result.u = (uint64_t)*pu32RAX * u32Factor;
1434 *pu32RAX = Result.s.Lo;
1435 *pu32RDX = Result.s.Hi;
1436
1437 /* MUL EFLAGS according to Skylake (similar to IMUL). */
1438 *pfEFlags &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF);
1439 if (Result.s.Lo & RT_BIT_32(31))
1440 *pfEFlags |= X86_EFL_SF;
1441 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1442 if (Result.s.Hi != 0)
1443 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1444 return 0;
1445}
1446
1447
1448IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u16,(uint16_t *pu16RAX, uint16_t *pu16RDX, uint16_t u16Factor, uint32_t *pfEFlags))
1449{
1450 RTUINT32U Result;
1451 Result.u = (uint32_t)*pu16RAX * u16Factor;
1452 *pu16RAX = Result.s.Lo;
1453 *pu16RDX = Result.s.Hi;
1454
1455 /* MUL EFLAGS according to Skylake (similar to IMUL). */
1456 *pfEFlags &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF);
1457 if (Result.s.Lo & RT_BIT_32(15))
1458 *pfEFlags |= X86_EFL_SF;
1459 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1460 if (Result.s.Hi != 0)
1461 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1462 return 0;
1463}
1464
1465# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1466
1467
1468/*
1469 * IMUL
1470 */
1471
1472IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u64,(uint64_t *pu64RAX, uint64_t *pu64RDX, uint64_t u64Factor, uint32_t *pfEFlags))
1473{
1474 RTUINT128U Result;
1475 *pfEFlags &= ~( X86_EFL_SF | X86_EFL_CF | X86_EFL_OF
1476 /* Skylake always clears: */ | X86_EFL_AF | X86_EFL_ZF
1477 /* Skylake may set: */ | X86_EFL_PF);
1478
1479 if ((int64_t)*pu64RAX >= 0)
1480 {
1481 if ((int64_t)u64Factor >= 0)
1482 {
1483 RTUInt128MulU64ByU64(&Result, *pu64RAX, u64Factor);
1484 if (Result.s.Hi != 0 || Result.s.Lo >= UINT64_C(0x8000000000000000))
1485 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1486 }
1487 else
1488 {
1489 RTUInt128MulU64ByU64(&Result, *pu64RAX, UINT64_C(0) - u64Factor);
1490 if (Result.s.Hi != 0 || Result.s.Lo > UINT64_C(0x8000000000000000))
1491 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1492 RTUInt128AssignNeg(&Result);
1493 }
1494 }
1495 else
1496 {
1497 if ((int64_t)u64Factor >= 0)
1498 {
1499 RTUInt128MulU64ByU64(&Result, UINT64_C(0) - *pu64RAX, u64Factor);
1500 if (Result.s.Hi != 0 || Result.s.Lo > UINT64_C(0x8000000000000000))
1501 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1502 RTUInt128AssignNeg(&Result);
1503 }
1504 else
1505 {
1506 RTUInt128MulU64ByU64(&Result, UINT64_C(0) - *pu64RAX, UINT64_C(0) - u64Factor);
1507 if (Result.s.Hi != 0 || Result.s.Lo >= UINT64_C(0x8000000000000000))
1508 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1509 }
1510 }
1511 *pu64RAX = Result.s.Lo;
1512 if (Result.s.Lo & RT_BIT_64(63))
1513 *pfEFlags |= X86_EFL_SF;
1514 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1515 *pu64RDX = Result.s.Hi;
1516
1517 return 0;
1518}
1519
1520
1521IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1522{
1523/** @todo Testcase: IMUL 2 and 3 operands. */
1524 uint64_t u64Ign;
1525 iemAImpl_imul_u64(puDst, &u64Ign, uSrc, pfEFlags);
1526}
1527
1528# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1529
1530IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u32,(uint32_t *pu32RAX, uint32_t *pu32RDX, uint32_t u32Factor, uint32_t *pfEFlags))
1531{
1532 RTUINT64U Result;
1533 *pfEFlags &= ~( X86_EFL_SF | X86_EFL_CF | X86_EFL_OF
1534 /* Skylake always clears: */ | X86_EFL_AF | X86_EFL_ZF
1535 /* Skylake may set: */ | X86_EFL_PF);
1536
1537 if ((int32_t)*pu32RAX >= 0)
1538 {
1539 if ((int32_t)u32Factor >= 0)
1540 {
1541 Result.u = (uint64_t)*pu32RAX * u32Factor;
1542 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_32(31))
1543 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1544 }
1545 else
1546 {
1547 Result.u = (uint64_t)*pu32RAX * (UINT32_C(0) - u32Factor);
1548 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_32(31))
1549 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1550 Result.u = UINT64_C(0) - Result.u;
1551 }
1552 }
1553 else
1554 {
1555 if ((int32_t)u32Factor >= 0)
1556 {
1557 Result.u = (uint64_t)(UINT32_C(0) - *pu32RAX) * u32Factor;
1558 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_32(31))
1559 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1560 Result.u = UINT64_C(0) - Result.u;
1561 }
1562 else
1563 {
1564 Result.u = (uint64_t)(UINT32_C(0) - *pu32RAX) * (UINT32_C(0) - u32Factor);
1565 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_32(31))
1566 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1567 }
1568 }
1569 *pu32RAX = Result.s.Lo;
1570 if (Result.s.Lo & RT_BIT_32(31))
1571 *pfEFlags |= X86_EFL_SF;
1572 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1573 *pu32RDX = Result.s.Hi;
1574
1575 return 0;
1576}
1577
1578
1579IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1580{
1581/** @todo Testcase: IMUL 2 and 3 operands. */
1582 uint32_t u32Ign;
1583 iemAImpl_imul_u32(puDst, &u32Ign, uSrc, pfEFlags);
1584}
1585
1586
1587IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u16,(uint16_t *pu16RAX, uint16_t *pu16RDX, uint16_t u16Factor, uint32_t *pfEFlags))
1588{
1589 RTUINT32U Result;
1590 *pfEFlags &= ~( X86_EFL_SF | X86_EFL_CF | X86_EFL_OF
1591 /* Skylake always clears: */ | X86_EFL_AF | X86_EFL_ZF
1592 /* Skylake may set: */ | X86_EFL_PF);
1593
1594 if ((int16_t)*pu16RAX >= 0)
1595 {
1596 if ((int16_t)u16Factor >= 0)
1597 {
1598 Result.u = (uint32_t)*pu16RAX * u16Factor;
1599 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_32(15))
1600 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1601 }
1602 else
1603 {
1604 Result.u = (uint32_t)*pu16RAX * (UINT16_C(0) - u16Factor);
1605 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_32(15))
1606 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1607 Result.u = UINT32_C(0) - Result.u;
1608 }
1609 }
1610 else
1611 {
1612 if ((int16_t)u16Factor >= 0)
1613 {
1614 Result.u = (uint32_t)(UINT16_C(0) - *pu16RAX) * u16Factor;
1615 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_32(15))
1616 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1617 Result.u = UINT32_C(0) - Result.u;
1618 }
1619 else
1620 {
1621 Result.u = (uint32_t)(UINT16_C(0) - *pu16RAX) * (UINT16_C(0) - u16Factor);
1622 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_32(15))
1623 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1624 }
1625 }
1626 *pu16RAX = Result.s.Lo;
1627 if (Result.s.Lo & RT_BIT_32(15))
1628 *pfEFlags |= X86_EFL_SF;
1629 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1630 *pu16RDX = Result.s.Hi;
1631
1632 return 0;
1633}
1634
1635
1636IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1637{
1638/** @todo Testcase: IMUL 2 and 3 operands. */
1639 uint16_t u16Ign;
1640 iemAImpl_imul_u16(puDst, &u16Ign, uSrc, pfEFlags);
1641}
1642
1643# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1644
1645
1646/*
1647 * DIV
1648 */
1649
1650IEM_DECL_IMPL_DEF(int, iemAImpl_div_u64,(uint64_t *pu64RAX, uint64_t *pu64RDX, uint64_t u64Divisor, uint32_t *pfEFlags))
1651{
1652 /* Note! Skylake leaves all flags alone. */
1653 RT_NOREF_PV(pfEFlags);
1654
1655 if ( u64Divisor != 0
1656 && *pu64RDX < u64Divisor)
1657 {
1658 RTUINT128U Dividend;
1659 Dividend.s.Lo = *pu64RAX;
1660 Dividend.s.Hi = *pu64RDX;
1661
1662 RTUINT128U Divisor;
1663 Divisor.s.Lo = u64Divisor;
1664 Divisor.s.Hi = 0;
1665
1666 RTUINT128U Remainder;
1667 RTUINT128U Quotient;
1668# ifdef __GNUC__ /* GCC maybe really annoying in function. */
1669 Quotient.s.Lo = 0;
1670 Quotient.s.Hi = 0;
1671# endif
1672 RTUInt128DivRem(&Quotient, &Remainder, &Dividend, &Divisor);
1673 Assert(Quotient.s.Hi == 0);
1674 Assert(Remainder.s.Hi == 0);
1675
1676 *pu64RAX = Quotient.s.Lo;
1677 *pu64RDX = Remainder.s.Lo;
1678 /** @todo research the undefined DIV flags. */
1679 return 0;
1680
1681 }
1682 /* #DE */
1683 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1684}
1685
1686# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1687
1688IEM_DECL_IMPL_DEF(int, iemAImpl_div_u32,(uint32_t *pu32RAX, uint32_t *pu32RDX, uint32_t u32Divisor, uint32_t *pfEFlags))
1689{
1690 /* Note! Skylake leaves all flags alone. */
1691 RT_NOREF_PV(pfEFlags);
1692
1693 if ( u32Divisor != 0
1694 && *pu32RDX < u32Divisor)
1695 {
1696 RTUINT64U Dividend;
1697 Dividend.s.Lo = *pu32RAX;
1698 Dividend.s.Hi = *pu32RDX;
1699
1700 RTUINT64U Remainder;
1701 RTUINT64U Quotient;
1702 Quotient.u = Dividend.u / u32Divisor;
1703 Remainder.u = Dividend.u % u32Divisor;
1704
1705 *pu32RAX = Quotient.s.Lo;
1706 *pu32RDX = Remainder.s.Lo;
1707 /** @todo research the undefined DIV flags. */
1708 return 0;
1709
1710 }
1711 /* #DE */
1712 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1713}
1714
1715
1716IEM_DECL_IMPL_DEF(int, iemAImpl_div_u16,(uint16_t *pu16RAX, uint16_t *pu16RDX, uint16_t u16Divisor, uint32_t *pfEFlags))
1717{
1718 /* Note! Skylake leaves all flags alone. */
1719 RT_NOREF_PV(pfEFlags);
1720
1721 if ( u16Divisor != 0
1722 && *pu16RDX < u16Divisor)
1723 {
1724 RTUINT32U Dividend;
1725 Dividend.s.Lo = *pu16RAX;
1726 Dividend.s.Hi = *pu16RDX;
1727
1728 RTUINT32U Remainder;
1729 RTUINT32U Quotient;
1730 Quotient.u = Dividend.u / u16Divisor;
1731 Remainder.u = Dividend.u % u16Divisor;
1732
1733 *pu16RAX = Quotient.s.Lo;
1734 *pu16RDX = Remainder.s.Lo;
1735 /** @todo research the undefined DIV flags. */
1736 return 0;
1737
1738 }
1739 /* #DE */
1740 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1741}
1742
1743# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1744
1745
1746/*
1747 * IDIV
1748 */
1749
1750IEM_DECL_IMPL_DEF(int, iemAImpl_idiv_u64,(uint64_t *pu64RAX, uint64_t *pu64RDX, uint64_t u64Divisor, uint32_t *pfEFlags))
1751{
1752 /* Note! Skylake leaves all flags alone. */
1753 RT_NOREF_PV(pfEFlags);
1754
1755 /** @todo overflow checks */
1756 if (u64Divisor != 0)
1757 {
1758 /*
1759 * Convert to unsigned division.
1760 */
1761 RTUINT128U Dividend;
1762 Dividend.s.Lo = *pu64RAX;
1763 Dividend.s.Hi = *pu64RDX;
1764 if ((int64_t)*pu64RDX < 0)
1765 RTUInt128AssignNeg(&Dividend);
1766
1767 RTUINT128U Divisor;
1768 Divisor.s.Hi = 0;
1769 if ((int64_t)u64Divisor >= 0)
1770 Divisor.s.Lo = u64Divisor;
1771 else
1772 Divisor.s.Lo = UINT64_C(0) - u64Divisor;
1773
1774 RTUINT128U Remainder;
1775 RTUINT128U Quotient;
1776# ifdef __GNUC__ /* GCC maybe really annoying. */
1777 Quotient.s.Lo = 0;
1778 Quotient.s.Hi = 0;
1779# endif
1780 RTUInt128DivRem(&Quotient, &Remainder, &Dividend, &Divisor);
1781
1782 /*
1783 * Setup the result, checking for overflows.
1784 */
1785 if ((int64_t)u64Divisor >= 0)
1786 {
1787 if ((int64_t)*pu64RDX >= 0)
1788 {
1789 /* Positive divisor, positive dividend => result positive. */
1790 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint64_t)INT64_MAX)
1791 {
1792 *pu64RAX = Quotient.s.Lo;
1793 *pu64RDX = Remainder.s.Lo;
1794 return 0;
1795 }
1796 }
1797 else
1798 {
1799 /* Positive divisor, positive dividend => result negative. */
1800 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= UINT64_C(0x8000000000000000))
1801 {
1802 *pu64RAX = UINT64_C(0) - Quotient.s.Lo;
1803 *pu64RDX = UINT64_C(0) - Remainder.s.Lo;
1804 return 0;
1805 }
1806 }
1807 }
1808 else
1809 {
1810 if ((int64_t)*pu64RDX >= 0)
1811 {
1812 /* Negative divisor, positive dividend => negative quotient, positive remainder. */
1813 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= UINT64_C(0x8000000000000000))
1814 {
1815 *pu64RAX = UINT64_C(0) - Quotient.s.Lo;
1816 *pu64RDX = Remainder.s.Lo;
1817 return 0;
1818 }
1819 }
1820 else
1821 {
1822 /* Negative divisor, negative dividend => positive quotient, negative remainder. */
1823 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint64_t)INT64_MAX)
1824 {
1825 *pu64RAX = Quotient.s.Lo;
1826 *pu64RDX = UINT64_C(0) - Remainder.s.Lo;
1827 return 0;
1828 }
1829 }
1830 }
1831 }
1832 /* #DE */
1833 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1834}
1835
1836# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1837
1838IEM_DECL_IMPL_DEF(int, iemAImpl_idiv_u32,(uint32_t *pu32RAX, uint32_t *pu32RDX, uint32_t u32Divisor, uint32_t *pfEFlags))
1839{
1840 /* Note! Skylake leaves all flags alone. */
1841 RT_NOREF_PV(pfEFlags);
1842
1843 /** @todo overflow checks */
1844 if (u32Divisor != 0)
1845 {
1846 /*
1847 * Convert to unsigned division.
1848 */
1849 RTUINT64U Dividend;
1850 Dividend.s.Lo = *pu32RAX;
1851 Dividend.s.Hi = *pu32RDX;
1852 if ((int32_t)*pu32RDX < 0)
1853 Dividend.u = UINT64_C(0) - Dividend.u;
1854
1855 uint32_t u32DivisorPositive;
1856 if ((int32_t)u32Divisor >= 0)
1857 u32DivisorPositive = u32Divisor;
1858 else
1859 u32DivisorPositive = UINT32_C(0) - u32Divisor;
1860
1861 RTUINT64U Remainder;
1862 RTUINT64U Quotient;
1863 Quotient.u = Dividend.u / u32DivisorPositive;
1864 Remainder.u = Dividend.u % u32DivisorPositive;
1865
1866 /*
1867 * Setup the result, checking for overflows.
1868 */
1869 if ((int32_t)u32Divisor >= 0)
1870 {
1871 if ((int32_t)*pu32RDX >= 0)
1872 {
1873 /* Positive divisor, positive dividend => result positive. */
1874 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint32_t)INT32_MAX)
1875 {
1876 *pu32RAX = Quotient.s.Lo;
1877 *pu32RDX = Remainder.s.Lo;
1878 return 0;
1879 }
1880 }
1881 else
1882 {
1883 /* Positive divisor, positive dividend => result negative. */
1884 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_32(31))
1885 {
1886 *pu32RAX = UINT32_C(0) - Quotient.s.Lo;
1887 *pu32RDX = UINT32_C(0) - Remainder.s.Lo;
1888 return 0;
1889 }
1890 }
1891 }
1892 else
1893 {
1894 if ((int32_t)*pu32RDX >= 0)
1895 {
1896 /* Negative divisor, positive dividend => negative quotient, positive remainder. */
1897 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_32(31))
1898 {
1899 *pu32RAX = UINT32_C(0) - Quotient.s.Lo;
1900 *pu32RDX = Remainder.s.Lo;
1901 return 0;
1902 }
1903 }
1904 else
1905 {
1906 /* Negative divisor, negative dividend => positive quotient, negative remainder. */
1907 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint32_t)INT32_MAX)
1908 {
1909 *pu32RAX = Quotient.s.Lo;
1910 *pu32RDX = UINT32_C(0) - Remainder.s.Lo;
1911 return 0;
1912 }
1913 }
1914 }
1915 }
1916 /* #DE */
1917 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1918}
1919
1920
1921IEM_DECL_IMPL_DEF(int, iemAImpl_idiv_u16,(uint16_t *pu16RAX, uint16_t *pu16RDX, uint16_t u16Divisor, uint32_t *pfEFlags))
1922{
1923 /* Note! Skylake leaves all flags alone. */
1924 RT_NOREF_PV(pfEFlags);
1925
1926 if (u16Divisor != 0)
1927 {
1928 /*
1929 * Convert to unsigned division.
1930 */
1931 RTUINT32U Dividend;
1932 Dividend.s.Lo = *pu16RAX;
1933 Dividend.s.Hi = *pu16RDX;
1934 if ((int16_t)*pu16RDX < 0)
1935 Dividend.u = UINT32_C(0) - Dividend.u;
1936
1937 uint16_t u16DivisorPositive;
1938 if ((int16_t)u16Divisor >= 0)
1939 u16DivisorPositive = u16Divisor;
1940 else
1941 u16DivisorPositive = UINT16_C(0) - u16Divisor;
1942
1943 RTUINT32U Remainder;
1944 RTUINT32U Quotient;
1945 Quotient.u = Dividend.u / u16DivisorPositive;
1946 Remainder.u = Dividend.u % u16DivisorPositive;
1947
1948 /*
1949 * Setup the result, checking for overflows.
1950 */
1951 if ((int16_t)u16Divisor >= 0)
1952 {
1953 if ((int16_t)*pu16RDX >= 0)
1954 {
1955 /* Positive divisor, positive dividend => result positive. */
1956 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint16_t)INT16_MAX)
1957 {
1958 *pu16RAX = Quotient.s.Lo;
1959 *pu16RDX = Remainder.s.Lo;
1960 return 0;
1961 }
1962 }
1963 else
1964 {
1965 /* Positive divisor, positive dividend => result negative. */
1966 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_32(15))
1967 {
1968 *pu16RAX = UINT16_C(0) - Quotient.s.Lo;
1969 *pu16RDX = UINT16_C(0) - Remainder.s.Lo;
1970 return 0;
1971 }
1972 }
1973 }
1974 else
1975 {
1976 if ((int16_t)*pu16RDX >= 0)
1977 {
1978 /* Negative divisor, positive dividend => negative quotient, positive remainder. */
1979 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_32(15))
1980 {
1981 *pu16RAX = UINT16_C(0) - Quotient.s.Lo;
1982 *pu16RDX = Remainder.s.Lo;
1983 return 0;
1984 }
1985 }
1986 else
1987 {
1988 /* Negative divisor, negative dividend => positive quotient, negative remainder. */
1989 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint16_t)INT16_MAX)
1990 {
1991 *pu16RAX = Quotient.s.Lo;
1992 *pu16RDX = UINT16_C(0) - Remainder.s.Lo;
1993 return 0;
1994 }
1995 }
1996 }
1997 }
1998 /* #DE */
1999 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
2000}
2001
2002# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2003
2004
2005/*********************************************************************************************************************************
2006* Unary operations. *
2007*********************************************************************************************************************************/
2008
2009IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2010{
2011 uint64_t uDst = *puDst;
2012 uint64_t uResult = uDst + 1;
2013 *puDst = uResult;
2014
2015 /*
2016 * Calc EFLAGS.
2017 * CF is NOT modified for hysterical raisins (allegedly for carrying and
2018 * borrowing in arithmetic loops on intel 8008).
2019 */
2020 uint32_t fEfl = *pfEFlags & ~(X86_EFL_STATUS_BITS & ~X86_EFL_CF);
2021 fEfl |= g_afParity[uResult & 0xff];
2022 fEfl |= ((uint32_t)uResult ^ (uint32_t)uDst) & X86_EFL_AF;
2023 fEfl |= X86_EFL_CALC_ZF(uResult);
2024 fEfl |= X86_EFL_CALC_SF(uResult, 64);
2025 fEfl |= (((uDst ^ RT_BIT_64(63)) & uResult) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF;
2026 *pfEFlags = fEfl;
2027}
2028
2029
2030IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2031{
2032 uint64_t uDst = *puDst;
2033 uint64_t uResult = uDst - 1;
2034 *puDst = uResult;
2035
2036 /*
2037 * Calc EFLAGS.
2038 * CF is NOT modified for hysterical raisins (allegedly for carrying and
2039 * borrowing in arithmetic loops on intel 8008).
2040 */
2041 uint32_t fEfl = *pfEFlags & ~(X86_EFL_STATUS_BITS & ~X86_EFL_CF);
2042 fEfl |= g_afParity[uResult & 0xff];
2043 fEfl |= ((uint32_t)uResult ^ (uint32_t)uDst) & X86_EFL_AF;
2044 fEfl |= X86_EFL_CALC_ZF(uResult);
2045 fEfl |= X86_EFL_CALC_SF(uResult, 64);
2046 fEfl |= ((uDst & (uResult ^ RT_BIT_64(63))) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF;
2047 *pfEFlags = fEfl;
2048}
2049
2050
2051IEM_DECL_IMPL_DEF(void, iemAImpl_not_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2052{
2053 uint64_t uDst = *puDst;
2054 uint64_t uResult = ~uDst;
2055 *puDst = uResult;
2056 /* EFLAGS are not modified. */
2057 RT_NOREF_PV(pfEFlags);
2058}
2059
2060
2061IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2062{
2063 uint64_t uDst = 0;
2064 uint64_t uSrc = *puDst;
2065 uint64_t uResult = uDst - uSrc;
2066 *puDst = uResult;
2067
2068 /* Calc EFLAGS. */
2069 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
2070 fEfl |= (uSrc != 0) << X86_EFL_CF_BIT;
2071 fEfl |= g_afParity[uResult & 0xff];
2072 fEfl |= ((uint32_t)uResult ^ (uint32_t)uDst) & X86_EFL_AF;
2073 fEfl |= X86_EFL_CALC_ZF(uResult);
2074 fEfl |= X86_EFL_CALC_SF(uResult, 64);
2075 fEfl |= ((uSrc & uResult) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF;
2076 *pfEFlags = fEfl;
2077}
2078
2079
2080/** 64-bit locked unary operand operation. */
2081# define DO_LOCKED_UNARY_OP_U64(a_Mnemonic) \
2082 do { \
2083 uint64_t uOld = ASMAtomicReadU64(puDst); \
2084 uint64_t uTmp; \
2085 uint32_t fEflTmp; \
2086 do \
2087 { \
2088 uTmp = uOld; \
2089 fEflTmp = *pfEFlags; \
2090 iemAImpl_ ## a_Mnemonic ## _u64(&uTmp, &fEflTmp); \
2091 } while (!ASMAtomicCmpXchgExU64(puDst, uTmp, uOld, &uOld)); \
2092 *pfEFlags = fEflTmp; \
2093 } while (0)
2094
2095IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u64_locked,(uint64_t *puDst, uint32_t *pfEFlags))
2096{
2097 DO_LOCKED_UNARY_OP_U64(inc);
2098}
2099
2100
2101IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u64_locked,(uint64_t *puDst, uint32_t *pfEFlags))
2102{
2103 DO_LOCKED_UNARY_OP_U64(dec);
2104}
2105
2106
2107IEM_DECL_IMPL_DEF(void, iemAImpl_not_u64_locked,(uint64_t *puDst, uint32_t *pfEFlags))
2108{
2109 DO_LOCKED_UNARY_OP_U64(not);
2110}
2111
2112
2113IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u64_locked,(uint64_t *puDst, uint32_t *pfEFlags))
2114{
2115 DO_LOCKED_UNARY_OP_U64(neg);
2116}
2117
2118
2119/* Shift and rotate. */
2120
2121IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2122{
2123 cShift &= 63;
2124 if (cShift)
2125 {
2126 uint64_t uDst = *puDst;
2127 uint64_t uResult;
2128 uResult = uDst << cShift;
2129 uResult |= uDst >> (64 - cShift);
2130 *puDst = uResult;
2131
2132 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
2133 it the same way as for 1 bit shifts. */
2134 AssertCompile(X86_EFL_CF_BIT == 0);
2135 uint32_t fEfl = *pfEFlags & ~(X86_EFL_CF | X86_EFL_OF);
2136 uint32_t fCarry = (uResult & 1);
2137 fEfl |= fCarry;
2138 fEfl |= ((uResult >> 63) ^ fCarry) << X86_EFL_OF_BIT;
2139 *pfEFlags = fEfl;
2140 }
2141}
2142
2143
2144IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2145{
2146 cShift &= 63;
2147 if (cShift)
2148 {
2149 uint64_t uDst = *puDst;
2150 uint64_t uResult;
2151 uResult = uDst >> cShift;
2152 uResult |= uDst << (64 - cShift);
2153 *puDst = uResult;
2154
2155 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
2156 it the same way as for 1 bit shifts (OF = OF XOR New-CF). */
2157 AssertCompile(X86_EFL_CF_BIT == 0);
2158 uint32_t fEfl = *pfEFlags & ~(X86_EFL_CF | X86_EFL_OF);
2159 uint32_t fCarry = (uResult >> 63) & X86_EFL_CF;
2160 fEfl |= fCarry;
2161 fEfl |= (((uResult >> 62) ^ fCarry) << X86_EFL_OF_BIT) & X86_EFL_OF;
2162 *pfEFlags = fEfl;
2163 }
2164}
2165
2166
2167IEM_DECL_IMPL_DEF(void, iemAImpl_rcl_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2168{
2169 cShift &= 63;
2170 if (cShift)
2171 {
2172 uint32_t fEfl = *pfEFlags;
2173 uint64_t uDst = *puDst;
2174 uint64_t uResult;
2175 uResult = uDst << cShift;
2176 AssertCompile(X86_EFL_CF_BIT == 0);
2177 if (cShift > 1)
2178 uResult |= uDst >> (65 - cShift);
2179 uResult |= (uint64_t)(fEfl & X86_EFL_CF) << (cShift - 1);
2180 *puDst = uResult;
2181
2182 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
2183 it the same way as for 1 bit shifts. */
2184 uint32_t fCarry = (uDst >> (64 - cShift)) & X86_EFL_CF;
2185 fEfl &= ~(X86_EFL_CF | X86_EFL_OF);
2186 fEfl |= fCarry;
2187 fEfl |= ((uResult >> 63) ^ fCarry) << X86_EFL_OF_BIT;
2188 *pfEFlags = fEfl;
2189 }
2190}
2191
2192
2193IEM_DECL_IMPL_DEF(void, iemAImpl_rcr_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2194{
2195 cShift &= 63;
2196 if (cShift)
2197 {
2198 uint32_t fEfl = *pfEFlags;
2199 uint64_t uDst = *puDst;
2200 uint64_t uResult;
2201 uResult = uDst >> cShift;
2202 AssertCompile(X86_EFL_CF_BIT == 0);
2203 if (cShift > 1)
2204 uResult |= uDst << (65 - cShift);
2205 uResult |= (uint64_t)(fEfl & X86_EFL_CF) << (64 - cShift);
2206 *puDst = uResult;
2207
2208 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
2209 it the same way as for 1 bit shifts. */
2210 uint32_t fCarry = (uDst >> (cShift - 1)) & X86_EFL_CF;
2211 fEfl &= ~(X86_EFL_CF | X86_EFL_OF);
2212 fEfl |= fCarry;
2213 fEfl |= ((uResult >> 63) ^ fCarry) << X86_EFL_OF_BIT;
2214 *pfEFlags = fEfl;
2215 }
2216}
2217
2218
2219IEM_DECL_IMPL_DEF(void, iemAImpl_shl_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2220{
2221 cShift &= 63;
2222 if (cShift)
2223 {
2224 uint64_t uDst = *puDst;
2225 uint64_t uResult = uDst << cShift;
2226 *puDst = uResult;
2227
2228 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
2229 it the same way as for 1 bit shifts. The AF bit is undefined, we
2230 always set it to zero atm. */
2231 AssertCompile(X86_EFL_CF_BIT == 0);
2232 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
2233 uint32_t fCarry = (uDst >> (64 - cShift)) & X86_EFL_CF;
2234 fEfl |= fCarry;
2235 fEfl |= ((uResult >> 63) ^ fCarry) << X86_EFL_OF_BIT;
2236 fEfl |= X86_EFL_CALC_SF(uResult, 64);
2237 fEfl |= X86_EFL_CALC_ZF(uResult);
2238 fEfl |= g_afParity[uResult & 0xff];
2239 *pfEFlags = fEfl;
2240 }
2241}
2242
2243
2244IEM_DECL_IMPL_DEF(void, iemAImpl_shr_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2245{
2246 cShift &= 63;
2247 if (cShift)
2248 {
2249 uint64_t uDst = *puDst;
2250 uint64_t uResult = uDst >> cShift;
2251 *puDst = uResult;
2252
2253 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
2254 it the same way as for 1 bit shifts. The AF bit is undefined, we
2255 always set it to zero atm. */
2256 AssertCompile(X86_EFL_CF_BIT == 0);
2257 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
2258 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF;
2259 fEfl |= (uDst >> 63) << X86_EFL_OF_BIT;
2260 fEfl |= X86_EFL_CALC_SF(uResult, 64);
2261 fEfl |= X86_EFL_CALC_ZF(uResult);
2262 fEfl |= g_afParity[uResult & 0xff];
2263 *pfEFlags = fEfl;
2264 }
2265}
2266
2267
2268IEM_DECL_IMPL_DEF(void, iemAImpl_sar_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2269{
2270 cShift &= 63;
2271 if (cShift)
2272 {
2273 uint64_t uDst = *puDst;
2274 uint64_t uResult = (int64_t)uDst >> cShift;
2275 *puDst = uResult;
2276
2277 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
2278 it the same way as for 1 bit shifts (0). The AF bit is undefined,
2279 we always set it to zero atm. */
2280 AssertCompile(X86_EFL_CF_BIT == 0);
2281 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
2282 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF;
2283 fEfl |= X86_EFL_CALC_SF(uResult, 64);
2284 fEfl |= X86_EFL_CALC_ZF(uResult);
2285 fEfl |= g_afParity[uResult & 0xff];
2286 *pfEFlags = fEfl;
2287 }
2288}
2289
2290
2291IEM_DECL_IMPL_DEF(void, iemAImpl_shld_u64,(uint64_t *puDst, uint64_t uSrc, uint8_t cShift, uint32_t *pfEFlags))
2292{
2293 cShift &= 63;
2294 if (cShift)
2295 {
2296 uint64_t uDst = *puDst;
2297 uint64_t uResult;
2298 uResult = uDst << cShift;
2299 uResult |= uSrc >> (64 - cShift);
2300 *puDst = uResult;
2301
2302 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
2303 it the same way as for 1 bit shifts. The AF bit is undefined,
2304 we always set it to zero atm. */
2305 AssertCompile(X86_EFL_CF_BIT == 0);
2306 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
2307 fEfl |= (uDst >> (64 - cShift)) & X86_EFL_CF;
2308 fEfl |= (uint32_t)((uDst >> 63) ^ (uint32_t)(uResult >> 63)) << X86_EFL_OF_BIT;
2309 fEfl |= X86_EFL_CALC_SF(uResult, 64);
2310 fEfl |= X86_EFL_CALC_ZF(uResult);
2311 fEfl |= g_afParity[uResult & 0xff];
2312 *pfEFlags = fEfl;
2313 }
2314}
2315
2316
2317IEM_DECL_IMPL_DEF(void, iemAImpl_shrd_u64,(uint64_t *puDst, uint64_t uSrc, uint8_t cShift, uint32_t *pfEFlags))
2318{
2319 cShift &= 63;
2320 if (cShift)
2321 {
2322 uint64_t uDst = *puDst;
2323 uint64_t uResult;
2324 uResult = uDst >> cShift;
2325 uResult |= uSrc << (64 - cShift);
2326 *puDst = uResult;
2327
2328 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement
2329 it the same way as for 1 bit shifts. The AF bit is undefined,
2330 we always set it to zero atm. */
2331 AssertCompile(X86_EFL_CF_BIT == 0);
2332 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS;
2333 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF;
2334 fEfl |= (uint32_t)((uDst >> 63) ^ (uint32_t)(uResult >> 63)) << X86_EFL_OF_BIT;
2335 fEfl |= X86_EFL_CALC_SF(uResult, 64);
2336 fEfl |= X86_EFL_CALC_ZF(uResult);
2337 fEfl |= g_afParity[uResult & 0xff];
2338 *pfEFlags = fEfl;
2339 }
2340}
2341
2342
2343#endif /* !RT_ARCH_AMD64 || IEM_WITHOUT_ASSEMBLY */
2344
2345
2346IEM_DECL_IMPL_DEF(void, iemAImpl_arpl,(uint16_t *pu16Dst, uint16_t u16Src, uint32_t *pfEFlags))
2347{
2348 if ((*pu16Dst & X86_SEL_RPL) < (u16Src & X86_SEL_RPL))
2349 {
2350 *pu16Dst &= X86_SEL_MASK_OFF_RPL;
2351 *pu16Dst |= u16Src & X86_SEL_RPL;
2352
2353 *pfEFlags |= X86_EFL_ZF;
2354 }
2355 else
2356 *pfEFlags &= ~X86_EFL_ZF;
2357}
2358
2359
2360
2361IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_fallback,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx,
2362 PRTUINT128U pu128RbxRcx, uint32_t *pEFlags))
2363{
2364 RTUINT128U u128Tmp = *pu128Dst;
2365 if ( u128Tmp.s.Lo == pu128RaxRdx->s.Lo
2366 && u128Tmp.s.Hi == pu128RaxRdx->s.Hi)
2367 {
2368 *pu128Dst = *pu128RbxRcx;
2369 *pEFlags |= X86_EFL_ZF;
2370 }
2371 else
2372 {
2373 *pu128RaxRdx = u128Tmp;
2374 *pEFlags &= ~X86_EFL_ZF;
2375 }
2376}
2377
2378
2379IEM_DECL_IMPL_DEF(void, iemAImpl_movsldup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
2380{
2381 RT_NOREF(pFpuState);
2382 puDst->au32[0] = puSrc->au32[0];
2383 puDst->au32[1] = puSrc->au32[0];
2384 puDst->au32[2] = puSrc->au32[2];
2385 puDst->au32[3] = puSrc->au32[2];
2386}
2387
2388#ifdef IEM_WITH_VEX
2389
2390IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
2391{
2392 pXState->x87.aXMM[iYRegDst].au32[0] = pXState->x87.aXMM[iYRegSrc].au32[0];
2393 pXState->x87.aXMM[iYRegDst].au32[1] = pXState->x87.aXMM[iYRegSrc].au32[0];
2394 pXState->x87.aXMM[iYRegDst].au32[2] = pXState->x87.aXMM[iYRegSrc].au32[2];
2395 pXState->x87.aXMM[iYRegDst].au32[3] = pXState->x87.aXMM[iYRegSrc].au32[2];
2396 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
2397 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
2398 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
2399 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
2400}
2401
2402
2403IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
2404{
2405 pXState->x87.aXMM[iYRegDst].au32[0] = pSrc->au32[0];
2406 pXState->x87.aXMM[iYRegDst].au32[1] = pSrc->au32[0];
2407 pXState->x87.aXMM[iYRegDst].au32[2] = pSrc->au32[2];
2408 pXState->x87.aXMM[iYRegDst].au32[3] = pSrc->au32[2];
2409 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pSrc->au32[4];
2410 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pSrc->au32[4];
2411 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pSrc->au32[6];
2412 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pSrc->au32[6];
2413}
2414
2415#endif /* IEM_WITH_VEX */
2416
2417
2418IEM_DECL_IMPL_DEF(void, iemAImpl_movshdup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
2419{
2420 RT_NOREF(pFpuState);
2421 puDst->au32[0] = puSrc->au32[1];
2422 puDst->au32[1] = puSrc->au32[1];
2423 puDst->au32[2] = puSrc->au32[3];
2424 puDst->au32[3] = puSrc->au32[3];
2425}
2426
2427
2428IEM_DECL_IMPL_DEF(void, iemAImpl_movddup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, uint64_t uSrc))
2429{
2430 RT_NOREF(pFpuState);
2431 puDst->au64[0] = uSrc;
2432 puDst->au64[1] = uSrc;
2433}
2434
2435#ifdef IEM_WITH_VEX
2436
2437IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
2438{
2439 pXState->x87.aXMM[iYRegDst].au64[0] = pXState->x87.aXMM[iYRegSrc].au64[0];
2440 pXState->x87.aXMM[iYRegDst].au64[1] = pXState->x87.aXMM[iYRegSrc].au64[0];
2441 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
2442 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
2443}
2444
2445IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
2446{
2447 pXState->x87.aXMM[iYRegDst].au64[0] = pSrc->au64[0];
2448 pXState->x87.aXMM[iYRegDst].au64[1] = pSrc->au64[0];
2449 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pSrc->au64[2];
2450 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pSrc->au64[2];
2451}
2452
2453#endif /* IEM_WITH_VEX */
2454
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette