VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp@ 93792

Last change on this file since 93792 was 93792, checked in by vboxsync, 3 years ago

VMM/IEM: Working on adding missing C version of IEMAllAImpl.asm functions. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 87.8 KB
Line 
1/* $Id: IEMAllAImplC.cpp 93792 2022-02-16 13:30:16Z vboxsync $ */
2/** @file
3 * IEM - Instruction Implementation in Assembly, portable C variant.
4 */
5
6/*
7 * Copyright (C) 2011-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#include "IEMInternal.h"
23#include <VBox/vmm/vmcc.h>
24#include <VBox/err.h>
25#include <iprt/x86.h>
26#include <iprt/uint128.h>
27
28
29/*********************************************************************************************************************************
30* Defined Constants And Macros *
31*********************************************************************************************************************************/
32#if defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
33# define IEM_WITHOUT_ASSEMBLY
34#endif
35
36/**
37 * Calculates the signed flag value given a result and it's bit width.
38 *
39 * The signed flag (SF) is a duplication of the most significant bit in the
40 * result.
41 *
42 * @returns X86_EFL_SF or 0.
43 * @param a_uResult Unsigned result value.
44 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
45 */
46#define X86_EFL_CALC_SF(a_uResult, a_cBitsWidth) \
47 ( (uint32_t)((a_uResult) >> ((a_cBitsWidth) - X86_EFL_SF_BIT - 1)) & X86_EFL_SF )
48
49/**
50 * Calculates the zero flag value given a result.
51 *
52 * The zero flag (ZF) indicates whether the result is zero or not.
53 *
54 * @returns X86_EFL_ZF or 0.
55 * @param a_uResult Unsigned result value.
56 */
57#define X86_EFL_CALC_ZF(a_uResult) \
58 ( (uint32_t)((a_uResult) == 0) << X86_EFL_ZF_BIT )
59
60/**
61 * Extracts the OF flag from a OF calculation result.
62 *
63 * These are typically used by concating with a bitcount. The problem is that
64 * 8-bit values needs shifting in the other direction than the others.
65 */
66#define X86_EFL_GET_OF_8(a_uValue) ((uint32_t)((a_uValue) << (X86_EFL_OF_BIT - 8)) & X86_EFL_OF)
67#define X86_EFL_GET_OF_16(a_uValue) ((uint32_t)((a_uValue) >> (16 - X86_EFL_OF_BIT)) & X86_EFL_OF)
68#define X86_EFL_GET_OF_32(a_uValue) ((uint32_t)((a_uValue) >> (32 - X86_EFL_OF_BIT)) & X86_EFL_OF)
69#define X86_EFL_GET_OF_64(a_uValue) ((uint32_t)((a_uValue) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF)
70
71/**
72 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after arithmetic op.
73 *
74 * @returns Status bits.
75 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
76 * @param a_uResult Unsigned result value.
77 * @param a_uSrc The source value (for AF calc).
78 * @param a_uDst The original destination value (for AF calc).
79 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
80 * @param a_CfExpr Bool expression for the carry flag (CF).
81 * @param a_OfMethod 0 for ADD-style, 1 for SUB-style.
82 */
83#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(a_pfEFlags, a_uResult, a_uDst, a_uSrc, a_cBitsWidth, a_CfExpr, a_OfMethod) \
84 do { \
85 uint32_t fEflTmp = *(a_pfEFlags); \
86 fEflTmp &= ~X86_EFL_STATUS_BITS; \
87 fEflTmp |= (a_CfExpr) << X86_EFL_CF_BIT; \
88 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
89 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
90 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
91 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
92 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth( ((a_uDst) ^ (a_uSrc) ^ (a_OfMethod == 0 ? RT_BIT_64(a_cBitsWidth - 1) : 0)) \
93 & ((a_uResult) ^ (a_uDst)) ); \
94 *(a_pfEFlags) = fEflTmp; \
95 } while (0)
96
97/**
98 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after a logical op.
99 *
100 * CF and OF are defined to be 0 by logical operations. AF on the other hand is
101 * undefined. We do not set AF, as that seems to make the most sense (which
102 * probably makes it the most wrong in real life).
103 *
104 * @returns Status bits.
105 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
106 * @param a_uResult Unsigned result value.
107 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
108 * @param a_fExtra Additional bits to set.
109 */
110#define IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(a_pfEFlags, a_uResult, a_cBitsWidth, a_fExtra) \
111 do { \
112 uint32_t fEflTmp = *(a_pfEFlags); \
113 fEflTmp &= ~X86_EFL_STATUS_BITS; \
114 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
115 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
116 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
117 fEflTmp |= (a_fExtra); \
118 *(a_pfEFlags) = fEflTmp; \
119 } while (0)
120
121
122/*********************************************************************************************************************************
123* Global Variables *
124*********************************************************************************************************************************/
125#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
126/**
127 * Parity calculation table.
128 *
129 * The generator code:
130 * @code
131 * #include <stdio.h>
132 *
133 * int main()
134 * {
135 * unsigned b;
136 * for (b = 0; b < 256; b++)
137 * {
138 * int cOnes = ( b & 1)
139 * + ((b >> 1) & 1)
140 * + ((b >> 2) & 1)
141 * + ((b >> 3) & 1)
142 * + ((b >> 4) & 1)
143 * + ((b >> 5) & 1)
144 * + ((b >> 6) & 1)
145 * + ((b >> 7) & 1);
146 * printf(" /" "* %#04x = %u%u%u%u%u%u%u%ub *" "/ %s,\n",
147 * b,
148 * (b >> 7) & 1,
149 * (b >> 6) & 1,
150 * (b >> 5) & 1,
151 * (b >> 4) & 1,
152 * (b >> 3) & 1,
153 * (b >> 2) & 1,
154 * (b >> 1) & 1,
155 * b & 1,
156 * cOnes & 1 ? "0" : "X86_EFL_PF");
157 * }
158 * return 0;
159 * }
160 * @endcode
161 */
162static uint8_t const g_afParity[256] =
163{
164 /* 0000 = 00000000b */ X86_EFL_PF,
165 /* 0x01 = 00000001b */ 0,
166 /* 0x02 = 00000010b */ 0,
167 /* 0x03 = 00000011b */ X86_EFL_PF,
168 /* 0x04 = 00000100b */ 0,
169 /* 0x05 = 00000101b */ X86_EFL_PF,
170 /* 0x06 = 00000110b */ X86_EFL_PF,
171 /* 0x07 = 00000111b */ 0,
172 /* 0x08 = 00001000b */ 0,
173 /* 0x09 = 00001001b */ X86_EFL_PF,
174 /* 0x0a = 00001010b */ X86_EFL_PF,
175 /* 0x0b = 00001011b */ 0,
176 /* 0x0c = 00001100b */ X86_EFL_PF,
177 /* 0x0d = 00001101b */ 0,
178 /* 0x0e = 00001110b */ 0,
179 /* 0x0f = 00001111b */ X86_EFL_PF,
180 /* 0x10 = 00010000b */ 0,
181 /* 0x11 = 00010001b */ X86_EFL_PF,
182 /* 0x12 = 00010010b */ X86_EFL_PF,
183 /* 0x13 = 00010011b */ 0,
184 /* 0x14 = 00010100b */ X86_EFL_PF,
185 /* 0x15 = 00010101b */ 0,
186 /* 0x16 = 00010110b */ 0,
187 /* 0x17 = 00010111b */ X86_EFL_PF,
188 /* 0x18 = 00011000b */ X86_EFL_PF,
189 /* 0x19 = 00011001b */ 0,
190 /* 0x1a = 00011010b */ 0,
191 /* 0x1b = 00011011b */ X86_EFL_PF,
192 /* 0x1c = 00011100b */ 0,
193 /* 0x1d = 00011101b */ X86_EFL_PF,
194 /* 0x1e = 00011110b */ X86_EFL_PF,
195 /* 0x1f = 00011111b */ 0,
196 /* 0x20 = 00100000b */ 0,
197 /* 0x21 = 00100001b */ X86_EFL_PF,
198 /* 0x22 = 00100010b */ X86_EFL_PF,
199 /* 0x23 = 00100011b */ 0,
200 /* 0x24 = 00100100b */ X86_EFL_PF,
201 /* 0x25 = 00100101b */ 0,
202 /* 0x26 = 00100110b */ 0,
203 /* 0x27 = 00100111b */ X86_EFL_PF,
204 /* 0x28 = 00101000b */ X86_EFL_PF,
205 /* 0x29 = 00101001b */ 0,
206 /* 0x2a = 00101010b */ 0,
207 /* 0x2b = 00101011b */ X86_EFL_PF,
208 /* 0x2c = 00101100b */ 0,
209 /* 0x2d = 00101101b */ X86_EFL_PF,
210 /* 0x2e = 00101110b */ X86_EFL_PF,
211 /* 0x2f = 00101111b */ 0,
212 /* 0x30 = 00110000b */ X86_EFL_PF,
213 /* 0x31 = 00110001b */ 0,
214 /* 0x32 = 00110010b */ 0,
215 /* 0x33 = 00110011b */ X86_EFL_PF,
216 /* 0x34 = 00110100b */ 0,
217 /* 0x35 = 00110101b */ X86_EFL_PF,
218 /* 0x36 = 00110110b */ X86_EFL_PF,
219 /* 0x37 = 00110111b */ 0,
220 /* 0x38 = 00111000b */ 0,
221 /* 0x39 = 00111001b */ X86_EFL_PF,
222 /* 0x3a = 00111010b */ X86_EFL_PF,
223 /* 0x3b = 00111011b */ 0,
224 /* 0x3c = 00111100b */ X86_EFL_PF,
225 /* 0x3d = 00111101b */ 0,
226 /* 0x3e = 00111110b */ 0,
227 /* 0x3f = 00111111b */ X86_EFL_PF,
228 /* 0x40 = 01000000b */ 0,
229 /* 0x41 = 01000001b */ X86_EFL_PF,
230 /* 0x42 = 01000010b */ X86_EFL_PF,
231 /* 0x43 = 01000011b */ 0,
232 /* 0x44 = 01000100b */ X86_EFL_PF,
233 /* 0x45 = 01000101b */ 0,
234 /* 0x46 = 01000110b */ 0,
235 /* 0x47 = 01000111b */ X86_EFL_PF,
236 /* 0x48 = 01001000b */ X86_EFL_PF,
237 /* 0x49 = 01001001b */ 0,
238 /* 0x4a = 01001010b */ 0,
239 /* 0x4b = 01001011b */ X86_EFL_PF,
240 /* 0x4c = 01001100b */ 0,
241 /* 0x4d = 01001101b */ X86_EFL_PF,
242 /* 0x4e = 01001110b */ X86_EFL_PF,
243 /* 0x4f = 01001111b */ 0,
244 /* 0x50 = 01010000b */ X86_EFL_PF,
245 /* 0x51 = 01010001b */ 0,
246 /* 0x52 = 01010010b */ 0,
247 /* 0x53 = 01010011b */ X86_EFL_PF,
248 /* 0x54 = 01010100b */ 0,
249 /* 0x55 = 01010101b */ X86_EFL_PF,
250 /* 0x56 = 01010110b */ X86_EFL_PF,
251 /* 0x57 = 01010111b */ 0,
252 /* 0x58 = 01011000b */ 0,
253 /* 0x59 = 01011001b */ X86_EFL_PF,
254 /* 0x5a = 01011010b */ X86_EFL_PF,
255 /* 0x5b = 01011011b */ 0,
256 /* 0x5c = 01011100b */ X86_EFL_PF,
257 /* 0x5d = 01011101b */ 0,
258 /* 0x5e = 01011110b */ 0,
259 /* 0x5f = 01011111b */ X86_EFL_PF,
260 /* 0x60 = 01100000b */ X86_EFL_PF,
261 /* 0x61 = 01100001b */ 0,
262 /* 0x62 = 01100010b */ 0,
263 /* 0x63 = 01100011b */ X86_EFL_PF,
264 /* 0x64 = 01100100b */ 0,
265 /* 0x65 = 01100101b */ X86_EFL_PF,
266 /* 0x66 = 01100110b */ X86_EFL_PF,
267 /* 0x67 = 01100111b */ 0,
268 /* 0x68 = 01101000b */ 0,
269 /* 0x69 = 01101001b */ X86_EFL_PF,
270 /* 0x6a = 01101010b */ X86_EFL_PF,
271 /* 0x6b = 01101011b */ 0,
272 /* 0x6c = 01101100b */ X86_EFL_PF,
273 /* 0x6d = 01101101b */ 0,
274 /* 0x6e = 01101110b */ 0,
275 /* 0x6f = 01101111b */ X86_EFL_PF,
276 /* 0x70 = 01110000b */ 0,
277 /* 0x71 = 01110001b */ X86_EFL_PF,
278 /* 0x72 = 01110010b */ X86_EFL_PF,
279 /* 0x73 = 01110011b */ 0,
280 /* 0x74 = 01110100b */ X86_EFL_PF,
281 /* 0x75 = 01110101b */ 0,
282 /* 0x76 = 01110110b */ 0,
283 /* 0x77 = 01110111b */ X86_EFL_PF,
284 /* 0x78 = 01111000b */ X86_EFL_PF,
285 /* 0x79 = 01111001b */ 0,
286 /* 0x7a = 01111010b */ 0,
287 /* 0x7b = 01111011b */ X86_EFL_PF,
288 /* 0x7c = 01111100b */ 0,
289 /* 0x7d = 01111101b */ X86_EFL_PF,
290 /* 0x7e = 01111110b */ X86_EFL_PF,
291 /* 0x7f = 01111111b */ 0,
292 /* 0x80 = 10000000b */ 0,
293 /* 0x81 = 10000001b */ X86_EFL_PF,
294 /* 0x82 = 10000010b */ X86_EFL_PF,
295 /* 0x83 = 10000011b */ 0,
296 /* 0x84 = 10000100b */ X86_EFL_PF,
297 /* 0x85 = 10000101b */ 0,
298 /* 0x86 = 10000110b */ 0,
299 /* 0x87 = 10000111b */ X86_EFL_PF,
300 /* 0x88 = 10001000b */ X86_EFL_PF,
301 /* 0x89 = 10001001b */ 0,
302 /* 0x8a = 10001010b */ 0,
303 /* 0x8b = 10001011b */ X86_EFL_PF,
304 /* 0x8c = 10001100b */ 0,
305 /* 0x8d = 10001101b */ X86_EFL_PF,
306 /* 0x8e = 10001110b */ X86_EFL_PF,
307 /* 0x8f = 10001111b */ 0,
308 /* 0x90 = 10010000b */ X86_EFL_PF,
309 /* 0x91 = 10010001b */ 0,
310 /* 0x92 = 10010010b */ 0,
311 /* 0x93 = 10010011b */ X86_EFL_PF,
312 /* 0x94 = 10010100b */ 0,
313 /* 0x95 = 10010101b */ X86_EFL_PF,
314 /* 0x96 = 10010110b */ X86_EFL_PF,
315 /* 0x97 = 10010111b */ 0,
316 /* 0x98 = 10011000b */ 0,
317 /* 0x99 = 10011001b */ X86_EFL_PF,
318 /* 0x9a = 10011010b */ X86_EFL_PF,
319 /* 0x9b = 10011011b */ 0,
320 /* 0x9c = 10011100b */ X86_EFL_PF,
321 /* 0x9d = 10011101b */ 0,
322 /* 0x9e = 10011110b */ 0,
323 /* 0x9f = 10011111b */ X86_EFL_PF,
324 /* 0xa0 = 10100000b */ X86_EFL_PF,
325 /* 0xa1 = 10100001b */ 0,
326 /* 0xa2 = 10100010b */ 0,
327 /* 0xa3 = 10100011b */ X86_EFL_PF,
328 /* 0xa4 = 10100100b */ 0,
329 /* 0xa5 = 10100101b */ X86_EFL_PF,
330 /* 0xa6 = 10100110b */ X86_EFL_PF,
331 /* 0xa7 = 10100111b */ 0,
332 /* 0xa8 = 10101000b */ 0,
333 /* 0xa9 = 10101001b */ X86_EFL_PF,
334 /* 0xaa = 10101010b */ X86_EFL_PF,
335 /* 0xab = 10101011b */ 0,
336 /* 0xac = 10101100b */ X86_EFL_PF,
337 /* 0xad = 10101101b */ 0,
338 /* 0xae = 10101110b */ 0,
339 /* 0xaf = 10101111b */ X86_EFL_PF,
340 /* 0xb0 = 10110000b */ 0,
341 /* 0xb1 = 10110001b */ X86_EFL_PF,
342 /* 0xb2 = 10110010b */ X86_EFL_PF,
343 /* 0xb3 = 10110011b */ 0,
344 /* 0xb4 = 10110100b */ X86_EFL_PF,
345 /* 0xb5 = 10110101b */ 0,
346 /* 0xb6 = 10110110b */ 0,
347 /* 0xb7 = 10110111b */ X86_EFL_PF,
348 /* 0xb8 = 10111000b */ X86_EFL_PF,
349 /* 0xb9 = 10111001b */ 0,
350 /* 0xba = 10111010b */ 0,
351 /* 0xbb = 10111011b */ X86_EFL_PF,
352 /* 0xbc = 10111100b */ 0,
353 /* 0xbd = 10111101b */ X86_EFL_PF,
354 /* 0xbe = 10111110b */ X86_EFL_PF,
355 /* 0xbf = 10111111b */ 0,
356 /* 0xc0 = 11000000b */ X86_EFL_PF,
357 /* 0xc1 = 11000001b */ 0,
358 /* 0xc2 = 11000010b */ 0,
359 /* 0xc3 = 11000011b */ X86_EFL_PF,
360 /* 0xc4 = 11000100b */ 0,
361 /* 0xc5 = 11000101b */ X86_EFL_PF,
362 /* 0xc6 = 11000110b */ X86_EFL_PF,
363 /* 0xc7 = 11000111b */ 0,
364 /* 0xc8 = 11001000b */ 0,
365 /* 0xc9 = 11001001b */ X86_EFL_PF,
366 /* 0xca = 11001010b */ X86_EFL_PF,
367 /* 0xcb = 11001011b */ 0,
368 /* 0xcc = 11001100b */ X86_EFL_PF,
369 /* 0xcd = 11001101b */ 0,
370 /* 0xce = 11001110b */ 0,
371 /* 0xcf = 11001111b */ X86_EFL_PF,
372 /* 0xd0 = 11010000b */ 0,
373 /* 0xd1 = 11010001b */ X86_EFL_PF,
374 /* 0xd2 = 11010010b */ X86_EFL_PF,
375 /* 0xd3 = 11010011b */ 0,
376 /* 0xd4 = 11010100b */ X86_EFL_PF,
377 /* 0xd5 = 11010101b */ 0,
378 /* 0xd6 = 11010110b */ 0,
379 /* 0xd7 = 11010111b */ X86_EFL_PF,
380 /* 0xd8 = 11011000b */ X86_EFL_PF,
381 /* 0xd9 = 11011001b */ 0,
382 /* 0xda = 11011010b */ 0,
383 /* 0xdb = 11011011b */ X86_EFL_PF,
384 /* 0xdc = 11011100b */ 0,
385 /* 0xdd = 11011101b */ X86_EFL_PF,
386 /* 0xde = 11011110b */ X86_EFL_PF,
387 /* 0xdf = 11011111b */ 0,
388 /* 0xe0 = 11100000b */ 0,
389 /* 0xe1 = 11100001b */ X86_EFL_PF,
390 /* 0xe2 = 11100010b */ X86_EFL_PF,
391 /* 0xe3 = 11100011b */ 0,
392 /* 0xe4 = 11100100b */ X86_EFL_PF,
393 /* 0xe5 = 11100101b */ 0,
394 /* 0xe6 = 11100110b */ 0,
395 /* 0xe7 = 11100111b */ X86_EFL_PF,
396 /* 0xe8 = 11101000b */ X86_EFL_PF,
397 /* 0xe9 = 11101001b */ 0,
398 /* 0xea = 11101010b */ 0,
399 /* 0xeb = 11101011b */ X86_EFL_PF,
400 /* 0xec = 11101100b */ 0,
401 /* 0xed = 11101101b */ X86_EFL_PF,
402 /* 0xee = 11101110b */ X86_EFL_PF,
403 /* 0xef = 11101111b */ 0,
404 /* 0xf0 = 11110000b */ X86_EFL_PF,
405 /* 0xf1 = 11110001b */ 0,
406 /* 0xf2 = 11110010b */ 0,
407 /* 0xf3 = 11110011b */ X86_EFL_PF,
408 /* 0xf4 = 11110100b */ 0,
409 /* 0xf5 = 11110101b */ X86_EFL_PF,
410 /* 0xf6 = 11110110b */ X86_EFL_PF,
411 /* 0xf7 = 11110111b */ 0,
412 /* 0xf8 = 11111000b */ 0,
413 /* 0xf9 = 11111001b */ X86_EFL_PF,
414 /* 0xfa = 11111010b */ X86_EFL_PF,
415 /* 0xfb = 11111011b */ 0,
416 /* 0xfc = 11111100b */ X86_EFL_PF,
417 /* 0xfd = 11111101b */ 0,
418 /* 0xfe = 11111110b */ 0,
419 /* 0xff = 11111111b */ X86_EFL_PF,
420};
421#endif /* !RT_ARCH_AMD64 || IEM_WITHOUT_ASSEMBLY */
422
423
424
425/*
426 * There are a few 64-bit on 32-bit things we'd rather do in C. Actually, doing
427 * it all in C is probably safer atm., optimize what's necessary later, maybe.
428 */
429#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
430
431
432/*********************************************************************************************************************************
433* Binary Operations *
434*********************************************************************************************************************************/
435
436/*
437 * ADD
438 */
439
440IEM_DECL_IMPL_DEF(void, iemAImpl_add_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
441{
442 uint64_t uDst = *puDst;
443 uint64_t uResult = uDst + uSrc;
444 *puDst = uResult;
445 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult < uDst, 0);
446}
447
448# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
449
450IEM_DECL_IMPL_DEF(void, iemAImpl_add_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
451{
452 uint32_t uDst = *puDst;
453 uint32_t uResult = uDst + uSrc;
454 *puDst = uResult;
455 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult < uDst, 0);
456}
457
458
459IEM_DECL_IMPL_DEF(void, iemAImpl_add_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
460{
461 uint16_t uDst = *puDst;
462 uint16_t uResult = uDst + uSrc;
463 *puDst = uResult;
464 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult < uDst, 0);
465}
466
467
468IEM_DECL_IMPL_DEF(void, iemAImpl_add_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
469{
470 uint8_t uDst = *puDst;
471 uint8_t uResult = uDst + uSrc;
472 *puDst = uResult;
473 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult < uDst, 0);
474}
475
476# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
477
478/*
479 * ADC
480 */
481
482IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
483{
484 if (!(*pfEFlags & X86_EFL_CF))
485 iemAImpl_add_u64(puDst, uSrc, pfEFlags);
486 else
487 {
488 uint64_t uDst = *puDst;
489 uint64_t uResult = uDst + uSrc + 1;
490 *puDst = uResult;
491 /** @todo verify AF and OF calculations. */
492 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult <= uDst, 0);
493 }
494}
495
496# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
497
498IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
499{
500 if (!(*pfEFlags & X86_EFL_CF))
501 iemAImpl_add_u32(puDst, uSrc, pfEFlags);
502 else
503 {
504 uint32_t uDst = *puDst;
505 uint32_t uResult = uDst + uSrc + 1;
506 *puDst = uResult;
507 /** @todo verify AF and OF calculations. */
508 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult <= uDst, 0);
509 }
510}
511
512
513IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
514{
515 if (!(*pfEFlags & X86_EFL_CF))
516 iemAImpl_add_u16(puDst, uSrc, pfEFlags);
517 else
518 {
519 uint16_t uDst = *puDst;
520 uint16_t uResult = uDst + uSrc + 1;
521 *puDst = uResult;
522 /** @todo verify AF and OF calculations. */
523 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult <= uDst, 0);
524 }
525}
526
527
528IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
529{
530 if (!(*pfEFlags & X86_EFL_CF))
531 iemAImpl_add_u8(puDst, uSrc, pfEFlags);
532 else
533 {
534 uint8_t uDst = *puDst;
535 uint8_t uResult = uDst + uSrc + 1;
536 *puDst = uResult;
537 /** @todo verify AF and OF calculations. */
538 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult <= uDst, 0);
539 }
540}
541
542# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
543
544/*
545 * SUB
546 */
547
548IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
549{
550 uint64_t uDst = *puDst;
551 uint64_t uResult = uDst - uSrc;
552 *puDst = uResult;
553 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult < uDst, 1);
554}
555
556# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
557
558IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
559{
560 uint32_t uDst = *puDst;
561 uint32_t uResult = uDst - uSrc;
562 *puDst = uResult;
563 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult < uDst, 1);
564}
565
566
567IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
568{
569 uint16_t uDst = *puDst;
570 uint16_t uResult = uDst - uSrc;
571 *puDst = uResult;
572 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult < uDst, 1);
573}
574
575
576IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
577{
578 uint8_t uDst = *puDst;
579 uint8_t uResult = uDst - uSrc;
580 *puDst = uResult;
581 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult < uDst, 1);
582}
583
584# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
585
586/*
587 * SBB
588 */
589
590IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
591{
592 if (!(*pfEFlags & X86_EFL_CF))
593 iemAImpl_sub_u64(puDst, uSrc, pfEFlags);
594 else
595 {
596 uint64_t uDst = *puDst;
597 uint64_t uResult = uDst - uSrc - 1;
598 *puDst = uResult;
599 /** @todo verify AF and OF calculations. */
600 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult <= uDst, 1);
601 }
602}
603
604# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
605
606IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
607{
608 if (!(*pfEFlags & X86_EFL_CF))
609 iemAImpl_sub_u32(puDst, uSrc, pfEFlags);
610 else
611 {
612 uint32_t uDst = *puDst;
613 uint32_t uResult = uDst - uSrc - 1;
614 *puDst = uResult;
615 /** @todo verify AF and OF calculations. */
616 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult <= uDst, 1);
617 }
618}
619
620
621IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
622{
623 if (!(*pfEFlags & X86_EFL_CF))
624 iemAImpl_sub_u16(puDst, uSrc, pfEFlags);
625 else
626 {
627 uint16_t uDst = *puDst;
628 uint16_t uResult = uDst - uSrc - 1;
629 *puDst = uResult;
630 /** @todo verify AF and OF calculations. */
631 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult <= uDst, 1);
632 }
633}
634
635
636IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
637{
638 if (!(*pfEFlags & X86_EFL_CF))
639 iemAImpl_sub_u8(puDst, uSrc, pfEFlags);
640 else
641 {
642 uint8_t uDst = *puDst;
643 uint8_t uResult = uDst - uSrc - 1;
644 *puDst = uResult;
645 /** @todo verify AF and OF calculations. */
646 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult <= uDst, 1);
647 }
648}
649
650# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
651
652
653/*
654 * OR
655 */
656
657IEM_DECL_IMPL_DEF(void, iemAImpl_or_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
658{
659 uint64_t uResult = *puDst | uSrc;
660 *puDst = uResult;
661 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
662}
663
664# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
665
666IEM_DECL_IMPL_DEF(void, iemAImpl_or_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
667{
668 uint32_t uResult = *puDst | uSrc;
669 *puDst = uResult;
670 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
671}
672
673
674IEM_DECL_IMPL_DEF(void, iemAImpl_or_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
675{
676 uint16_t uResult = *puDst | uSrc;
677 *puDst = uResult;
678 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
679}
680
681
682IEM_DECL_IMPL_DEF(void, iemAImpl_or_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
683{
684 uint8_t uResult = *puDst | uSrc;
685 *puDst = uResult;
686 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
687}
688
689# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
690
691/*
692 * XOR
693 */
694
695IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
696{
697 uint64_t uResult = *puDst ^ uSrc;
698 *puDst = uResult;
699 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
700}
701
702# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
703
704IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
705{
706 uint32_t uResult = *puDst ^ uSrc;
707 *puDst = uResult;
708 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
709}
710
711
712IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
713{
714 uint16_t uResult = *puDst ^ uSrc;
715 *puDst = uResult;
716 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
717}
718
719
720IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
721{
722 uint8_t uResult = *puDst ^ uSrc;
723 *puDst = uResult;
724 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
725}
726
727# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
728
729/*
730 * AND
731 */
732
733IEM_DECL_IMPL_DEF(void, iemAImpl_and_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
734{
735 uint64_t uResult = *puDst & uSrc;
736 *puDst = uResult;
737 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
738}
739
740# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
741
742IEM_DECL_IMPL_DEF(void, iemAImpl_and_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
743{
744 uint32_t uResult = *puDst & uSrc;
745 *puDst = uResult;
746 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
747}
748
749
750IEM_DECL_IMPL_DEF(void, iemAImpl_and_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
751{
752 uint16_t uResult = *puDst & uSrc;
753 *puDst = uResult;
754 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
755}
756
757
758IEM_DECL_IMPL_DEF(void, iemAImpl_and_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
759{
760 uint8_t uResult = *puDst & uSrc;
761 *puDst = uResult;
762 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
763}
764
765# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
766
767/*
768 * CMP
769 */
770
771IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
772{
773 uint64_t uDstTmp = *puDst;
774 iemAImpl_sub_u64(&uDstTmp, uSrc, pfEFlags);
775}
776
777# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
778
779IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
780{
781 uint32_t uDstTmp = *puDst;
782 iemAImpl_sub_u32(&uDstTmp, uSrc, pfEFlags);
783}
784
785
786IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
787{
788 uint16_t uDstTmp = *puDst;
789 iemAImpl_sub_u16(&uDstTmp, uSrc, pfEFlags);
790}
791
792
793IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
794{
795 uint8_t uDstTmp = *puDst;
796 iemAImpl_sub_u8(&uDstTmp, uSrc, pfEFlags);
797}
798
799# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
800
801/*
802 * TEST
803 */
804
805IEM_DECL_IMPL_DEF(void, iemAImpl_test_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
806{
807 uint64_t uResult = *puDst & uSrc;
808 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
809}
810
811# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
812
813IEM_DECL_IMPL_DEF(void, iemAImpl_test_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
814{
815 uint32_t uResult = *puDst & uSrc;
816 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
817}
818
819
820IEM_DECL_IMPL_DEF(void, iemAImpl_test_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
821{
822 uint16_t uResult = *puDst & uSrc;
823 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
824}
825
826
827IEM_DECL_IMPL_DEF(void, iemAImpl_test_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
828{
829 uint8_t uResult = *puDst & uSrc;
830 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
831}
832
833# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
834
835
836/*
837 * LOCK prefixed variants of the above
838 */
839
840/** 64-bit locked binary operand operation. */
841# define DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
842 do { \
843 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
844 uint ## a_cBitsWidth ## _t uTmp; \
845 uint32_t fEflTmp; \
846 do \
847 { \
848 uTmp = uOld; \
849 fEflTmp = *pfEFlags; \
850 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, uSrc, &fEflTmp); \
851 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
852 *pfEFlags = fEflTmp; \
853 } while (0)
854
855
856#define EMIT_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
857 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
858 uint ## a_cBitsWidth ## _t uSrc, \
859 uint32_t *pfEFlags)) \
860 { \
861 DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth); \
862 }
863
864EMIT_LOCKED_BIN_OP(add, 64)
865EMIT_LOCKED_BIN_OP(adc, 64)
866EMIT_LOCKED_BIN_OP(sub, 64)
867EMIT_LOCKED_BIN_OP(sbb, 64)
868EMIT_LOCKED_BIN_OP(or, 64)
869EMIT_LOCKED_BIN_OP(xor, 64)
870EMIT_LOCKED_BIN_OP(and, 64)
871# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
872EMIT_LOCKED_BIN_OP(add, 32)
873EMIT_LOCKED_BIN_OP(adc, 32)
874EMIT_LOCKED_BIN_OP(sub, 32)
875EMIT_LOCKED_BIN_OP(sbb, 32)
876EMIT_LOCKED_BIN_OP(or, 32)
877EMIT_LOCKED_BIN_OP(xor, 32)
878EMIT_LOCKED_BIN_OP(and, 32)
879
880EMIT_LOCKED_BIN_OP(add, 16)
881EMIT_LOCKED_BIN_OP(adc, 16)
882EMIT_LOCKED_BIN_OP(sub, 16)
883EMIT_LOCKED_BIN_OP(sbb, 16)
884EMIT_LOCKED_BIN_OP(or, 16)
885EMIT_LOCKED_BIN_OP(xor, 16)
886EMIT_LOCKED_BIN_OP(and, 16)
887
888EMIT_LOCKED_BIN_OP(add, 8)
889EMIT_LOCKED_BIN_OP(adc, 8)
890EMIT_LOCKED_BIN_OP(sub, 8)
891EMIT_LOCKED_BIN_OP(sbb, 8)
892EMIT_LOCKED_BIN_OP(or, 8)
893EMIT_LOCKED_BIN_OP(xor, 8)
894EMIT_LOCKED_BIN_OP(and, 8)
895# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
896
897
898/*
899 * Bit operations (same signature as above).
900 */
901
902/*
903 * BT
904 */
905
906IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
907{
908 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
909 logical operation (AND/OR/whatever). */
910 Assert(uSrc < 64);
911 uint64_t uDst = *puDst;
912 if (uDst & RT_BIT_64(uSrc))
913 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
914 else
915 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
916}
917
918# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
919
920IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
921{
922 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
923 logical operation (AND/OR/whatever). */
924 Assert(uSrc < 32);
925 uint32_t uDst = *puDst;
926 if (uDst & RT_BIT_32(uSrc))
927 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
928 else
929 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
930}
931
932IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
933{
934 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
935 logical operation (AND/OR/whatever). */
936 Assert(uSrc < 16);
937 uint16_t uDst = *puDst;
938 if (uDst & RT_BIT_32(uSrc))
939 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, X86_EFL_CF);
940 else
941 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, 0);
942}
943
944# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
945
946/*
947 * BTC
948 */
949
950IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
951{
952 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
953 logical operation (AND/OR/whatever). */
954 Assert(uSrc < 64);
955 uint64_t fMask = RT_BIT_64(uSrc);
956 uint64_t uDst = *puDst;
957 if (uDst & fMask)
958 {
959 uDst &= ~fMask;
960 *puDst = uDst;
961 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
962 }
963 else
964 {
965 uDst |= fMask;
966 *puDst = uDst;
967 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
968 }
969}
970
971# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
972
973IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
974{
975 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
976 logical operation (AND/OR/whatever). */
977 Assert(uSrc < 32);
978 uint32_t fMask = RT_BIT_32(uSrc);
979 uint32_t uDst = *puDst;
980 if (uDst & fMask)
981 {
982 uDst &= ~fMask;
983 *puDst = uDst;
984 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
985 }
986 else
987 {
988 uDst |= fMask;
989 *puDst = uDst;
990 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
991 }
992}
993
994
995IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
996{
997 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
998 logical operation (AND/OR/whatever). */
999 Assert(uSrc < 16);
1000 uint16_t fMask = RT_BIT_32(uSrc);
1001 uint16_t uDst = *puDst;
1002 if (uDst & fMask)
1003 {
1004 uDst &= ~fMask;
1005 *puDst = uDst;
1006 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, X86_EFL_CF);
1007 }
1008 else
1009 {
1010 uDst |= fMask;
1011 *puDst = uDst;
1012 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, 0);
1013 }
1014}
1015
1016# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1017
1018/*
1019 * BTR
1020 */
1021
1022IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1023{
1024 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1025 logical operation (AND/OR/whatever). */
1026 Assert(uSrc < 64);
1027 uint64_t fMask = RT_BIT_64(uSrc);
1028 uint64_t uDst = *puDst;
1029 if (uDst & fMask)
1030 {
1031 uDst &= ~fMask;
1032 *puDst = uDst;
1033 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
1034 }
1035 else
1036 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
1037}
1038
1039# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1040
1041IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1042{
1043 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1044 logical operation (AND/OR/whatever). */
1045 Assert(uSrc < 32);
1046 uint32_t fMask = RT_BIT_32(uSrc);
1047 uint32_t uDst = *puDst;
1048 if (uDst & fMask)
1049 {
1050 uDst &= ~fMask;
1051 *puDst = uDst;
1052 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
1053 }
1054 else
1055 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
1056}
1057
1058
1059IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1060{
1061 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1062 logical operation (AND/OR/whatever). */
1063 Assert(uSrc < 16);
1064 uint16_t fMask = RT_BIT_32(uSrc);
1065 uint16_t uDst = *puDst;
1066 if (uDst & fMask)
1067 {
1068 uDst &= ~fMask;
1069 *puDst = uDst;
1070 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, X86_EFL_CF);
1071 }
1072 else
1073 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, 0);
1074}
1075
1076# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1077
1078/*
1079 * BTS
1080 */
1081
1082IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1083{
1084 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1085 logical operation (AND/OR/whatever). */
1086 Assert(uSrc < 64);
1087 uint64_t fMask = RT_BIT_64(uSrc);
1088 uint64_t uDst = *puDst;
1089 if (uDst & fMask)
1090 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
1091 else
1092 {
1093 uDst |= fMask;
1094 *puDst = uDst;
1095 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
1096 }
1097}
1098
1099# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1100
1101IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1102{
1103 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1104 logical operation (AND/OR/whatever). */
1105 Assert(uSrc < 32);
1106 uint32_t fMask = RT_BIT_32(uSrc);
1107 uint32_t uDst = *puDst;
1108 if (uDst & fMask)
1109 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
1110 else
1111 {
1112 uDst |= fMask;
1113 *puDst = uDst;
1114 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
1115 }
1116}
1117
1118
1119IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1120{
1121 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1122 logical operation (AND/OR/whatever). */
1123 Assert(uSrc < 16);
1124 uint16_t fMask = RT_BIT_32(uSrc);
1125 uint32_t uDst = *puDst;
1126 if (uDst & fMask)
1127 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
1128 else
1129 {
1130 uDst |= fMask;
1131 *puDst = uDst;
1132 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
1133 }
1134}
1135
1136# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1137
1138
1139EMIT_LOCKED_BIN_OP(btc, 64)
1140EMIT_LOCKED_BIN_OP(btr, 64)
1141EMIT_LOCKED_BIN_OP(bts, 64)
1142# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1143EMIT_LOCKED_BIN_OP(btc, 32)
1144EMIT_LOCKED_BIN_OP(btr, 32)
1145EMIT_LOCKED_BIN_OP(bts, 32)
1146
1147EMIT_LOCKED_BIN_OP(btc, 16)
1148EMIT_LOCKED_BIN_OP(btr, 16)
1149EMIT_LOCKED_BIN_OP(bts, 16)
1150# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1151
1152
1153/*
1154 * BSF - first (least significant) bit set
1155 */
1156
1157IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1158{
1159 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1160 /** @todo check what real CPUs do. */
1161 unsigned iBit = ASMBitFirstSetU64(uSrc);
1162 if (iBit)
1163 {
1164 *puDst = iBit - 1;
1165 *pfEFlags &= ~X86_EFL_ZF;
1166 }
1167 else
1168 *pfEFlags |= X86_EFL_ZF;
1169}
1170
1171# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1172
1173IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1174{
1175 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1176 /** @todo check what real CPUs do. */
1177 unsigned iBit = ASMBitFirstSetU32(uSrc);
1178 if (iBit)
1179 {
1180 *puDst = iBit - 1;
1181 *pfEFlags &= ~X86_EFL_ZF;
1182 }
1183 else
1184 *pfEFlags |= X86_EFL_ZF;
1185}
1186
1187
1188IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1189{
1190 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1191 /** @todo check what real CPUs do. */
1192 unsigned iBit = ASMBitFirstSetU16(uSrc);
1193 if (iBit)
1194 {
1195 *puDst = iBit - 1;
1196 *pfEFlags &= ~X86_EFL_ZF;
1197 }
1198 else
1199 *pfEFlags |= X86_EFL_ZF;
1200}
1201
1202# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1203
1204/*
1205 * BSR - last (most significant) bit set
1206 */
1207
1208IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1209{
1210 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1211 /** @todo check what real CPUs do. */
1212 unsigned iBit = ASMBitLastSetU64(uSrc);
1213 if (uSrc)
1214 {
1215 *puDst = iBit - 1;
1216 *pfEFlags &= ~X86_EFL_ZF;
1217 }
1218 else
1219 *pfEFlags |= X86_EFL_ZF;
1220}
1221
1222# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1223
1224IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1225{
1226 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1227 /** @todo check what real CPUs do. */
1228 unsigned iBit = ASMBitLastSetU32(uSrc);
1229 if (uSrc)
1230 {
1231 *puDst = iBit - 1;
1232 *pfEFlags &= ~X86_EFL_ZF;
1233 }
1234 else
1235 *pfEFlags |= X86_EFL_ZF;
1236}
1237
1238
1239IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1240{
1241 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1242 /** @todo check what real CPUs do. */
1243 unsigned iBit = ASMBitLastSetU16(uSrc);
1244 if (uSrc)
1245 {
1246 *puDst = iBit - 1;
1247 *pfEFlags &= ~X86_EFL_ZF;
1248 }
1249 else
1250 *pfEFlags |= X86_EFL_ZF;
1251}
1252
1253# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1254
1255
1256/*
1257 * XCHG
1258 */
1259
1260IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64,(uint64_t *puMem, uint64_t *puReg))
1261{
1262 /* XCHG implies LOCK. */
1263 uint64_t uOldMem = *puMem;
1264 while (!ASMAtomicCmpXchgExU64(puMem, *puReg, uOldMem, &uOldMem))
1265 ASMNopPause();
1266 *puReg = uOldMem;
1267}
1268
1269# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1270
1271IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u32,(uint32_t *puMem, uint32_t *puReg))
1272{
1273 /* XCHG implies LOCK. */
1274 uint32_t uOldMem = *puMem;
1275 while (!ASMAtomicCmpXchgExU32(puMem, *puReg, uOldMem, &uOldMem))
1276 ASMNopPause();
1277 *puReg = uOldMem;
1278}
1279
1280
1281IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u16,(uint16_t *puMem, uint16_t *puReg))
1282{
1283 /* XCHG implies LOCK. */
1284 uint16_t uOldMem = *puMem;
1285 while (!ASMAtomicCmpXchgExU16(puMem, *puReg, uOldMem, &uOldMem))
1286 ASMNopPause();
1287 *puReg = uOldMem;
1288}
1289
1290
1291IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8,(uint8_t *puMem, uint8_t *puReg))
1292{
1293 /* XCHG implies LOCK. */
1294 uint8_t uOldMem = *puMem;
1295 while (!ASMAtomicCmpXchgExU8(puMem, *puReg, uOldMem, &uOldMem))
1296 ASMNopPause();
1297 *puReg = uOldMem;
1298}
1299
1300# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1301
1302
1303/*
1304 * XADD and LOCK XADD.
1305 */
1306
1307IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u64,(uint64_t *puDst, uint64_t *puReg, uint32_t *pfEFlags))
1308{
1309 uint64_t uDst = *puDst;
1310 uint64_t uResult = uDst;
1311 iemAImpl_add_u64(&uResult, *puReg, pfEFlags);
1312 *puDst = uResult;
1313 *puReg = uDst;
1314}
1315
1316
1317IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u64_locked,(uint64_t *puDst, uint64_t *puReg, uint32_t *pfEFlags))
1318{
1319 uint64_t uOld = ASMAtomicUoReadU64(puDst);
1320 uint64_t uTmpDst;
1321 uint32_t fEflTmp;
1322 do
1323 {
1324 uTmpDst = uOld;
1325 fEflTmp = *pfEFlags;
1326 iemAImpl_add_u64(&uTmpDst, *puReg, pfEFlags);
1327 } while (!ASMAtomicCmpXchgExU64(puDst, uTmpDst, uOld, &uOld));
1328 *puReg = uOld;
1329 *pfEFlags = fEflTmp;
1330}
1331
1332# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1333
1334IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u32,(uint32_t *puDst, uint32_t *puReg, uint32_t *pfEFlags))
1335{
1336 uint32_t uDst = *puDst;
1337 uint32_t uResult = uDst;
1338 iemAImpl_add_u32(&uResult, *puReg, pfEFlags);
1339 *puDst = uResult;
1340 *puReg = uDst;
1341}
1342
1343
1344IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u32_locked,(uint32_t *puDst, uint32_t *puReg, uint32_t *pfEFlags))
1345{
1346 uint32_t uOld = ASMAtomicUoReadU32(puDst);
1347 uint32_t uTmpDst;
1348 uint32_t fEflTmp;
1349 do
1350 {
1351 uTmpDst = uOld;
1352 fEflTmp = *pfEFlags;
1353 iemAImpl_add_u32(&uTmpDst, *puReg, pfEFlags);
1354 } while (!ASMAtomicCmpXchgExU32(puDst, uTmpDst, uOld, &uOld));
1355 *puReg = uOld;
1356 *pfEFlags = fEflTmp;
1357}
1358
1359
1360IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u16,(uint16_t *puDst, uint16_t *puReg, uint32_t *pfEFlags))
1361{
1362 uint16_t uDst = *puDst;
1363 uint16_t uResult = uDst;
1364 iemAImpl_add_u16(&uResult, *puReg, pfEFlags);
1365 *puDst = uResult;
1366 *puReg = uDst;
1367}
1368
1369
1370IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u16_locked,(uint16_t *puDst, uint16_t *puReg, uint32_t *pfEFlags))
1371{
1372 uint16_t uOld = ASMAtomicUoReadU16(puDst);
1373 uint16_t uTmpDst;
1374 uint32_t fEflTmp;
1375 do
1376 {
1377 uTmpDst = uOld;
1378 fEflTmp = *pfEFlags;
1379 iemAImpl_add_u16(&uTmpDst, *puReg, pfEFlags);
1380 } while (!ASMAtomicCmpXchgExU16(puDst, uTmpDst, uOld, &uOld));
1381 *puReg = uOld;
1382 *pfEFlags = fEflTmp;
1383}
1384
1385
1386IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u8,(uint8_t *puDst, uint8_t *puReg, uint32_t *pfEFlags))
1387{
1388 uint8_t uDst = *puDst;
1389 uint8_t uResult = uDst;
1390 iemAImpl_add_u8(&uResult, *puReg, pfEFlags);
1391 *puDst = uResult;
1392 *puReg = uDst;
1393}
1394
1395
1396IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u8_locked,(uint8_t *puDst, uint8_t *puReg, uint32_t *pfEFlags))
1397{
1398 uint8_t uOld = ASMAtomicUoReadU8(puDst);
1399 uint8_t uTmpDst;
1400 uint32_t fEflTmp;
1401 do
1402 {
1403 uTmpDst = uOld;
1404 fEflTmp = *pfEFlags;
1405 iemAImpl_add_u8(&uTmpDst, *puReg, pfEFlags);
1406 } while (!ASMAtomicCmpXchgExU8(puDst, uTmpDst, uOld, &uOld));
1407 *puReg = uOld;
1408 *pfEFlags = fEflTmp;
1409}
1410
1411# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1412
1413
1414/*
1415 * MUL
1416 */
1417
1418IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u64,(uint64_t *pu64RAX, uint64_t *pu64RDX, uint64_t u64Factor, uint32_t *pfEFlags))
1419{
1420 RTUINT128U Result;
1421 RTUInt128MulU64ByU64(&Result, *pu64RAX, u64Factor);
1422 *pu64RAX = Result.s.Lo;
1423 *pu64RDX = Result.s.Hi;
1424
1425 /* MUL EFLAGS according to Skylake (similar to IMUL). */
1426 *pfEFlags &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF);
1427 if (Result.s.Lo & RT_BIT_64(63))
1428 *pfEFlags |= X86_EFL_SF;
1429 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1430 if (Result.s.Hi != 0)
1431 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1432 return 0;
1433}
1434
1435# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1436
1437IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u32,(uint32_t *pu32RAX, uint32_t *pu32RDX, uint32_t u32Factor, uint32_t *pfEFlags))
1438{
1439 RTUINT64U Result;
1440 Result.u = (uint64_t)*pu32RAX * u32Factor;
1441 *pu32RAX = Result.s.Lo;
1442 *pu32RDX = Result.s.Hi;
1443
1444 /* MUL EFLAGS according to Skylake (similar to IMUL). */
1445 *pfEFlags &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF);
1446 if (Result.s.Lo & RT_BIT_32(31))
1447 *pfEFlags |= X86_EFL_SF;
1448 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1449 if (Result.s.Hi != 0)
1450 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1451 return 0;
1452}
1453
1454
1455IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u16,(uint16_t *pu16RAX, uint16_t *pu16RDX, uint16_t u16Factor, uint32_t *pfEFlags))
1456{
1457 RTUINT32U Result;
1458 Result.u = (uint32_t)*pu16RAX * u16Factor;
1459 *pu16RAX = Result.s.Lo;
1460 *pu16RDX = Result.s.Hi;
1461
1462 /* MUL EFLAGS according to Skylake (similar to IMUL). */
1463 *pfEFlags &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF);
1464 if (Result.s.Lo & RT_BIT_32(15))
1465 *pfEFlags |= X86_EFL_SF;
1466 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1467 if (Result.s.Hi != 0)
1468 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1469 return 0;
1470}
1471
1472# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1473
1474
1475/*
1476 * IMUL
1477 */
1478
1479IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u64,(uint64_t *pu64RAX, uint64_t *pu64RDX, uint64_t u64Factor, uint32_t *pfEFlags))
1480{
1481 RTUINT128U Result;
1482 *pfEFlags &= ~( X86_EFL_SF | X86_EFL_CF | X86_EFL_OF
1483 /* Skylake always clears: */ | X86_EFL_AF | X86_EFL_ZF
1484 /* Skylake may set: */ | X86_EFL_PF);
1485
1486 if ((int64_t)*pu64RAX >= 0)
1487 {
1488 if ((int64_t)u64Factor >= 0)
1489 {
1490 RTUInt128MulU64ByU64(&Result, *pu64RAX, u64Factor);
1491 if (Result.s.Hi != 0 || Result.s.Lo >= UINT64_C(0x8000000000000000))
1492 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1493 }
1494 else
1495 {
1496 RTUInt128MulU64ByU64(&Result, *pu64RAX, UINT64_C(0) - u64Factor);
1497 if (Result.s.Hi != 0 || Result.s.Lo > UINT64_C(0x8000000000000000))
1498 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1499 RTUInt128AssignNeg(&Result);
1500 }
1501 }
1502 else
1503 {
1504 if ((int64_t)u64Factor >= 0)
1505 {
1506 RTUInt128MulU64ByU64(&Result, UINT64_C(0) - *pu64RAX, u64Factor);
1507 if (Result.s.Hi != 0 || Result.s.Lo > UINT64_C(0x8000000000000000))
1508 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1509 RTUInt128AssignNeg(&Result);
1510 }
1511 else
1512 {
1513 RTUInt128MulU64ByU64(&Result, UINT64_C(0) - *pu64RAX, UINT64_C(0) - u64Factor);
1514 if (Result.s.Hi != 0 || Result.s.Lo >= UINT64_C(0x8000000000000000))
1515 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1516 }
1517 }
1518 *pu64RAX = Result.s.Lo;
1519 if (Result.s.Lo & RT_BIT_64(63))
1520 *pfEFlags |= X86_EFL_SF;
1521 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1522 *pu64RDX = Result.s.Hi;
1523
1524 return 0;
1525}
1526
1527
1528IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1529{
1530/** @todo Testcase: IMUL 2 and 3 operands. */
1531 uint64_t u64Ign;
1532 iemAImpl_imul_u64(puDst, &u64Ign, uSrc, pfEFlags);
1533}
1534
1535# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1536
1537IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u32,(uint32_t *pu32RAX, uint32_t *pu32RDX, uint32_t u32Factor, uint32_t *pfEFlags))
1538{
1539 RTUINT64U Result;
1540 *pfEFlags &= ~( X86_EFL_SF | X86_EFL_CF | X86_EFL_OF
1541 /* Skylake always clears: */ | X86_EFL_AF | X86_EFL_ZF
1542 /* Skylake may set: */ | X86_EFL_PF);
1543
1544 if ((int32_t)*pu32RAX >= 0)
1545 {
1546 if ((int32_t)u32Factor >= 0)
1547 {
1548 Result.u = (uint64_t)*pu32RAX * u32Factor;
1549 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_32(31))
1550 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1551 }
1552 else
1553 {
1554 Result.u = (uint64_t)*pu32RAX * (UINT32_C(0) - u32Factor);
1555 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_32(31))
1556 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1557 Result.u = UINT64_C(0) - Result.u;
1558 }
1559 }
1560 else
1561 {
1562 if ((int32_t)u32Factor >= 0)
1563 {
1564 Result.u = (uint64_t)(UINT32_C(0) - *pu32RAX) * u32Factor;
1565 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_32(31))
1566 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1567 Result.u = UINT64_C(0) - Result.u;
1568 }
1569 else
1570 {
1571 Result.u = (uint64_t)(UINT32_C(0) - *pu32RAX) * (UINT32_C(0) - u32Factor);
1572 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_32(31))
1573 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1574 }
1575 }
1576 *pu32RAX = Result.s.Lo;
1577 if (Result.s.Lo & RT_BIT_32(31))
1578 *pfEFlags |= X86_EFL_SF;
1579 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1580 *pu32RDX = Result.s.Hi;
1581
1582 return 0;
1583}
1584
1585
1586IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1587{
1588/** @todo Testcase: IMUL 2 and 3 operands. */
1589 uint32_t u32Ign;
1590 iemAImpl_imul_u32(puDst, &u32Ign, uSrc, pfEFlags);
1591}
1592
1593
1594IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u16,(uint16_t *pu16RAX, uint16_t *pu16RDX, uint16_t u16Factor, uint32_t *pfEFlags))
1595{
1596 RTUINT32U Result;
1597 *pfEFlags &= ~( X86_EFL_SF | X86_EFL_CF | X86_EFL_OF
1598 /* Skylake always clears: */ | X86_EFL_AF | X86_EFL_ZF
1599 /* Skylake may set: */ | X86_EFL_PF);
1600
1601 if ((int16_t)*pu16RAX >= 0)
1602 {
1603 if ((int16_t)u16Factor >= 0)
1604 {
1605 Result.u = (uint32_t)*pu16RAX * u16Factor;
1606 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_32(15))
1607 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1608 }
1609 else
1610 {
1611 Result.u = (uint32_t)*pu16RAX * (UINT16_C(0) - u16Factor);
1612 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_32(15))
1613 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1614 Result.u = UINT32_C(0) - Result.u;
1615 }
1616 }
1617 else
1618 {
1619 if ((int16_t)u16Factor >= 0)
1620 {
1621 Result.u = (uint32_t)(UINT16_C(0) - *pu16RAX) * u16Factor;
1622 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_32(15))
1623 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1624 Result.u = UINT32_C(0) - Result.u;
1625 }
1626 else
1627 {
1628 Result.u = (uint32_t)(UINT16_C(0) - *pu16RAX) * (UINT16_C(0) - u16Factor);
1629 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_32(15))
1630 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1631 }
1632 }
1633 *pu16RAX = Result.s.Lo;
1634 if (Result.s.Lo & RT_BIT_32(15))
1635 *pfEFlags |= X86_EFL_SF;
1636 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1637 *pu16RDX = Result.s.Hi;
1638
1639 return 0;
1640}
1641
1642
1643IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1644{
1645/** @todo Testcase: IMUL 2 and 3 operands. */
1646 uint16_t u16Ign;
1647 iemAImpl_imul_u16(puDst, &u16Ign, uSrc, pfEFlags);
1648}
1649
1650# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1651
1652
1653/*
1654 * DIV
1655 */
1656
1657IEM_DECL_IMPL_DEF(int, iemAImpl_div_u64,(uint64_t *pu64RAX, uint64_t *pu64RDX, uint64_t u64Divisor, uint32_t *pfEFlags))
1658{
1659 /* Note! Skylake leaves all flags alone. */
1660 RT_NOREF_PV(pfEFlags);
1661
1662 if ( u64Divisor != 0
1663 && *pu64RDX < u64Divisor)
1664 {
1665 RTUINT128U Dividend;
1666 Dividend.s.Lo = *pu64RAX;
1667 Dividend.s.Hi = *pu64RDX;
1668
1669 RTUINT128U Divisor;
1670 Divisor.s.Lo = u64Divisor;
1671 Divisor.s.Hi = 0;
1672
1673 RTUINT128U Remainder;
1674 RTUINT128U Quotient;
1675# ifdef __GNUC__ /* GCC maybe really annoying in function. */
1676 Quotient.s.Lo = 0;
1677 Quotient.s.Hi = 0;
1678# endif
1679 RTUInt128DivRem(&Quotient, &Remainder, &Dividend, &Divisor);
1680 Assert(Quotient.s.Hi == 0);
1681 Assert(Remainder.s.Hi == 0);
1682
1683 *pu64RAX = Quotient.s.Lo;
1684 *pu64RDX = Remainder.s.Lo;
1685 /** @todo research the undefined DIV flags. */
1686 return 0;
1687
1688 }
1689 /* #DE */
1690 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1691}
1692
1693# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1694
1695IEM_DECL_IMPL_DEF(int, iemAImpl_div_u32,(uint32_t *pu32RAX, uint32_t *pu32RDX, uint32_t u32Divisor, uint32_t *pfEFlags))
1696{
1697 /* Note! Skylake leaves all flags alone. */
1698 RT_NOREF_PV(pfEFlags);
1699
1700 if ( u32Divisor != 0
1701 && *pu32RDX < u32Divisor)
1702 {
1703 RTUINT64U Dividend;
1704 Dividend.s.Lo = *pu32RAX;
1705 Dividend.s.Hi = *pu32RDX;
1706
1707 RTUINT64U Remainder;
1708 RTUINT64U Quotient;
1709 Quotient.u = Dividend.u / u32Divisor;
1710 Remainder.u = Dividend.u % u32Divisor;
1711
1712 *pu32RAX = Quotient.s.Lo;
1713 *pu32RDX = Remainder.s.Lo;
1714 /** @todo research the undefined DIV flags. */
1715 return 0;
1716
1717 }
1718 /* #DE */
1719 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1720}
1721
1722
1723IEM_DECL_IMPL_DEF(int, iemAImpl_div_u16,(uint16_t *pu16RAX, uint16_t *pu16RDX, uint16_t u16Divisor, uint32_t *pfEFlags))
1724{
1725 /* Note! Skylake leaves all flags alone. */
1726 RT_NOREF_PV(pfEFlags);
1727
1728 if ( u16Divisor != 0
1729 && *pu16RDX < u16Divisor)
1730 {
1731 RTUINT32U Dividend;
1732 Dividend.s.Lo = *pu16RAX;
1733 Dividend.s.Hi = *pu16RDX;
1734
1735 RTUINT32U Remainder;
1736 RTUINT32U Quotient;
1737 Quotient.u = Dividend.u / u16Divisor;
1738 Remainder.u = Dividend.u % u16Divisor;
1739
1740 *pu16RAX = Quotient.s.Lo;
1741 *pu16RDX = Remainder.s.Lo;
1742 /** @todo research the undefined DIV flags. */
1743 return 0;
1744
1745 }
1746 /* #DE */
1747 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1748}
1749
1750# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1751
1752
1753/*
1754 * IDIV
1755 */
1756
1757IEM_DECL_IMPL_DEF(int, iemAImpl_idiv_u64,(uint64_t *pu64RAX, uint64_t *pu64RDX, uint64_t u64Divisor, uint32_t *pfEFlags))
1758{
1759 /* Note! Skylake leaves all flags alone. */
1760 RT_NOREF_PV(pfEFlags);
1761
1762 /** @todo overflow checks */
1763 if (u64Divisor != 0)
1764 {
1765 /*
1766 * Convert to unsigned division.
1767 */
1768 RTUINT128U Dividend;
1769 Dividend.s.Lo = *pu64RAX;
1770 Dividend.s.Hi = *pu64RDX;
1771 if ((int64_t)*pu64RDX < 0)
1772 RTUInt128AssignNeg(&Dividend);
1773
1774 RTUINT128U Divisor;
1775 Divisor.s.Hi = 0;
1776 if ((int64_t)u64Divisor >= 0)
1777 Divisor.s.Lo = u64Divisor;
1778 else
1779 Divisor.s.Lo = UINT64_C(0) - u64Divisor;
1780
1781 RTUINT128U Remainder;
1782 RTUINT128U Quotient;
1783# ifdef __GNUC__ /* GCC maybe really annoying. */
1784 Quotient.s.Lo = 0;
1785 Quotient.s.Hi = 0;
1786# endif
1787 RTUInt128DivRem(&Quotient, &Remainder, &Dividend, &Divisor);
1788
1789 /*
1790 * Setup the result, checking for overflows.
1791 */
1792 if ((int64_t)u64Divisor >= 0)
1793 {
1794 if ((int64_t)*pu64RDX >= 0)
1795 {
1796 /* Positive divisor, positive dividend => result positive. */
1797 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint64_t)INT64_MAX)
1798 {
1799 *pu64RAX = Quotient.s.Lo;
1800 *pu64RDX = Remainder.s.Lo;
1801 return 0;
1802 }
1803 }
1804 else
1805 {
1806 /* Positive divisor, positive dividend => result negative. */
1807 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= UINT64_C(0x8000000000000000))
1808 {
1809 *pu64RAX = UINT64_C(0) - Quotient.s.Lo;
1810 *pu64RDX = UINT64_C(0) - Remainder.s.Lo;
1811 return 0;
1812 }
1813 }
1814 }
1815 else
1816 {
1817 if ((int64_t)*pu64RDX >= 0)
1818 {
1819 /* Negative divisor, positive dividend => negative quotient, positive remainder. */
1820 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= UINT64_C(0x8000000000000000))
1821 {
1822 *pu64RAX = UINT64_C(0) - Quotient.s.Lo;
1823 *pu64RDX = Remainder.s.Lo;
1824 return 0;
1825 }
1826 }
1827 else
1828 {
1829 /* Negative divisor, negative dividend => positive quotient, negative remainder. */
1830 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint64_t)INT64_MAX)
1831 {
1832 *pu64RAX = Quotient.s.Lo;
1833 *pu64RDX = UINT64_C(0) - Remainder.s.Lo;
1834 return 0;
1835 }
1836 }
1837 }
1838 }
1839 /* #DE */
1840 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1841}
1842
1843# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1844
1845IEM_DECL_IMPL_DEF(int, iemAImpl_idiv_u32,(uint32_t *pu32RAX, uint32_t *pu32RDX, uint32_t u32Divisor, uint32_t *pfEFlags))
1846{
1847 /* Note! Skylake leaves all flags alone. */
1848 RT_NOREF_PV(pfEFlags);
1849
1850 /** @todo overflow checks */
1851 if (u32Divisor != 0)
1852 {
1853 /*
1854 * Convert to unsigned division.
1855 */
1856 RTUINT64U Dividend;
1857 Dividend.s.Lo = *pu32RAX;
1858 Dividend.s.Hi = *pu32RDX;
1859 if ((int32_t)*pu32RDX < 0)
1860 Dividend.u = UINT64_C(0) - Dividend.u;
1861
1862 uint32_t u32DivisorPositive;
1863 if ((int32_t)u32Divisor >= 0)
1864 u32DivisorPositive = u32Divisor;
1865 else
1866 u32DivisorPositive = UINT32_C(0) - u32Divisor;
1867
1868 RTUINT64U Remainder;
1869 RTUINT64U Quotient;
1870 Quotient.u = Dividend.u / u32DivisorPositive;
1871 Remainder.u = Dividend.u % u32DivisorPositive;
1872
1873 /*
1874 * Setup the result, checking for overflows.
1875 */
1876 if ((int32_t)u32Divisor >= 0)
1877 {
1878 if ((int32_t)*pu32RDX >= 0)
1879 {
1880 /* Positive divisor, positive dividend => result positive. */
1881 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint32_t)INT32_MAX)
1882 {
1883 *pu32RAX = Quotient.s.Lo;
1884 *pu32RDX = Remainder.s.Lo;
1885 return 0;
1886 }
1887 }
1888 else
1889 {
1890 /* Positive divisor, positive dividend => result negative. */
1891 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_32(31))
1892 {
1893 *pu32RAX = UINT32_C(0) - Quotient.s.Lo;
1894 *pu32RDX = UINT32_C(0) - Remainder.s.Lo;
1895 return 0;
1896 }
1897 }
1898 }
1899 else
1900 {
1901 if ((int32_t)*pu32RDX >= 0)
1902 {
1903 /* Negative divisor, positive dividend => negative quotient, positive remainder. */
1904 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_32(31))
1905 {
1906 *pu32RAX = UINT32_C(0) - Quotient.s.Lo;
1907 *pu32RDX = Remainder.s.Lo;
1908 return 0;
1909 }
1910 }
1911 else
1912 {
1913 /* Negative divisor, negative dividend => positive quotient, negative remainder. */
1914 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint32_t)INT32_MAX)
1915 {
1916 *pu32RAX = Quotient.s.Lo;
1917 *pu32RDX = UINT32_C(0) - Remainder.s.Lo;
1918 return 0;
1919 }
1920 }
1921 }
1922 }
1923 /* #DE */
1924 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1925}
1926
1927
1928IEM_DECL_IMPL_DEF(int, iemAImpl_idiv_u16,(uint16_t *pu16RAX, uint16_t *pu16RDX, uint16_t u16Divisor, uint32_t *pfEFlags))
1929{
1930 /* Note! Skylake leaves all flags alone. */
1931 RT_NOREF_PV(pfEFlags);
1932
1933 if (u16Divisor != 0)
1934 {
1935 /*
1936 * Convert to unsigned division.
1937 */
1938 RTUINT32U Dividend;
1939 Dividend.s.Lo = *pu16RAX;
1940 Dividend.s.Hi = *pu16RDX;
1941 if ((int16_t)*pu16RDX < 0)
1942 Dividend.u = UINT32_C(0) - Dividend.u;
1943
1944 uint16_t u16DivisorPositive;
1945 if ((int16_t)u16Divisor >= 0)
1946 u16DivisorPositive = u16Divisor;
1947 else
1948 u16DivisorPositive = UINT16_C(0) - u16Divisor;
1949
1950 RTUINT32U Remainder;
1951 RTUINT32U Quotient;
1952 Quotient.u = Dividend.u / u16DivisorPositive;
1953 Remainder.u = Dividend.u % u16DivisorPositive;
1954
1955 /*
1956 * Setup the result, checking for overflows.
1957 */
1958 if ((int16_t)u16Divisor >= 0)
1959 {
1960 if ((int16_t)*pu16RDX >= 0)
1961 {
1962 /* Positive divisor, positive dividend => result positive. */
1963 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint16_t)INT16_MAX)
1964 {
1965 *pu16RAX = Quotient.s.Lo;
1966 *pu16RDX = Remainder.s.Lo;
1967 return 0;
1968 }
1969 }
1970 else
1971 {
1972 /* Positive divisor, positive dividend => result negative. */
1973 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_32(15))
1974 {
1975 *pu16RAX = UINT16_C(0) - Quotient.s.Lo;
1976 *pu16RDX = UINT16_C(0) - Remainder.s.Lo;
1977 return 0;
1978 }
1979 }
1980 }
1981 else
1982 {
1983 if ((int16_t)*pu16RDX >= 0)
1984 {
1985 /* Negative divisor, positive dividend => negative quotient, positive remainder. */
1986 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_32(15))
1987 {
1988 *pu16RAX = UINT16_C(0) - Quotient.s.Lo;
1989 *pu16RDX = Remainder.s.Lo;
1990 return 0;
1991 }
1992 }
1993 else
1994 {
1995 /* Negative divisor, negative dividend => positive quotient, negative remainder. */
1996 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint16_t)INT16_MAX)
1997 {
1998 *pu16RAX = Quotient.s.Lo;
1999 *pu16RDX = UINT16_C(0) - Remainder.s.Lo;
2000 return 0;
2001 }
2002 }
2003 }
2004 }
2005 /* #DE */
2006 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
2007}
2008
2009# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2010
2011
2012/*********************************************************************************************************************************
2013* Unary operations. *
2014*********************************************************************************************************************************/
2015
2016/**
2017 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an INC or DEC instruction.
2018 *
2019 * CF is NOT modified for hysterical raisins (allegedly for carrying and
2020 * borrowing in arithmetic loops on intel 8008).
2021 *
2022 * @returns Status bits.
2023 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2024 * @param a_uResult Unsigned result value.
2025 * @param a_uDst The original destination value (for AF calc).
2026 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2027 * @param a_OfMethod 0 for INC-style, 1 for DEC-style.
2028 */
2029#define IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth, a_OfMethod) \
2030 do { \
2031 uint32_t fEflTmp = *(a_pfEFlags); \
2032 fEflTmp &= ~X86_EFL_STATUS_BITS & ~X86_EFL_CF; \
2033 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
2034 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
2035 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
2036 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
2037 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth(a_OfMethod == 0 ? (((a_uDst) ^ RT_BIT_64(63)) & (a_uResult)) \
2038 : ((a_uDst) & ((a_uResult) ^ RT_BIT_64(63))) ); \
2039 *(a_pfEFlags) = fEflTmp; \
2040 } while (0)
2041
2042/*
2043 * INC
2044 */
2045
2046IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2047{
2048 uint64_t uDst = *puDst;
2049 uint64_t uResult = uDst + 1;
2050 *puDst = uResult;
2051 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 0 /*INC*/);
2052}
2053
2054# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2055
2056IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2057{
2058 uint32_t uDst = *puDst;
2059 uint32_t uResult = uDst + 1;
2060 *puDst = uResult;
2061 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 0 /*INC*/);
2062}
2063
2064
2065IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2066{
2067 uint16_t uDst = *puDst;
2068 uint16_t uResult = uDst + 1;
2069 *puDst = uResult;
2070 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 0 /*INC*/);
2071}
2072
2073IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2074{
2075 uint8_t uDst = *puDst;
2076 uint8_t uResult = uDst + 1;
2077 *puDst = uResult;
2078 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 0 /*INC*/);
2079}
2080
2081# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2082
2083
2084/*
2085 * DEC
2086 */
2087
2088IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2089{
2090 uint64_t uDst = *puDst;
2091 uint64_t uResult = uDst - 1;
2092 *puDst = uResult;
2093 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 1 /*INC*/);
2094}
2095
2096# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2097
2098IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2099{
2100 uint32_t uDst = *puDst;
2101 uint32_t uResult = uDst - 1;
2102 *puDst = uResult;
2103 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 1 /*INC*/);
2104}
2105
2106
2107IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2108{
2109 uint16_t uDst = *puDst;
2110 uint16_t uResult = uDst - 1;
2111 *puDst = uResult;
2112 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 1 /*INC*/);
2113}
2114
2115
2116IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2117{
2118 uint8_t uDst = *puDst;
2119 uint8_t uResult = uDst - 1;
2120 *puDst = uResult;
2121 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 1 /*INC*/);
2122}
2123
2124# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2125
2126
2127/*
2128 * NOT
2129 */
2130
2131IEM_DECL_IMPL_DEF(void, iemAImpl_not_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2132{
2133 uint64_t uDst = *puDst;
2134 uint64_t uResult = ~uDst;
2135 *puDst = uResult;
2136 /* EFLAGS are not modified. */
2137 RT_NOREF_PV(pfEFlags);
2138}
2139
2140# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2141
2142IEM_DECL_IMPL_DEF(void, iemAImpl_not_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2143{
2144 uint32_t uDst = *puDst;
2145 uint32_t uResult = ~uDst;
2146 *puDst = uResult;
2147 /* EFLAGS are not modified. */
2148 RT_NOREF_PV(pfEFlags);
2149}
2150
2151IEM_DECL_IMPL_DEF(void, iemAImpl_not_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2152{
2153 uint16_t uDst = *puDst;
2154 uint16_t uResult = ~uDst;
2155 *puDst = uResult;
2156 /* EFLAGS are not modified. */
2157 RT_NOREF_PV(pfEFlags);
2158}
2159
2160IEM_DECL_IMPL_DEF(void, iemAImpl_not_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2161{
2162 uint8_t uDst = *puDst;
2163 uint8_t uResult = ~uDst;
2164 *puDst = uResult;
2165 /* EFLAGS are not modified. */
2166 RT_NOREF_PV(pfEFlags);
2167}
2168
2169# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2170
2171
2172/*
2173 * NEG
2174 */
2175
2176/**
2177 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an NEG instruction.
2178 *
2179 * @returns Status bits.
2180 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2181 * @param a_uResult Unsigned result value.
2182 * @param a_uDst The original destination value (for AF calc).
2183 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2184 */
2185#define IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth) \
2186 do { \
2187 uint32_t fEflTmp = *(a_pfEFlags); \
2188 fEflTmp &= ~X86_EFL_STATUS_BITS & ~X86_EFL_CF; \
2189 fEflTmp |= ((a_uDst) != 0) << X86_EFL_CF_BIT; \
2190 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
2191 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
2192 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
2193 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
2194 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth((a_uDst) & (a_uResult)); \
2195 *(a_pfEFlags) = fEflTmp; \
2196 } while (0)
2197
2198IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2199{
2200 uint64_t uDst = *puDst;
2201 uint64_t uResult = (uint64_t)0 - uDst;
2202 *puDst = uResult;
2203 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 64);
2204}
2205
2206# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2207
2208IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2209{
2210 uint32_t uDst = *puDst;
2211 uint32_t uResult = (uint32_t)0 - uDst;
2212 *puDst = uResult;
2213 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 32);
2214}
2215
2216
2217IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2218{
2219 uint16_t uDst = *puDst;
2220 uint16_t uResult = (uint16_t)0 - uDst;
2221 *puDst = uResult;
2222 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 16);
2223}
2224
2225
2226IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2227{
2228 uint8_t uDst = *puDst;
2229 uint8_t uResult = (uint8_t)0 - uDst;
2230 *puDst = uResult;
2231 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 8);
2232}
2233
2234# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2235
2236/*
2237 * Locked variants.
2238 */
2239
2240/** Emit a function for doing a locked unary operand operation. */
2241# define EMIT_LOCKED_UNARY_OP(a_Mnemonic, a_cBitsWidth) \
2242 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
2243 uint32_t *pfEFlags)) \
2244 { \
2245 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
2246 uint ## a_cBitsWidth ## _t uTmp; \
2247 uint32_t fEflTmp; \
2248 do \
2249 { \
2250 uTmp = uOld; \
2251 fEflTmp = *pfEFlags; \
2252 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, &fEflTmp); \
2253 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
2254 *pfEFlags = fEflTmp; \
2255 }
2256
2257EMIT_LOCKED_UNARY_OP(inc, 64);
2258EMIT_LOCKED_UNARY_OP(dec, 64);
2259EMIT_LOCKED_UNARY_OP(not, 64);
2260EMIT_LOCKED_UNARY_OP(neg, 64);
2261# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2262EMIT_LOCKED_UNARY_OP(inc, 32);
2263EMIT_LOCKED_UNARY_OP(dec, 32);
2264EMIT_LOCKED_UNARY_OP(not, 32);
2265EMIT_LOCKED_UNARY_OP(neg, 32);
2266
2267EMIT_LOCKED_UNARY_OP(inc, 16);
2268EMIT_LOCKED_UNARY_OP(dec, 16);
2269EMIT_LOCKED_UNARY_OP(not, 16);
2270EMIT_LOCKED_UNARY_OP(neg, 16);
2271
2272EMIT_LOCKED_UNARY_OP(inc, 8);
2273EMIT_LOCKED_UNARY_OP(dec, 8);
2274EMIT_LOCKED_UNARY_OP(not, 8);
2275EMIT_LOCKED_UNARY_OP(neg, 8);
2276# endif
2277
2278
2279/*********************************************************************************************************************************
2280* Shifting and Rotating *
2281*********************************************************************************************************************************/
2282
2283/*
2284 * ROL
2285 */
2286
2287/**
2288 * Updates the status bits (OF and CF) for an ROL instruction.
2289 *
2290 * @returns Status bits.
2291 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2292 * @param a_uResult Unsigned result value.
2293 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2294 */
2295#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(a_pfEFlags, a_uResult, a_cBitsWidth) do { \
2296 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2297 it the same way as for 1 bit shifts. */ \
2298 AssertCompile(X86_EFL_CF_BIT == 0); \
2299 uint32_t fEflTmp = *(a_pfEFlags); \
2300 fEflTmp &= ~(X86_EFL_CF | X86_EFL_OF); \
2301 uint32_t const fCarry = ((a_uResult) & X86_EFL_CF); \
2302 fEflTmp |= fCarry; \
2303 fEflTmp |= (((a_uResult) >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2304 *(a_pfEFlags) = fEflTmp; \
2305 } while (0)
2306
2307IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2308{
2309 cShift &= 63;
2310 if (cShift)
2311 {
2312 uint64_t uResult = ASMRotateLeftU64(*puDst, cShift);
2313 *puDst = uResult;
2314 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(pfEFlags, uResult, 64);
2315 }
2316}
2317
2318# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2319
2320IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u32,(uint32_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2321{
2322 cShift &= 31;
2323 if (cShift)
2324 {
2325 uint32_t uResult = ASMRotateLeftU32(*puDst, cShift);
2326 *puDst = uResult;
2327 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(pfEFlags, uResult, 32);
2328 }
2329}
2330
2331
2332IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u16,(uint16_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2333{
2334 cShift &= 15;
2335 if (cShift)
2336 {
2337 uint16_t uDst = *puDst;
2338 uint16_t uResult = (uDst << cShift) | (uDst >> (16 - cShift));
2339 *puDst = uResult;
2340 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(pfEFlags, uResult, 16);
2341 }
2342}
2343
2344
2345IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u8,(uint8_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2346{
2347 cShift &= 7;
2348 if (cShift)
2349 {
2350 uint8_t uDst = *puDst;
2351 uint8_t uResult = (uDst << cShift) | (uDst >> (8 - cShift));
2352 *puDst = uResult;
2353 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(pfEFlags, uResult, 8);
2354 }
2355}
2356
2357# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2358
2359
2360/*
2361 * ROR
2362 */
2363
2364/**
2365 * Updates the status bits (OF and CF) for an ROL instruction.
2366 *
2367 * @returns Status bits.
2368 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2369 * @param a_uResult Unsigned result value.
2370 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2371 */
2372#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(a_pfEFlags, a_uResult, a_cBitsWidth) do { \
2373 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2374 it the same way as for 1 bit shifts. */ \
2375 AssertCompile(X86_EFL_CF_BIT == 0); \
2376 uint32_t fEflTmp = *(a_pfEFlags); \
2377 fEflTmp &= ~(X86_EFL_CF | X86_EFL_OF); \
2378 uint32_t const fCarry = ((a_uResult) >> ((a_cBitsWidth) - 1)) & X86_EFL_CF; \
2379 fEflTmp |= fCarry; \
2380 fEflTmp |= (((a_uResult) >> ((a_cBitsWidth) - 2)) ^ fCarry) << X86_EFL_OF_BIT; \
2381 *(a_pfEFlags) = fEflTmp; \
2382 } while (0)
2383
2384IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2385{
2386 cShift &= 63;
2387 if (cShift)
2388 {
2389 uint64_t const uResult = ASMRotateRightU64(*puDst, cShift);
2390 *puDst = uResult;
2391 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(pfEFlags, uResult, 64);
2392 }
2393}
2394
2395# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2396
2397IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u32,(uint32_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2398{
2399 cShift &= 31;
2400 if (cShift)
2401 {
2402 uint64_t const uResult = ASMRotateRightU32(*puDst, cShift);
2403 *puDst = uResult;
2404 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(pfEFlags, uResult, 32);
2405 }
2406}
2407
2408
2409IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u16,(uint16_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2410{
2411 cShift &= 15;
2412 if (cShift)
2413 {
2414 uint16_t uDst = *puDst;
2415 uint16_t uResult;
2416 uResult = uDst >> cShift;
2417 uResult |= uDst << (16 - cShift);
2418 *puDst = uResult;
2419 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(pfEFlags, uResult, 16);
2420 }
2421}
2422
2423
2424IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u8,(uint8_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2425{
2426 cShift &= 7;
2427 if (cShift)
2428 {
2429 uint8_t uDst = *puDst;
2430 uint8_t uResult;
2431 uResult = uDst >> cShift;
2432 uResult |= uDst << (8 - cShift);
2433 *puDst = uResult;
2434 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(pfEFlags, uResult, 8);
2435 }
2436}
2437
2438# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2439
2440
2441/*
2442 * RCL
2443 */
2444#define EMIT_RCL(a_cBitsWidth) \
2445IEM_DECL_IMPL_DEF(void, iemAImpl_rcl_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2446{ \
2447 cShift &= a_cBitsWidth - 1; \
2448 if (cShift) \
2449 { \
2450 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2451 uint ## a_cBitsWidth ## _t uResult = uDst << cShift; \
2452 if (cShift > 1) \
2453 uResult |= uDst >> (a_cBitsWidth + 1 - cShift); \
2454 \
2455 uint32_t fEfl = *pfEFlags; \
2456 AssertCompile(X86_EFL_CF_BIT == 0); \
2457 uResult |= (uint ## a_cBitsWidth ## _t)(fEfl & X86_EFL_CF) << (cShift - 1); \
2458 \
2459 *puDst = uResult; \
2460 \
2461 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2462 it the same way as for 1 bit shifts. */ \
2463 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2464 uint32_t const fCarry = (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2465 fEfl |= fCarry; \
2466 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2467 *pfEFlags = fEfl; \
2468 } \
2469}
2470EMIT_RCL(64);
2471# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2472EMIT_RCL(32);
2473EMIT_RCL(16);
2474EMIT_RCL(8);
2475# endif
2476
2477
2478/*
2479 * RCR
2480 */
2481#define EMIT_RCR(a_cBitsWidth) \
2482IEM_DECL_IMPL_DEF(void, iemAImpl_rcr_u ## a_cBitsWidth,(uint ## a_cBitsWidth ##_t *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2483{ \
2484 cShift &= a_cBitsWidth - 1; \
2485 if (cShift) \
2486 { \
2487 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2488 uint ## a_cBitsWidth ## _t uResult = uDst >> cShift; \
2489 if (cShift > 1) \
2490 uResult |= uDst << (a_cBitsWidth + 1 - cShift); \
2491 \
2492 AssertCompile(X86_EFL_CF_BIT == 0); \
2493 uint32_t fEfl = *pfEFlags; \
2494 uResult |= (uint ## a_cBitsWidth ## _t)(fEfl & X86_EFL_CF) << (a_cBitsWidth - cShift); \
2495 *puDst = uResult; \
2496 \
2497 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2498 it the same way as for 1 bit shifts. */ \
2499 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2500 uint32_t const fCarry = (uDst >> (cShift - 1)) & X86_EFL_CF; \
2501 fEfl |= fCarry; \
2502 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2503 *pfEFlags = fEfl; \
2504 } \
2505}
2506EMIT_RCR(64);
2507# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2508EMIT_RCR(32);
2509EMIT_RCR(16);
2510EMIT_RCR(8);
2511# endif
2512
2513
2514/*
2515 * SHL
2516 */
2517#define EMIT_SHL(a_cBitsWidth) \
2518IEM_DECL_IMPL_DEF(void, iemAImpl_shl_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2519{ \
2520 cShift &= a_cBitsWidth - 1; \
2521 if (cShift) \
2522 { \
2523 uint ## a_cBitsWidth ##_t const uDst = *puDst; \
2524 uint ## a_cBitsWidth ##_t uResult = uDst << cShift; \
2525 *puDst = uResult; \
2526 \
2527 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2528 it the same way as for 1 bit shifts. The AF bit is undefined, we \
2529 always set it to zero atm. */ \
2530 AssertCompile(X86_EFL_CF_BIT == 0); \
2531 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2532 uint32_t fCarry = (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2533 fEfl |= fCarry; \
2534 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2535 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2536 fEfl |= X86_EFL_CALC_ZF(uResult); \
2537 fEfl |= g_afParity[uResult & 0xff]; \
2538 *pfEFlags = fEfl; \
2539 } \
2540}
2541EMIT_SHL(64)
2542# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2543EMIT_SHL(32)
2544EMIT_SHL(16)
2545EMIT_SHL(8)
2546# endif
2547
2548
2549/*
2550 * SHR
2551 */
2552#define EMIT_SHR(a_cBitsWidth) \
2553IEM_DECL_IMPL_DEF(void, iemAImpl_shr_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2554{ \
2555 cShift &= a_cBitsWidth - 1; \
2556 if (cShift) \
2557 { \
2558 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2559 uint ## a_cBitsWidth ## _t uResult = uDst >> cShift; \
2560 *puDst = uResult; \
2561 \
2562 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2563 it the same way as for 1 bit shifts. The AF bit is undefined, we \
2564 always set it to zero atm. */ \
2565 AssertCompile(X86_EFL_CF_BIT == 0); \
2566 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2567 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2568 fEfl |= (uDst >> (a_cBitsWidth - 1)) << X86_EFL_OF_BIT; \
2569 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2570 fEfl |= X86_EFL_CALC_ZF(uResult); \
2571 fEfl |= g_afParity[uResult & 0xff]; \
2572 *pfEFlags = fEfl; \
2573 } \
2574}
2575EMIT_SHR(64)
2576# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2577EMIT_SHR(32)
2578EMIT_SHR(16)
2579EMIT_SHR(8)
2580# endif
2581
2582
2583/*
2584 * SAR
2585 */
2586#define EMIT_SAR(a_cBitsWidth) \
2587IEM_DECL_IMPL_DEF(void, iemAImpl_sar_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2588{ \
2589 cShift &= a_cBitsWidth - 1; \
2590 if (cShift) \
2591 { \
2592 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2593 uint ## a_cBitsWidth ## _t uResult = (int ## a_cBitsWidth ## _t)uDst >> cShift; \
2594 *puDst = uResult; \
2595 \
2596 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2597 it the same way as for 1 bit shifts (0). The AF bit is undefined, \
2598 we always set it to zero atm. */ \
2599 AssertCompile(X86_EFL_CF_BIT == 0); \
2600 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2601 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2602 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2603 fEfl |= X86_EFL_CALC_ZF(uResult); \
2604 fEfl |= g_afParity[uResult & 0xff]; \
2605 *pfEFlags = fEfl; \
2606 } \
2607}
2608EMIT_SAR(64)
2609# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2610EMIT_SAR(32)
2611EMIT_SAR(16)
2612EMIT_SAR(8)
2613# endif
2614
2615
2616/*
2617 * SHLD
2618 */
2619#define EMIT_SHLD(a_cBitsWidth) \
2620IEM_DECL_IMPL_DEF(void, iemAImpl_shld_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, \
2621 uint ## a_cBitsWidth ## _t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2622{ \
2623 cShift &= a_cBitsWidth - 1; \
2624 if (cShift) \
2625 { \
2626 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2627 uint ## a_cBitsWidth ## _t uResult = uDst << cShift; \
2628 uResult |= uSrc >> (a_cBitsWidth - cShift); \
2629 *puDst = uResult; \
2630 \
2631 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2632 it the same way as for 1 bit shifts. The AF bit is undefined, \
2633 we always set it to zero atm. */ \
2634 AssertCompile(X86_EFL_CF_BIT == 0); \
2635 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2636 fEfl |= (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2637 fEfl |= (uint32_t)((uDst >> (a_cBitsWidth - 1)) ^ (uint32_t)(uResult >> (a_cBitsWidth - 1))) << X86_EFL_OF_BIT; \
2638 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2639 fEfl |= X86_EFL_CALC_ZF(uResult); \
2640 fEfl |= g_afParity[uResult & 0xff]; \
2641 *pfEFlags = fEfl; \
2642 } \
2643}
2644EMIT_SHLD(64)
2645# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2646EMIT_SHLD(32)
2647EMIT_SHLD(16)
2648EMIT_SHLD(8)
2649# endif
2650
2651
2652/*
2653 * SHRD
2654 */
2655#define EMIT_SHRD(a_cBitsWidth) \
2656IEM_DECL_IMPL_DEF(void, iemAImpl_shrd_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, \
2657 uint ## a_cBitsWidth ## _t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2658{ \
2659 cShift &= a_cBitsWidth - 1; \
2660 if (cShift) \
2661 { \
2662 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2663 uint ## a_cBitsWidth ## _t uResult = uDst >> cShift; \
2664 uResult |= uSrc << (a_cBitsWidth - cShift); \
2665 *puDst = uResult; \
2666 \
2667 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2668 it the same way as for 1 bit shifts. The AF bit is undefined, \
2669 we always set it to zero atm. */ \
2670 AssertCompile(X86_EFL_CF_BIT == 0); \
2671 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2672 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2673 fEfl |= (uint32_t)((uDst >> (a_cBitsWidth - 1)) ^ (uint32_t)(uResult >> (a_cBitsWidth - 1))) << X86_EFL_OF_BIT; \
2674 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2675 fEfl |= X86_EFL_CALC_ZF(uResult); \
2676 fEfl |= g_afParity[uResult & 0xff]; \
2677 *pfEFlags = fEfl; \
2678 } \
2679}
2680EMIT_SHRD(64)
2681# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2682EMIT_SHRD(32)
2683EMIT_SHRD(16)
2684EMIT_SHRD(8)
2685# endif
2686
2687
2688# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2689/*
2690 * BSWAP
2691 */
2692
2693IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u64,(uint64_t *puDst))
2694{
2695 *puDst = ASMByteSwapU64(*puDst);
2696}
2697
2698
2699IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u32,(uint32_t *puDst))
2700{
2701 *puDst = ASMByteSwapU32(*puDst);
2702}
2703
2704
2705/* Note! undocument, so 32-bit arg */
2706IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u16,(uint32_t *puDst))
2707{
2708 *puDst = ASMByteSwapU16((uint16_t)*puDst) | (*puDst & UINT32_C(0xffff0000));
2709}
2710
2711# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2712
2713#endif /* !RT_ARCH_AMD64 || IEM_WITHOUT_ASSEMBLY */
2714
2715
2716IEM_DECL_IMPL_DEF(void, iemAImpl_arpl,(uint16_t *pu16Dst, uint16_t u16Src, uint32_t *pfEFlags))
2717{
2718 if ((*pu16Dst & X86_SEL_RPL) < (u16Src & X86_SEL_RPL))
2719 {
2720 *pu16Dst &= X86_SEL_MASK_OFF_RPL;
2721 *pu16Dst |= u16Src & X86_SEL_RPL;
2722
2723 *pfEFlags |= X86_EFL_ZF;
2724 }
2725 else
2726 *pfEFlags &= ~X86_EFL_ZF;
2727}
2728
2729
2730
2731IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_fallback,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx,
2732 PRTUINT128U pu128RbxRcx, uint32_t *pEFlags))
2733{
2734 RTUINT128U u128Tmp = *pu128Dst;
2735 if ( u128Tmp.s.Lo == pu128RaxRdx->s.Lo
2736 && u128Tmp.s.Hi == pu128RaxRdx->s.Hi)
2737 {
2738 *pu128Dst = *pu128RbxRcx;
2739 *pEFlags |= X86_EFL_ZF;
2740 }
2741 else
2742 {
2743 *pu128RaxRdx = u128Tmp;
2744 *pEFlags &= ~X86_EFL_ZF;
2745 }
2746}
2747
2748
2749IEM_DECL_IMPL_DEF(void, iemAImpl_movsldup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
2750{
2751 RT_NOREF(pFpuState);
2752 puDst->au32[0] = puSrc->au32[0];
2753 puDst->au32[1] = puSrc->au32[0];
2754 puDst->au32[2] = puSrc->au32[2];
2755 puDst->au32[3] = puSrc->au32[2];
2756}
2757
2758#ifdef IEM_WITH_VEX
2759
2760IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
2761{
2762 pXState->x87.aXMM[iYRegDst].au32[0] = pXState->x87.aXMM[iYRegSrc].au32[0];
2763 pXState->x87.aXMM[iYRegDst].au32[1] = pXState->x87.aXMM[iYRegSrc].au32[0];
2764 pXState->x87.aXMM[iYRegDst].au32[2] = pXState->x87.aXMM[iYRegSrc].au32[2];
2765 pXState->x87.aXMM[iYRegDst].au32[3] = pXState->x87.aXMM[iYRegSrc].au32[2];
2766 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
2767 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
2768 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
2769 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
2770}
2771
2772
2773IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
2774{
2775 pXState->x87.aXMM[iYRegDst].au32[0] = pSrc->au32[0];
2776 pXState->x87.aXMM[iYRegDst].au32[1] = pSrc->au32[0];
2777 pXState->x87.aXMM[iYRegDst].au32[2] = pSrc->au32[2];
2778 pXState->x87.aXMM[iYRegDst].au32[3] = pSrc->au32[2];
2779 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pSrc->au32[4];
2780 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pSrc->au32[4];
2781 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pSrc->au32[6];
2782 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pSrc->au32[6];
2783}
2784
2785#endif /* IEM_WITH_VEX */
2786
2787
2788IEM_DECL_IMPL_DEF(void, iemAImpl_movshdup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
2789{
2790 RT_NOREF(pFpuState);
2791 puDst->au32[0] = puSrc->au32[1];
2792 puDst->au32[1] = puSrc->au32[1];
2793 puDst->au32[2] = puSrc->au32[3];
2794 puDst->au32[3] = puSrc->au32[3];
2795}
2796
2797
2798IEM_DECL_IMPL_DEF(void, iemAImpl_movddup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, uint64_t uSrc))
2799{
2800 RT_NOREF(pFpuState);
2801 puDst->au64[0] = uSrc;
2802 puDst->au64[1] = uSrc;
2803}
2804
2805#ifdef IEM_WITH_VEX
2806
2807IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
2808{
2809 pXState->x87.aXMM[iYRegDst].au64[0] = pXState->x87.aXMM[iYRegSrc].au64[0];
2810 pXState->x87.aXMM[iYRegDst].au64[1] = pXState->x87.aXMM[iYRegSrc].au64[0];
2811 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
2812 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
2813}
2814
2815IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
2816{
2817 pXState->x87.aXMM[iYRegDst].au64[0] = pSrc->au64[0];
2818 pXState->x87.aXMM[iYRegDst].au64[1] = pSrc->au64[0];
2819 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pSrc->au64[2];
2820 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pSrc->au64[2];
2821}
2822
2823#endif /* IEM_WITH_VEX */
2824
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette