VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp@ 93851

Last change on this file since 93851 was 93851, checked in by vboxsync, 3 years ago

VMM/IEM: Reduced the code duplication for the DIV and IDIV C workers. [build fixes] bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 89.0 KB
Line 
1/* $Id: IEMAllAImplC.cpp 93851 2022-02-19 13:15:33Z vboxsync $ */
2/** @file
3 * IEM - Instruction Implementation in Assembly, portable C variant.
4 */
5
6/*
7 * Copyright (C) 2011-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#include "IEMInternal.h"
23#include <VBox/vmm/vmcc.h>
24#include <VBox/err.h>
25#include <iprt/x86.h>
26#include <iprt/uint128.h>
27
28
29/*********************************************************************************************************************************
30* Defined Constants And Macros *
31*********************************************************************************************************************************/
32/** @def IEM_WITHOUT_ASSEMBLY
33 * Enables all the code in this file.
34 */
35#if !defined(IEM_WITHOUT_ASSEMBLY)
36# if defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
37# define IEM_WITHOUT_ASSEMBLY
38# endif
39#endif
40
41/**
42 * Calculates the signed flag value given a result and it's bit width.
43 *
44 * The signed flag (SF) is a duplication of the most significant bit in the
45 * result.
46 *
47 * @returns X86_EFL_SF or 0.
48 * @param a_uResult Unsigned result value.
49 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
50 */
51#define X86_EFL_CALC_SF(a_uResult, a_cBitsWidth) \
52 ( (uint32_t)((a_uResult) >> ((a_cBitsWidth) - X86_EFL_SF_BIT - 1)) & X86_EFL_SF )
53
54/**
55 * Calculates the zero flag value given a result.
56 *
57 * The zero flag (ZF) indicates whether the result is zero or not.
58 *
59 * @returns X86_EFL_ZF or 0.
60 * @param a_uResult Unsigned result value.
61 */
62#define X86_EFL_CALC_ZF(a_uResult) \
63 ( (uint32_t)((a_uResult) == 0) << X86_EFL_ZF_BIT )
64
65/**
66 * Extracts the OF flag from a OF calculation result.
67 *
68 * These are typically used by concating with a bitcount. The problem is that
69 * 8-bit values needs shifting in the other direction than the others.
70 */
71#define X86_EFL_GET_OF_8(a_uValue) ((uint32_t)((a_uValue) << (X86_EFL_OF_BIT - 8)) & X86_EFL_OF)
72#define X86_EFL_GET_OF_16(a_uValue) ((uint32_t)((a_uValue) >> (16 - X86_EFL_OF_BIT)) & X86_EFL_OF)
73#define X86_EFL_GET_OF_32(a_uValue) ((uint32_t)((a_uValue) >> (32 - X86_EFL_OF_BIT)) & X86_EFL_OF)
74#define X86_EFL_GET_OF_64(a_uValue) ((uint32_t)((a_uValue) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF)
75
76/**
77 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after arithmetic op.
78 *
79 * @returns Status bits.
80 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
81 * @param a_uResult Unsigned result value.
82 * @param a_uSrc The source value (for AF calc).
83 * @param a_uDst The original destination value (for AF calc).
84 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
85 * @param a_CfExpr Bool expression for the carry flag (CF).
86 * @param a_OfMethod 0 for ADD-style, 1 for SUB-style.
87 */
88#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(a_pfEFlags, a_uResult, a_uDst, a_uSrc, a_cBitsWidth, a_CfExpr, a_OfMethod) \
89 do { \
90 uint32_t fEflTmp = *(a_pfEFlags); \
91 fEflTmp &= ~X86_EFL_STATUS_BITS; \
92 fEflTmp |= (a_CfExpr) << X86_EFL_CF_BIT; \
93 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
94 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
95 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
96 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
97 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth( ((a_uDst) ^ (a_uSrc) ^ (a_OfMethod == 0 ? RT_BIT_64(a_cBitsWidth - 1) : 0)) \
98 & ((a_uResult) ^ (a_uDst)) ); \
99 *(a_pfEFlags) = fEflTmp; \
100 } while (0)
101
102/**
103 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after a logical op.
104 *
105 * CF and OF are defined to be 0 by logical operations. AF on the other hand is
106 * undefined. We do not set AF, as that seems to make the most sense (which
107 * probably makes it the most wrong in real life).
108 *
109 * @returns Status bits.
110 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
111 * @param a_uResult Unsigned result value.
112 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
113 * @param a_fExtra Additional bits to set.
114 */
115#define IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(a_pfEFlags, a_uResult, a_cBitsWidth, a_fExtra) \
116 do { \
117 uint32_t fEflTmp = *(a_pfEFlags); \
118 fEflTmp &= ~X86_EFL_STATUS_BITS; \
119 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
120 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
121 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
122 fEflTmp |= (a_fExtra); \
123 *(a_pfEFlags) = fEflTmp; \
124 } while (0)
125
126
127/*********************************************************************************************************************************
128* Global Variables *
129*********************************************************************************************************************************/
130#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
131/**
132 * Parity calculation table.
133 *
134 * The generator code:
135 * @code
136 * #include <stdio.h>
137 *
138 * int main()
139 * {
140 * unsigned b;
141 * for (b = 0; b < 256; b++)
142 * {
143 * int cOnes = ( b & 1)
144 * + ((b >> 1) & 1)
145 * + ((b >> 2) & 1)
146 * + ((b >> 3) & 1)
147 * + ((b >> 4) & 1)
148 * + ((b >> 5) & 1)
149 * + ((b >> 6) & 1)
150 * + ((b >> 7) & 1);
151 * printf(" /" "* %#04x = %u%u%u%u%u%u%u%ub *" "/ %s,\n",
152 * b,
153 * (b >> 7) & 1,
154 * (b >> 6) & 1,
155 * (b >> 5) & 1,
156 * (b >> 4) & 1,
157 * (b >> 3) & 1,
158 * (b >> 2) & 1,
159 * (b >> 1) & 1,
160 * b & 1,
161 * cOnes & 1 ? "0" : "X86_EFL_PF");
162 * }
163 * return 0;
164 * }
165 * @endcode
166 */
167static uint8_t const g_afParity[256] =
168{
169 /* 0000 = 00000000b */ X86_EFL_PF,
170 /* 0x01 = 00000001b */ 0,
171 /* 0x02 = 00000010b */ 0,
172 /* 0x03 = 00000011b */ X86_EFL_PF,
173 /* 0x04 = 00000100b */ 0,
174 /* 0x05 = 00000101b */ X86_EFL_PF,
175 /* 0x06 = 00000110b */ X86_EFL_PF,
176 /* 0x07 = 00000111b */ 0,
177 /* 0x08 = 00001000b */ 0,
178 /* 0x09 = 00001001b */ X86_EFL_PF,
179 /* 0x0a = 00001010b */ X86_EFL_PF,
180 /* 0x0b = 00001011b */ 0,
181 /* 0x0c = 00001100b */ X86_EFL_PF,
182 /* 0x0d = 00001101b */ 0,
183 /* 0x0e = 00001110b */ 0,
184 /* 0x0f = 00001111b */ X86_EFL_PF,
185 /* 0x10 = 00010000b */ 0,
186 /* 0x11 = 00010001b */ X86_EFL_PF,
187 /* 0x12 = 00010010b */ X86_EFL_PF,
188 /* 0x13 = 00010011b */ 0,
189 /* 0x14 = 00010100b */ X86_EFL_PF,
190 /* 0x15 = 00010101b */ 0,
191 /* 0x16 = 00010110b */ 0,
192 /* 0x17 = 00010111b */ X86_EFL_PF,
193 /* 0x18 = 00011000b */ X86_EFL_PF,
194 /* 0x19 = 00011001b */ 0,
195 /* 0x1a = 00011010b */ 0,
196 /* 0x1b = 00011011b */ X86_EFL_PF,
197 /* 0x1c = 00011100b */ 0,
198 /* 0x1d = 00011101b */ X86_EFL_PF,
199 /* 0x1e = 00011110b */ X86_EFL_PF,
200 /* 0x1f = 00011111b */ 0,
201 /* 0x20 = 00100000b */ 0,
202 /* 0x21 = 00100001b */ X86_EFL_PF,
203 /* 0x22 = 00100010b */ X86_EFL_PF,
204 /* 0x23 = 00100011b */ 0,
205 /* 0x24 = 00100100b */ X86_EFL_PF,
206 /* 0x25 = 00100101b */ 0,
207 /* 0x26 = 00100110b */ 0,
208 /* 0x27 = 00100111b */ X86_EFL_PF,
209 /* 0x28 = 00101000b */ X86_EFL_PF,
210 /* 0x29 = 00101001b */ 0,
211 /* 0x2a = 00101010b */ 0,
212 /* 0x2b = 00101011b */ X86_EFL_PF,
213 /* 0x2c = 00101100b */ 0,
214 /* 0x2d = 00101101b */ X86_EFL_PF,
215 /* 0x2e = 00101110b */ X86_EFL_PF,
216 /* 0x2f = 00101111b */ 0,
217 /* 0x30 = 00110000b */ X86_EFL_PF,
218 /* 0x31 = 00110001b */ 0,
219 /* 0x32 = 00110010b */ 0,
220 /* 0x33 = 00110011b */ X86_EFL_PF,
221 /* 0x34 = 00110100b */ 0,
222 /* 0x35 = 00110101b */ X86_EFL_PF,
223 /* 0x36 = 00110110b */ X86_EFL_PF,
224 /* 0x37 = 00110111b */ 0,
225 /* 0x38 = 00111000b */ 0,
226 /* 0x39 = 00111001b */ X86_EFL_PF,
227 /* 0x3a = 00111010b */ X86_EFL_PF,
228 /* 0x3b = 00111011b */ 0,
229 /* 0x3c = 00111100b */ X86_EFL_PF,
230 /* 0x3d = 00111101b */ 0,
231 /* 0x3e = 00111110b */ 0,
232 /* 0x3f = 00111111b */ X86_EFL_PF,
233 /* 0x40 = 01000000b */ 0,
234 /* 0x41 = 01000001b */ X86_EFL_PF,
235 /* 0x42 = 01000010b */ X86_EFL_PF,
236 /* 0x43 = 01000011b */ 0,
237 /* 0x44 = 01000100b */ X86_EFL_PF,
238 /* 0x45 = 01000101b */ 0,
239 /* 0x46 = 01000110b */ 0,
240 /* 0x47 = 01000111b */ X86_EFL_PF,
241 /* 0x48 = 01001000b */ X86_EFL_PF,
242 /* 0x49 = 01001001b */ 0,
243 /* 0x4a = 01001010b */ 0,
244 /* 0x4b = 01001011b */ X86_EFL_PF,
245 /* 0x4c = 01001100b */ 0,
246 /* 0x4d = 01001101b */ X86_EFL_PF,
247 /* 0x4e = 01001110b */ X86_EFL_PF,
248 /* 0x4f = 01001111b */ 0,
249 /* 0x50 = 01010000b */ X86_EFL_PF,
250 /* 0x51 = 01010001b */ 0,
251 /* 0x52 = 01010010b */ 0,
252 /* 0x53 = 01010011b */ X86_EFL_PF,
253 /* 0x54 = 01010100b */ 0,
254 /* 0x55 = 01010101b */ X86_EFL_PF,
255 /* 0x56 = 01010110b */ X86_EFL_PF,
256 /* 0x57 = 01010111b */ 0,
257 /* 0x58 = 01011000b */ 0,
258 /* 0x59 = 01011001b */ X86_EFL_PF,
259 /* 0x5a = 01011010b */ X86_EFL_PF,
260 /* 0x5b = 01011011b */ 0,
261 /* 0x5c = 01011100b */ X86_EFL_PF,
262 /* 0x5d = 01011101b */ 0,
263 /* 0x5e = 01011110b */ 0,
264 /* 0x5f = 01011111b */ X86_EFL_PF,
265 /* 0x60 = 01100000b */ X86_EFL_PF,
266 /* 0x61 = 01100001b */ 0,
267 /* 0x62 = 01100010b */ 0,
268 /* 0x63 = 01100011b */ X86_EFL_PF,
269 /* 0x64 = 01100100b */ 0,
270 /* 0x65 = 01100101b */ X86_EFL_PF,
271 /* 0x66 = 01100110b */ X86_EFL_PF,
272 /* 0x67 = 01100111b */ 0,
273 /* 0x68 = 01101000b */ 0,
274 /* 0x69 = 01101001b */ X86_EFL_PF,
275 /* 0x6a = 01101010b */ X86_EFL_PF,
276 /* 0x6b = 01101011b */ 0,
277 /* 0x6c = 01101100b */ X86_EFL_PF,
278 /* 0x6d = 01101101b */ 0,
279 /* 0x6e = 01101110b */ 0,
280 /* 0x6f = 01101111b */ X86_EFL_PF,
281 /* 0x70 = 01110000b */ 0,
282 /* 0x71 = 01110001b */ X86_EFL_PF,
283 /* 0x72 = 01110010b */ X86_EFL_PF,
284 /* 0x73 = 01110011b */ 0,
285 /* 0x74 = 01110100b */ X86_EFL_PF,
286 /* 0x75 = 01110101b */ 0,
287 /* 0x76 = 01110110b */ 0,
288 /* 0x77 = 01110111b */ X86_EFL_PF,
289 /* 0x78 = 01111000b */ X86_EFL_PF,
290 /* 0x79 = 01111001b */ 0,
291 /* 0x7a = 01111010b */ 0,
292 /* 0x7b = 01111011b */ X86_EFL_PF,
293 /* 0x7c = 01111100b */ 0,
294 /* 0x7d = 01111101b */ X86_EFL_PF,
295 /* 0x7e = 01111110b */ X86_EFL_PF,
296 /* 0x7f = 01111111b */ 0,
297 /* 0x80 = 10000000b */ 0,
298 /* 0x81 = 10000001b */ X86_EFL_PF,
299 /* 0x82 = 10000010b */ X86_EFL_PF,
300 /* 0x83 = 10000011b */ 0,
301 /* 0x84 = 10000100b */ X86_EFL_PF,
302 /* 0x85 = 10000101b */ 0,
303 /* 0x86 = 10000110b */ 0,
304 /* 0x87 = 10000111b */ X86_EFL_PF,
305 /* 0x88 = 10001000b */ X86_EFL_PF,
306 /* 0x89 = 10001001b */ 0,
307 /* 0x8a = 10001010b */ 0,
308 /* 0x8b = 10001011b */ X86_EFL_PF,
309 /* 0x8c = 10001100b */ 0,
310 /* 0x8d = 10001101b */ X86_EFL_PF,
311 /* 0x8e = 10001110b */ X86_EFL_PF,
312 /* 0x8f = 10001111b */ 0,
313 /* 0x90 = 10010000b */ X86_EFL_PF,
314 /* 0x91 = 10010001b */ 0,
315 /* 0x92 = 10010010b */ 0,
316 /* 0x93 = 10010011b */ X86_EFL_PF,
317 /* 0x94 = 10010100b */ 0,
318 /* 0x95 = 10010101b */ X86_EFL_PF,
319 /* 0x96 = 10010110b */ X86_EFL_PF,
320 /* 0x97 = 10010111b */ 0,
321 /* 0x98 = 10011000b */ 0,
322 /* 0x99 = 10011001b */ X86_EFL_PF,
323 /* 0x9a = 10011010b */ X86_EFL_PF,
324 /* 0x9b = 10011011b */ 0,
325 /* 0x9c = 10011100b */ X86_EFL_PF,
326 /* 0x9d = 10011101b */ 0,
327 /* 0x9e = 10011110b */ 0,
328 /* 0x9f = 10011111b */ X86_EFL_PF,
329 /* 0xa0 = 10100000b */ X86_EFL_PF,
330 /* 0xa1 = 10100001b */ 0,
331 /* 0xa2 = 10100010b */ 0,
332 /* 0xa3 = 10100011b */ X86_EFL_PF,
333 /* 0xa4 = 10100100b */ 0,
334 /* 0xa5 = 10100101b */ X86_EFL_PF,
335 /* 0xa6 = 10100110b */ X86_EFL_PF,
336 /* 0xa7 = 10100111b */ 0,
337 /* 0xa8 = 10101000b */ 0,
338 /* 0xa9 = 10101001b */ X86_EFL_PF,
339 /* 0xaa = 10101010b */ X86_EFL_PF,
340 /* 0xab = 10101011b */ 0,
341 /* 0xac = 10101100b */ X86_EFL_PF,
342 /* 0xad = 10101101b */ 0,
343 /* 0xae = 10101110b */ 0,
344 /* 0xaf = 10101111b */ X86_EFL_PF,
345 /* 0xb0 = 10110000b */ 0,
346 /* 0xb1 = 10110001b */ X86_EFL_PF,
347 /* 0xb2 = 10110010b */ X86_EFL_PF,
348 /* 0xb3 = 10110011b */ 0,
349 /* 0xb4 = 10110100b */ X86_EFL_PF,
350 /* 0xb5 = 10110101b */ 0,
351 /* 0xb6 = 10110110b */ 0,
352 /* 0xb7 = 10110111b */ X86_EFL_PF,
353 /* 0xb8 = 10111000b */ X86_EFL_PF,
354 /* 0xb9 = 10111001b */ 0,
355 /* 0xba = 10111010b */ 0,
356 /* 0xbb = 10111011b */ X86_EFL_PF,
357 /* 0xbc = 10111100b */ 0,
358 /* 0xbd = 10111101b */ X86_EFL_PF,
359 /* 0xbe = 10111110b */ X86_EFL_PF,
360 /* 0xbf = 10111111b */ 0,
361 /* 0xc0 = 11000000b */ X86_EFL_PF,
362 /* 0xc1 = 11000001b */ 0,
363 /* 0xc2 = 11000010b */ 0,
364 /* 0xc3 = 11000011b */ X86_EFL_PF,
365 /* 0xc4 = 11000100b */ 0,
366 /* 0xc5 = 11000101b */ X86_EFL_PF,
367 /* 0xc6 = 11000110b */ X86_EFL_PF,
368 /* 0xc7 = 11000111b */ 0,
369 /* 0xc8 = 11001000b */ 0,
370 /* 0xc9 = 11001001b */ X86_EFL_PF,
371 /* 0xca = 11001010b */ X86_EFL_PF,
372 /* 0xcb = 11001011b */ 0,
373 /* 0xcc = 11001100b */ X86_EFL_PF,
374 /* 0xcd = 11001101b */ 0,
375 /* 0xce = 11001110b */ 0,
376 /* 0xcf = 11001111b */ X86_EFL_PF,
377 /* 0xd0 = 11010000b */ 0,
378 /* 0xd1 = 11010001b */ X86_EFL_PF,
379 /* 0xd2 = 11010010b */ X86_EFL_PF,
380 /* 0xd3 = 11010011b */ 0,
381 /* 0xd4 = 11010100b */ X86_EFL_PF,
382 /* 0xd5 = 11010101b */ 0,
383 /* 0xd6 = 11010110b */ 0,
384 /* 0xd7 = 11010111b */ X86_EFL_PF,
385 /* 0xd8 = 11011000b */ X86_EFL_PF,
386 /* 0xd9 = 11011001b */ 0,
387 /* 0xda = 11011010b */ 0,
388 /* 0xdb = 11011011b */ X86_EFL_PF,
389 /* 0xdc = 11011100b */ 0,
390 /* 0xdd = 11011101b */ X86_EFL_PF,
391 /* 0xde = 11011110b */ X86_EFL_PF,
392 /* 0xdf = 11011111b */ 0,
393 /* 0xe0 = 11100000b */ 0,
394 /* 0xe1 = 11100001b */ X86_EFL_PF,
395 /* 0xe2 = 11100010b */ X86_EFL_PF,
396 /* 0xe3 = 11100011b */ 0,
397 /* 0xe4 = 11100100b */ X86_EFL_PF,
398 /* 0xe5 = 11100101b */ 0,
399 /* 0xe6 = 11100110b */ 0,
400 /* 0xe7 = 11100111b */ X86_EFL_PF,
401 /* 0xe8 = 11101000b */ X86_EFL_PF,
402 /* 0xe9 = 11101001b */ 0,
403 /* 0xea = 11101010b */ 0,
404 /* 0xeb = 11101011b */ X86_EFL_PF,
405 /* 0xec = 11101100b */ 0,
406 /* 0xed = 11101101b */ X86_EFL_PF,
407 /* 0xee = 11101110b */ X86_EFL_PF,
408 /* 0xef = 11101111b */ 0,
409 /* 0xf0 = 11110000b */ X86_EFL_PF,
410 /* 0xf1 = 11110001b */ 0,
411 /* 0xf2 = 11110010b */ 0,
412 /* 0xf3 = 11110011b */ X86_EFL_PF,
413 /* 0xf4 = 11110100b */ 0,
414 /* 0xf5 = 11110101b */ X86_EFL_PF,
415 /* 0xf6 = 11110110b */ X86_EFL_PF,
416 /* 0xf7 = 11110111b */ 0,
417 /* 0xf8 = 11111000b */ 0,
418 /* 0xf9 = 11111001b */ X86_EFL_PF,
419 /* 0xfa = 11111010b */ X86_EFL_PF,
420 /* 0xfb = 11111011b */ 0,
421 /* 0xfc = 11111100b */ X86_EFL_PF,
422 /* 0xfd = 11111101b */ 0,
423 /* 0xfe = 11111110b */ 0,
424 /* 0xff = 11111111b */ X86_EFL_PF,
425};
426#endif /* !RT_ARCH_AMD64 || IEM_WITHOUT_ASSEMBLY */
427
428
429
430/*
431 * There are a few 64-bit on 32-bit things we'd rather do in C. Actually, doing
432 * it all in C is probably safer atm., optimize what's necessary later, maybe.
433 */
434#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
435
436
437/*********************************************************************************************************************************
438* Binary Operations *
439*********************************************************************************************************************************/
440
441/*
442 * ADD
443 */
444
445IEM_DECL_IMPL_DEF(void, iemAImpl_add_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
446{
447 uint64_t uDst = *puDst;
448 uint64_t uResult = uDst + uSrc;
449 *puDst = uResult;
450 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult < uDst, 0);
451}
452
453# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
454
455IEM_DECL_IMPL_DEF(void, iemAImpl_add_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
456{
457 uint32_t uDst = *puDst;
458 uint32_t uResult = uDst + uSrc;
459 *puDst = uResult;
460 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult < uDst, 0);
461}
462
463
464IEM_DECL_IMPL_DEF(void, iemAImpl_add_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
465{
466 uint16_t uDst = *puDst;
467 uint16_t uResult = uDst + uSrc;
468 *puDst = uResult;
469 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult < uDst, 0);
470}
471
472
473IEM_DECL_IMPL_DEF(void, iemAImpl_add_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
474{
475 uint8_t uDst = *puDst;
476 uint8_t uResult = uDst + uSrc;
477 *puDst = uResult;
478 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult < uDst, 0);
479}
480
481# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
482
483/*
484 * ADC
485 */
486
487IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
488{
489 if (!(*pfEFlags & X86_EFL_CF))
490 iemAImpl_add_u64(puDst, uSrc, pfEFlags);
491 else
492 {
493 uint64_t uDst = *puDst;
494 uint64_t uResult = uDst + uSrc + 1;
495 *puDst = uResult;
496 /** @todo verify AF and OF calculations. */
497 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult <= uDst, 0);
498 }
499}
500
501# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
502
503IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
504{
505 if (!(*pfEFlags & X86_EFL_CF))
506 iemAImpl_add_u32(puDst, uSrc, pfEFlags);
507 else
508 {
509 uint32_t uDst = *puDst;
510 uint32_t uResult = uDst + uSrc + 1;
511 *puDst = uResult;
512 /** @todo verify AF and OF calculations. */
513 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult <= uDst, 0);
514 }
515}
516
517
518IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
519{
520 if (!(*pfEFlags & X86_EFL_CF))
521 iemAImpl_add_u16(puDst, uSrc, pfEFlags);
522 else
523 {
524 uint16_t uDst = *puDst;
525 uint16_t uResult = uDst + uSrc + 1;
526 *puDst = uResult;
527 /** @todo verify AF and OF calculations. */
528 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult <= uDst, 0);
529 }
530}
531
532
533IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
534{
535 if (!(*pfEFlags & X86_EFL_CF))
536 iemAImpl_add_u8(puDst, uSrc, pfEFlags);
537 else
538 {
539 uint8_t uDst = *puDst;
540 uint8_t uResult = uDst + uSrc + 1;
541 *puDst = uResult;
542 /** @todo verify AF and OF calculations. */
543 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult <= uDst, 0);
544 }
545}
546
547# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
548
549/*
550 * SUB
551 */
552
553IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
554{
555 uint64_t uDst = *puDst;
556 uint64_t uResult = uDst - uSrc;
557 *puDst = uResult;
558 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult < uDst, 1);
559}
560
561# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
562
563IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
564{
565 uint32_t uDst = *puDst;
566 uint32_t uResult = uDst - uSrc;
567 *puDst = uResult;
568 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult < uDst, 1);
569}
570
571
572IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
573{
574 uint16_t uDst = *puDst;
575 uint16_t uResult = uDst - uSrc;
576 *puDst = uResult;
577 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult < uDst, 1);
578}
579
580
581IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
582{
583 uint8_t uDst = *puDst;
584 uint8_t uResult = uDst - uSrc;
585 *puDst = uResult;
586 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult < uDst, 1);
587}
588
589# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
590
591/*
592 * SBB
593 */
594
595IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
596{
597 if (!(*pfEFlags & X86_EFL_CF))
598 iemAImpl_sub_u64(puDst, uSrc, pfEFlags);
599 else
600 {
601 uint64_t uDst = *puDst;
602 uint64_t uResult = uDst - uSrc - 1;
603 *puDst = uResult;
604 /** @todo verify AF and OF calculations. */
605 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult <= uDst, 1);
606 }
607}
608
609# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
610
611IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
612{
613 if (!(*pfEFlags & X86_EFL_CF))
614 iemAImpl_sub_u32(puDst, uSrc, pfEFlags);
615 else
616 {
617 uint32_t uDst = *puDst;
618 uint32_t uResult = uDst - uSrc - 1;
619 *puDst = uResult;
620 /** @todo verify AF and OF calculations. */
621 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult <= uDst, 1);
622 }
623}
624
625
626IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
627{
628 if (!(*pfEFlags & X86_EFL_CF))
629 iemAImpl_sub_u16(puDst, uSrc, pfEFlags);
630 else
631 {
632 uint16_t uDst = *puDst;
633 uint16_t uResult = uDst - uSrc - 1;
634 *puDst = uResult;
635 /** @todo verify AF and OF calculations. */
636 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult <= uDst, 1);
637 }
638}
639
640
641IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
642{
643 if (!(*pfEFlags & X86_EFL_CF))
644 iemAImpl_sub_u8(puDst, uSrc, pfEFlags);
645 else
646 {
647 uint8_t uDst = *puDst;
648 uint8_t uResult = uDst - uSrc - 1;
649 *puDst = uResult;
650 /** @todo verify AF and OF calculations. */
651 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult <= uDst, 1);
652 }
653}
654
655# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
656
657
658/*
659 * OR
660 */
661
662IEM_DECL_IMPL_DEF(void, iemAImpl_or_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
663{
664 uint64_t uResult = *puDst | uSrc;
665 *puDst = uResult;
666 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
667}
668
669# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
670
671IEM_DECL_IMPL_DEF(void, iemAImpl_or_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
672{
673 uint32_t uResult = *puDst | uSrc;
674 *puDst = uResult;
675 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
676}
677
678
679IEM_DECL_IMPL_DEF(void, iemAImpl_or_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
680{
681 uint16_t uResult = *puDst | uSrc;
682 *puDst = uResult;
683 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
684}
685
686
687IEM_DECL_IMPL_DEF(void, iemAImpl_or_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
688{
689 uint8_t uResult = *puDst | uSrc;
690 *puDst = uResult;
691 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
692}
693
694# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
695
696/*
697 * XOR
698 */
699
700IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
701{
702 uint64_t uResult = *puDst ^ uSrc;
703 *puDst = uResult;
704 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
705}
706
707# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
708
709IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
710{
711 uint32_t uResult = *puDst ^ uSrc;
712 *puDst = uResult;
713 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
714}
715
716
717IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
718{
719 uint16_t uResult = *puDst ^ uSrc;
720 *puDst = uResult;
721 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
722}
723
724
725IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
726{
727 uint8_t uResult = *puDst ^ uSrc;
728 *puDst = uResult;
729 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
730}
731
732# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
733
734/*
735 * AND
736 */
737
738IEM_DECL_IMPL_DEF(void, iemAImpl_and_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
739{
740 uint64_t uResult = *puDst & uSrc;
741 *puDst = uResult;
742 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
743}
744
745# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
746
747IEM_DECL_IMPL_DEF(void, iemAImpl_and_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
748{
749 uint32_t uResult = *puDst & uSrc;
750 *puDst = uResult;
751 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
752}
753
754
755IEM_DECL_IMPL_DEF(void, iemAImpl_and_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
756{
757 uint16_t uResult = *puDst & uSrc;
758 *puDst = uResult;
759 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
760}
761
762
763IEM_DECL_IMPL_DEF(void, iemAImpl_and_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
764{
765 uint8_t uResult = *puDst & uSrc;
766 *puDst = uResult;
767 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
768}
769
770# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
771
772/*
773 * CMP
774 */
775
776IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
777{
778 uint64_t uDstTmp = *puDst;
779 iemAImpl_sub_u64(&uDstTmp, uSrc, pfEFlags);
780}
781
782# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
783
784IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
785{
786 uint32_t uDstTmp = *puDst;
787 iemAImpl_sub_u32(&uDstTmp, uSrc, pfEFlags);
788}
789
790
791IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
792{
793 uint16_t uDstTmp = *puDst;
794 iemAImpl_sub_u16(&uDstTmp, uSrc, pfEFlags);
795}
796
797
798IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
799{
800 uint8_t uDstTmp = *puDst;
801 iemAImpl_sub_u8(&uDstTmp, uSrc, pfEFlags);
802}
803
804# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
805
806/*
807 * TEST
808 */
809
810IEM_DECL_IMPL_DEF(void, iemAImpl_test_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
811{
812 uint64_t uResult = *puDst & uSrc;
813 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
814}
815
816# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
817
818IEM_DECL_IMPL_DEF(void, iemAImpl_test_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
819{
820 uint32_t uResult = *puDst & uSrc;
821 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
822}
823
824
825IEM_DECL_IMPL_DEF(void, iemAImpl_test_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
826{
827 uint16_t uResult = *puDst & uSrc;
828 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
829}
830
831
832IEM_DECL_IMPL_DEF(void, iemAImpl_test_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
833{
834 uint8_t uResult = *puDst & uSrc;
835 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
836}
837
838# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
839
840
841/*
842 * LOCK prefixed variants of the above
843 */
844
845/** 64-bit locked binary operand operation. */
846# define DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
847 do { \
848 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
849 uint ## a_cBitsWidth ## _t uTmp; \
850 uint32_t fEflTmp; \
851 do \
852 { \
853 uTmp = uOld; \
854 fEflTmp = *pfEFlags; \
855 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, uSrc, &fEflTmp); \
856 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
857 *pfEFlags = fEflTmp; \
858 } while (0)
859
860
861#define EMIT_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
862 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
863 uint ## a_cBitsWidth ## _t uSrc, \
864 uint32_t *pfEFlags)) \
865 { \
866 DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth); \
867 }
868
869EMIT_LOCKED_BIN_OP(add, 64)
870EMIT_LOCKED_BIN_OP(adc, 64)
871EMIT_LOCKED_BIN_OP(sub, 64)
872EMIT_LOCKED_BIN_OP(sbb, 64)
873EMIT_LOCKED_BIN_OP(or, 64)
874EMIT_LOCKED_BIN_OP(xor, 64)
875EMIT_LOCKED_BIN_OP(and, 64)
876# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
877EMIT_LOCKED_BIN_OP(add, 32)
878EMIT_LOCKED_BIN_OP(adc, 32)
879EMIT_LOCKED_BIN_OP(sub, 32)
880EMIT_LOCKED_BIN_OP(sbb, 32)
881EMIT_LOCKED_BIN_OP(or, 32)
882EMIT_LOCKED_BIN_OP(xor, 32)
883EMIT_LOCKED_BIN_OP(and, 32)
884
885EMIT_LOCKED_BIN_OP(add, 16)
886EMIT_LOCKED_BIN_OP(adc, 16)
887EMIT_LOCKED_BIN_OP(sub, 16)
888EMIT_LOCKED_BIN_OP(sbb, 16)
889EMIT_LOCKED_BIN_OP(or, 16)
890EMIT_LOCKED_BIN_OP(xor, 16)
891EMIT_LOCKED_BIN_OP(and, 16)
892
893EMIT_LOCKED_BIN_OP(add, 8)
894EMIT_LOCKED_BIN_OP(adc, 8)
895EMIT_LOCKED_BIN_OP(sub, 8)
896EMIT_LOCKED_BIN_OP(sbb, 8)
897EMIT_LOCKED_BIN_OP(or, 8)
898EMIT_LOCKED_BIN_OP(xor, 8)
899EMIT_LOCKED_BIN_OP(and, 8)
900# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
901
902
903/*
904 * Bit operations (same signature as above).
905 */
906
907/*
908 * BT
909 */
910
911IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
912{
913 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
914 logical operation (AND/OR/whatever). */
915 Assert(uSrc < 64);
916 uint64_t uDst = *puDst;
917 if (uDst & RT_BIT_64(uSrc))
918 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
919 else
920 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
921}
922
923# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
924
925IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
926{
927 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
928 logical operation (AND/OR/whatever). */
929 Assert(uSrc < 32);
930 uint32_t uDst = *puDst;
931 if (uDst & RT_BIT_32(uSrc))
932 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
933 else
934 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
935}
936
937IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
938{
939 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
940 logical operation (AND/OR/whatever). */
941 Assert(uSrc < 16);
942 uint16_t uDst = *puDst;
943 if (uDst & RT_BIT_32(uSrc))
944 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, X86_EFL_CF);
945 else
946 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, 0);
947}
948
949# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
950
951/*
952 * BTC
953 */
954
955IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
956{
957 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
958 logical operation (AND/OR/whatever). */
959 Assert(uSrc < 64);
960 uint64_t fMask = RT_BIT_64(uSrc);
961 uint64_t uDst = *puDst;
962 if (uDst & fMask)
963 {
964 uDst &= ~fMask;
965 *puDst = uDst;
966 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
967 }
968 else
969 {
970 uDst |= fMask;
971 *puDst = uDst;
972 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
973 }
974}
975
976# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
977
978IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
979{
980 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
981 logical operation (AND/OR/whatever). */
982 Assert(uSrc < 32);
983 uint32_t fMask = RT_BIT_32(uSrc);
984 uint32_t uDst = *puDst;
985 if (uDst & fMask)
986 {
987 uDst &= ~fMask;
988 *puDst = uDst;
989 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
990 }
991 else
992 {
993 uDst |= fMask;
994 *puDst = uDst;
995 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
996 }
997}
998
999
1000IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1001{
1002 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1003 logical operation (AND/OR/whatever). */
1004 Assert(uSrc < 16);
1005 uint16_t fMask = RT_BIT_32(uSrc);
1006 uint16_t uDst = *puDst;
1007 if (uDst & fMask)
1008 {
1009 uDst &= ~fMask;
1010 *puDst = uDst;
1011 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, X86_EFL_CF);
1012 }
1013 else
1014 {
1015 uDst |= fMask;
1016 *puDst = uDst;
1017 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, 0);
1018 }
1019}
1020
1021# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1022
1023/*
1024 * BTR
1025 */
1026
1027IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1028{
1029 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1030 logical operation (AND/OR/whatever). */
1031 Assert(uSrc < 64);
1032 uint64_t fMask = RT_BIT_64(uSrc);
1033 uint64_t uDst = *puDst;
1034 if (uDst & fMask)
1035 {
1036 uDst &= ~fMask;
1037 *puDst = uDst;
1038 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
1039 }
1040 else
1041 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
1042}
1043
1044# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1045
1046IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1047{
1048 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1049 logical operation (AND/OR/whatever). */
1050 Assert(uSrc < 32);
1051 uint32_t fMask = RT_BIT_32(uSrc);
1052 uint32_t uDst = *puDst;
1053 if (uDst & fMask)
1054 {
1055 uDst &= ~fMask;
1056 *puDst = uDst;
1057 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
1058 }
1059 else
1060 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
1061}
1062
1063
1064IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1065{
1066 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1067 logical operation (AND/OR/whatever). */
1068 Assert(uSrc < 16);
1069 uint16_t fMask = RT_BIT_32(uSrc);
1070 uint16_t uDst = *puDst;
1071 if (uDst & fMask)
1072 {
1073 uDst &= ~fMask;
1074 *puDst = uDst;
1075 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, X86_EFL_CF);
1076 }
1077 else
1078 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, 0);
1079}
1080
1081# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1082
1083/*
1084 * BTS
1085 */
1086
1087IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1088{
1089 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1090 logical operation (AND/OR/whatever). */
1091 Assert(uSrc < 64);
1092 uint64_t fMask = RT_BIT_64(uSrc);
1093 uint64_t uDst = *puDst;
1094 if (uDst & fMask)
1095 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
1096 else
1097 {
1098 uDst |= fMask;
1099 *puDst = uDst;
1100 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
1101 }
1102}
1103
1104# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1105
1106IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1107{
1108 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1109 logical operation (AND/OR/whatever). */
1110 Assert(uSrc < 32);
1111 uint32_t fMask = RT_BIT_32(uSrc);
1112 uint32_t uDst = *puDst;
1113 if (uDst & fMask)
1114 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
1115 else
1116 {
1117 uDst |= fMask;
1118 *puDst = uDst;
1119 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
1120 }
1121}
1122
1123
1124IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1125{
1126 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1127 logical operation (AND/OR/whatever). */
1128 Assert(uSrc < 16);
1129 uint16_t fMask = RT_BIT_32(uSrc);
1130 uint32_t uDst = *puDst;
1131 if (uDst & fMask)
1132 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
1133 else
1134 {
1135 uDst |= fMask;
1136 *puDst = uDst;
1137 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
1138 }
1139}
1140
1141# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1142
1143
1144EMIT_LOCKED_BIN_OP(btc, 64)
1145EMIT_LOCKED_BIN_OP(btr, 64)
1146EMIT_LOCKED_BIN_OP(bts, 64)
1147# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1148EMIT_LOCKED_BIN_OP(btc, 32)
1149EMIT_LOCKED_BIN_OP(btr, 32)
1150EMIT_LOCKED_BIN_OP(bts, 32)
1151
1152EMIT_LOCKED_BIN_OP(btc, 16)
1153EMIT_LOCKED_BIN_OP(btr, 16)
1154EMIT_LOCKED_BIN_OP(bts, 16)
1155# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1156
1157
1158/*
1159 * BSF - first (least significant) bit set
1160 */
1161
1162IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1163{
1164 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1165 /** @todo check what real CPUs do. */
1166 unsigned iBit = ASMBitFirstSetU64(uSrc);
1167 if (iBit)
1168 {
1169 *puDst = iBit - 1;
1170 *pfEFlags &= ~X86_EFL_ZF;
1171 }
1172 else
1173 *pfEFlags |= X86_EFL_ZF;
1174}
1175
1176# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1177
1178IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1179{
1180 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1181 /** @todo check what real CPUs do. */
1182 unsigned iBit = ASMBitFirstSetU32(uSrc);
1183 if (iBit)
1184 {
1185 *puDst = iBit - 1;
1186 *pfEFlags &= ~X86_EFL_ZF;
1187 }
1188 else
1189 *pfEFlags |= X86_EFL_ZF;
1190}
1191
1192
1193IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1194{
1195 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1196 /** @todo check what real CPUs do. */
1197 unsigned iBit = ASMBitFirstSetU16(uSrc);
1198 if (iBit)
1199 {
1200 *puDst = iBit - 1;
1201 *pfEFlags &= ~X86_EFL_ZF;
1202 }
1203 else
1204 *pfEFlags |= X86_EFL_ZF;
1205}
1206
1207# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1208
1209/*
1210 * BSR - last (most significant) bit set
1211 */
1212
1213IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1214{
1215 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1216 /** @todo check what real CPUs do. */
1217 unsigned iBit = ASMBitLastSetU64(uSrc);
1218 if (uSrc)
1219 {
1220 *puDst = iBit - 1;
1221 *pfEFlags &= ~X86_EFL_ZF;
1222 }
1223 else
1224 *pfEFlags |= X86_EFL_ZF;
1225}
1226
1227# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1228
1229IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1230{
1231 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1232 /** @todo check what real CPUs do. */
1233 unsigned iBit = ASMBitLastSetU32(uSrc);
1234 if (uSrc)
1235 {
1236 *puDst = iBit - 1;
1237 *pfEFlags &= ~X86_EFL_ZF;
1238 }
1239 else
1240 *pfEFlags |= X86_EFL_ZF;
1241}
1242
1243
1244IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1245{
1246 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1247 /** @todo check what real CPUs do. */
1248 unsigned iBit = ASMBitLastSetU16(uSrc);
1249 if (uSrc)
1250 {
1251 *puDst = iBit - 1;
1252 *pfEFlags &= ~X86_EFL_ZF;
1253 }
1254 else
1255 *pfEFlags |= X86_EFL_ZF;
1256}
1257
1258# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1259
1260
1261/*
1262 * XCHG
1263 */
1264
1265IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64,(uint64_t *puMem, uint64_t *puReg))
1266{
1267 /* XCHG implies LOCK. */
1268 uint64_t uOldMem = *puMem;
1269 while (!ASMAtomicCmpXchgExU64(puMem, *puReg, uOldMem, &uOldMem))
1270 ASMNopPause();
1271 *puReg = uOldMem;
1272}
1273
1274# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1275
1276IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u32,(uint32_t *puMem, uint32_t *puReg))
1277{
1278 /* XCHG implies LOCK. */
1279 uint32_t uOldMem = *puMem;
1280 while (!ASMAtomicCmpXchgExU32(puMem, *puReg, uOldMem, &uOldMem))
1281 ASMNopPause();
1282 *puReg = uOldMem;
1283}
1284
1285
1286IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u16,(uint16_t *puMem, uint16_t *puReg))
1287{
1288 /* XCHG implies LOCK. */
1289 uint16_t uOldMem = *puMem;
1290 while (!ASMAtomicCmpXchgExU16(puMem, *puReg, uOldMem, &uOldMem))
1291 ASMNopPause();
1292 *puReg = uOldMem;
1293}
1294
1295
1296IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8,(uint8_t *puMem, uint8_t *puReg))
1297{
1298 /* XCHG implies LOCK. */
1299 uint8_t uOldMem = *puMem;
1300 while (!ASMAtomicCmpXchgExU8(puMem, *puReg, uOldMem, &uOldMem))
1301 ASMNopPause();
1302 *puReg = uOldMem;
1303}
1304
1305# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1306
1307
1308/*
1309 * XADD and LOCK XADD.
1310 */
1311
1312IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u64,(uint64_t *puDst, uint64_t *puReg, uint32_t *pfEFlags))
1313{
1314 uint64_t uDst = *puDst;
1315 uint64_t uResult = uDst;
1316 iemAImpl_add_u64(&uResult, *puReg, pfEFlags);
1317 *puDst = uResult;
1318 *puReg = uDst;
1319}
1320
1321
1322IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u64_locked,(uint64_t *puDst, uint64_t *puReg, uint32_t *pfEFlags))
1323{
1324 uint64_t uOld = ASMAtomicUoReadU64(puDst);
1325 uint64_t uTmpDst;
1326 uint32_t fEflTmp;
1327 do
1328 {
1329 uTmpDst = uOld;
1330 fEflTmp = *pfEFlags;
1331 iemAImpl_add_u64(&uTmpDst, *puReg, pfEFlags);
1332 } while (!ASMAtomicCmpXchgExU64(puDst, uTmpDst, uOld, &uOld));
1333 *puReg = uOld;
1334 *pfEFlags = fEflTmp;
1335}
1336
1337# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1338
1339IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u32,(uint32_t *puDst, uint32_t *puReg, uint32_t *pfEFlags))
1340{
1341 uint32_t uDst = *puDst;
1342 uint32_t uResult = uDst;
1343 iemAImpl_add_u32(&uResult, *puReg, pfEFlags);
1344 *puDst = uResult;
1345 *puReg = uDst;
1346}
1347
1348
1349IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u32_locked,(uint32_t *puDst, uint32_t *puReg, uint32_t *pfEFlags))
1350{
1351 uint32_t uOld = ASMAtomicUoReadU32(puDst);
1352 uint32_t uTmpDst;
1353 uint32_t fEflTmp;
1354 do
1355 {
1356 uTmpDst = uOld;
1357 fEflTmp = *pfEFlags;
1358 iemAImpl_add_u32(&uTmpDst, *puReg, pfEFlags);
1359 } while (!ASMAtomicCmpXchgExU32(puDst, uTmpDst, uOld, &uOld));
1360 *puReg = uOld;
1361 *pfEFlags = fEflTmp;
1362}
1363
1364
1365IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u16,(uint16_t *puDst, uint16_t *puReg, uint32_t *pfEFlags))
1366{
1367 uint16_t uDst = *puDst;
1368 uint16_t uResult = uDst;
1369 iemAImpl_add_u16(&uResult, *puReg, pfEFlags);
1370 *puDst = uResult;
1371 *puReg = uDst;
1372}
1373
1374
1375IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u16_locked,(uint16_t *puDst, uint16_t *puReg, uint32_t *pfEFlags))
1376{
1377 uint16_t uOld = ASMAtomicUoReadU16(puDst);
1378 uint16_t uTmpDst;
1379 uint32_t fEflTmp;
1380 do
1381 {
1382 uTmpDst = uOld;
1383 fEflTmp = *pfEFlags;
1384 iemAImpl_add_u16(&uTmpDst, *puReg, pfEFlags);
1385 } while (!ASMAtomicCmpXchgExU16(puDst, uTmpDst, uOld, &uOld));
1386 *puReg = uOld;
1387 *pfEFlags = fEflTmp;
1388}
1389
1390
1391IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u8,(uint8_t *puDst, uint8_t *puReg, uint32_t *pfEFlags))
1392{
1393 uint8_t uDst = *puDst;
1394 uint8_t uResult = uDst;
1395 iemAImpl_add_u8(&uResult, *puReg, pfEFlags);
1396 *puDst = uResult;
1397 *puReg = uDst;
1398}
1399
1400
1401IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u8_locked,(uint8_t *puDst, uint8_t *puReg, uint32_t *pfEFlags))
1402{
1403 uint8_t uOld = ASMAtomicUoReadU8(puDst);
1404 uint8_t uTmpDst;
1405 uint32_t fEflTmp;
1406 do
1407 {
1408 uTmpDst = uOld;
1409 fEflTmp = *pfEFlags;
1410 iemAImpl_add_u8(&uTmpDst, *puReg, pfEFlags);
1411 } while (!ASMAtomicCmpXchgExU8(puDst, uTmpDst, uOld, &uOld));
1412 *puReg = uOld;
1413 *pfEFlags = fEflTmp;
1414}
1415
1416# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1417#endif
1418
1419/*
1420 * CMPXCHG, CMPXCHG8B, CMPXCHG16B
1421 *
1422 * Note! We don't have non-locking/atomic cmpxchg primitives, so all cmpxchg
1423 * instructions are emulated as locked.
1424 */
1425#if defined(IEM_WITHOUT_ASSEMBLY)
1426
1427IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u8_locked, (uint8_t *pu8Dst, uint8_t *puAl, uint8_t uSrcReg, uint32_t *pEFlags))
1428{
1429 uint8_t const uOld = *puAl;
1430 if (ASMAtomicCmpXchgExU8(pu8Dst, uSrcReg, uOld, puAl))
1431 {
1432 Assert(*puAl == uOld);
1433 *pEFlags |= X86_EFL_ZF;
1434 }
1435 else
1436 *pEFlags &= ~X86_EFL_ZF;
1437}
1438
1439
1440IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u16_locked,(uint16_t *pu16Dst, uint16_t *puAx, uint16_t uSrcReg, uint32_t *pEFlags))
1441{
1442 uint16_t const uOld = *puAx;
1443 if (ASMAtomicCmpXchgExU16(pu16Dst, uSrcReg, uOld, puAx))
1444 {
1445 Assert(*puAx == uOld);
1446 *pEFlags |= X86_EFL_ZF;
1447 }
1448 else
1449 *pEFlags &= ~X86_EFL_ZF;
1450}
1451
1452
1453IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u32_locked,(uint32_t *pu32Dst, uint32_t *puEax, uint32_t uSrcReg, uint32_t *pEFlags))
1454{
1455 uint32_t const uOld = *puEax;
1456 if (ASMAtomicCmpXchgExU32(pu32Dst, uSrcReg, uOld, puEax))
1457 {
1458 Assert(*puEax == uOld);
1459 *pEFlags |= X86_EFL_ZF;
1460 }
1461 else
1462 *pEFlags &= ~X86_EFL_ZF;
1463}
1464
1465
1466# if ARCH_BITS == 32
1467IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64_locked,(uint64_t *pu64Dst, uint64_t *puRax, uint64_t *puSrcReg, uint32_t *pEFlags))
1468# else
1469IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64_locked,(uint64_t *pu64Dst, uint64_t *puRax, uint64_t uSrcReg, uint32_t *pEFlags))
1470# endif
1471{
1472# if ARCH_BITS == 32
1473 uint64_t const uSrcReg = *puSrcReg;
1474# endif
1475 uint64_t const uOld = *puRax;
1476 if (ASMAtomicCmpXchgExU64(pu64Dst, uSrcReg, uOld, puRax))
1477 {
1478 Assert(*puRax == uOld);
1479 *pEFlags |= X86_EFL_ZF;
1480 }
1481 else
1482 *pEFlags &= ~X86_EFL_ZF;
1483}
1484
1485
1486IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b_locked,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
1487 uint32_t *pEFlags))
1488{
1489 uint64_t const uNew = pu64EbxEcx->u;
1490 uint64_t const uOld = pu64EaxEdx->u;
1491 if (ASMAtomicCmpXchgExU64(pu64Dst, uNew, uOld, &pu64EaxEdx->u))
1492 {
1493 Assert(pu64EaxEdx->u == uOld);
1494 *pEFlags |= X86_EFL_ZF;
1495 }
1496 else
1497 *pEFlags &= ~X86_EFL_ZF;
1498}
1499
1500
1501# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64)
1502IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_locked,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx,
1503 uint32_t *pEFlags))
1504{
1505# ifdef VBOX_STRICT
1506 RTUINT128U const uOld = *pu128RaxRdx;
1507# endif
1508# if defined(RT_ARCH_AMD64)
1509 if (ASMAtomicCmpXchgU128v2(&pu128Dst->u, pu128RbxRcx->s.Hi, pu128RbxRcx->s.Lo, pu128RaxRdx->s.Hi, pu128RaxRdx->s.Lo,
1510 &pu128RaxRdx->u))
1511# else
1512 if (ASMAtomicCmpXchgU128(&pu128Dst->u, pu128RbxRcx->u, pu128RaxRdx->u, &pu128RaxRdx->u))
1513# endif
1514 {
1515 Assert(pu128RaxRdx->s.Lo == uOld.s.Lo && pu128RaxRdx->s.Hi == uOld.s.Hi);
1516 *pEFlags |= X86_EFL_ZF;
1517 }
1518 else
1519 *pEFlags &= ~X86_EFL_ZF;
1520}
1521# endif
1522
1523#endif /* defined(IEM_WITHOUT_ASSEMBLY) */
1524
1525# if !defined(RT_ARCH_ARM64) /** @todo may need this for unaligned accesses... */
1526IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_fallback,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx,
1527 PRTUINT128U pu128RbxRcx, uint32_t *pEFlags))
1528{
1529 RTUINT128U u128Tmp = *pu128Dst;
1530 if ( u128Tmp.s.Lo == pu128RaxRdx->s.Lo
1531 && u128Tmp.s.Hi == pu128RaxRdx->s.Hi)
1532 {
1533 *pu128Dst = *pu128RbxRcx;
1534 *pEFlags |= X86_EFL_ZF;
1535 }
1536 else
1537 {
1538 *pu128RaxRdx = u128Tmp;
1539 *pEFlags &= ~X86_EFL_ZF;
1540 }
1541}
1542#endif /* !RT_ARCH_ARM64 */
1543
1544#if defined(IEM_WITHOUT_ASSEMBLY)
1545
1546/* Unlocked versions mapped to the locked ones: */
1547
1548IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u8, (uint8_t *pu8Dst, uint8_t *puAl, uint8_t uSrcReg, uint32_t *pEFlags))
1549{
1550 iemAImpl_cmpxchg_u8_locked(pu8Dst, puAl, uSrcReg, pEFlags);
1551}
1552
1553
1554IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u16, (uint16_t *pu16Dst, uint16_t *puAx, uint16_t uSrcReg, uint32_t *pEFlags))
1555{
1556 iemAImpl_cmpxchg_u16_locked(pu16Dst, puAx, uSrcReg, pEFlags);
1557}
1558
1559
1560IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u32, (uint32_t *pu32Dst, uint32_t *puEax, uint32_t uSrcReg, uint32_t *pEFlags))
1561{
1562 iemAImpl_cmpxchg_u32_locked(pu32Dst, puEax, uSrcReg, pEFlags);
1563}
1564
1565
1566# if ARCH_BITS == 32
1567IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64, (uint64_t *pu64Dst, uint64_t *puRax, uint64_t *puSrcReg, uint32_t *pEFlags))
1568{
1569 iemAImpl_cmpxchg_u64_locked(pu64Dst, puRax, puSrcReg, pEFlags);
1570}
1571# else
1572IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64, (uint64_t *pu64Dst, uint64_t *puRax, uint64_t uSrcReg, uint32_t *pEFlags))
1573{
1574 iemAImpl_cmpxchg_u64_locked(pu64Dst, puRax, uSrcReg, pEFlags);
1575}
1576# endif
1577
1578
1579IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx, uint32_t *pEFlags))
1580{
1581 iemAImpl_cmpxchg8b_locked(pu64Dst, pu64EaxEdx, pu64EbxEcx, pEFlags);
1582}
1583
1584
1585IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx,
1586 uint32_t *pEFlags))
1587{
1588 iemAImpl_cmpxchg16b_locked(pu128Dst, pu128RaxRdx, pu128RbxRcx, pEFlags);
1589}
1590
1591#endif /* defined(IEM_WITHOUT_ASSEMBLY) */
1592
1593#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
1594
1595/*
1596 * MUL
1597 */
1598
1599IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u64,(uint64_t *pu64RAX, uint64_t *pu64RDX, uint64_t u64Factor, uint32_t *pfEFlags))
1600{
1601 RTUINT128U Result;
1602 RTUInt128MulU64ByU64(&Result, *pu64RAX, u64Factor);
1603 *pu64RAX = Result.s.Lo;
1604 *pu64RDX = Result.s.Hi;
1605
1606 /* MUL EFLAGS according to Skylake (similar to IMUL). */
1607 *pfEFlags &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF);
1608 if (Result.s.Lo & RT_BIT_64(63))
1609 *pfEFlags |= X86_EFL_SF;
1610 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1611 if (Result.s.Hi != 0)
1612 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1613 return 0;
1614}
1615
1616# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1617
1618IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u32,(uint32_t *pu32RAX, uint32_t *pu32RDX, uint32_t u32Factor, uint32_t *pfEFlags))
1619{
1620 RTUINT64U Result;
1621 Result.u = (uint64_t)*pu32RAX * u32Factor;
1622 *pu32RAX = Result.s.Lo;
1623 *pu32RDX = Result.s.Hi;
1624
1625 /* MUL EFLAGS according to Skylake (similar to IMUL). */
1626 *pfEFlags &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF);
1627 if (Result.s.Lo & RT_BIT_32(31))
1628 *pfEFlags |= X86_EFL_SF;
1629 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1630 if (Result.s.Hi != 0)
1631 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1632 return 0;
1633}
1634
1635
1636IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u16,(uint16_t *pu16RAX, uint16_t *pu16RDX, uint16_t u16Factor, uint32_t *pfEFlags))
1637{
1638 RTUINT32U Result;
1639 Result.u = (uint32_t)*pu16RAX * u16Factor;
1640 *pu16RAX = Result.s.Lo;
1641 *pu16RDX = Result.s.Hi;
1642
1643 /* MUL EFLAGS according to Skylake (similar to IMUL). */
1644 *pfEFlags &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF);
1645 if (Result.s.Lo & RT_BIT_32(15))
1646 *pfEFlags |= X86_EFL_SF;
1647 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1648 if (Result.s.Hi != 0)
1649 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1650 return 0;
1651}
1652
1653# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1654
1655
1656/*
1657 * IMUL
1658 */
1659
1660IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u64,(uint64_t *pu64RAX, uint64_t *pu64RDX, uint64_t u64Factor, uint32_t *pfEFlags))
1661{
1662 RTUINT128U Result;
1663 *pfEFlags &= ~( X86_EFL_SF | X86_EFL_CF | X86_EFL_OF
1664 /* Skylake always clears: */ | X86_EFL_AF | X86_EFL_ZF
1665 /* Skylake may set: */ | X86_EFL_PF);
1666
1667 if ((int64_t)*pu64RAX >= 0)
1668 {
1669 if ((int64_t)u64Factor >= 0)
1670 {
1671 RTUInt128MulU64ByU64(&Result, *pu64RAX, u64Factor);
1672 if (Result.s.Hi != 0 || Result.s.Lo >= UINT64_C(0x8000000000000000))
1673 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1674 }
1675 else
1676 {
1677 RTUInt128MulU64ByU64(&Result, *pu64RAX, UINT64_C(0) - u64Factor);
1678 if (Result.s.Hi != 0 || Result.s.Lo > UINT64_C(0x8000000000000000))
1679 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1680 RTUInt128AssignNeg(&Result);
1681 }
1682 }
1683 else
1684 {
1685 if ((int64_t)u64Factor >= 0)
1686 {
1687 RTUInt128MulU64ByU64(&Result, UINT64_C(0) - *pu64RAX, u64Factor);
1688 if (Result.s.Hi != 0 || Result.s.Lo > UINT64_C(0x8000000000000000))
1689 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1690 RTUInt128AssignNeg(&Result);
1691 }
1692 else
1693 {
1694 RTUInt128MulU64ByU64(&Result, UINT64_C(0) - *pu64RAX, UINT64_C(0) - u64Factor);
1695 if (Result.s.Hi != 0 || Result.s.Lo >= UINT64_C(0x8000000000000000))
1696 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1697 }
1698 }
1699 *pu64RAX = Result.s.Lo;
1700 if (Result.s.Lo & RT_BIT_64(63))
1701 *pfEFlags |= X86_EFL_SF;
1702 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1703 *pu64RDX = Result.s.Hi;
1704
1705 return 0;
1706}
1707
1708
1709IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1710{
1711/** @todo Testcase: IMUL 2 and 3 operands. */
1712 uint64_t u64Ign;
1713 iemAImpl_imul_u64(puDst, &u64Ign, uSrc, pfEFlags);
1714}
1715
1716# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1717
1718IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u32,(uint32_t *pu32RAX, uint32_t *pu32RDX, uint32_t u32Factor, uint32_t *pfEFlags))
1719{
1720 RTUINT64U Result;
1721 *pfEFlags &= ~( X86_EFL_SF | X86_EFL_CF | X86_EFL_OF
1722 /* Skylake always clears: */ | X86_EFL_AF | X86_EFL_ZF
1723 /* Skylake may set: */ | X86_EFL_PF);
1724
1725 if ((int32_t)*pu32RAX >= 0)
1726 {
1727 if ((int32_t)u32Factor >= 0)
1728 {
1729 Result.u = (uint64_t)*pu32RAX * u32Factor;
1730 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_32(31))
1731 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1732 }
1733 else
1734 {
1735 Result.u = (uint64_t)*pu32RAX * (UINT32_C(0) - u32Factor);
1736 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_32(31))
1737 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1738 Result.u = UINT64_C(0) - Result.u;
1739 }
1740 }
1741 else
1742 {
1743 if ((int32_t)u32Factor >= 0)
1744 {
1745 Result.u = (uint64_t)(UINT32_C(0) - *pu32RAX) * u32Factor;
1746 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_32(31))
1747 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1748 Result.u = UINT64_C(0) - Result.u;
1749 }
1750 else
1751 {
1752 Result.u = (uint64_t)(UINT32_C(0) - *pu32RAX) * (UINT32_C(0) - u32Factor);
1753 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_32(31))
1754 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1755 }
1756 }
1757 *pu32RAX = Result.s.Lo;
1758 if (Result.s.Lo & RT_BIT_32(31))
1759 *pfEFlags |= X86_EFL_SF;
1760 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1761 *pu32RDX = Result.s.Hi;
1762
1763 return 0;
1764}
1765
1766
1767IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1768{
1769/** @todo Testcase: IMUL 2 and 3 operands. */
1770 uint32_t u32Ign;
1771 iemAImpl_imul_u32(puDst, &u32Ign, uSrc, pfEFlags);
1772}
1773
1774
1775IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u16,(uint16_t *pu16RAX, uint16_t *pu16RDX, uint16_t u16Factor, uint32_t *pfEFlags))
1776{
1777 RTUINT32U Result;
1778 *pfEFlags &= ~( X86_EFL_SF | X86_EFL_CF | X86_EFL_OF
1779 /* Skylake always clears: */ | X86_EFL_AF | X86_EFL_ZF
1780 /* Skylake may set: */ | X86_EFL_PF);
1781
1782 if ((int16_t)*pu16RAX >= 0)
1783 {
1784 if ((int16_t)u16Factor >= 0)
1785 {
1786 Result.u = (uint32_t)*pu16RAX * u16Factor;
1787 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_32(15))
1788 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1789 }
1790 else
1791 {
1792 Result.u = (uint32_t)*pu16RAX * (UINT16_C(0) - u16Factor);
1793 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_32(15))
1794 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1795 Result.u = UINT32_C(0) - Result.u;
1796 }
1797 }
1798 else
1799 {
1800 if ((int16_t)u16Factor >= 0)
1801 {
1802 Result.u = (uint32_t)(UINT16_C(0) - *pu16RAX) * u16Factor;
1803 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_32(15))
1804 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1805 Result.u = UINT32_C(0) - Result.u;
1806 }
1807 else
1808 {
1809 Result.u = (uint32_t)(UINT16_C(0) - *pu16RAX) * (UINT16_C(0) - u16Factor);
1810 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_32(15))
1811 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1812 }
1813 }
1814 *pu16RAX = Result.s.Lo;
1815 if (Result.s.Lo & RT_BIT_32(15))
1816 *pfEFlags |= X86_EFL_SF;
1817 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1818 *pu16RDX = Result.s.Hi;
1819
1820 return 0;
1821}
1822
1823
1824IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1825{
1826/** @todo Testcase: IMUL 2 and 3 operands. */
1827 uint16_t u16Ign;
1828 iemAImpl_imul_u16(puDst, &u16Ign, uSrc, pfEFlags);
1829}
1830
1831# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1832
1833
1834/*
1835 * DIV and IDIV helpers.
1836 *
1837 * - The U64 versions must use 128-bit intermediates, so we need to abstract the
1838 * division step so we can select between using C operators and RTUInt128DivRem.
1839 *
1840 * - The U8 versions work on AX and returns output in AL + AH instead of xDX:xAX
1841 * and return xAX + xDX, so we need load and store wrappers to hide this.
1842 */
1843
1844DECLINLINE(void) RTUInt128DivRemByU64(PRTUINT128U pQuotient, PRTUINT128U pRemainder, PCRTUINT128U pDividend, uint64_t u64Divisor)
1845{
1846# ifdef __GNUC__ /* GCC maybe really annoying in function. */
1847 pQuotient->s.Lo = 0;
1848 pQuotient->s.Hi = 0;
1849# endif
1850 RTUINT128U Divisor;
1851 Divisor.s.Lo = u64Divisor;
1852 Divisor.s.Hi = 0;
1853 RTUInt128DivRem(pQuotient, pRemainder, pDividend, &Divisor);
1854}
1855
1856# define DIV_LOAD(a_Dividend) \
1857 a_Dividend.s.Lo = *puA, a_Dividend.s.Hi = *puD
1858# define DIV_LOAD_U8(a_Dividend) \
1859 a_Dividend.u = *puAX
1860
1861# define DIV_STORE(a_Quotient, a_uReminder) *puA = (a_Quotient), *puD = (a_uReminder)
1862# define DIV_STORE_U8(a_Quotient, a_uReminder) *puAX = (a_Quotient) | ((uint16_t)(a_uReminder) << 8)
1863
1864# define DIV_NEG(a_Value, a_cBitsWidth2x) \
1865 (a_Value).u = UINT ## a_cBitsWidth2x ## _C(0) - (a_Value).u
1866# define DIV_NEG_U128(a_Value, a_cBitsWidth2x) \
1867 RTUInt128AssignNeg(&(a_Value))
1868
1869# define DIV_DO_DIVREM(a_Quotient, a_Remainder, a_Dividend, a_uDivisor) \
1870 a_Quotient.u = (a_Dividend).u / (a_uDivisor), \
1871 a_Remainder.u = (a_Dividend).u % (a_uDivisor)
1872# define DIV_DO_DIVREM_U128(a_Quotient, a_Remainder, a_Dividend, a_uDivisor) \
1873 RTUInt128DivRemByU64(&a_Quotient, &a_Remainder, &a_Dividend, a_uDivisor)
1874
1875
1876/*
1877 * DIV
1878 */
1879# define EMIT_DIV(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_fnLoad, a_fnStore, a_fnDivRem) \
1880IEM_DECL_IMPL_DEF(int, iemAImpl_div_u ## a_cBitsWidth,a_Args) \
1881{ \
1882 /* Note! Skylake leaves all flags alone. */ \
1883 RT_NOREF_PV(pfEFlags); \
1884 \
1885 RTUINT ## a_cBitsWidth2x ## U Dividend; \
1886 a_fnLoad(Dividend); \
1887 if ( uDivisor != 0 \
1888 && Dividend.s.Hi < uDivisor) \
1889 { \
1890 RTUINT ## a_cBitsWidth2x ## U Remainder, Quotient; \
1891 a_fnDivRem(Remainder, Quotient, Dividend, uDivisor); \
1892 a_fnStore(Quotient.s.Lo, Remainder.s.Lo); \
1893 /** @todo research the undefined DIV flags. */ \
1894 return 0; \
1895 } \
1896 /* #DE */ \
1897 return -1; \
1898}
1899EMIT_DIV(64,128,(uint64_t *puA, uint64_t *puD, uint64_t uDivisor, uint32_t *pfEFlags), DIV_LOAD, DIV_STORE, DIV_DO_DIVREM_U128)
1900# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1901EMIT_DIV(32,64, (uint32_t *puA, uint32_t *puD, uint32_t uDivisor, uint32_t *pfEFlags), DIV_LOAD, DIV_STORE, DIV_DO_DIVREM)
1902EMIT_DIV(16,32, (uint16_t *puA, uint16_t *puD, uint16_t uDivisor, uint32_t *pfEFlags), DIV_LOAD, DIV_STORE, DIV_DO_DIVREM)
1903EMIT_DIV(8,16, (uint16_t *puAX, uint8_t uDivisor, uint32_t *pfEFlags), DIV_LOAD_U8, DIV_STORE_U8, DIV_DO_DIVREM)
1904# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1905
1906
1907/*
1908 * IDIV
1909 */
1910# define EMIT_IDIV(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem) \
1911IEM_DECL_IMPL_DEF(int, iemAImpl_idiv_u ## a_cBitsWidth,a_Args) \
1912{ \
1913 /* Note! Skylake leaves all flags alone. */ \
1914 RT_NOREF_PV(pfEFlags); \
1915 \
1916 /** @todo overflow checks */ \
1917 if (uDivisor != 0) \
1918 { \
1919 /* \
1920 * Convert to unsigned division. \
1921 */ \
1922 RTUINT ## a_cBitsWidth2x ## U Dividend; \
1923 a_fnLoad(Dividend); \
1924 if ((int ## a_cBitsWidth ## _t)Dividend.s.Hi < 0) \
1925 a_fnNeg(Dividend, a_cBitsWidth2x); \
1926 \
1927 uint ## a_cBitsWidth ## _t uDivisorPositive; \
1928 if ((int ## a_cBitsWidth ## _t)uDivisor >= 0) \
1929 uDivisorPositive = uDivisor; \
1930 else \
1931 uDivisorPositive = UINT ## a_cBitsWidth ## _C(0) - uDivisor; \
1932 \
1933 RTUINT ## a_cBitsWidth2x ## U Remainder, Quotient; \
1934 a_fnDivRem(Remainder, Quotient, Dividend, uDivisorPositive); \
1935 \
1936 /* \
1937 * Setup the result, checking for overflows. \
1938 */ \
1939 if ((int ## a_cBitsWidth ## _t)uDivisor >= 0) \
1940 { \
1941 if ((int ## a_cBitsWidth ## _t)Dividend.s.Hi >= 0) \
1942 { \
1943 /* Positive divisor, positive dividend => result positive. */ \
1944 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint ## a_cBitsWidth ## _t)INT ## a_cBitsWidth ## _MAX) \
1945 { \
1946 a_fnStore(Quotient.s.Lo, Remainder.s.Lo); \
1947 return 0; \
1948 } \
1949 } \
1950 else \
1951 { \
1952 /* Positive divisor, positive dividend => result negative. */ \
1953 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_64(a_cBitsWidth - 1)) \
1954 { \
1955 a_fnStore(UINT ## a_cBitsWidth ## _C(0) - Quotient.s.Lo, UINT ## a_cBitsWidth ## _C(0) - Remainder.s.Lo); \
1956 return 0; \
1957 } \
1958 } \
1959 } \
1960 else \
1961 { \
1962 if ((int ## a_cBitsWidth ## _t)Dividend.s.Hi >= 0) \
1963 { \
1964 /* Negative divisor, positive dividend => negative quotient, positive remainder. */ \
1965 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_64(a_cBitsWidth - 1)) \
1966 { \
1967 a_fnStore(UINT ## a_cBitsWidth ## _C(0) - Quotient.s.Lo, Remainder.s.Lo); \
1968 return 0; \
1969 } \
1970 } \
1971 else \
1972 { \
1973 /* Negative divisor, negative dividend => positive quotient, negative remainder. */ \
1974 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint ## a_cBitsWidth ## _t)INT ## a_cBitsWidth ## _MAX) \
1975 { \
1976 a_fnStore(Quotient.s.Lo, UINT ## a_cBitsWidth ## _C(0) - Remainder.s.Lo); \
1977 return 0; \
1978 } \
1979 } \
1980 } \
1981 } \
1982 /* #DE */ \
1983 return -1; \
1984}
1985EMIT_IDIV(64,128,(uint64_t *puA, uint64_t *puD, uint64_t uDivisor, uint32_t *pfEFlags), DIV_LOAD, DIV_STORE, DIV_NEG_U128, DIV_DO_DIVREM_U128)
1986# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1987EMIT_IDIV(32,64,(uint32_t *puA, uint32_t *puD, uint32_t uDivisor, uint32_t *pfEFlags), DIV_LOAD, DIV_STORE, DIV_NEG, DIV_DO_DIVREM)
1988EMIT_IDIV(16,32,(uint16_t *puA, uint16_t *puD, uint16_t uDivisor, uint32_t *pfEFlags), DIV_LOAD, DIV_STORE, DIV_NEG, DIV_DO_DIVREM)
1989EMIT_IDIV(8,16,(uint16_t *puAX, uint8_t uDivisor, uint32_t *pfEFlags), DIV_LOAD_U8, DIV_STORE_U8, DIV_NEG, DIV_DO_DIVREM)
1990# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1991
1992
1993/*********************************************************************************************************************************
1994* Unary operations. *
1995*********************************************************************************************************************************/
1996
1997/**
1998 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an INC or DEC instruction.
1999 *
2000 * CF is NOT modified for hysterical raisins (allegedly for carrying and
2001 * borrowing in arithmetic loops on intel 8008).
2002 *
2003 * @returns Status bits.
2004 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2005 * @param a_uResult Unsigned result value.
2006 * @param a_uDst The original destination value (for AF calc).
2007 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2008 * @param a_OfMethod 0 for INC-style, 1 for DEC-style.
2009 */
2010#define IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth, a_OfMethod) \
2011 do { \
2012 uint32_t fEflTmp = *(a_pfEFlags); \
2013 fEflTmp &= ~X86_EFL_STATUS_BITS & ~X86_EFL_CF; \
2014 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
2015 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
2016 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
2017 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
2018 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth(a_OfMethod == 0 ? (((a_uDst) ^ RT_BIT_64(63)) & (a_uResult)) \
2019 : ((a_uDst) & ((a_uResult) ^ RT_BIT_64(63))) ); \
2020 *(a_pfEFlags) = fEflTmp; \
2021 } while (0)
2022
2023/*
2024 * INC
2025 */
2026
2027IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2028{
2029 uint64_t uDst = *puDst;
2030 uint64_t uResult = uDst + 1;
2031 *puDst = uResult;
2032 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 0 /*INC*/);
2033}
2034
2035# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2036
2037IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2038{
2039 uint32_t uDst = *puDst;
2040 uint32_t uResult = uDst + 1;
2041 *puDst = uResult;
2042 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 0 /*INC*/);
2043}
2044
2045
2046IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2047{
2048 uint16_t uDst = *puDst;
2049 uint16_t uResult = uDst + 1;
2050 *puDst = uResult;
2051 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 0 /*INC*/);
2052}
2053
2054IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2055{
2056 uint8_t uDst = *puDst;
2057 uint8_t uResult = uDst + 1;
2058 *puDst = uResult;
2059 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 0 /*INC*/);
2060}
2061
2062# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2063
2064
2065/*
2066 * DEC
2067 */
2068
2069IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2070{
2071 uint64_t uDst = *puDst;
2072 uint64_t uResult = uDst - 1;
2073 *puDst = uResult;
2074 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 1 /*INC*/);
2075}
2076
2077# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2078
2079IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2080{
2081 uint32_t uDst = *puDst;
2082 uint32_t uResult = uDst - 1;
2083 *puDst = uResult;
2084 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 1 /*INC*/);
2085}
2086
2087
2088IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2089{
2090 uint16_t uDst = *puDst;
2091 uint16_t uResult = uDst - 1;
2092 *puDst = uResult;
2093 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 1 /*INC*/);
2094}
2095
2096
2097IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2098{
2099 uint8_t uDst = *puDst;
2100 uint8_t uResult = uDst - 1;
2101 *puDst = uResult;
2102 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 1 /*INC*/);
2103}
2104
2105# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2106
2107
2108/*
2109 * NOT
2110 */
2111
2112IEM_DECL_IMPL_DEF(void, iemAImpl_not_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2113{
2114 uint64_t uDst = *puDst;
2115 uint64_t uResult = ~uDst;
2116 *puDst = uResult;
2117 /* EFLAGS are not modified. */
2118 RT_NOREF_PV(pfEFlags);
2119}
2120
2121# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2122
2123IEM_DECL_IMPL_DEF(void, iemAImpl_not_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2124{
2125 uint32_t uDst = *puDst;
2126 uint32_t uResult = ~uDst;
2127 *puDst = uResult;
2128 /* EFLAGS are not modified. */
2129 RT_NOREF_PV(pfEFlags);
2130}
2131
2132IEM_DECL_IMPL_DEF(void, iemAImpl_not_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2133{
2134 uint16_t uDst = *puDst;
2135 uint16_t uResult = ~uDst;
2136 *puDst = uResult;
2137 /* EFLAGS are not modified. */
2138 RT_NOREF_PV(pfEFlags);
2139}
2140
2141IEM_DECL_IMPL_DEF(void, iemAImpl_not_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2142{
2143 uint8_t uDst = *puDst;
2144 uint8_t uResult = ~uDst;
2145 *puDst = uResult;
2146 /* EFLAGS are not modified. */
2147 RT_NOREF_PV(pfEFlags);
2148}
2149
2150# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2151
2152
2153/*
2154 * NEG
2155 */
2156
2157/**
2158 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an NEG instruction.
2159 *
2160 * @returns Status bits.
2161 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2162 * @param a_uResult Unsigned result value.
2163 * @param a_uDst The original destination value (for AF calc).
2164 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2165 */
2166#define IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth) \
2167 do { \
2168 uint32_t fEflTmp = *(a_pfEFlags); \
2169 fEflTmp &= ~X86_EFL_STATUS_BITS & ~X86_EFL_CF; \
2170 fEflTmp |= ((a_uDst) != 0) << X86_EFL_CF_BIT; \
2171 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
2172 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
2173 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
2174 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
2175 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth((a_uDst) & (a_uResult)); \
2176 *(a_pfEFlags) = fEflTmp; \
2177 } while (0)
2178
2179IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2180{
2181 uint64_t uDst = *puDst;
2182 uint64_t uResult = (uint64_t)0 - uDst;
2183 *puDst = uResult;
2184 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 64);
2185}
2186
2187# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2188
2189IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2190{
2191 uint32_t uDst = *puDst;
2192 uint32_t uResult = (uint32_t)0 - uDst;
2193 *puDst = uResult;
2194 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 32);
2195}
2196
2197
2198IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2199{
2200 uint16_t uDst = *puDst;
2201 uint16_t uResult = (uint16_t)0 - uDst;
2202 *puDst = uResult;
2203 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 16);
2204}
2205
2206
2207IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2208{
2209 uint8_t uDst = *puDst;
2210 uint8_t uResult = (uint8_t)0 - uDst;
2211 *puDst = uResult;
2212 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 8);
2213}
2214
2215# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2216
2217/*
2218 * Locked variants.
2219 */
2220
2221/** Emit a function for doing a locked unary operand operation. */
2222# define EMIT_LOCKED_UNARY_OP(a_Mnemonic, a_cBitsWidth) \
2223 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
2224 uint32_t *pfEFlags)) \
2225 { \
2226 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
2227 uint ## a_cBitsWidth ## _t uTmp; \
2228 uint32_t fEflTmp; \
2229 do \
2230 { \
2231 uTmp = uOld; \
2232 fEflTmp = *pfEFlags; \
2233 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, &fEflTmp); \
2234 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
2235 *pfEFlags = fEflTmp; \
2236 }
2237
2238EMIT_LOCKED_UNARY_OP(inc, 64)
2239EMIT_LOCKED_UNARY_OP(dec, 64)
2240EMIT_LOCKED_UNARY_OP(not, 64)
2241EMIT_LOCKED_UNARY_OP(neg, 64)
2242# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2243EMIT_LOCKED_UNARY_OP(inc, 32)
2244EMIT_LOCKED_UNARY_OP(dec, 32)
2245EMIT_LOCKED_UNARY_OP(not, 32)
2246EMIT_LOCKED_UNARY_OP(neg, 32)
2247
2248EMIT_LOCKED_UNARY_OP(inc, 16)
2249EMIT_LOCKED_UNARY_OP(dec, 16)
2250EMIT_LOCKED_UNARY_OP(not, 16)
2251EMIT_LOCKED_UNARY_OP(neg, 16)
2252
2253EMIT_LOCKED_UNARY_OP(inc, 8)
2254EMIT_LOCKED_UNARY_OP(dec, 8)
2255EMIT_LOCKED_UNARY_OP(not, 8)
2256EMIT_LOCKED_UNARY_OP(neg, 8)
2257# endif
2258
2259
2260/*********************************************************************************************************************************
2261* Shifting and Rotating *
2262*********************************************************************************************************************************/
2263
2264/*
2265 * ROL
2266 */
2267
2268/**
2269 * Updates the status bits (OF and CF) for an ROL instruction.
2270 *
2271 * @returns Status bits.
2272 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2273 * @param a_uResult Unsigned result value.
2274 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2275 */
2276#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(a_pfEFlags, a_uResult, a_cBitsWidth) do { \
2277 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2278 it the same way as for 1 bit shifts. */ \
2279 AssertCompile(X86_EFL_CF_BIT == 0); \
2280 uint32_t fEflTmp = *(a_pfEFlags); \
2281 fEflTmp &= ~(X86_EFL_CF | X86_EFL_OF); \
2282 uint32_t const fCarry = ((a_uResult) & X86_EFL_CF); \
2283 fEflTmp |= fCarry; \
2284 fEflTmp |= (((a_uResult) >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2285 *(a_pfEFlags) = fEflTmp; \
2286 } while (0)
2287
2288IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2289{
2290 cShift &= 63;
2291 if (cShift)
2292 {
2293 uint64_t uResult = ASMRotateLeftU64(*puDst, cShift);
2294 *puDst = uResult;
2295 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(pfEFlags, uResult, 64);
2296 }
2297}
2298
2299# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2300
2301IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u32,(uint32_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2302{
2303 cShift &= 31;
2304 if (cShift)
2305 {
2306 uint32_t uResult = ASMRotateLeftU32(*puDst, cShift);
2307 *puDst = uResult;
2308 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(pfEFlags, uResult, 32);
2309 }
2310}
2311
2312
2313IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u16,(uint16_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2314{
2315 cShift &= 15;
2316 if (cShift)
2317 {
2318 uint16_t uDst = *puDst;
2319 uint16_t uResult = (uDst << cShift) | (uDst >> (16 - cShift));
2320 *puDst = uResult;
2321 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(pfEFlags, uResult, 16);
2322 }
2323}
2324
2325
2326IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u8,(uint8_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2327{
2328 cShift &= 7;
2329 if (cShift)
2330 {
2331 uint8_t uDst = *puDst;
2332 uint8_t uResult = (uDst << cShift) | (uDst >> (8 - cShift));
2333 *puDst = uResult;
2334 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(pfEFlags, uResult, 8);
2335 }
2336}
2337
2338# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2339
2340
2341/*
2342 * ROR
2343 */
2344
2345/**
2346 * Updates the status bits (OF and CF) for an ROL instruction.
2347 *
2348 * @returns Status bits.
2349 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2350 * @param a_uResult Unsigned result value.
2351 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2352 */
2353#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(a_pfEFlags, a_uResult, a_cBitsWidth) do { \
2354 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2355 it the same way as for 1 bit shifts. */ \
2356 AssertCompile(X86_EFL_CF_BIT == 0); \
2357 uint32_t fEflTmp = *(a_pfEFlags); \
2358 fEflTmp &= ~(X86_EFL_CF | X86_EFL_OF); \
2359 uint32_t const fCarry = ((a_uResult) >> ((a_cBitsWidth) - 1)) & X86_EFL_CF; \
2360 fEflTmp |= fCarry; \
2361 fEflTmp |= (((a_uResult) >> ((a_cBitsWidth) - 2)) ^ fCarry) << X86_EFL_OF_BIT; \
2362 *(a_pfEFlags) = fEflTmp; \
2363 } while (0)
2364
2365IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2366{
2367 cShift &= 63;
2368 if (cShift)
2369 {
2370 uint64_t const uResult = ASMRotateRightU64(*puDst, cShift);
2371 *puDst = uResult;
2372 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(pfEFlags, uResult, 64);
2373 }
2374}
2375
2376# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2377
2378IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u32,(uint32_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2379{
2380 cShift &= 31;
2381 if (cShift)
2382 {
2383 uint64_t const uResult = ASMRotateRightU32(*puDst, cShift);
2384 *puDst = uResult;
2385 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(pfEFlags, uResult, 32);
2386 }
2387}
2388
2389
2390IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u16,(uint16_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2391{
2392 cShift &= 15;
2393 if (cShift)
2394 {
2395 uint16_t uDst = *puDst;
2396 uint16_t uResult;
2397 uResult = uDst >> cShift;
2398 uResult |= uDst << (16 - cShift);
2399 *puDst = uResult;
2400 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(pfEFlags, uResult, 16);
2401 }
2402}
2403
2404
2405IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u8,(uint8_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2406{
2407 cShift &= 7;
2408 if (cShift)
2409 {
2410 uint8_t uDst = *puDst;
2411 uint8_t uResult;
2412 uResult = uDst >> cShift;
2413 uResult |= uDst << (8 - cShift);
2414 *puDst = uResult;
2415 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(pfEFlags, uResult, 8);
2416 }
2417}
2418
2419# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2420
2421
2422/*
2423 * RCL
2424 */
2425#define EMIT_RCL(a_cBitsWidth) \
2426IEM_DECL_IMPL_DEF(void, iemAImpl_rcl_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2427{ \
2428 cShift &= a_cBitsWidth - 1; \
2429 if (cShift) \
2430 { \
2431 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2432 uint ## a_cBitsWidth ## _t uResult = uDst << cShift; \
2433 if (cShift > 1) \
2434 uResult |= uDst >> (a_cBitsWidth + 1 - cShift); \
2435 \
2436 uint32_t fEfl = *pfEFlags; \
2437 AssertCompile(X86_EFL_CF_BIT == 0); \
2438 uResult |= (uint ## a_cBitsWidth ## _t)(fEfl & X86_EFL_CF) << (cShift - 1); \
2439 \
2440 *puDst = uResult; \
2441 \
2442 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2443 it the same way as for 1 bit shifts. */ \
2444 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2445 uint32_t const fCarry = (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2446 fEfl |= fCarry; \
2447 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2448 *pfEFlags = fEfl; \
2449 } \
2450}
2451EMIT_RCL(64)
2452# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2453EMIT_RCL(32)
2454EMIT_RCL(16)
2455EMIT_RCL(8)
2456# endif
2457
2458
2459/*
2460 * RCR
2461 */
2462#define EMIT_RCR(a_cBitsWidth) \
2463IEM_DECL_IMPL_DEF(void, iemAImpl_rcr_u ## a_cBitsWidth,(uint ## a_cBitsWidth ##_t *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2464{ \
2465 cShift &= a_cBitsWidth - 1; \
2466 if (cShift) \
2467 { \
2468 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2469 uint ## a_cBitsWidth ## _t uResult = uDst >> cShift; \
2470 if (cShift > 1) \
2471 uResult |= uDst << (a_cBitsWidth + 1 - cShift); \
2472 \
2473 AssertCompile(X86_EFL_CF_BIT == 0); \
2474 uint32_t fEfl = *pfEFlags; \
2475 uResult |= (uint ## a_cBitsWidth ## _t)(fEfl & X86_EFL_CF) << (a_cBitsWidth - cShift); \
2476 *puDst = uResult; \
2477 \
2478 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2479 it the same way as for 1 bit shifts. */ \
2480 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2481 uint32_t const fCarry = (uDst >> (cShift - 1)) & X86_EFL_CF; \
2482 fEfl |= fCarry; \
2483 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2484 *pfEFlags = fEfl; \
2485 } \
2486}
2487EMIT_RCR(64)
2488# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2489EMIT_RCR(32)
2490EMIT_RCR(16)
2491EMIT_RCR(8)
2492# endif
2493
2494
2495/*
2496 * SHL
2497 */
2498#define EMIT_SHL(a_cBitsWidth) \
2499IEM_DECL_IMPL_DEF(void, iemAImpl_shl_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2500{ \
2501 cShift &= a_cBitsWidth - 1; \
2502 if (cShift) \
2503 { \
2504 uint ## a_cBitsWidth ##_t const uDst = *puDst; \
2505 uint ## a_cBitsWidth ##_t uResult = uDst << cShift; \
2506 *puDst = uResult; \
2507 \
2508 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2509 it the same way as for 1 bit shifts. The AF bit is undefined, we \
2510 always set it to zero atm. */ \
2511 AssertCompile(X86_EFL_CF_BIT == 0); \
2512 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2513 uint32_t fCarry = (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2514 fEfl |= fCarry; \
2515 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2516 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2517 fEfl |= X86_EFL_CALC_ZF(uResult); \
2518 fEfl |= g_afParity[uResult & 0xff]; \
2519 *pfEFlags = fEfl; \
2520 } \
2521}
2522EMIT_SHL(64)
2523# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2524EMIT_SHL(32)
2525EMIT_SHL(16)
2526EMIT_SHL(8)
2527# endif
2528
2529
2530/*
2531 * SHR
2532 */
2533#define EMIT_SHR(a_cBitsWidth) \
2534IEM_DECL_IMPL_DEF(void, iemAImpl_shr_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2535{ \
2536 cShift &= a_cBitsWidth - 1; \
2537 if (cShift) \
2538 { \
2539 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2540 uint ## a_cBitsWidth ## _t uResult = uDst >> cShift; \
2541 *puDst = uResult; \
2542 \
2543 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2544 it the same way as for 1 bit shifts. The AF bit is undefined, we \
2545 always set it to zero atm. */ \
2546 AssertCompile(X86_EFL_CF_BIT == 0); \
2547 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2548 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2549 fEfl |= (uDst >> (a_cBitsWidth - 1)) << X86_EFL_OF_BIT; \
2550 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2551 fEfl |= X86_EFL_CALC_ZF(uResult); \
2552 fEfl |= g_afParity[uResult & 0xff]; \
2553 *pfEFlags = fEfl; \
2554 } \
2555}
2556EMIT_SHR(64)
2557# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2558EMIT_SHR(32)
2559EMIT_SHR(16)
2560EMIT_SHR(8)
2561# endif
2562
2563
2564/*
2565 * SAR
2566 */
2567#define EMIT_SAR(a_cBitsWidth) \
2568IEM_DECL_IMPL_DEF(void, iemAImpl_sar_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2569{ \
2570 cShift &= a_cBitsWidth - 1; \
2571 if (cShift) \
2572 { \
2573 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2574 uint ## a_cBitsWidth ## _t uResult = (int ## a_cBitsWidth ## _t)uDst >> cShift; \
2575 *puDst = uResult; \
2576 \
2577 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2578 it the same way as for 1 bit shifts (0). The AF bit is undefined, \
2579 we always set it to zero atm. */ \
2580 AssertCompile(X86_EFL_CF_BIT == 0); \
2581 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2582 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2583 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2584 fEfl |= X86_EFL_CALC_ZF(uResult); \
2585 fEfl |= g_afParity[uResult & 0xff]; \
2586 *pfEFlags = fEfl; \
2587 } \
2588}
2589EMIT_SAR(64)
2590# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2591EMIT_SAR(32)
2592EMIT_SAR(16)
2593EMIT_SAR(8)
2594# endif
2595
2596
2597/*
2598 * SHLD
2599 */
2600#define EMIT_SHLD(a_cBitsWidth) \
2601IEM_DECL_IMPL_DEF(void, iemAImpl_shld_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, \
2602 uint ## a_cBitsWidth ## _t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2603{ \
2604 cShift &= a_cBitsWidth - 1; \
2605 if (cShift) \
2606 { \
2607 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2608 uint ## a_cBitsWidth ## _t uResult = uDst << cShift; \
2609 uResult |= uSrc >> (a_cBitsWidth - cShift); \
2610 *puDst = uResult; \
2611 \
2612 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2613 it the same way as for 1 bit shifts. The AF bit is undefined, \
2614 we always set it to zero atm. */ \
2615 AssertCompile(X86_EFL_CF_BIT == 0); \
2616 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2617 fEfl |= (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2618 fEfl |= (uint32_t)((uDst >> (a_cBitsWidth - 1)) ^ (uint32_t)(uResult >> (a_cBitsWidth - 1))) << X86_EFL_OF_BIT; \
2619 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2620 fEfl |= X86_EFL_CALC_ZF(uResult); \
2621 fEfl |= g_afParity[uResult & 0xff]; \
2622 *pfEFlags = fEfl; \
2623 } \
2624}
2625EMIT_SHLD(64)
2626# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2627EMIT_SHLD(32)
2628EMIT_SHLD(16)
2629EMIT_SHLD(8)
2630# endif
2631
2632
2633/*
2634 * SHRD
2635 */
2636#define EMIT_SHRD(a_cBitsWidth) \
2637IEM_DECL_IMPL_DEF(void, iemAImpl_shrd_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, \
2638 uint ## a_cBitsWidth ## _t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2639{ \
2640 cShift &= a_cBitsWidth - 1; \
2641 if (cShift) \
2642 { \
2643 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2644 uint ## a_cBitsWidth ## _t uResult = uDst >> cShift; \
2645 uResult |= uSrc << (a_cBitsWidth - cShift); \
2646 *puDst = uResult; \
2647 \
2648 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2649 it the same way as for 1 bit shifts. The AF bit is undefined, \
2650 we always set it to zero atm. */ \
2651 AssertCompile(X86_EFL_CF_BIT == 0); \
2652 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2653 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2654 fEfl |= (uint32_t)((uDst >> (a_cBitsWidth - 1)) ^ (uint32_t)(uResult >> (a_cBitsWidth - 1))) << X86_EFL_OF_BIT; \
2655 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2656 fEfl |= X86_EFL_CALC_ZF(uResult); \
2657 fEfl |= g_afParity[uResult & 0xff]; \
2658 *pfEFlags = fEfl; \
2659 } \
2660}
2661EMIT_SHRD(64)
2662# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2663EMIT_SHRD(32)
2664EMIT_SHRD(16)
2665EMIT_SHRD(8)
2666# endif
2667
2668
2669# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2670/*
2671 * BSWAP
2672 */
2673
2674IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u64,(uint64_t *puDst))
2675{
2676 *puDst = ASMByteSwapU64(*puDst);
2677}
2678
2679
2680IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u32,(uint32_t *puDst))
2681{
2682 *puDst = ASMByteSwapU32(*puDst);
2683}
2684
2685
2686/* Note! undocument, so 32-bit arg */
2687IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u16,(uint32_t *puDst))
2688{
2689 *puDst = ASMByteSwapU16((uint16_t)*puDst) | (*puDst & UINT32_C(0xffff0000));
2690}
2691
2692# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2693
2694
2695
2696# if defined(IEM_WITHOUT_ASSEMBLY)
2697
2698/*
2699 * LFENCE, SFENCE & MFENCE.
2700 */
2701
2702IEM_DECL_IMPL_DEF(void, iemAImpl_lfence,(void))
2703{
2704 ASMReadFence();
2705}
2706
2707
2708IEM_DECL_IMPL_DEF(void, iemAImpl_sfence,(void))
2709{
2710 ASMWriteFence();
2711}
2712
2713
2714IEM_DECL_IMPL_DEF(void, iemAImpl_mfence,(void))
2715{
2716 ASMMemoryFence();
2717}
2718
2719
2720# ifndef RT_ARCH_ARM64
2721IEM_DECL_IMPL_DEF(void, iemAImpl_alt_mem_fence,(void))
2722{
2723 ASMMemoryFence();
2724}
2725# endif
2726
2727# endif
2728
2729#endif /* !RT_ARCH_AMD64 || IEM_WITHOUT_ASSEMBLY */
2730
2731
2732IEM_DECL_IMPL_DEF(void, iemAImpl_arpl,(uint16_t *pu16Dst, uint16_t u16Src, uint32_t *pfEFlags))
2733{
2734 if ((*pu16Dst & X86_SEL_RPL) < (u16Src & X86_SEL_RPL))
2735 {
2736 *pu16Dst &= X86_SEL_MASK_OFF_RPL;
2737 *pu16Dst |= u16Src & X86_SEL_RPL;
2738
2739 *pfEFlags |= X86_EFL_ZF;
2740 }
2741 else
2742 *pfEFlags &= ~X86_EFL_ZF;
2743}
2744
2745
2746IEM_DECL_IMPL_DEF(void, iemAImpl_movsldup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
2747{
2748 RT_NOREF(pFpuState);
2749 puDst->au32[0] = puSrc->au32[0];
2750 puDst->au32[1] = puSrc->au32[0];
2751 puDst->au32[2] = puSrc->au32[2];
2752 puDst->au32[3] = puSrc->au32[2];
2753}
2754
2755#ifdef IEM_WITH_VEX
2756
2757IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
2758{
2759 pXState->x87.aXMM[iYRegDst].au32[0] = pXState->x87.aXMM[iYRegSrc].au32[0];
2760 pXState->x87.aXMM[iYRegDst].au32[1] = pXState->x87.aXMM[iYRegSrc].au32[0];
2761 pXState->x87.aXMM[iYRegDst].au32[2] = pXState->x87.aXMM[iYRegSrc].au32[2];
2762 pXState->x87.aXMM[iYRegDst].au32[3] = pXState->x87.aXMM[iYRegSrc].au32[2];
2763 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
2764 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
2765 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
2766 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
2767}
2768
2769
2770IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
2771{
2772 pXState->x87.aXMM[iYRegDst].au32[0] = pSrc->au32[0];
2773 pXState->x87.aXMM[iYRegDst].au32[1] = pSrc->au32[0];
2774 pXState->x87.aXMM[iYRegDst].au32[2] = pSrc->au32[2];
2775 pXState->x87.aXMM[iYRegDst].au32[3] = pSrc->au32[2];
2776 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pSrc->au32[4];
2777 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pSrc->au32[4];
2778 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pSrc->au32[6];
2779 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pSrc->au32[6];
2780}
2781
2782#endif /* IEM_WITH_VEX */
2783
2784
2785IEM_DECL_IMPL_DEF(void, iemAImpl_movshdup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
2786{
2787 RT_NOREF(pFpuState);
2788 puDst->au32[0] = puSrc->au32[1];
2789 puDst->au32[1] = puSrc->au32[1];
2790 puDst->au32[2] = puSrc->au32[3];
2791 puDst->au32[3] = puSrc->au32[3];
2792}
2793
2794
2795IEM_DECL_IMPL_DEF(void, iemAImpl_movddup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, uint64_t uSrc))
2796{
2797 RT_NOREF(pFpuState);
2798 puDst->au64[0] = uSrc;
2799 puDst->au64[1] = uSrc;
2800}
2801
2802#ifdef IEM_WITH_VEX
2803
2804IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
2805{
2806 pXState->x87.aXMM[iYRegDst].au64[0] = pXState->x87.aXMM[iYRegSrc].au64[0];
2807 pXState->x87.aXMM[iYRegDst].au64[1] = pXState->x87.aXMM[iYRegSrc].au64[0];
2808 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
2809 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
2810}
2811
2812IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
2813{
2814 pXState->x87.aXMM[iYRegDst].au64[0] = pSrc->au64[0];
2815 pXState->x87.aXMM[iYRegDst].au64[1] = pSrc->au64[0];
2816 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pSrc->au64[2];
2817 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pSrc->au64[2];
2818}
2819
2820#endif /* IEM_WITH_VEX */
2821
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette