VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp@ 93854

Last change on this file since 93854 was 93854, checked in by vboxsync, 3 years ago

VMM/IEM: Working on adding missing C version of IEMAllAImpl.asm functions. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 87.1 KB
Line 
1/* $Id: IEMAllAImplC.cpp 93854 2022-02-19 15:15:31Z vboxsync $ */
2/** @file
3 * IEM - Instruction Implementation in Assembly, portable C variant.
4 */
5
6/*
7 * Copyright (C) 2011-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#include "IEMInternal.h"
23#include <VBox/vmm/vmcc.h>
24#include <iprt/errcore.h>
25#include <iprt/x86.h>
26#include <iprt/uint128.h>
27
28
29/*********************************************************************************************************************************
30* Defined Constants And Macros *
31*********************************************************************************************************************************/
32/** @def IEM_WITHOUT_ASSEMBLY
33 * Enables all the code in this file.
34 */
35#if !defined(IEM_WITHOUT_ASSEMBLY)
36# if defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
37# define IEM_WITHOUT_ASSEMBLY
38# endif
39#endif
40
41/**
42 * Calculates the signed flag value given a result and it's bit width.
43 *
44 * The signed flag (SF) is a duplication of the most significant bit in the
45 * result.
46 *
47 * @returns X86_EFL_SF or 0.
48 * @param a_uResult Unsigned result value.
49 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
50 */
51#define X86_EFL_CALC_SF(a_uResult, a_cBitsWidth) \
52 ( (uint32_t)((a_uResult) >> ((a_cBitsWidth) - X86_EFL_SF_BIT - 1)) & X86_EFL_SF )
53
54/**
55 * Calculates the zero flag value given a result.
56 *
57 * The zero flag (ZF) indicates whether the result is zero or not.
58 *
59 * @returns X86_EFL_ZF or 0.
60 * @param a_uResult Unsigned result value.
61 */
62#define X86_EFL_CALC_ZF(a_uResult) \
63 ( (uint32_t)((a_uResult) == 0) << X86_EFL_ZF_BIT )
64
65/**
66 * Extracts the OF flag from a OF calculation result.
67 *
68 * These are typically used by concating with a bitcount. The problem is that
69 * 8-bit values needs shifting in the other direction than the others.
70 */
71#define X86_EFL_GET_OF_8(a_uValue) ((uint32_t)((a_uValue) << (X86_EFL_OF_BIT - 8)) & X86_EFL_OF)
72#define X86_EFL_GET_OF_16(a_uValue) ((uint32_t)((a_uValue) >> (16 - X86_EFL_OF_BIT)) & X86_EFL_OF)
73#define X86_EFL_GET_OF_32(a_uValue) ((uint32_t)((a_uValue) >> (32 - X86_EFL_OF_BIT)) & X86_EFL_OF)
74#define X86_EFL_GET_OF_64(a_uValue) ((uint32_t)((a_uValue) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF)
75
76/**
77 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after arithmetic op.
78 *
79 * @returns Status bits.
80 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
81 * @param a_uResult Unsigned result value.
82 * @param a_uSrc The source value (for AF calc).
83 * @param a_uDst The original destination value (for AF calc).
84 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
85 * @param a_CfExpr Bool expression for the carry flag (CF).
86 * @param a_OfMethod 0 for ADD-style, 1 for SUB-style.
87 */
88#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(a_pfEFlags, a_uResult, a_uDst, a_uSrc, a_cBitsWidth, a_CfExpr, a_OfMethod) \
89 do { \
90 uint32_t fEflTmp = *(a_pfEFlags); \
91 fEflTmp &= ~X86_EFL_STATUS_BITS; \
92 fEflTmp |= (a_CfExpr) << X86_EFL_CF_BIT; \
93 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
94 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
95 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
96 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
97 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth( ((a_uDst) ^ (a_uSrc) ^ (a_OfMethod == 0 ? RT_BIT_64(a_cBitsWidth - 1) : 0)) \
98 & ((a_uResult) ^ (a_uDst)) ); \
99 *(a_pfEFlags) = fEflTmp; \
100 } while (0)
101
102/**
103 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after a logical op.
104 *
105 * CF and OF are defined to be 0 by logical operations. AF on the other hand is
106 * undefined. We do not set AF, as that seems to make the most sense (which
107 * probably makes it the most wrong in real life).
108 *
109 * @returns Status bits.
110 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
111 * @param a_uResult Unsigned result value.
112 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
113 * @param a_fExtra Additional bits to set.
114 */
115#define IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(a_pfEFlags, a_uResult, a_cBitsWidth, a_fExtra) \
116 do { \
117 uint32_t fEflTmp = *(a_pfEFlags); \
118 fEflTmp &= ~X86_EFL_STATUS_BITS; \
119 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
120 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
121 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
122 fEflTmp |= (a_fExtra); \
123 *(a_pfEFlags) = fEflTmp; \
124 } while (0)
125
126
127/*********************************************************************************************************************************
128* Global Variables *
129*********************************************************************************************************************************/
130#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
131/**
132 * Parity calculation table.
133 *
134 * The generator code:
135 * @code
136 * #include <stdio.h>
137 *
138 * int main()
139 * {
140 * unsigned b;
141 * for (b = 0; b < 256; b++)
142 * {
143 * int cOnes = ( b & 1)
144 * + ((b >> 1) & 1)
145 * + ((b >> 2) & 1)
146 * + ((b >> 3) & 1)
147 * + ((b >> 4) & 1)
148 * + ((b >> 5) & 1)
149 * + ((b >> 6) & 1)
150 * + ((b >> 7) & 1);
151 * printf(" /" "* %#04x = %u%u%u%u%u%u%u%ub *" "/ %s,\n",
152 * b,
153 * (b >> 7) & 1,
154 * (b >> 6) & 1,
155 * (b >> 5) & 1,
156 * (b >> 4) & 1,
157 * (b >> 3) & 1,
158 * (b >> 2) & 1,
159 * (b >> 1) & 1,
160 * b & 1,
161 * cOnes & 1 ? "0" : "X86_EFL_PF");
162 * }
163 * return 0;
164 * }
165 * @endcode
166 */
167static uint8_t const g_afParity[256] =
168{
169 /* 0000 = 00000000b */ X86_EFL_PF,
170 /* 0x01 = 00000001b */ 0,
171 /* 0x02 = 00000010b */ 0,
172 /* 0x03 = 00000011b */ X86_EFL_PF,
173 /* 0x04 = 00000100b */ 0,
174 /* 0x05 = 00000101b */ X86_EFL_PF,
175 /* 0x06 = 00000110b */ X86_EFL_PF,
176 /* 0x07 = 00000111b */ 0,
177 /* 0x08 = 00001000b */ 0,
178 /* 0x09 = 00001001b */ X86_EFL_PF,
179 /* 0x0a = 00001010b */ X86_EFL_PF,
180 /* 0x0b = 00001011b */ 0,
181 /* 0x0c = 00001100b */ X86_EFL_PF,
182 /* 0x0d = 00001101b */ 0,
183 /* 0x0e = 00001110b */ 0,
184 /* 0x0f = 00001111b */ X86_EFL_PF,
185 /* 0x10 = 00010000b */ 0,
186 /* 0x11 = 00010001b */ X86_EFL_PF,
187 /* 0x12 = 00010010b */ X86_EFL_PF,
188 /* 0x13 = 00010011b */ 0,
189 /* 0x14 = 00010100b */ X86_EFL_PF,
190 /* 0x15 = 00010101b */ 0,
191 /* 0x16 = 00010110b */ 0,
192 /* 0x17 = 00010111b */ X86_EFL_PF,
193 /* 0x18 = 00011000b */ X86_EFL_PF,
194 /* 0x19 = 00011001b */ 0,
195 /* 0x1a = 00011010b */ 0,
196 /* 0x1b = 00011011b */ X86_EFL_PF,
197 /* 0x1c = 00011100b */ 0,
198 /* 0x1d = 00011101b */ X86_EFL_PF,
199 /* 0x1e = 00011110b */ X86_EFL_PF,
200 /* 0x1f = 00011111b */ 0,
201 /* 0x20 = 00100000b */ 0,
202 /* 0x21 = 00100001b */ X86_EFL_PF,
203 /* 0x22 = 00100010b */ X86_EFL_PF,
204 /* 0x23 = 00100011b */ 0,
205 /* 0x24 = 00100100b */ X86_EFL_PF,
206 /* 0x25 = 00100101b */ 0,
207 /* 0x26 = 00100110b */ 0,
208 /* 0x27 = 00100111b */ X86_EFL_PF,
209 /* 0x28 = 00101000b */ X86_EFL_PF,
210 /* 0x29 = 00101001b */ 0,
211 /* 0x2a = 00101010b */ 0,
212 /* 0x2b = 00101011b */ X86_EFL_PF,
213 /* 0x2c = 00101100b */ 0,
214 /* 0x2d = 00101101b */ X86_EFL_PF,
215 /* 0x2e = 00101110b */ X86_EFL_PF,
216 /* 0x2f = 00101111b */ 0,
217 /* 0x30 = 00110000b */ X86_EFL_PF,
218 /* 0x31 = 00110001b */ 0,
219 /* 0x32 = 00110010b */ 0,
220 /* 0x33 = 00110011b */ X86_EFL_PF,
221 /* 0x34 = 00110100b */ 0,
222 /* 0x35 = 00110101b */ X86_EFL_PF,
223 /* 0x36 = 00110110b */ X86_EFL_PF,
224 /* 0x37 = 00110111b */ 0,
225 /* 0x38 = 00111000b */ 0,
226 /* 0x39 = 00111001b */ X86_EFL_PF,
227 /* 0x3a = 00111010b */ X86_EFL_PF,
228 /* 0x3b = 00111011b */ 0,
229 /* 0x3c = 00111100b */ X86_EFL_PF,
230 /* 0x3d = 00111101b */ 0,
231 /* 0x3e = 00111110b */ 0,
232 /* 0x3f = 00111111b */ X86_EFL_PF,
233 /* 0x40 = 01000000b */ 0,
234 /* 0x41 = 01000001b */ X86_EFL_PF,
235 /* 0x42 = 01000010b */ X86_EFL_PF,
236 /* 0x43 = 01000011b */ 0,
237 /* 0x44 = 01000100b */ X86_EFL_PF,
238 /* 0x45 = 01000101b */ 0,
239 /* 0x46 = 01000110b */ 0,
240 /* 0x47 = 01000111b */ X86_EFL_PF,
241 /* 0x48 = 01001000b */ X86_EFL_PF,
242 /* 0x49 = 01001001b */ 0,
243 /* 0x4a = 01001010b */ 0,
244 /* 0x4b = 01001011b */ X86_EFL_PF,
245 /* 0x4c = 01001100b */ 0,
246 /* 0x4d = 01001101b */ X86_EFL_PF,
247 /* 0x4e = 01001110b */ X86_EFL_PF,
248 /* 0x4f = 01001111b */ 0,
249 /* 0x50 = 01010000b */ X86_EFL_PF,
250 /* 0x51 = 01010001b */ 0,
251 /* 0x52 = 01010010b */ 0,
252 /* 0x53 = 01010011b */ X86_EFL_PF,
253 /* 0x54 = 01010100b */ 0,
254 /* 0x55 = 01010101b */ X86_EFL_PF,
255 /* 0x56 = 01010110b */ X86_EFL_PF,
256 /* 0x57 = 01010111b */ 0,
257 /* 0x58 = 01011000b */ 0,
258 /* 0x59 = 01011001b */ X86_EFL_PF,
259 /* 0x5a = 01011010b */ X86_EFL_PF,
260 /* 0x5b = 01011011b */ 0,
261 /* 0x5c = 01011100b */ X86_EFL_PF,
262 /* 0x5d = 01011101b */ 0,
263 /* 0x5e = 01011110b */ 0,
264 /* 0x5f = 01011111b */ X86_EFL_PF,
265 /* 0x60 = 01100000b */ X86_EFL_PF,
266 /* 0x61 = 01100001b */ 0,
267 /* 0x62 = 01100010b */ 0,
268 /* 0x63 = 01100011b */ X86_EFL_PF,
269 /* 0x64 = 01100100b */ 0,
270 /* 0x65 = 01100101b */ X86_EFL_PF,
271 /* 0x66 = 01100110b */ X86_EFL_PF,
272 /* 0x67 = 01100111b */ 0,
273 /* 0x68 = 01101000b */ 0,
274 /* 0x69 = 01101001b */ X86_EFL_PF,
275 /* 0x6a = 01101010b */ X86_EFL_PF,
276 /* 0x6b = 01101011b */ 0,
277 /* 0x6c = 01101100b */ X86_EFL_PF,
278 /* 0x6d = 01101101b */ 0,
279 /* 0x6e = 01101110b */ 0,
280 /* 0x6f = 01101111b */ X86_EFL_PF,
281 /* 0x70 = 01110000b */ 0,
282 /* 0x71 = 01110001b */ X86_EFL_PF,
283 /* 0x72 = 01110010b */ X86_EFL_PF,
284 /* 0x73 = 01110011b */ 0,
285 /* 0x74 = 01110100b */ X86_EFL_PF,
286 /* 0x75 = 01110101b */ 0,
287 /* 0x76 = 01110110b */ 0,
288 /* 0x77 = 01110111b */ X86_EFL_PF,
289 /* 0x78 = 01111000b */ X86_EFL_PF,
290 /* 0x79 = 01111001b */ 0,
291 /* 0x7a = 01111010b */ 0,
292 /* 0x7b = 01111011b */ X86_EFL_PF,
293 /* 0x7c = 01111100b */ 0,
294 /* 0x7d = 01111101b */ X86_EFL_PF,
295 /* 0x7e = 01111110b */ X86_EFL_PF,
296 /* 0x7f = 01111111b */ 0,
297 /* 0x80 = 10000000b */ 0,
298 /* 0x81 = 10000001b */ X86_EFL_PF,
299 /* 0x82 = 10000010b */ X86_EFL_PF,
300 /* 0x83 = 10000011b */ 0,
301 /* 0x84 = 10000100b */ X86_EFL_PF,
302 /* 0x85 = 10000101b */ 0,
303 /* 0x86 = 10000110b */ 0,
304 /* 0x87 = 10000111b */ X86_EFL_PF,
305 /* 0x88 = 10001000b */ X86_EFL_PF,
306 /* 0x89 = 10001001b */ 0,
307 /* 0x8a = 10001010b */ 0,
308 /* 0x8b = 10001011b */ X86_EFL_PF,
309 /* 0x8c = 10001100b */ 0,
310 /* 0x8d = 10001101b */ X86_EFL_PF,
311 /* 0x8e = 10001110b */ X86_EFL_PF,
312 /* 0x8f = 10001111b */ 0,
313 /* 0x90 = 10010000b */ X86_EFL_PF,
314 /* 0x91 = 10010001b */ 0,
315 /* 0x92 = 10010010b */ 0,
316 /* 0x93 = 10010011b */ X86_EFL_PF,
317 /* 0x94 = 10010100b */ 0,
318 /* 0x95 = 10010101b */ X86_EFL_PF,
319 /* 0x96 = 10010110b */ X86_EFL_PF,
320 /* 0x97 = 10010111b */ 0,
321 /* 0x98 = 10011000b */ 0,
322 /* 0x99 = 10011001b */ X86_EFL_PF,
323 /* 0x9a = 10011010b */ X86_EFL_PF,
324 /* 0x9b = 10011011b */ 0,
325 /* 0x9c = 10011100b */ X86_EFL_PF,
326 /* 0x9d = 10011101b */ 0,
327 /* 0x9e = 10011110b */ 0,
328 /* 0x9f = 10011111b */ X86_EFL_PF,
329 /* 0xa0 = 10100000b */ X86_EFL_PF,
330 /* 0xa1 = 10100001b */ 0,
331 /* 0xa2 = 10100010b */ 0,
332 /* 0xa3 = 10100011b */ X86_EFL_PF,
333 /* 0xa4 = 10100100b */ 0,
334 /* 0xa5 = 10100101b */ X86_EFL_PF,
335 /* 0xa6 = 10100110b */ X86_EFL_PF,
336 /* 0xa7 = 10100111b */ 0,
337 /* 0xa8 = 10101000b */ 0,
338 /* 0xa9 = 10101001b */ X86_EFL_PF,
339 /* 0xaa = 10101010b */ X86_EFL_PF,
340 /* 0xab = 10101011b */ 0,
341 /* 0xac = 10101100b */ X86_EFL_PF,
342 /* 0xad = 10101101b */ 0,
343 /* 0xae = 10101110b */ 0,
344 /* 0xaf = 10101111b */ X86_EFL_PF,
345 /* 0xb0 = 10110000b */ 0,
346 /* 0xb1 = 10110001b */ X86_EFL_PF,
347 /* 0xb2 = 10110010b */ X86_EFL_PF,
348 /* 0xb3 = 10110011b */ 0,
349 /* 0xb4 = 10110100b */ X86_EFL_PF,
350 /* 0xb5 = 10110101b */ 0,
351 /* 0xb6 = 10110110b */ 0,
352 /* 0xb7 = 10110111b */ X86_EFL_PF,
353 /* 0xb8 = 10111000b */ X86_EFL_PF,
354 /* 0xb9 = 10111001b */ 0,
355 /* 0xba = 10111010b */ 0,
356 /* 0xbb = 10111011b */ X86_EFL_PF,
357 /* 0xbc = 10111100b */ 0,
358 /* 0xbd = 10111101b */ X86_EFL_PF,
359 /* 0xbe = 10111110b */ X86_EFL_PF,
360 /* 0xbf = 10111111b */ 0,
361 /* 0xc0 = 11000000b */ X86_EFL_PF,
362 /* 0xc1 = 11000001b */ 0,
363 /* 0xc2 = 11000010b */ 0,
364 /* 0xc3 = 11000011b */ X86_EFL_PF,
365 /* 0xc4 = 11000100b */ 0,
366 /* 0xc5 = 11000101b */ X86_EFL_PF,
367 /* 0xc6 = 11000110b */ X86_EFL_PF,
368 /* 0xc7 = 11000111b */ 0,
369 /* 0xc8 = 11001000b */ 0,
370 /* 0xc9 = 11001001b */ X86_EFL_PF,
371 /* 0xca = 11001010b */ X86_EFL_PF,
372 /* 0xcb = 11001011b */ 0,
373 /* 0xcc = 11001100b */ X86_EFL_PF,
374 /* 0xcd = 11001101b */ 0,
375 /* 0xce = 11001110b */ 0,
376 /* 0xcf = 11001111b */ X86_EFL_PF,
377 /* 0xd0 = 11010000b */ 0,
378 /* 0xd1 = 11010001b */ X86_EFL_PF,
379 /* 0xd2 = 11010010b */ X86_EFL_PF,
380 /* 0xd3 = 11010011b */ 0,
381 /* 0xd4 = 11010100b */ X86_EFL_PF,
382 /* 0xd5 = 11010101b */ 0,
383 /* 0xd6 = 11010110b */ 0,
384 /* 0xd7 = 11010111b */ X86_EFL_PF,
385 /* 0xd8 = 11011000b */ X86_EFL_PF,
386 /* 0xd9 = 11011001b */ 0,
387 /* 0xda = 11011010b */ 0,
388 /* 0xdb = 11011011b */ X86_EFL_PF,
389 /* 0xdc = 11011100b */ 0,
390 /* 0xdd = 11011101b */ X86_EFL_PF,
391 /* 0xde = 11011110b */ X86_EFL_PF,
392 /* 0xdf = 11011111b */ 0,
393 /* 0xe0 = 11100000b */ 0,
394 /* 0xe1 = 11100001b */ X86_EFL_PF,
395 /* 0xe2 = 11100010b */ X86_EFL_PF,
396 /* 0xe3 = 11100011b */ 0,
397 /* 0xe4 = 11100100b */ X86_EFL_PF,
398 /* 0xe5 = 11100101b */ 0,
399 /* 0xe6 = 11100110b */ 0,
400 /* 0xe7 = 11100111b */ X86_EFL_PF,
401 /* 0xe8 = 11101000b */ X86_EFL_PF,
402 /* 0xe9 = 11101001b */ 0,
403 /* 0xea = 11101010b */ 0,
404 /* 0xeb = 11101011b */ X86_EFL_PF,
405 /* 0xec = 11101100b */ 0,
406 /* 0xed = 11101101b */ X86_EFL_PF,
407 /* 0xee = 11101110b */ X86_EFL_PF,
408 /* 0xef = 11101111b */ 0,
409 /* 0xf0 = 11110000b */ X86_EFL_PF,
410 /* 0xf1 = 11110001b */ 0,
411 /* 0xf2 = 11110010b */ 0,
412 /* 0xf3 = 11110011b */ X86_EFL_PF,
413 /* 0xf4 = 11110100b */ 0,
414 /* 0xf5 = 11110101b */ X86_EFL_PF,
415 /* 0xf6 = 11110110b */ X86_EFL_PF,
416 /* 0xf7 = 11110111b */ 0,
417 /* 0xf8 = 11111000b */ 0,
418 /* 0xf9 = 11111001b */ X86_EFL_PF,
419 /* 0xfa = 11111010b */ X86_EFL_PF,
420 /* 0xfb = 11111011b */ 0,
421 /* 0xfc = 11111100b */ X86_EFL_PF,
422 /* 0xfd = 11111101b */ 0,
423 /* 0xfe = 11111110b */ 0,
424 /* 0xff = 11111111b */ X86_EFL_PF,
425};
426#endif /* !RT_ARCH_AMD64 || IEM_WITHOUT_ASSEMBLY */
427
428
429
430/*
431 * There are a few 64-bit on 32-bit things we'd rather do in C. Actually, doing
432 * it all in C is probably safer atm., optimize what's necessary later, maybe.
433 */
434#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
435
436
437/*********************************************************************************************************************************
438* Binary Operations *
439*********************************************************************************************************************************/
440
441/*
442 * ADD
443 */
444
445IEM_DECL_IMPL_DEF(void, iemAImpl_add_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
446{
447 uint64_t uDst = *puDst;
448 uint64_t uResult = uDst + uSrc;
449 *puDst = uResult;
450 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult < uDst, 0);
451}
452
453# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
454
455IEM_DECL_IMPL_DEF(void, iemAImpl_add_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
456{
457 uint32_t uDst = *puDst;
458 uint32_t uResult = uDst + uSrc;
459 *puDst = uResult;
460 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult < uDst, 0);
461}
462
463
464IEM_DECL_IMPL_DEF(void, iemAImpl_add_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
465{
466 uint16_t uDst = *puDst;
467 uint16_t uResult = uDst + uSrc;
468 *puDst = uResult;
469 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult < uDst, 0);
470}
471
472
473IEM_DECL_IMPL_DEF(void, iemAImpl_add_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
474{
475 uint8_t uDst = *puDst;
476 uint8_t uResult = uDst + uSrc;
477 *puDst = uResult;
478 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult < uDst, 0);
479}
480
481# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
482
483/*
484 * ADC
485 */
486
487IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
488{
489 if (!(*pfEFlags & X86_EFL_CF))
490 iemAImpl_add_u64(puDst, uSrc, pfEFlags);
491 else
492 {
493 uint64_t uDst = *puDst;
494 uint64_t uResult = uDst + uSrc + 1;
495 *puDst = uResult;
496 /** @todo verify AF and OF calculations. */
497 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult <= uDst, 0);
498 }
499}
500
501# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
502
503IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
504{
505 if (!(*pfEFlags & X86_EFL_CF))
506 iemAImpl_add_u32(puDst, uSrc, pfEFlags);
507 else
508 {
509 uint32_t uDst = *puDst;
510 uint32_t uResult = uDst + uSrc + 1;
511 *puDst = uResult;
512 /** @todo verify AF and OF calculations. */
513 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult <= uDst, 0);
514 }
515}
516
517
518IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
519{
520 if (!(*pfEFlags & X86_EFL_CF))
521 iemAImpl_add_u16(puDst, uSrc, pfEFlags);
522 else
523 {
524 uint16_t uDst = *puDst;
525 uint16_t uResult = uDst + uSrc + 1;
526 *puDst = uResult;
527 /** @todo verify AF and OF calculations. */
528 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult <= uDst, 0);
529 }
530}
531
532
533IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
534{
535 if (!(*pfEFlags & X86_EFL_CF))
536 iemAImpl_add_u8(puDst, uSrc, pfEFlags);
537 else
538 {
539 uint8_t uDst = *puDst;
540 uint8_t uResult = uDst + uSrc + 1;
541 *puDst = uResult;
542 /** @todo verify AF and OF calculations. */
543 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult <= uDst, 0);
544 }
545}
546
547# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
548
549/*
550 * SUB
551 */
552
553IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
554{
555 uint64_t uDst = *puDst;
556 uint64_t uResult = uDst - uSrc;
557 *puDst = uResult;
558 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult < uDst, 1);
559}
560
561# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
562
563IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
564{
565 uint32_t uDst = *puDst;
566 uint32_t uResult = uDst - uSrc;
567 *puDst = uResult;
568 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult < uDst, 1);
569}
570
571
572IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
573{
574 uint16_t uDst = *puDst;
575 uint16_t uResult = uDst - uSrc;
576 *puDst = uResult;
577 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult < uDst, 1);
578}
579
580
581IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
582{
583 uint8_t uDst = *puDst;
584 uint8_t uResult = uDst - uSrc;
585 *puDst = uResult;
586 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult < uDst, 1);
587}
588
589# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
590
591/*
592 * SBB
593 */
594
595IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
596{
597 if (!(*pfEFlags & X86_EFL_CF))
598 iemAImpl_sub_u64(puDst, uSrc, pfEFlags);
599 else
600 {
601 uint64_t uDst = *puDst;
602 uint64_t uResult = uDst - uSrc - 1;
603 *puDst = uResult;
604 /** @todo verify AF and OF calculations. */
605 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult <= uDst, 1);
606 }
607}
608
609# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
610
611IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
612{
613 if (!(*pfEFlags & X86_EFL_CF))
614 iemAImpl_sub_u32(puDst, uSrc, pfEFlags);
615 else
616 {
617 uint32_t uDst = *puDst;
618 uint32_t uResult = uDst - uSrc - 1;
619 *puDst = uResult;
620 /** @todo verify AF and OF calculations. */
621 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult <= uDst, 1);
622 }
623}
624
625
626IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
627{
628 if (!(*pfEFlags & X86_EFL_CF))
629 iemAImpl_sub_u16(puDst, uSrc, pfEFlags);
630 else
631 {
632 uint16_t uDst = *puDst;
633 uint16_t uResult = uDst - uSrc - 1;
634 *puDst = uResult;
635 /** @todo verify AF and OF calculations. */
636 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult <= uDst, 1);
637 }
638}
639
640
641IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
642{
643 if (!(*pfEFlags & X86_EFL_CF))
644 iemAImpl_sub_u8(puDst, uSrc, pfEFlags);
645 else
646 {
647 uint8_t uDst = *puDst;
648 uint8_t uResult = uDst - uSrc - 1;
649 *puDst = uResult;
650 /** @todo verify AF and OF calculations. */
651 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult <= uDst, 1);
652 }
653}
654
655# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
656
657
658/*
659 * OR
660 */
661
662IEM_DECL_IMPL_DEF(void, iemAImpl_or_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
663{
664 uint64_t uResult = *puDst | uSrc;
665 *puDst = uResult;
666 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
667}
668
669# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
670
671IEM_DECL_IMPL_DEF(void, iemAImpl_or_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
672{
673 uint32_t uResult = *puDst | uSrc;
674 *puDst = uResult;
675 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
676}
677
678
679IEM_DECL_IMPL_DEF(void, iemAImpl_or_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
680{
681 uint16_t uResult = *puDst | uSrc;
682 *puDst = uResult;
683 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
684}
685
686
687IEM_DECL_IMPL_DEF(void, iemAImpl_or_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
688{
689 uint8_t uResult = *puDst | uSrc;
690 *puDst = uResult;
691 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
692}
693
694# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
695
696/*
697 * XOR
698 */
699
700IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
701{
702 uint64_t uResult = *puDst ^ uSrc;
703 *puDst = uResult;
704 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
705}
706
707# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
708
709IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
710{
711 uint32_t uResult = *puDst ^ uSrc;
712 *puDst = uResult;
713 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
714}
715
716
717IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
718{
719 uint16_t uResult = *puDst ^ uSrc;
720 *puDst = uResult;
721 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
722}
723
724
725IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
726{
727 uint8_t uResult = *puDst ^ uSrc;
728 *puDst = uResult;
729 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
730}
731
732# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
733
734/*
735 * AND
736 */
737
738IEM_DECL_IMPL_DEF(void, iemAImpl_and_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
739{
740 uint64_t uResult = *puDst & uSrc;
741 *puDst = uResult;
742 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
743}
744
745# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
746
747IEM_DECL_IMPL_DEF(void, iemAImpl_and_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
748{
749 uint32_t uResult = *puDst & uSrc;
750 *puDst = uResult;
751 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
752}
753
754
755IEM_DECL_IMPL_DEF(void, iemAImpl_and_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
756{
757 uint16_t uResult = *puDst & uSrc;
758 *puDst = uResult;
759 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
760}
761
762
763IEM_DECL_IMPL_DEF(void, iemAImpl_and_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
764{
765 uint8_t uResult = *puDst & uSrc;
766 *puDst = uResult;
767 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
768}
769
770# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
771
772/*
773 * CMP
774 */
775
776IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
777{
778 uint64_t uDstTmp = *puDst;
779 iemAImpl_sub_u64(&uDstTmp, uSrc, pfEFlags);
780}
781
782# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
783
784IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
785{
786 uint32_t uDstTmp = *puDst;
787 iemAImpl_sub_u32(&uDstTmp, uSrc, pfEFlags);
788}
789
790
791IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
792{
793 uint16_t uDstTmp = *puDst;
794 iemAImpl_sub_u16(&uDstTmp, uSrc, pfEFlags);
795}
796
797
798IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
799{
800 uint8_t uDstTmp = *puDst;
801 iemAImpl_sub_u8(&uDstTmp, uSrc, pfEFlags);
802}
803
804# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
805
806/*
807 * TEST
808 */
809
810IEM_DECL_IMPL_DEF(void, iemAImpl_test_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
811{
812 uint64_t uResult = *puDst & uSrc;
813 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
814}
815
816# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
817
818IEM_DECL_IMPL_DEF(void, iemAImpl_test_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
819{
820 uint32_t uResult = *puDst & uSrc;
821 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
822}
823
824
825IEM_DECL_IMPL_DEF(void, iemAImpl_test_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
826{
827 uint16_t uResult = *puDst & uSrc;
828 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
829}
830
831
832IEM_DECL_IMPL_DEF(void, iemAImpl_test_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
833{
834 uint8_t uResult = *puDst & uSrc;
835 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
836}
837
838# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
839
840
841/*
842 * LOCK prefixed variants of the above
843 */
844
845/** 64-bit locked binary operand operation. */
846# define DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
847 do { \
848 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
849 uint ## a_cBitsWidth ## _t uTmp; \
850 uint32_t fEflTmp; \
851 do \
852 { \
853 uTmp = uOld; \
854 fEflTmp = *pfEFlags; \
855 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, uSrc, &fEflTmp); \
856 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
857 *pfEFlags = fEflTmp; \
858 } while (0)
859
860
861#define EMIT_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
862 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
863 uint ## a_cBitsWidth ## _t uSrc, \
864 uint32_t *pfEFlags)) \
865 { \
866 DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth); \
867 }
868
869EMIT_LOCKED_BIN_OP(add, 64)
870EMIT_LOCKED_BIN_OP(adc, 64)
871EMIT_LOCKED_BIN_OP(sub, 64)
872EMIT_LOCKED_BIN_OP(sbb, 64)
873EMIT_LOCKED_BIN_OP(or, 64)
874EMIT_LOCKED_BIN_OP(xor, 64)
875EMIT_LOCKED_BIN_OP(and, 64)
876# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
877EMIT_LOCKED_BIN_OP(add, 32)
878EMIT_LOCKED_BIN_OP(adc, 32)
879EMIT_LOCKED_BIN_OP(sub, 32)
880EMIT_LOCKED_BIN_OP(sbb, 32)
881EMIT_LOCKED_BIN_OP(or, 32)
882EMIT_LOCKED_BIN_OP(xor, 32)
883EMIT_LOCKED_BIN_OP(and, 32)
884
885EMIT_LOCKED_BIN_OP(add, 16)
886EMIT_LOCKED_BIN_OP(adc, 16)
887EMIT_LOCKED_BIN_OP(sub, 16)
888EMIT_LOCKED_BIN_OP(sbb, 16)
889EMIT_LOCKED_BIN_OP(or, 16)
890EMIT_LOCKED_BIN_OP(xor, 16)
891EMIT_LOCKED_BIN_OP(and, 16)
892
893EMIT_LOCKED_BIN_OP(add, 8)
894EMIT_LOCKED_BIN_OP(adc, 8)
895EMIT_LOCKED_BIN_OP(sub, 8)
896EMIT_LOCKED_BIN_OP(sbb, 8)
897EMIT_LOCKED_BIN_OP(or, 8)
898EMIT_LOCKED_BIN_OP(xor, 8)
899EMIT_LOCKED_BIN_OP(and, 8)
900# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
901
902
903/*
904 * Bit operations (same signature as above).
905 */
906
907/*
908 * BT
909 */
910
911IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
912{
913 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
914 logical operation (AND/OR/whatever). */
915 Assert(uSrc < 64);
916 uint64_t uDst = *puDst;
917 if (uDst & RT_BIT_64(uSrc))
918 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
919 else
920 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
921}
922
923# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
924
925IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
926{
927 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
928 logical operation (AND/OR/whatever). */
929 Assert(uSrc < 32);
930 uint32_t uDst = *puDst;
931 if (uDst & RT_BIT_32(uSrc))
932 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
933 else
934 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
935}
936
937IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
938{
939 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
940 logical operation (AND/OR/whatever). */
941 Assert(uSrc < 16);
942 uint16_t uDst = *puDst;
943 if (uDst & RT_BIT_32(uSrc))
944 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, X86_EFL_CF);
945 else
946 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, 0);
947}
948
949# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
950
951/*
952 * BTC
953 */
954
955IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
956{
957 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
958 logical operation (AND/OR/whatever). */
959 Assert(uSrc < 64);
960 uint64_t fMask = RT_BIT_64(uSrc);
961 uint64_t uDst = *puDst;
962 if (uDst & fMask)
963 {
964 uDst &= ~fMask;
965 *puDst = uDst;
966 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
967 }
968 else
969 {
970 uDst |= fMask;
971 *puDst = uDst;
972 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
973 }
974}
975
976# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
977
978IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
979{
980 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
981 logical operation (AND/OR/whatever). */
982 Assert(uSrc < 32);
983 uint32_t fMask = RT_BIT_32(uSrc);
984 uint32_t uDst = *puDst;
985 if (uDst & fMask)
986 {
987 uDst &= ~fMask;
988 *puDst = uDst;
989 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
990 }
991 else
992 {
993 uDst |= fMask;
994 *puDst = uDst;
995 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
996 }
997}
998
999
1000IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1001{
1002 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1003 logical operation (AND/OR/whatever). */
1004 Assert(uSrc < 16);
1005 uint16_t fMask = RT_BIT_32(uSrc);
1006 uint16_t uDst = *puDst;
1007 if (uDst & fMask)
1008 {
1009 uDst &= ~fMask;
1010 *puDst = uDst;
1011 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, X86_EFL_CF);
1012 }
1013 else
1014 {
1015 uDst |= fMask;
1016 *puDst = uDst;
1017 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, 0);
1018 }
1019}
1020
1021# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1022
1023/*
1024 * BTR
1025 */
1026
1027IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1028{
1029 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1030 logical operation (AND/OR/whatever). */
1031 Assert(uSrc < 64);
1032 uint64_t fMask = RT_BIT_64(uSrc);
1033 uint64_t uDst = *puDst;
1034 if (uDst & fMask)
1035 {
1036 uDst &= ~fMask;
1037 *puDst = uDst;
1038 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
1039 }
1040 else
1041 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
1042}
1043
1044# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1045
1046IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1047{
1048 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1049 logical operation (AND/OR/whatever). */
1050 Assert(uSrc < 32);
1051 uint32_t fMask = RT_BIT_32(uSrc);
1052 uint32_t uDst = *puDst;
1053 if (uDst & fMask)
1054 {
1055 uDst &= ~fMask;
1056 *puDst = uDst;
1057 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
1058 }
1059 else
1060 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
1061}
1062
1063
1064IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1065{
1066 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1067 logical operation (AND/OR/whatever). */
1068 Assert(uSrc < 16);
1069 uint16_t fMask = RT_BIT_32(uSrc);
1070 uint16_t uDst = *puDst;
1071 if (uDst & fMask)
1072 {
1073 uDst &= ~fMask;
1074 *puDst = uDst;
1075 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, X86_EFL_CF);
1076 }
1077 else
1078 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, 0);
1079}
1080
1081# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1082
1083/*
1084 * BTS
1085 */
1086
1087IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1088{
1089 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1090 logical operation (AND/OR/whatever). */
1091 Assert(uSrc < 64);
1092 uint64_t fMask = RT_BIT_64(uSrc);
1093 uint64_t uDst = *puDst;
1094 if (uDst & fMask)
1095 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
1096 else
1097 {
1098 uDst |= fMask;
1099 *puDst = uDst;
1100 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
1101 }
1102}
1103
1104# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1105
1106IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1107{
1108 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1109 logical operation (AND/OR/whatever). */
1110 Assert(uSrc < 32);
1111 uint32_t fMask = RT_BIT_32(uSrc);
1112 uint32_t uDst = *puDst;
1113 if (uDst & fMask)
1114 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
1115 else
1116 {
1117 uDst |= fMask;
1118 *puDst = uDst;
1119 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
1120 }
1121}
1122
1123
1124IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1125{
1126 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1127 logical operation (AND/OR/whatever). */
1128 Assert(uSrc < 16);
1129 uint16_t fMask = RT_BIT_32(uSrc);
1130 uint32_t uDst = *puDst;
1131 if (uDst & fMask)
1132 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
1133 else
1134 {
1135 uDst |= fMask;
1136 *puDst = uDst;
1137 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
1138 }
1139}
1140
1141# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1142
1143
1144EMIT_LOCKED_BIN_OP(btc, 64)
1145EMIT_LOCKED_BIN_OP(btr, 64)
1146EMIT_LOCKED_BIN_OP(bts, 64)
1147# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1148EMIT_LOCKED_BIN_OP(btc, 32)
1149EMIT_LOCKED_BIN_OP(btr, 32)
1150EMIT_LOCKED_BIN_OP(bts, 32)
1151
1152EMIT_LOCKED_BIN_OP(btc, 16)
1153EMIT_LOCKED_BIN_OP(btr, 16)
1154EMIT_LOCKED_BIN_OP(bts, 16)
1155# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1156
1157
1158/*
1159 * BSF - first (least significant) bit set
1160 */
1161
1162IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1163{
1164 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1165 /** @todo check what real CPUs do. */
1166 unsigned iBit = ASMBitFirstSetU64(uSrc);
1167 if (iBit)
1168 {
1169 *puDst = iBit - 1;
1170 *pfEFlags &= ~X86_EFL_ZF;
1171 }
1172 else
1173 *pfEFlags |= X86_EFL_ZF;
1174}
1175
1176# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1177
1178IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1179{
1180 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1181 /** @todo check what real CPUs do. */
1182 unsigned iBit = ASMBitFirstSetU32(uSrc);
1183 if (iBit)
1184 {
1185 *puDst = iBit - 1;
1186 *pfEFlags &= ~X86_EFL_ZF;
1187 }
1188 else
1189 *pfEFlags |= X86_EFL_ZF;
1190}
1191
1192
1193IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1194{
1195 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1196 /** @todo check what real CPUs do. */
1197 unsigned iBit = ASMBitFirstSetU16(uSrc);
1198 if (iBit)
1199 {
1200 *puDst = iBit - 1;
1201 *pfEFlags &= ~X86_EFL_ZF;
1202 }
1203 else
1204 *pfEFlags |= X86_EFL_ZF;
1205}
1206
1207# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1208
1209/*
1210 * BSR - last (most significant) bit set
1211 */
1212
1213IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1214{
1215 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1216 /** @todo check what real CPUs do. */
1217 unsigned iBit = ASMBitLastSetU64(uSrc);
1218 if (uSrc)
1219 {
1220 *puDst = iBit - 1;
1221 *pfEFlags &= ~X86_EFL_ZF;
1222 }
1223 else
1224 *pfEFlags |= X86_EFL_ZF;
1225}
1226
1227# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1228
1229IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1230{
1231 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1232 /** @todo check what real CPUs do. */
1233 unsigned iBit = ASMBitLastSetU32(uSrc);
1234 if (uSrc)
1235 {
1236 *puDst = iBit - 1;
1237 *pfEFlags &= ~X86_EFL_ZF;
1238 }
1239 else
1240 *pfEFlags |= X86_EFL_ZF;
1241}
1242
1243
1244IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1245{
1246 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1247 /** @todo check what real CPUs do. */
1248 unsigned iBit = ASMBitLastSetU16(uSrc);
1249 if (uSrc)
1250 {
1251 *puDst = iBit - 1;
1252 *pfEFlags &= ~X86_EFL_ZF;
1253 }
1254 else
1255 *pfEFlags |= X86_EFL_ZF;
1256}
1257
1258# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1259
1260
1261/*
1262 * XCHG
1263 */
1264
1265IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64_locked,(uint64_t *puMem, uint64_t *puReg))
1266{
1267#if ARCH_BITS >= 64
1268 *puReg = ASMAtomicXchgU64(puMem, *puReg);
1269#else
1270 uint64_t uOldMem = *puMem;
1271 while (!ASMAtomicCmpXchgExU64(puMem, *puReg, uOldMem, &uOldMem))
1272 ASMNopPause();
1273 *puReg = uOldMem;
1274#endif
1275}
1276
1277# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1278
1279IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u32_locked,(uint32_t *puMem, uint32_t *puReg))
1280{
1281 *puReg = ASMAtomicXchgU32(puMem, *puReg);
1282}
1283
1284
1285IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u16_locked,(uint16_t *puMem, uint16_t *puReg))
1286{
1287 *puReg = ASMAtomicXchgU16(puMem, *puReg);
1288}
1289
1290
1291IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8_locked,(uint8_t *puMem, uint8_t *puReg))
1292{
1293 *puReg = ASMAtomicXchgU8(puMem, *puReg);
1294}
1295
1296# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1297
1298
1299/* Unlocked variants for fDisregardLock mode: */
1300
1301IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64_unlocked,(uint64_t *puMem, uint64_t *puReg))
1302{
1303 uint64_t const uOld = *puMem;
1304 *puMem = *puReg;
1305 *puReg = uOld;
1306}
1307
1308# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1309
1310IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u32_unlocked,(uint32_t *puMem, uint32_t *puReg))
1311{
1312 uint32_t const uOld = *puMem;
1313 *puMem = *puReg;
1314 *puReg = uOld;
1315}
1316
1317
1318IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u16_unlocked,(uint16_t *puMem, uint16_t *puReg))
1319{
1320 uint16_t const uOld = *puMem;
1321 *puMem = *puReg;
1322 *puReg = uOld;
1323}
1324
1325
1326IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8_unlocked,(uint8_t *puMem, uint8_t *puReg))
1327{
1328 uint8_t const uOld = *puMem;
1329 *puMem = *puReg;
1330 *puReg = uOld;
1331}
1332
1333# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1334
1335
1336/*
1337 * XADD and LOCK XADD.
1338 */
1339
1340IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u64,(uint64_t *puDst, uint64_t *puReg, uint32_t *pfEFlags))
1341{
1342 uint64_t uDst = *puDst;
1343 uint64_t uResult = uDst;
1344 iemAImpl_add_u64(&uResult, *puReg, pfEFlags);
1345 *puDst = uResult;
1346 *puReg = uDst;
1347}
1348
1349
1350IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u64_locked,(uint64_t *puDst, uint64_t *puReg, uint32_t *pfEFlags))
1351{
1352 uint64_t uOld = ASMAtomicUoReadU64(puDst);
1353 uint64_t uTmpDst;
1354 uint32_t fEflTmp;
1355 do
1356 {
1357 uTmpDst = uOld;
1358 fEflTmp = *pfEFlags;
1359 iemAImpl_add_u64(&uTmpDst, *puReg, pfEFlags);
1360 } while (!ASMAtomicCmpXchgExU64(puDst, uTmpDst, uOld, &uOld));
1361 *puReg = uOld;
1362 *pfEFlags = fEflTmp;
1363}
1364
1365# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1366
1367IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u32,(uint32_t *puDst, uint32_t *puReg, uint32_t *pfEFlags))
1368{
1369 uint32_t uDst = *puDst;
1370 uint32_t uResult = uDst;
1371 iemAImpl_add_u32(&uResult, *puReg, pfEFlags);
1372 *puDst = uResult;
1373 *puReg = uDst;
1374}
1375
1376
1377IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u32_locked,(uint32_t *puDst, uint32_t *puReg, uint32_t *pfEFlags))
1378{
1379 uint32_t uOld = ASMAtomicUoReadU32(puDst);
1380 uint32_t uTmpDst;
1381 uint32_t fEflTmp;
1382 do
1383 {
1384 uTmpDst = uOld;
1385 fEflTmp = *pfEFlags;
1386 iemAImpl_add_u32(&uTmpDst, *puReg, pfEFlags);
1387 } while (!ASMAtomicCmpXchgExU32(puDst, uTmpDst, uOld, &uOld));
1388 *puReg = uOld;
1389 *pfEFlags = fEflTmp;
1390}
1391
1392
1393IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u16,(uint16_t *puDst, uint16_t *puReg, uint32_t *pfEFlags))
1394{
1395 uint16_t uDst = *puDst;
1396 uint16_t uResult = uDst;
1397 iemAImpl_add_u16(&uResult, *puReg, pfEFlags);
1398 *puDst = uResult;
1399 *puReg = uDst;
1400}
1401
1402
1403IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u16_locked,(uint16_t *puDst, uint16_t *puReg, uint32_t *pfEFlags))
1404{
1405 uint16_t uOld = ASMAtomicUoReadU16(puDst);
1406 uint16_t uTmpDst;
1407 uint32_t fEflTmp;
1408 do
1409 {
1410 uTmpDst = uOld;
1411 fEflTmp = *pfEFlags;
1412 iemAImpl_add_u16(&uTmpDst, *puReg, pfEFlags);
1413 } while (!ASMAtomicCmpXchgExU16(puDst, uTmpDst, uOld, &uOld));
1414 *puReg = uOld;
1415 *pfEFlags = fEflTmp;
1416}
1417
1418
1419IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u8,(uint8_t *puDst, uint8_t *puReg, uint32_t *pfEFlags))
1420{
1421 uint8_t uDst = *puDst;
1422 uint8_t uResult = uDst;
1423 iemAImpl_add_u8(&uResult, *puReg, pfEFlags);
1424 *puDst = uResult;
1425 *puReg = uDst;
1426}
1427
1428
1429IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u8_locked,(uint8_t *puDst, uint8_t *puReg, uint32_t *pfEFlags))
1430{
1431 uint8_t uOld = ASMAtomicUoReadU8(puDst);
1432 uint8_t uTmpDst;
1433 uint32_t fEflTmp;
1434 do
1435 {
1436 uTmpDst = uOld;
1437 fEflTmp = *pfEFlags;
1438 iemAImpl_add_u8(&uTmpDst, *puReg, pfEFlags);
1439 } while (!ASMAtomicCmpXchgExU8(puDst, uTmpDst, uOld, &uOld));
1440 *puReg = uOld;
1441 *pfEFlags = fEflTmp;
1442}
1443
1444# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1445#endif
1446
1447/*
1448 * CMPXCHG, CMPXCHG8B, CMPXCHG16B
1449 *
1450 * Note! We don't have non-locking/atomic cmpxchg primitives, so all cmpxchg
1451 * instructions are emulated as locked.
1452 */
1453#if defined(IEM_WITHOUT_ASSEMBLY)
1454
1455IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u8_locked, (uint8_t *pu8Dst, uint8_t *puAl, uint8_t uSrcReg, uint32_t *pEFlags))
1456{
1457 uint8_t const uOld = *puAl;
1458 if (ASMAtomicCmpXchgExU8(pu8Dst, uSrcReg, uOld, puAl))
1459 {
1460 Assert(*puAl == uOld);
1461 *pEFlags |= X86_EFL_ZF;
1462 }
1463 else
1464 *pEFlags &= ~X86_EFL_ZF;
1465}
1466
1467
1468IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u16_locked,(uint16_t *pu16Dst, uint16_t *puAx, uint16_t uSrcReg, uint32_t *pEFlags))
1469{
1470 uint16_t const uOld = *puAx;
1471 if (ASMAtomicCmpXchgExU16(pu16Dst, uSrcReg, uOld, puAx))
1472 {
1473 Assert(*puAx == uOld);
1474 *pEFlags |= X86_EFL_ZF;
1475 }
1476 else
1477 *pEFlags &= ~X86_EFL_ZF;
1478}
1479
1480
1481IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u32_locked,(uint32_t *pu32Dst, uint32_t *puEax, uint32_t uSrcReg, uint32_t *pEFlags))
1482{
1483 uint32_t const uOld = *puEax;
1484 if (ASMAtomicCmpXchgExU32(pu32Dst, uSrcReg, uOld, puEax))
1485 {
1486 Assert(*puEax == uOld);
1487 *pEFlags |= X86_EFL_ZF;
1488 }
1489 else
1490 *pEFlags &= ~X86_EFL_ZF;
1491}
1492
1493
1494# if ARCH_BITS == 32
1495IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64_locked,(uint64_t *pu64Dst, uint64_t *puRax, uint64_t *puSrcReg, uint32_t *pEFlags))
1496# else
1497IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64_locked,(uint64_t *pu64Dst, uint64_t *puRax, uint64_t uSrcReg, uint32_t *pEFlags))
1498# endif
1499{
1500# if ARCH_BITS == 32
1501 uint64_t const uSrcReg = *puSrcReg;
1502# endif
1503 uint64_t const uOld = *puRax;
1504 if (ASMAtomicCmpXchgExU64(pu64Dst, uSrcReg, uOld, puRax))
1505 {
1506 Assert(*puRax == uOld);
1507 *pEFlags |= X86_EFL_ZF;
1508 }
1509 else
1510 *pEFlags &= ~X86_EFL_ZF;
1511}
1512
1513
1514IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b_locked,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
1515 uint32_t *pEFlags))
1516{
1517 uint64_t const uNew = pu64EbxEcx->u;
1518 uint64_t const uOld = pu64EaxEdx->u;
1519 if (ASMAtomicCmpXchgExU64(pu64Dst, uNew, uOld, &pu64EaxEdx->u))
1520 {
1521 Assert(pu64EaxEdx->u == uOld);
1522 *pEFlags |= X86_EFL_ZF;
1523 }
1524 else
1525 *pEFlags &= ~X86_EFL_ZF;
1526}
1527
1528
1529# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64)
1530IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_locked,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx,
1531 uint32_t *pEFlags))
1532{
1533# ifdef VBOX_STRICT
1534 RTUINT128U const uOld = *pu128RaxRdx;
1535# endif
1536# if defined(RT_ARCH_AMD64)
1537 if (ASMAtomicCmpXchgU128v2(&pu128Dst->u, pu128RbxRcx->s.Hi, pu128RbxRcx->s.Lo, pu128RaxRdx->s.Hi, pu128RaxRdx->s.Lo,
1538 &pu128RaxRdx->u))
1539# else
1540 if (ASMAtomicCmpXchgU128(&pu128Dst->u, pu128RbxRcx->u, pu128RaxRdx->u, &pu128RaxRdx->u))
1541# endif
1542 {
1543 Assert(pu128RaxRdx->s.Lo == uOld.s.Lo && pu128RaxRdx->s.Hi == uOld.s.Hi);
1544 *pEFlags |= X86_EFL_ZF;
1545 }
1546 else
1547 *pEFlags &= ~X86_EFL_ZF;
1548}
1549# endif
1550
1551#endif /* defined(IEM_WITHOUT_ASSEMBLY) */
1552
1553# if !defined(RT_ARCH_ARM64) /** @todo may need this for unaligned accesses... */
1554IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_fallback,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx,
1555 PRTUINT128U pu128RbxRcx, uint32_t *pEFlags))
1556{
1557 RTUINT128U u128Tmp = *pu128Dst;
1558 if ( u128Tmp.s.Lo == pu128RaxRdx->s.Lo
1559 && u128Tmp.s.Hi == pu128RaxRdx->s.Hi)
1560 {
1561 *pu128Dst = *pu128RbxRcx;
1562 *pEFlags |= X86_EFL_ZF;
1563 }
1564 else
1565 {
1566 *pu128RaxRdx = u128Tmp;
1567 *pEFlags &= ~X86_EFL_ZF;
1568 }
1569}
1570#endif /* !RT_ARCH_ARM64 */
1571
1572#if defined(IEM_WITHOUT_ASSEMBLY)
1573
1574/* Unlocked versions mapped to the locked ones: */
1575
1576IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u8, (uint8_t *pu8Dst, uint8_t *puAl, uint8_t uSrcReg, uint32_t *pEFlags))
1577{
1578 iemAImpl_cmpxchg_u8_locked(pu8Dst, puAl, uSrcReg, pEFlags);
1579}
1580
1581
1582IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u16, (uint16_t *pu16Dst, uint16_t *puAx, uint16_t uSrcReg, uint32_t *pEFlags))
1583{
1584 iemAImpl_cmpxchg_u16_locked(pu16Dst, puAx, uSrcReg, pEFlags);
1585}
1586
1587
1588IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u32, (uint32_t *pu32Dst, uint32_t *puEax, uint32_t uSrcReg, uint32_t *pEFlags))
1589{
1590 iemAImpl_cmpxchg_u32_locked(pu32Dst, puEax, uSrcReg, pEFlags);
1591}
1592
1593
1594# if ARCH_BITS == 32
1595IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64, (uint64_t *pu64Dst, uint64_t *puRax, uint64_t *puSrcReg, uint32_t *pEFlags))
1596{
1597 iemAImpl_cmpxchg_u64_locked(pu64Dst, puRax, puSrcReg, pEFlags);
1598}
1599# else
1600IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64, (uint64_t *pu64Dst, uint64_t *puRax, uint64_t uSrcReg, uint32_t *pEFlags))
1601{
1602 iemAImpl_cmpxchg_u64_locked(pu64Dst, puRax, uSrcReg, pEFlags);
1603}
1604# endif
1605
1606
1607IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx, uint32_t *pEFlags))
1608{
1609 iemAImpl_cmpxchg8b_locked(pu64Dst, pu64EaxEdx, pu64EbxEcx, pEFlags);
1610}
1611
1612
1613IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx,
1614 uint32_t *pEFlags))
1615{
1616 iemAImpl_cmpxchg16b_locked(pu128Dst, pu128RaxRdx, pu128RbxRcx, pEFlags);
1617}
1618
1619#endif /* defined(IEM_WITHOUT_ASSEMBLY) */
1620
1621#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
1622
1623/*
1624 * MUL, IMUL, DIV and IDIV helpers.
1625 *
1626 * - The U64 versions must use 128-bit intermediates, so we need to abstract the
1627 * division step so we can select between using C operators and
1628 * RTUInt128DivRem/RTUInt128MulU64ByU64.
1629 *
1630 * - The U8 versions work returns output in AL + AH instead of xDX + xAX, with the
1631 * IDIV/DIV taking all the input in AX too. This means we have to abstract some
1632 * input loads and the result storing.
1633 */
1634
1635DECLINLINE(void) RTUInt128DivRemByU64(PRTUINT128U pQuotient, PRTUINT128U pRemainder, PCRTUINT128U pDividend, uint64_t u64Divisor)
1636{
1637# ifdef __GNUC__ /* GCC maybe really annoying in function. */
1638 pQuotient->s.Lo = 0;
1639 pQuotient->s.Hi = 0;
1640# endif
1641 RTUINT128U Divisor;
1642 Divisor.s.Lo = u64Divisor;
1643 Divisor.s.Hi = 0;
1644 RTUInt128DivRem(pQuotient, pRemainder, pDividend, &Divisor);
1645}
1646
1647# define DIV_LOAD(a_Dividend) \
1648 a_Dividend.s.Lo = *puA, a_Dividend.s.Hi = *puD
1649# define DIV_LOAD_U8(a_Dividend) \
1650 a_Dividend.u = *puAX
1651
1652# define DIV_STORE(a_Quotient, a_uReminder) *puA = (a_Quotient), *puD = (a_uReminder)
1653# define DIV_STORE_U8(a_Quotient, a_uReminder) *puAX = (a_Quotient) | ((uint16_t)(a_uReminder) << 8)
1654
1655# define MUL_LOAD_F1() *puA
1656# define MUL_LOAD_F1_U8() ((uint8_t)*puAX)
1657
1658# define MUL_STORE(a_Result) *puA = (a_Result).s.Hi, *puD = (a_Result).s.Lo
1659# define MUL_STORE_U8(a_Result) *puAX = a_Result.u
1660
1661# define MULDIV_NEG(a_Value, a_cBitsWidth2x) \
1662 (a_Value).u = UINT ## a_cBitsWidth2x ## _C(0) - (a_Value).u
1663# define MULDIV_NEG_U128(a_Value, a_cBitsWidth2x) \
1664 RTUInt128AssignNeg(&(a_Value))
1665
1666# define MULDIV_MUL(a_Result, a_Factor1, a_Factor2, a_cBitsWidth2x) \
1667 (a_Result).u = (uint ## a_cBitsWidth2x ## _t)(a_Factor1) * (a_Factor2)
1668# define MULDIV_MUL_U128(a_Result, a_Factor1, a_Factor2, a_cBitsWidth2x) \
1669 RTUInt128MulU64ByU64(&(a_Result), a_Factor1, a_Factor2);
1670
1671# define MULDIV_MODDIV(a_Quotient, a_Remainder, a_Dividend, a_uDivisor) \
1672 a_Quotient.u = (a_Dividend).u / (a_uDivisor), \
1673 a_Remainder.u = (a_Dividend).u % (a_uDivisor)
1674# define MULDIV_MODDIV_U128(a_Quotient, a_Remainder, a_Dividend, a_uDivisor) \
1675 RTUInt128DivRemByU64(&a_Quotient, &a_Remainder, &a_Dividend, a_uDivisor)
1676
1677
1678/*
1679 * MUL
1680 */
1681# define EMIT_MUL(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_fnLoadF1, a_fnStore, a_fnMul) \
1682IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u ## a_cBitsWidth, a_Args) \
1683{ \
1684 RTUINT ## a_cBitsWidth2x ## U Result; \
1685 a_fnMul(Result, a_fnLoadF1(), uFactor, a_cBitsWidth2x); \
1686 a_fnStore(Result); \
1687 \
1688 /* MUL EFLAGS according to Skylake (similar to IMUL). */ \
1689 *pfEFlags &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF); \
1690 if (Result.s.Lo & RT_BIT_64(a_cBitsWidth - 1)) \
1691 *pfEFlags |= X86_EFL_SF; \
1692 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */ \
1693 if (Result.s.Hi != 0) \
1694 *pfEFlags |= X86_EFL_CF | X86_EFL_OF; \
1695 return 0; \
1696}
1697EMIT_MUL(64, 128, (uint64_t *puA, uint64_t *puD, uint64_t uFactor, uint32_t *pfEFlags), MUL_LOAD_F1, MUL_STORE, MULDIV_MUL_U128)
1698# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1699EMIT_MUL(32, 64, (uint32_t *puA, uint32_t *puD, uint32_t uFactor, uint32_t *pfEFlags), MUL_LOAD_F1, MUL_STORE, MULDIV_MUL)
1700EMIT_MUL(16, 32, (uint16_t *puA, uint16_t *puD, uint16_t uFactor, uint32_t *pfEFlags), MUL_LOAD_F1, MUL_STORE, MULDIV_MUL)
1701EMIT_MUL(8, 16, (uint16_t *puAX, uint8_t uFactor, uint32_t *pfEFlags), MUL_LOAD_F1_U8, MUL_STORE_U8, MULDIV_MUL)
1702# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1703
1704
1705/*
1706 * IMUL
1707 */
1708# define EMIT_IMUL(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul) \
1709IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u ## a_cBitsWidth,a_Args) \
1710{ \
1711 RTUINT ## a_cBitsWidth2x ## U Result; \
1712 *pfEFlags &= ~( X86_EFL_SF | X86_EFL_CF | X86_EFL_OF \
1713 /* Skylake always clears: */ | X86_EFL_AF | X86_EFL_ZF \
1714 /* Skylake may set: */ | X86_EFL_PF); \
1715 \
1716 uint ## a_cBitsWidth ## _t const uFactor1 = a_fnLoadF1(); \
1717 if ((int ## a_cBitsWidth ## _t)uFactor1 >= 0) \
1718 { \
1719 if ((int ## a_cBitsWidth ## _t)uFactor2 >= 0) \
1720 { \
1721 a_fnMul(Result, uFactor1, uFactor2, a_cBitsWidth2x); \
1722 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_64(a_cBitsWidth - 1)) \
1723 *pfEFlags |= X86_EFL_CF | X86_EFL_OF; \
1724 } \
1725 else \
1726 { \
1727 a_fnMul(Result, uFactor1, UINT ## a_cBitsWidth ## _C(0) - uFactor2, a_cBitsWidth2x); \
1728 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_64(a_cBitsWidth - 1)) \
1729 *pfEFlags |= X86_EFL_CF | X86_EFL_OF; \
1730 a_fnNeg(Result, a_cBitsWidth2x); \
1731 } \
1732 } \
1733 else \
1734 { \
1735 if ((int ## a_cBitsWidth ## _t)uFactor2 >= 0) \
1736 { \
1737 a_fnMul(Result, UINT ## a_cBitsWidth ## _C(0) - uFactor1, uFactor2, a_cBitsWidth2x); \
1738 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_64(a_cBitsWidth - 1)) \
1739 *pfEFlags |= X86_EFL_CF | X86_EFL_OF; \
1740 a_fnNeg(Result, a_cBitsWidth2x); \
1741 } \
1742 else \
1743 { \
1744 a_fnMul(Result, UINT ## a_cBitsWidth ## _C(0) - uFactor1, UINT ## a_cBitsWidth ## _C(0) - uFactor2, a_cBitsWidth2x); \
1745 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_64(a_cBitsWidth - 1)) \
1746 *pfEFlags |= X86_EFL_CF | X86_EFL_OF; \
1747 } \
1748 } \
1749 a_fnStore(Result); \
1750 if (Result.s.Lo & RT_BIT_64(a_cBitsWidth - 1)) \
1751 *pfEFlags |= X86_EFL_SF; \
1752 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */ \
1753 return 0; \
1754}
1755/** @todo Testcase: IMUL 2 and 3 operands. */
1756EMIT_IMUL(64, 128, (uint64_t *puA, uint64_t *puD, uint64_t uFactor2, uint32_t *pfEFlags), MUL_LOAD_F1, MUL_STORE, MULDIV_NEG_U128, MULDIV_MUL_U128)
1757# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1758EMIT_IMUL(32, 64, (uint32_t *puA, uint32_t *puD, uint32_t uFactor2, uint32_t *pfEFlags), MUL_LOAD_F1, MUL_STORE, MULDIV_NEG, MULDIV_MUL)
1759EMIT_IMUL(16, 32, (uint16_t *puA, uint16_t *puD, uint16_t uFactor2, uint32_t *pfEFlags), MUL_LOAD_F1, MUL_STORE, MULDIV_NEG, MULDIV_MUL)
1760EMIT_IMUL(8, 16, (uint16_t *puAX, uint8_t uFactor2, uint32_t *pfEFlags), MUL_LOAD_F1_U8, MUL_STORE_U8, MULDIV_NEG, MULDIV_MUL)
1761# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1762
1763
1764IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1765{
1766 uint64_t uIgn;
1767 iemAImpl_imul_u64(puDst, &uIgn, uSrc, pfEFlags);
1768}
1769
1770# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1771
1772IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1773{
1774 uint32_t uIgn;
1775 iemAImpl_imul_u32(puDst, &uIgn, uSrc, pfEFlags);
1776}
1777
1778
1779IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1780{
1781 uint16_t uIgn;
1782 iemAImpl_imul_u16(puDst, &uIgn, uSrc, pfEFlags);
1783}
1784
1785#endif
1786
1787/*
1788 * DIV
1789 */
1790# define EMIT_DIV(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_fnLoad, a_fnStore, a_fnDivRem) \
1791IEM_DECL_IMPL_DEF(int, iemAImpl_div_u ## a_cBitsWidth,a_Args) \
1792{ \
1793 /* Note! Skylake leaves all flags alone. */ \
1794 RT_NOREF_PV(pfEFlags); \
1795 \
1796 RTUINT ## a_cBitsWidth2x ## U Dividend; \
1797 a_fnLoad(Dividend); \
1798 if ( uDivisor != 0 \
1799 && Dividend.s.Hi < uDivisor) \
1800 { \
1801 RTUINT ## a_cBitsWidth2x ## U Remainder, Quotient; \
1802 a_fnDivRem(Remainder, Quotient, Dividend, uDivisor); \
1803 a_fnStore(Quotient.s.Lo, Remainder.s.Lo); \
1804 /** @todo research the undefined DIV flags. */ \
1805 return 0; \
1806 } \
1807 /* #DE */ \
1808 return -1; \
1809}
1810EMIT_DIV(64,128,(uint64_t *puA, uint64_t *puD, uint64_t uDivisor, uint32_t *pfEFlags), DIV_LOAD, DIV_STORE, MULDIV_MODDIV_U128)
1811# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1812EMIT_DIV(32,64, (uint32_t *puA, uint32_t *puD, uint32_t uDivisor, uint32_t *pfEFlags), DIV_LOAD, DIV_STORE, MULDIV_MODDIV)
1813EMIT_DIV(16,32, (uint16_t *puA, uint16_t *puD, uint16_t uDivisor, uint32_t *pfEFlags), DIV_LOAD, DIV_STORE, MULDIV_MODDIV)
1814EMIT_DIV(8,16, (uint16_t *puAX, uint8_t uDivisor, uint32_t *pfEFlags), DIV_LOAD_U8, DIV_STORE_U8, MULDIV_MODDIV)
1815# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1816
1817
1818/*
1819 * IDIV
1820 */
1821# define EMIT_IDIV(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem) \
1822IEM_DECL_IMPL_DEF(int, iemAImpl_idiv_u ## a_cBitsWidth,a_Args) \
1823{ \
1824 /* Note! Skylake leaves all flags alone. */ \
1825 RT_NOREF_PV(pfEFlags); \
1826 \
1827 /** @todo overflow checks */ \
1828 if (uDivisor != 0) \
1829 { \
1830 /* \
1831 * Convert to unsigned division. \
1832 */ \
1833 RTUINT ## a_cBitsWidth2x ## U Dividend; \
1834 a_fnLoad(Dividend); \
1835 if ((int ## a_cBitsWidth ## _t)Dividend.s.Hi < 0) \
1836 a_fnNeg(Dividend, a_cBitsWidth2x); \
1837 \
1838 uint ## a_cBitsWidth ## _t uDivisorPositive; \
1839 if ((int ## a_cBitsWidth ## _t)uDivisor >= 0) \
1840 uDivisorPositive = uDivisor; \
1841 else \
1842 uDivisorPositive = UINT ## a_cBitsWidth ## _C(0) - uDivisor; \
1843 \
1844 RTUINT ## a_cBitsWidth2x ## U Remainder, Quotient; \
1845 a_fnDivRem(Remainder, Quotient, Dividend, uDivisorPositive); \
1846 \
1847 /* \
1848 * Setup the result, checking for overflows. \
1849 */ \
1850 if ((int ## a_cBitsWidth ## _t)uDivisor >= 0) \
1851 { \
1852 if ((int ## a_cBitsWidth ## _t)Dividend.s.Hi >= 0) \
1853 { \
1854 /* Positive divisor, positive dividend => result positive. */ \
1855 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint ## a_cBitsWidth ## _t)INT ## a_cBitsWidth ## _MAX) \
1856 { \
1857 a_fnStore(Quotient.s.Lo, Remainder.s.Lo); \
1858 return 0; \
1859 } \
1860 } \
1861 else \
1862 { \
1863 /* Positive divisor, positive dividend => result negative. */ \
1864 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_64(a_cBitsWidth - 1)) \
1865 { \
1866 a_fnStore(UINT ## a_cBitsWidth ## _C(0) - Quotient.s.Lo, UINT ## a_cBitsWidth ## _C(0) - Remainder.s.Lo); \
1867 return 0; \
1868 } \
1869 } \
1870 } \
1871 else \
1872 { \
1873 if ((int ## a_cBitsWidth ## _t)Dividend.s.Hi >= 0) \
1874 { \
1875 /* Negative divisor, positive dividend => negative quotient, positive remainder. */ \
1876 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_64(a_cBitsWidth - 1)) \
1877 { \
1878 a_fnStore(UINT ## a_cBitsWidth ## _C(0) - Quotient.s.Lo, Remainder.s.Lo); \
1879 return 0; \
1880 } \
1881 } \
1882 else \
1883 { \
1884 /* Negative divisor, negative dividend => positive quotient, negative remainder. */ \
1885 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint ## a_cBitsWidth ## _t)INT ## a_cBitsWidth ## _MAX) \
1886 { \
1887 a_fnStore(Quotient.s.Lo, UINT ## a_cBitsWidth ## _C(0) - Remainder.s.Lo); \
1888 return 0; \
1889 } \
1890 } \
1891 } \
1892 } \
1893 /* #DE */ \
1894 return -1; \
1895}
1896EMIT_IDIV(64,128,(uint64_t *puA, uint64_t *puD, uint64_t uDivisor, uint32_t *pfEFlags), DIV_LOAD, DIV_STORE, MULDIV_NEG_U128, MULDIV_MODDIV_U128)
1897# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1898EMIT_IDIV(32,64,(uint32_t *puA, uint32_t *puD, uint32_t uDivisor, uint32_t *pfEFlags), DIV_LOAD, DIV_STORE, MULDIV_NEG, MULDIV_MODDIV)
1899EMIT_IDIV(16,32,(uint16_t *puA, uint16_t *puD, uint16_t uDivisor, uint32_t *pfEFlags), DIV_LOAD, DIV_STORE, MULDIV_NEG, MULDIV_MODDIV)
1900EMIT_IDIV(8,16,(uint16_t *puAX, uint8_t uDivisor, uint32_t *pfEFlags), DIV_LOAD_U8, DIV_STORE_U8, MULDIV_NEG, MULDIV_MODDIV)
1901# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1902
1903
1904/*********************************************************************************************************************************
1905* Unary operations. *
1906*********************************************************************************************************************************/
1907
1908/**
1909 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an INC or DEC instruction.
1910 *
1911 * CF is NOT modified for hysterical raisins (allegedly for carrying and
1912 * borrowing in arithmetic loops on intel 8008).
1913 *
1914 * @returns Status bits.
1915 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
1916 * @param a_uResult Unsigned result value.
1917 * @param a_uDst The original destination value (for AF calc).
1918 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
1919 * @param a_OfMethod 0 for INC-style, 1 for DEC-style.
1920 */
1921#define IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth, a_OfMethod) \
1922 do { \
1923 uint32_t fEflTmp = *(a_pfEFlags); \
1924 fEflTmp &= ~X86_EFL_STATUS_BITS & ~X86_EFL_CF; \
1925 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
1926 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
1927 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
1928 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
1929 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth(a_OfMethod == 0 ? (((a_uDst) ^ RT_BIT_64(63)) & (a_uResult)) \
1930 : ((a_uDst) & ((a_uResult) ^ RT_BIT_64(63))) ); \
1931 *(a_pfEFlags) = fEflTmp; \
1932 } while (0)
1933
1934/*
1935 * INC
1936 */
1937
1938IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u64,(uint64_t *puDst, uint32_t *pfEFlags))
1939{
1940 uint64_t uDst = *puDst;
1941 uint64_t uResult = uDst + 1;
1942 *puDst = uResult;
1943 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 0 /*INC*/);
1944}
1945
1946# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1947
1948IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u32,(uint32_t *puDst, uint32_t *pfEFlags))
1949{
1950 uint32_t uDst = *puDst;
1951 uint32_t uResult = uDst + 1;
1952 *puDst = uResult;
1953 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 0 /*INC*/);
1954}
1955
1956
1957IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u16,(uint16_t *puDst, uint32_t *pfEFlags))
1958{
1959 uint16_t uDst = *puDst;
1960 uint16_t uResult = uDst + 1;
1961 *puDst = uResult;
1962 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 0 /*INC*/);
1963}
1964
1965IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u8,(uint8_t *puDst, uint32_t *pfEFlags))
1966{
1967 uint8_t uDst = *puDst;
1968 uint8_t uResult = uDst + 1;
1969 *puDst = uResult;
1970 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 0 /*INC*/);
1971}
1972
1973# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1974
1975
1976/*
1977 * DEC
1978 */
1979
1980IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u64,(uint64_t *puDst, uint32_t *pfEFlags))
1981{
1982 uint64_t uDst = *puDst;
1983 uint64_t uResult = uDst - 1;
1984 *puDst = uResult;
1985 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 1 /*INC*/);
1986}
1987
1988# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1989
1990IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u32,(uint32_t *puDst, uint32_t *pfEFlags))
1991{
1992 uint32_t uDst = *puDst;
1993 uint32_t uResult = uDst - 1;
1994 *puDst = uResult;
1995 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 1 /*INC*/);
1996}
1997
1998
1999IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2000{
2001 uint16_t uDst = *puDst;
2002 uint16_t uResult = uDst - 1;
2003 *puDst = uResult;
2004 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 1 /*INC*/);
2005}
2006
2007
2008IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2009{
2010 uint8_t uDst = *puDst;
2011 uint8_t uResult = uDst - 1;
2012 *puDst = uResult;
2013 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 1 /*INC*/);
2014}
2015
2016# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2017
2018
2019/*
2020 * NOT
2021 */
2022
2023IEM_DECL_IMPL_DEF(void, iemAImpl_not_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2024{
2025 uint64_t uDst = *puDst;
2026 uint64_t uResult = ~uDst;
2027 *puDst = uResult;
2028 /* EFLAGS are not modified. */
2029 RT_NOREF_PV(pfEFlags);
2030}
2031
2032# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2033
2034IEM_DECL_IMPL_DEF(void, iemAImpl_not_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2035{
2036 uint32_t uDst = *puDst;
2037 uint32_t uResult = ~uDst;
2038 *puDst = uResult;
2039 /* EFLAGS are not modified. */
2040 RT_NOREF_PV(pfEFlags);
2041}
2042
2043IEM_DECL_IMPL_DEF(void, iemAImpl_not_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2044{
2045 uint16_t uDst = *puDst;
2046 uint16_t uResult = ~uDst;
2047 *puDst = uResult;
2048 /* EFLAGS are not modified. */
2049 RT_NOREF_PV(pfEFlags);
2050}
2051
2052IEM_DECL_IMPL_DEF(void, iemAImpl_not_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2053{
2054 uint8_t uDst = *puDst;
2055 uint8_t uResult = ~uDst;
2056 *puDst = uResult;
2057 /* EFLAGS are not modified. */
2058 RT_NOREF_PV(pfEFlags);
2059}
2060
2061# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2062
2063
2064/*
2065 * NEG
2066 */
2067
2068/**
2069 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an NEG instruction.
2070 *
2071 * @returns Status bits.
2072 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2073 * @param a_uResult Unsigned result value.
2074 * @param a_uDst The original destination value (for AF calc).
2075 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2076 */
2077#define IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth) \
2078 do { \
2079 uint32_t fEflTmp = *(a_pfEFlags); \
2080 fEflTmp &= ~X86_EFL_STATUS_BITS & ~X86_EFL_CF; \
2081 fEflTmp |= ((a_uDst) != 0) << X86_EFL_CF_BIT; \
2082 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
2083 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
2084 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
2085 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
2086 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth((a_uDst) & (a_uResult)); \
2087 *(a_pfEFlags) = fEflTmp; \
2088 } while (0)
2089
2090IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2091{
2092 uint64_t uDst = *puDst;
2093 uint64_t uResult = (uint64_t)0 - uDst;
2094 *puDst = uResult;
2095 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 64);
2096}
2097
2098# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2099
2100IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2101{
2102 uint32_t uDst = *puDst;
2103 uint32_t uResult = (uint32_t)0 - uDst;
2104 *puDst = uResult;
2105 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 32);
2106}
2107
2108
2109IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2110{
2111 uint16_t uDst = *puDst;
2112 uint16_t uResult = (uint16_t)0 - uDst;
2113 *puDst = uResult;
2114 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 16);
2115}
2116
2117
2118IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2119{
2120 uint8_t uDst = *puDst;
2121 uint8_t uResult = (uint8_t)0 - uDst;
2122 *puDst = uResult;
2123 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 8);
2124}
2125
2126# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2127
2128/*
2129 * Locked variants.
2130 */
2131
2132/** Emit a function for doing a locked unary operand operation. */
2133# define EMIT_LOCKED_UNARY_OP(a_Mnemonic, a_cBitsWidth) \
2134 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
2135 uint32_t *pfEFlags)) \
2136 { \
2137 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
2138 uint ## a_cBitsWidth ## _t uTmp; \
2139 uint32_t fEflTmp; \
2140 do \
2141 { \
2142 uTmp = uOld; \
2143 fEflTmp = *pfEFlags; \
2144 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, &fEflTmp); \
2145 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
2146 *pfEFlags = fEflTmp; \
2147 }
2148
2149EMIT_LOCKED_UNARY_OP(inc, 64)
2150EMIT_LOCKED_UNARY_OP(dec, 64)
2151EMIT_LOCKED_UNARY_OP(not, 64)
2152EMIT_LOCKED_UNARY_OP(neg, 64)
2153# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2154EMIT_LOCKED_UNARY_OP(inc, 32)
2155EMIT_LOCKED_UNARY_OP(dec, 32)
2156EMIT_LOCKED_UNARY_OP(not, 32)
2157EMIT_LOCKED_UNARY_OP(neg, 32)
2158
2159EMIT_LOCKED_UNARY_OP(inc, 16)
2160EMIT_LOCKED_UNARY_OP(dec, 16)
2161EMIT_LOCKED_UNARY_OP(not, 16)
2162EMIT_LOCKED_UNARY_OP(neg, 16)
2163
2164EMIT_LOCKED_UNARY_OP(inc, 8)
2165EMIT_LOCKED_UNARY_OP(dec, 8)
2166EMIT_LOCKED_UNARY_OP(not, 8)
2167EMIT_LOCKED_UNARY_OP(neg, 8)
2168# endif
2169
2170
2171/*********************************************************************************************************************************
2172* Shifting and Rotating *
2173*********************************************************************************************************************************/
2174
2175/*
2176 * ROL
2177 */
2178
2179/**
2180 * Updates the status bits (OF and CF) for an ROL instruction.
2181 *
2182 * @returns Status bits.
2183 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2184 * @param a_uResult Unsigned result value.
2185 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2186 */
2187#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(a_pfEFlags, a_uResult, a_cBitsWidth) do { \
2188 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2189 it the same way as for 1 bit shifts. */ \
2190 AssertCompile(X86_EFL_CF_BIT == 0); \
2191 uint32_t fEflTmp = *(a_pfEFlags); \
2192 fEflTmp &= ~(X86_EFL_CF | X86_EFL_OF); \
2193 uint32_t const fCarry = ((a_uResult) & X86_EFL_CF); \
2194 fEflTmp |= fCarry; \
2195 fEflTmp |= (((a_uResult) >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2196 *(a_pfEFlags) = fEflTmp; \
2197 } while (0)
2198
2199IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2200{
2201 cShift &= 63;
2202 if (cShift)
2203 {
2204 uint64_t uResult = ASMRotateLeftU64(*puDst, cShift);
2205 *puDst = uResult;
2206 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(pfEFlags, uResult, 64);
2207 }
2208}
2209
2210# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2211
2212IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u32,(uint32_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2213{
2214 cShift &= 31;
2215 if (cShift)
2216 {
2217 uint32_t uResult = ASMRotateLeftU32(*puDst, cShift);
2218 *puDst = uResult;
2219 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(pfEFlags, uResult, 32);
2220 }
2221}
2222
2223
2224IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u16,(uint16_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2225{
2226 cShift &= 15;
2227 if (cShift)
2228 {
2229 uint16_t uDst = *puDst;
2230 uint16_t uResult = (uDst << cShift) | (uDst >> (16 - cShift));
2231 *puDst = uResult;
2232 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(pfEFlags, uResult, 16);
2233 }
2234}
2235
2236
2237IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u8,(uint8_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2238{
2239 cShift &= 7;
2240 if (cShift)
2241 {
2242 uint8_t uDst = *puDst;
2243 uint8_t uResult = (uDst << cShift) | (uDst >> (8 - cShift));
2244 *puDst = uResult;
2245 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(pfEFlags, uResult, 8);
2246 }
2247}
2248
2249# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2250
2251
2252/*
2253 * ROR
2254 */
2255
2256/**
2257 * Updates the status bits (OF and CF) for an ROL instruction.
2258 *
2259 * @returns Status bits.
2260 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2261 * @param a_uResult Unsigned result value.
2262 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2263 */
2264#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(a_pfEFlags, a_uResult, a_cBitsWidth) do { \
2265 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2266 it the same way as for 1 bit shifts. */ \
2267 AssertCompile(X86_EFL_CF_BIT == 0); \
2268 uint32_t fEflTmp = *(a_pfEFlags); \
2269 fEflTmp &= ~(X86_EFL_CF | X86_EFL_OF); \
2270 uint32_t const fCarry = ((a_uResult) >> ((a_cBitsWidth) - 1)) & X86_EFL_CF; \
2271 fEflTmp |= fCarry; \
2272 fEflTmp |= (((a_uResult) >> ((a_cBitsWidth) - 2)) ^ fCarry) << X86_EFL_OF_BIT; \
2273 *(a_pfEFlags) = fEflTmp; \
2274 } while (0)
2275
2276IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2277{
2278 cShift &= 63;
2279 if (cShift)
2280 {
2281 uint64_t const uResult = ASMRotateRightU64(*puDst, cShift);
2282 *puDst = uResult;
2283 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(pfEFlags, uResult, 64);
2284 }
2285}
2286
2287# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2288
2289IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u32,(uint32_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2290{
2291 cShift &= 31;
2292 if (cShift)
2293 {
2294 uint64_t const uResult = ASMRotateRightU32(*puDst, cShift);
2295 *puDst = uResult;
2296 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(pfEFlags, uResult, 32);
2297 }
2298}
2299
2300
2301IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u16,(uint16_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2302{
2303 cShift &= 15;
2304 if (cShift)
2305 {
2306 uint16_t uDst = *puDst;
2307 uint16_t uResult;
2308 uResult = uDst >> cShift;
2309 uResult |= uDst << (16 - cShift);
2310 *puDst = uResult;
2311 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(pfEFlags, uResult, 16);
2312 }
2313}
2314
2315
2316IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u8,(uint8_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2317{
2318 cShift &= 7;
2319 if (cShift)
2320 {
2321 uint8_t uDst = *puDst;
2322 uint8_t uResult;
2323 uResult = uDst >> cShift;
2324 uResult |= uDst << (8 - cShift);
2325 *puDst = uResult;
2326 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(pfEFlags, uResult, 8);
2327 }
2328}
2329
2330# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2331
2332
2333/*
2334 * RCL
2335 */
2336#define EMIT_RCL(a_cBitsWidth) \
2337IEM_DECL_IMPL_DEF(void, iemAImpl_rcl_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2338{ \
2339 cShift &= a_cBitsWidth - 1; \
2340 if (cShift) \
2341 { \
2342 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2343 uint ## a_cBitsWidth ## _t uResult = uDst << cShift; \
2344 if (cShift > 1) \
2345 uResult |= uDst >> (a_cBitsWidth + 1 - cShift); \
2346 \
2347 uint32_t fEfl = *pfEFlags; \
2348 AssertCompile(X86_EFL_CF_BIT == 0); \
2349 uResult |= (uint ## a_cBitsWidth ## _t)(fEfl & X86_EFL_CF) << (cShift - 1); \
2350 \
2351 *puDst = uResult; \
2352 \
2353 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2354 it the same way as for 1 bit shifts. */ \
2355 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2356 uint32_t const fCarry = (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2357 fEfl |= fCarry; \
2358 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2359 *pfEFlags = fEfl; \
2360 } \
2361}
2362EMIT_RCL(64)
2363# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2364EMIT_RCL(32)
2365EMIT_RCL(16)
2366EMIT_RCL(8)
2367# endif
2368
2369
2370/*
2371 * RCR
2372 */
2373#define EMIT_RCR(a_cBitsWidth) \
2374IEM_DECL_IMPL_DEF(void, iemAImpl_rcr_u ## a_cBitsWidth,(uint ## a_cBitsWidth ##_t *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2375{ \
2376 cShift &= a_cBitsWidth - 1; \
2377 if (cShift) \
2378 { \
2379 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2380 uint ## a_cBitsWidth ## _t uResult = uDst >> cShift; \
2381 if (cShift > 1) \
2382 uResult |= uDst << (a_cBitsWidth + 1 - cShift); \
2383 \
2384 AssertCompile(X86_EFL_CF_BIT == 0); \
2385 uint32_t fEfl = *pfEFlags; \
2386 uResult |= (uint ## a_cBitsWidth ## _t)(fEfl & X86_EFL_CF) << (a_cBitsWidth - cShift); \
2387 *puDst = uResult; \
2388 \
2389 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2390 it the same way as for 1 bit shifts. */ \
2391 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2392 uint32_t const fCarry = (uDst >> (cShift - 1)) & X86_EFL_CF; \
2393 fEfl |= fCarry; \
2394 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2395 *pfEFlags = fEfl; \
2396 } \
2397}
2398EMIT_RCR(64)
2399# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2400EMIT_RCR(32)
2401EMIT_RCR(16)
2402EMIT_RCR(8)
2403# endif
2404
2405
2406/*
2407 * SHL
2408 */
2409#define EMIT_SHL(a_cBitsWidth) \
2410IEM_DECL_IMPL_DEF(void, iemAImpl_shl_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2411{ \
2412 cShift &= a_cBitsWidth - 1; \
2413 if (cShift) \
2414 { \
2415 uint ## a_cBitsWidth ##_t const uDst = *puDst; \
2416 uint ## a_cBitsWidth ##_t uResult = uDst << cShift; \
2417 *puDst = uResult; \
2418 \
2419 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2420 it the same way as for 1 bit shifts. The AF bit is undefined, we \
2421 always set it to zero atm. */ \
2422 AssertCompile(X86_EFL_CF_BIT == 0); \
2423 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2424 uint32_t fCarry = (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2425 fEfl |= fCarry; \
2426 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2427 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2428 fEfl |= X86_EFL_CALC_ZF(uResult); \
2429 fEfl |= g_afParity[uResult & 0xff]; \
2430 *pfEFlags = fEfl; \
2431 } \
2432}
2433EMIT_SHL(64)
2434# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2435EMIT_SHL(32)
2436EMIT_SHL(16)
2437EMIT_SHL(8)
2438# endif
2439
2440
2441/*
2442 * SHR
2443 */
2444#define EMIT_SHR(a_cBitsWidth) \
2445IEM_DECL_IMPL_DEF(void, iemAImpl_shr_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2446{ \
2447 cShift &= a_cBitsWidth - 1; \
2448 if (cShift) \
2449 { \
2450 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2451 uint ## a_cBitsWidth ## _t uResult = uDst >> cShift; \
2452 *puDst = uResult; \
2453 \
2454 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2455 it the same way as for 1 bit shifts. The AF bit is undefined, we \
2456 always set it to zero atm. */ \
2457 AssertCompile(X86_EFL_CF_BIT == 0); \
2458 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2459 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2460 fEfl |= (uDst >> (a_cBitsWidth - 1)) << X86_EFL_OF_BIT; \
2461 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2462 fEfl |= X86_EFL_CALC_ZF(uResult); \
2463 fEfl |= g_afParity[uResult & 0xff]; \
2464 *pfEFlags = fEfl; \
2465 } \
2466}
2467EMIT_SHR(64)
2468# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2469EMIT_SHR(32)
2470EMIT_SHR(16)
2471EMIT_SHR(8)
2472# endif
2473
2474
2475/*
2476 * SAR
2477 */
2478#define EMIT_SAR(a_cBitsWidth) \
2479IEM_DECL_IMPL_DEF(void, iemAImpl_sar_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2480{ \
2481 cShift &= a_cBitsWidth - 1; \
2482 if (cShift) \
2483 { \
2484 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2485 uint ## a_cBitsWidth ## _t uResult = (int ## a_cBitsWidth ## _t)uDst >> cShift; \
2486 *puDst = uResult; \
2487 \
2488 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2489 it the same way as for 1 bit shifts (0). The AF bit is undefined, \
2490 we always set it to zero atm. */ \
2491 AssertCompile(X86_EFL_CF_BIT == 0); \
2492 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2493 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2494 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2495 fEfl |= X86_EFL_CALC_ZF(uResult); \
2496 fEfl |= g_afParity[uResult & 0xff]; \
2497 *pfEFlags = fEfl; \
2498 } \
2499}
2500EMIT_SAR(64)
2501# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2502EMIT_SAR(32)
2503EMIT_SAR(16)
2504EMIT_SAR(8)
2505# endif
2506
2507
2508/*
2509 * SHLD
2510 */
2511#define EMIT_SHLD(a_cBitsWidth) \
2512IEM_DECL_IMPL_DEF(void, iemAImpl_shld_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, \
2513 uint ## a_cBitsWidth ## _t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2514{ \
2515 cShift &= a_cBitsWidth - 1; \
2516 if (cShift) \
2517 { \
2518 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2519 uint ## a_cBitsWidth ## _t uResult = uDst << cShift; \
2520 uResult |= uSrc >> (a_cBitsWidth - cShift); \
2521 *puDst = uResult; \
2522 \
2523 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2524 it the same way as for 1 bit shifts. The AF bit is undefined, \
2525 we always set it to zero atm. */ \
2526 AssertCompile(X86_EFL_CF_BIT == 0); \
2527 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2528 fEfl |= (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2529 fEfl |= (uint32_t)((uDst >> (a_cBitsWidth - 1)) ^ (uint32_t)(uResult >> (a_cBitsWidth - 1))) << X86_EFL_OF_BIT; \
2530 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2531 fEfl |= X86_EFL_CALC_ZF(uResult); \
2532 fEfl |= g_afParity[uResult & 0xff]; \
2533 *pfEFlags = fEfl; \
2534 } \
2535}
2536EMIT_SHLD(64)
2537# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2538EMIT_SHLD(32)
2539EMIT_SHLD(16)
2540EMIT_SHLD(8)
2541# endif
2542
2543
2544/*
2545 * SHRD
2546 */
2547#define EMIT_SHRD(a_cBitsWidth) \
2548IEM_DECL_IMPL_DEF(void, iemAImpl_shrd_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, \
2549 uint ## a_cBitsWidth ## _t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2550{ \
2551 cShift &= a_cBitsWidth - 1; \
2552 if (cShift) \
2553 { \
2554 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2555 uint ## a_cBitsWidth ## _t uResult = uDst >> cShift; \
2556 uResult |= uSrc << (a_cBitsWidth - cShift); \
2557 *puDst = uResult; \
2558 \
2559 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2560 it the same way as for 1 bit shifts. The AF bit is undefined, \
2561 we always set it to zero atm. */ \
2562 AssertCompile(X86_EFL_CF_BIT == 0); \
2563 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2564 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2565 fEfl |= (uint32_t)((uDst >> (a_cBitsWidth - 1)) ^ (uint32_t)(uResult >> (a_cBitsWidth - 1))) << X86_EFL_OF_BIT; \
2566 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2567 fEfl |= X86_EFL_CALC_ZF(uResult); \
2568 fEfl |= g_afParity[uResult & 0xff]; \
2569 *pfEFlags = fEfl; \
2570 } \
2571}
2572EMIT_SHRD(64)
2573# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2574EMIT_SHRD(32)
2575EMIT_SHRD(16)
2576EMIT_SHRD(8)
2577# endif
2578
2579
2580# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2581/*
2582 * BSWAP
2583 */
2584
2585IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u64,(uint64_t *puDst))
2586{
2587 *puDst = ASMByteSwapU64(*puDst);
2588}
2589
2590
2591IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u32,(uint32_t *puDst))
2592{
2593 *puDst = ASMByteSwapU32(*puDst);
2594}
2595
2596
2597/* Note! undocument, so 32-bit arg */
2598IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u16,(uint32_t *puDst))
2599{
2600 *puDst = ASMByteSwapU16((uint16_t)*puDst) | (*puDst & UINT32_C(0xffff0000));
2601}
2602
2603# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2604
2605
2606
2607# if defined(IEM_WITHOUT_ASSEMBLY)
2608
2609/*
2610 * LFENCE, SFENCE & MFENCE.
2611 */
2612
2613IEM_DECL_IMPL_DEF(void, iemAImpl_lfence,(void))
2614{
2615 ASMReadFence();
2616}
2617
2618
2619IEM_DECL_IMPL_DEF(void, iemAImpl_sfence,(void))
2620{
2621 ASMWriteFence();
2622}
2623
2624
2625IEM_DECL_IMPL_DEF(void, iemAImpl_mfence,(void))
2626{
2627 ASMMemoryFence();
2628}
2629
2630
2631# ifndef RT_ARCH_ARM64
2632IEM_DECL_IMPL_DEF(void, iemAImpl_alt_mem_fence,(void))
2633{
2634 ASMMemoryFence();
2635}
2636# endif
2637
2638# endif
2639
2640#endif /* !RT_ARCH_AMD64 || IEM_WITHOUT_ASSEMBLY */
2641
2642
2643IEM_DECL_IMPL_DEF(void, iemAImpl_arpl,(uint16_t *pu16Dst, uint16_t u16Src, uint32_t *pfEFlags))
2644{
2645 if ((*pu16Dst & X86_SEL_RPL) < (u16Src & X86_SEL_RPL))
2646 {
2647 *pu16Dst &= X86_SEL_MASK_OFF_RPL;
2648 *pu16Dst |= u16Src & X86_SEL_RPL;
2649
2650 *pfEFlags |= X86_EFL_ZF;
2651 }
2652 else
2653 *pfEFlags &= ~X86_EFL_ZF;
2654}
2655
2656
2657IEM_DECL_IMPL_DEF(void, iemAImpl_movsldup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
2658{
2659 RT_NOREF(pFpuState);
2660 puDst->au32[0] = puSrc->au32[0];
2661 puDst->au32[1] = puSrc->au32[0];
2662 puDst->au32[2] = puSrc->au32[2];
2663 puDst->au32[3] = puSrc->au32[2];
2664}
2665
2666#ifdef IEM_WITH_VEX
2667
2668IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
2669{
2670 pXState->x87.aXMM[iYRegDst].au32[0] = pXState->x87.aXMM[iYRegSrc].au32[0];
2671 pXState->x87.aXMM[iYRegDst].au32[1] = pXState->x87.aXMM[iYRegSrc].au32[0];
2672 pXState->x87.aXMM[iYRegDst].au32[2] = pXState->x87.aXMM[iYRegSrc].au32[2];
2673 pXState->x87.aXMM[iYRegDst].au32[3] = pXState->x87.aXMM[iYRegSrc].au32[2];
2674 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
2675 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
2676 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
2677 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
2678}
2679
2680
2681IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
2682{
2683 pXState->x87.aXMM[iYRegDst].au32[0] = pSrc->au32[0];
2684 pXState->x87.aXMM[iYRegDst].au32[1] = pSrc->au32[0];
2685 pXState->x87.aXMM[iYRegDst].au32[2] = pSrc->au32[2];
2686 pXState->x87.aXMM[iYRegDst].au32[3] = pSrc->au32[2];
2687 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pSrc->au32[4];
2688 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pSrc->au32[4];
2689 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pSrc->au32[6];
2690 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pSrc->au32[6];
2691}
2692
2693#endif /* IEM_WITH_VEX */
2694
2695
2696IEM_DECL_IMPL_DEF(void, iemAImpl_movshdup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
2697{
2698 RT_NOREF(pFpuState);
2699 puDst->au32[0] = puSrc->au32[1];
2700 puDst->au32[1] = puSrc->au32[1];
2701 puDst->au32[2] = puSrc->au32[3];
2702 puDst->au32[3] = puSrc->au32[3];
2703}
2704
2705
2706IEM_DECL_IMPL_DEF(void, iemAImpl_movddup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, uint64_t uSrc))
2707{
2708 RT_NOREF(pFpuState);
2709 puDst->au64[0] = uSrc;
2710 puDst->au64[1] = uSrc;
2711}
2712
2713#ifdef IEM_WITH_VEX
2714
2715IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
2716{
2717 pXState->x87.aXMM[iYRegDst].au64[0] = pXState->x87.aXMM[iYRegSrc].au64[0];
2718 pXState->x87.aXMM[iYRegDst].au64[1] = pXState->x87.aXMM[iYRegSrc].au64[0];
2719 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
2720 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
2721}
2722
2723IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
2724{
2725 pXState->x87.aXMM[iYRegDst].au64[0] = pSrc->au64[0];
2726 pXState->x87.aXMM[iYRegDst].au64[1] = pSrc->au64[0];
2727 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pSrc->au64[2];
2728 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pSrc->au64[2];
2729}
2730
2731#endif /* IEM_WITH_VEX */
2732
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette