VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp@ 93848

Last change on this file since 93848 was 93848, checked in by vboxsync, 3 years ago

VMM/IEM: Working on adding missing C version of IEMAllAImpl.asm functions. [build fix] bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 93.9 KB
Line 
1/* $Id: IEMAllAImplC.cpp 93848 2022-02-18 15:35:59Z vboxsync $ */
2/** @file
3 * IEM - Instruction Implementation in Assembly, portable C variant.
4 */
5
6/*
7 * Copyright (C) 2011-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#include "IEMInternal.h"
23#include <VBox/vmm/vmcc.h>
24#include <VBox/err.h>
25#include <iprt/x86.h>
26#include <iprt/uint128.h>
27
28
29/*********************************************************************************************************************************
30* Defined Constants And Macros *
31*********************************************************************************************************************************/
32/** @def IEM_WITHOUT_ASSEMBLY
33 * Enables all the code in this file.
34 */
35#if !defined(IEM_WITHOUT_ASSEMBLY)
36# if defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
37# define IEM_WITHOUT_ASSEMBLY
38# endif
39#endif
40
41/**
42 * Calculates the signed flag value given a result and it's bit width.
43 *
44 * The signed flag (SF) is a duplication of the most significant bit in the
45 * result.
46 *
47 * @returns X86_EFL_SF or 0.
48 * @param a_uResult Unsigned result value.
49 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
50 */
51#define X86_EFL_CALC_SF(a_uResult, a_cBitsWidth) \
52 ( (uint32_t)((a_uResult) >> ((a_cBitsWidth) - X86_EFL_SF_BIT - 1)) & X86_EFL_SF )
53
54/**
55 * Calculates the zero flag value given a result.
56 *
57 * The zero flag (ZF) indicates whether the result is zero or not.
58 *
59 * @returns X86_EFL_ZF or 0.
60 * @param a_uResult Unsigned result value.
61 */
62#define X86_EFL_CALC_ZF(a_uResult) \
63 ( (uint32_t)((a_uResult) == 0) << X86_EFL_ZF_BIT )
64
65/**
66 * Extracts the OF flag from a OF calculation result.
67 *
68 * These are typically used by concating with a bitcount. The problem is that
69 * 8-bit values needs shifting in the other direction than the others.
70 */
71#define X86_EFL_GET_OF_8(a_uValue) ((uint32_t)((a_uValue) << (X86_EFL_OF_BIT - 8)) & X86_EFL_OF)
72#define X86_EFL_GET_OF_16(a_uValue) ((uint32_t)((a_uValue) >> (16 - X86_EFL_OF_BIT)) & X86_EFL_OF)
73#define X86_EFL_GET_OF_32(a_uValue) ((uint32_t)((a_uValue) >> (32 - X86_EFL_OF_BIT)) & X86_EFL_OF)
74#define X86_EFL_GET_OF_64(a_uValue) ((uint32_t)((a_uValue) >> (64 - X86_EFL_OF_BIT)) & X86_EFL_OF)
75
76/**
77 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after arithmetic op.
78 *
79 * @returns Status bits.
80 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
81 * @param a_uResult Unsigned result value.
82 * @param a_uSrc The source value (for AF calc).
83 * @param a_uDst The original destination value (for AF calc).
84 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
85 * @param a_CfExpr Bool expression for the carry flag (CF).
86 * @param a_OfMethod 0 for ADD-style, 1 for SUB-style.
87 */
88#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(a_pfEFlags, a_uResult, a_uDst, a_uSrc, a_cBitsWidth, a_CfExpr, a_OfMethod) \
89 do { \
90 uint32_t fEflTmp = *(a_pfEFlags); \
91 fEflTmp &= ~X86_EFL_STATUS_BITS; \
92 fEflTmp |= (a_CfExpr) << X86_EFL_CF_BIT; \
93 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
94 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
95 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
96 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
97 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth( ((a_uDst) ^ (a_uSrc) ^ (a_OfMethod == 0 ? RT_BIT_64(a_cBitsWidth - 1) : 0)) \
98 & ((a_uResult) ^ (a_uDst)) ); \
99 *(a_pfEFlags) = fEflTmp; \
100 } while (0)
101
102/**
103 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after a logical op.
104 *
105 * CF and OF are defined to be 0 by logical operations. AF on the other hand is
106 * undefined. We do not set AF, as that seems to make the most sense (which
107 * probably makes it the most wrong in real life).
108 *
109 * @returns Status bits.
110 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
111 * @param a_uResult Unsigned result value.
112 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
113 * @param a_fExtra Additional bits to set.
114 */
115#define IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(a_pfEFlags, a_uResult, a_cBitsWidth, a_fExtra) \
116 do { \
117 uint32_t fEflTmp = *(a_pfEFlags); \
118 fEflTmp &= ~X86_EFL_STATUS_BITS; \
119 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
120 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
121 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
122 fEflTmp |= (a_fExtra); \
123 *(a_pfEFlags) = fEflTmp; \
124 } while (0)
125
126
127/*********************************************************************************************************************************
128* Global Variables *
129*********************************************************************************************************************************/
130#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
131/**
132 * Parity calculation table.
133 *
134 * The generator code:
135 * @code
136 * #include <stdio.h>
137 *
138 * int main()
139 * {
140 * unsigned b;
141 * for (b = 0; b < 256; b++)
142 * {
143 * int cOnes = ( b & 1)
144 * + ((b >> 1) & 1)
145 * + ((b >> 2) & 1)
146 * + ((b >> 3) & 1)
147 * + ((b >> 4) & 1)
148 * + ((b >> 5) & 1)
149 * + ((b >> 6) & 1)
150 * + ((b >> 7) & 1);
151 * printf(" /" "* %#04x = %u%u%u%u%u%u%u%ub *" "/ %s,\n",
152 * b,
153 * (b >> 7) & 1,
154 * (b >> 6) & 1,
155 * (b >> 5) & 1,
156 * (b >> 4) & 1,
157 * (b >> 3) & 1,
158 * (b >> 2) & 1,
159 * (b >> 1) & 1,
160 * b & 1,
161 * cOnes & 1 ? "0" : "X86_EFL_PF");
162 * }
163 * return 0;
164 * }
165 * @endcode
166 */
167static uint8_t const g_afParity[256] =
168{
169 /* 0000 = 00000000b */ X86_EFL_PF,
170 /* 0x01 = 00000001b */ 0,
171 /* 0x02 = 00000010b */ 0,
172 /* 0x03 = 00000011b */ X86_EFL_PF,
173 /* 0x04 = 00000100b */ 0,
174 /* 0x05 = 00000101b */ X86_EFL_PF,
175 /* 0x06 = 00000110b */ X86_EFL_PF,
176 /* 0x07 = 00000111b */ 0,
177 /* 0x08 = 00001000b */ 0,
178 /* 0x09 = 00001001b */ X86_EFL_PF,
179 /* 0x0a = 00001010b */ X86_EFL_PF,
180 /* 0x0b = 00001011b */ 0,
181 /* 0x0c = 00001100b */ X86_EFL_PF,
182 /* 0x0d = 00001101b */ 0,
183 /* 0x0e = 00001110b */ 0,
184 /* 0x0f = 00001111b */ X86_EFL_PF,
185 /* 0x10 = 00010000b */ 0,
186 /* 0x11 = 00010001b */ X86_EFL_PF,
187 /* 0x12 = 00010010b */ X86_EFL_PF,
188 /* 0x13 = 00010011b */ 0,
189 /* 0x14 = 00010100b */ X86_EFL_PF,
190 /* 0x15 = 00010101b */ 0,
191 /* 0x16 = 00010110b */ 0,
192 /* 0x17 = 00010111b */ X86_EFL_PF,
193 /* 0x18 = 00011000b */ X86_EFL_PF,
194 /* 0x19 = 00011001b */ 0,
195 /* 0x1a = 00011010b */ 0,
196 /* 0x1b = 00011011b */ X86_EFL_PF,
197 /* 0x1c = 00011100b */ 0,
198 /* 0x1d = 00011101b */ X86_EFL_PF,
199 /* 0x1e = 00011110b */ X86_EFL_PF,
200 /* 0x1f = 00011111b */ 0,
201 /* 0x20 = 00100000b */ 0,
202 /* 0x21 = 00100001b */ X86_EFL_PF,
203 /* 0x22 = 00100010b */ X86_EFL_PF,
204 /* 0x23 = 00100011b */ 0,
205 /* 0x24 = 00100100b */ X86_EFL_PF,
206 /* 0x25 = 00100101b */ 0,
207 /* 0x26 = 00100110b */ 0,
208 /* 0x27 = 00100111b */ X86_EFL_PF,
209 /* 0x28 = 00101000b */ X86_EFL_PF,
210 /* 0x29 = 00101001b */ 0,
211 /* 0x2a = 00101010b */ 0,
212 /* 0x2b = 00101011b */ X86_EFL_PF,
213 /* 0x2c = 00101100b */ 0,
214 /* 0x2d = 00101101b */ X86_EFL_PF,
215 /* 0x2e = 00101110b */ X86_EFL_PF,
216 /* 0x2f = 00101111b */ 0,
217 /* 0x30 = 00110000b */ X86_EFL_PF,
218 /* 0x31 = 00110001b */ 0,
219 /* 0x32 = 00110010b */ 0,
220 /* 0x33 = 00110011b */ X86_EFL_PF,
221 /* 0x34 = 00110100b */ 0,
222 /* 0x35 = 00110101b */ X86_EFL_PF,
223 /* 0x36 = 00110110b */ X86_EFL_PF,
224 /* 0x37 = 00110111b */ 0,
225 /* 0x38 = 00111000b */ 0,
226 /* 0x39 = 00111001b */ X86_EFL_PF,
227 /* 0x3a = 00111010b */ X86_EFL_PF,
228 /* 0x3b = 00111011b */ 0,
229 /* 0x3c = 00111100b */ X86_EFL_PF,
230 /* 0x3d = 00111101b */ 0,
231 /* 0x3e = 00111110b */ 0,
232 /* 0x3f = 00111111b */ X86_EFL_PF,
233 /* 0x40 = 01000000b */ 0,
234 /* 0x41 = 01000001b */ X86_EFL_PF,
235 /* 0x42 = 01000010b */ X86_EFL_PF,
236 /* 0x43 = 01000011b */ 0,
237 /* 0x44 = 01000100b */ X86_EFL_PF,
238 /* 0x45 = 01000101b */ 0,
239 /* 0x46 = 01000110b */ 0,
240 /* 0x47 = 01000111b */ X86_EFL_PF,
241 /* 0x48 = 01001000b */ X86_EFL_PF,
242 /* 0x49 = 01001001b */ 0,
243 /* 0x4a = 01001010b */ 0,
244 /* 0x4b = 01001011b */ X86_EFL_PF,
245 /* 0x4c = 01001100b */ 0,
246 /* 0x4d = 01001101b */ X86_EFL_PF,
247 /* 0x4e = 01001110b */ X86_EFL_PF,
248 /* 0x4f = 01001111b */ 0,
249 /* 0x50 = 01010000b */ X86_EFL_PF,
250 /* 0x51 = 01010001b */ 0,
251 /* 0x52 = 01010010b */ 0,
252 /* 0x53 = 01010011b */ X86_EFL_PF,
253 /* 0x54 = 01010100b */ 0,
254 /* 0x55 = 01010101b */ X86_EFL_PF,
255 /* 0x56 = 01010110b */ X86_EFL_PF,
256 /* 0x57 = 01010111b */ 0,
257 /* 0x58 = 01011000b */ 0,
258 /* 0x59 = 01011001b */ X86_EFL_PF,
259 /* 0x5a = 01011010b */ X86_EFL_PF,
260 /* 0x5b = 01011011b */ 0,
261 /* 0x5c = 01011100b */ X86_EFL_PF,
262 /* 0x5d = 01011101b */ 0,
263 /* 0x5e = 01011110b */ 0,
264 /* 0x5f = 01011111b */ X86_EFL_PF,
265 /* 0x60 = 01100000b */ X86_EFL_PF,
266 /* 0x61 = 01100001b */ 0,
267 /* 0x62 = 01100010b */ 0,
268 /* 0x63 = 01100011b */ X86_EFL_PF,
269 /* 0x64 = 01100100b */ 0,
270 /* 0x65 = 01100101b */ X86_EFL_PF,
271 /* 0x66 = 01100110b */ X86_EFL_PF,
272 /* 0x67 = 01100111b */ 0,
273 /* 0x68 = 01101000b */ 0,
274 /* 0x69 = 01101001b */ X86_EFL_PF,
275 /* 0x6a = 01101010b */ X86_EFL_PF,
276 /* 0x6b = 01101011b */ 0,
277 /* 0x6c = 01101100b */ X86_EFL_PF,
278 /* 0x6d = 01101101b */ 0,
279 /* 0x6e = 01101110b */ 0,
280 /* 0x6f = 01101111b */ X86_EFL_PF,
281 /* 0x70 = 01110000b */ 0,
282 /* 0x71 = 01110001b */ X86_EFL_PF,
283 /* 0x72 = 01110010b */ X86_EFL_PF,
284 /* 0x73 = 01110011b */ 0,
285 /* 0x74 = 01110100b */ X86_EFL_PF,
286 /* 0x75 = 01110101b */ 0,
287 /* 0x76 = 01110110b */ 0,
288 /* 0x77 = 01110111b */ X86_EFL_PF,
289 /* 0x78 = 01111000b */ X86_EFL_PF,
290 /* 0x79 = 01111001b */ 0,
291 /* 0x7a = 01111010b */ 0,
292 /* 0x7b = 01111011b */ X86_EFL_PF,
293 /* 0x7c = 01111100b */ 0,
294 /* 0x7d = 01111101b */ X86_EFL_PF,
295 /* 0x7e = 01111110b */ X86_EFL_PF,
296 /* 0x7f = 01111111b */ 0,
297 /* 0x80 = 10000000b */ 0,
298 /* 0x81 = 10000001b */ X86_EFL_PF,
299 /* 0x82 = 10000010b */ X86_EFL_PF,
300 /* 0x83 = 10000011b */ 0,
301 /* 0x84 = 10000100b */ X86_EFL_PF,
302 /* 0x85 = 10000101b */ 0,
303 /* 0x86 = 10000110b */ 0,
304 /* 0x87 = 10000111b */ X86_EFL_PF,
305 /* 0x88 = 10001000b */ X86_EFL_PF,
306 /* 0x89 = 10001001b */ 0,
307 /* 0x8a = 10001010b */ 0,
308 /* 0x8b = 10001011b */ X86_EFL_PF,
309 /* 0x8c = 10001100b */ 0,
310 /* 0x8d = 10001101b */ X86_EFL_PF,
311 /* 0x8e = 10001110b */ X86_EFL_PF,
312 /* 0x8f = 10001111b */ 0,
313 /* 0x90 = 10010000b */ X86_EFL_PF,
314 /* 0x91 = 10010001b */ 0,
315 /* 0x92 = 10010010b */ 0,
316 /* 0x93 = 10010011b */ X86_EFL_PF,
317 /* 0x94 = 10010100b */ 0,
318 /* 0x95 = 10010101b */ X86_EFL_PF,
319 /* 0x96 = 10010110b */ X86_EFL_PF,
320 /* 0x97 = 10010111b */ 0,
321 /* 0x98 = 10011000b */ 0,
322 /* 0x99 = 10011001b */ X86_EFL_PF,
323 /* 0x9a = 10011010b */ X86_EFL_PF,
324 /* 0x9b = 10011011b */ 0,
325 /* 0x9c = 10011100b */ X86_EFL_PF,
326 /* 0x9d = 10011101b */ 0,
327 /* 0x9e = 10011110b */ 0,
328 /* 0x9f = 10011111b */ X86_EFL_PF,
329 /* 0xa0 = 10100000b */ X86_EFL_PF,
330 /* 0xa1 = 10100001b */ 0,
331 /* 0xa2 = 10100010b */ 0,
332 /* 0xa3 = 10100011b */ X86_EFL_PF,
333 /* 0xa4 = 10100100b */ 0,
334 /* 0xa5 = 10100101b */ X86_EFL_PF,
335 /* 0xa6 = 10100110b */ X86_EFL_PF,
336 /* 0xa7 = 10100111b */ 0,
337 /* 0xa8 = 10101000b */ 0,
338 /* 0xa9 = 10101001b */ X86_EFL_PF,
339 /* 0xaa = 10101010b */ X86_EFL_PF,
340 /* 0xab = 10101011b */ 0,
341 /* 0xac = 10101100b */ X86_EFL_PF,
342 /* 0xad = 10101101b */ 0,
343 /* 0xae = 10101110b */ 0,
344 /* 0xaf = 10101111b */ X86_EFL_PF,
345 /* 0xb0 = 10110000b */ 0,
346 /* 0xb1 = 10110001b */ X86_EFL_PF,
347 /* 0xb2 = 10110010b */ X86_EFL_PF,
348 /* 0xb3 = 10110011b */ 0,
349 /* 0xb4 = 10110100b */ X86_EFL_PF,
350 /* 0xb5 = 10110101b */ 0,
351 /* 0xb6 = 10110110b */ 0,
352 /* 0xb7 = 10110111b */ X86_EFL_PF,
353 /* 0xb8 = 10111000b */ X86_EFL_PF,
354 /* 0xb9 = 10111001b */ 0,
355 /* 0xba = 10111010b */ 0,
356 /* 0xbb = 10111011b */ X86_EFL_PF,
357 /* 0xbc = 10111100b */ 0,
358 /* 0xbd = 10111101b */ X86_EFL_PF,
359 /* 0xbe = 10111110b */ X86_EFL_PF,
360 /* 0xbf = 10111111b */ 0,
361 /* 0xc0 = 11000000b */ X86_EFL_PF,
362 /* 0xc1 = 11000001b */ 0,
363 /* 0xc2 = 11000010b */ 0,
364 /* 0xc3 = 11000011b */ X86_EFL_PF,
365 /* 0xc4 = 11000100b */ 0,
366 /* 0xc5 = 11000101b */ X86_EFL_PF,
367 /* 0xc6 = 11000110b */ X86_EFL_PF,
368 /* 0xc7 = 11000111b */ 0,
369 /* 0xc8 = 11001000b */ 0,
370 /* 0xc9 = 11001001b */ X86_EFL_PF,
371 /* 0xca = 11001010b */ X86_EFL_PF,
372 /* 0xcb = 11001011b */ 0,
373 /* 0xcc = 11001100b */ X86_EFL_PF,
374 /* 0xcd = 11001101b */ 0,
375 /* 0xce = 11001110b */ 0,
376 /* 0xcf = 11001111b */ X86_EFL_PF,
377 /* 0xd0 = 11010000b */ 0,
378 /* 0xd1 = 11010001b */ X86_EFL_PF,
379 /* 0xd2 = 11010010b */ X86_EFL_PF,
380 /* 0xd3 = 11010011b */ 0,
381 /* 0xd4 = 11010100b */ X86_EFL_PF,
382 /* 0xd5 = 11010101b */ 0,
383 /* 0xd6 = 11010110b */ 0,
384 /* 0xd7 = 11010111b */ X86_EFL_PF,
385 /* 0xd8 = 11011000b */ X86_EFL_PF,
386 /* 0xd9 = 11011001b */ 0,
387 /* 0xda = 11011010b */ 0,
388 /* 0xdb = 11011011b */ X86_EFL_PF,
389 /* 0xdc = 11011100b */ 0,
390 /* 0xdd = 11011101b */ X86_EFL_PF,
391 /* 0xde = 11011110b */ X86_EFL_PF,
392 /* 0xdf = 11011111b */ 0,
393 /* 0xe0 = 11100000b */ 0,
394 /* 0xe1 = 11100001b */ X86_EFL_PF,
395 /* 0xe2 = 11100010b */ X86_EFL_PF,
396 /* 0xe3 = 11100011b */ 0,
397 /* 0xe4 = 11100100b */ X86_EFL_PF,
398 /* 0xe5 = 11100101b */ 0,
399 /* 0xe6 = 11100110b */ 0,
400 /* 0xe7 = 11100111b */ X86_EFL_PF,
401 /* 0xe8 = 11101000b */ X86_EFL_PF,
402 /* 0xe9 = 11101001b */ 0,
403 /* 0xea = 11101010b */ 0,
404 /* 0xeb = 11101011b */ X86_EFL_PF,
405 /* 0xec = 11101100b */ 0,
406 /* 0xed = 11101101b */ X86_EFL_PF,
407 /* 0xee = 11101110b */ X86_EFL_PF,
408 /* 0xef = 11101111b */ 0,
409 /* 0xf0 = 11110000b */ X86_EFL_PF,
410 /* 0xf1 = 11110001b */ 0,
411 /* 0xf2 = 11110010b */ 0,
412 /* 0xf3 = 11110011b */ X86_EFL_PF,
413 /* 0xf4 = 11110100b */ 0,
414 /* 0xf5 = 11110101b */ X86_EFL_PF,
415 /* 0xf6 = 11110110b */ X86_EFL_PF,
416 /* 0xf7 = 11110111b */ 0,
417 /* 0xf8 = 11111000b */ 0,
418 /* 0xf9 = 11111001b */ X86_EFL_PF,
419 /* 0xfa = 11111010b */ X86_EFL_PF,
420 /* 0xfb = 11111011b */ 0,
421 /* 0xfc = 11111100b */ X86_EFL_PF,
422 /* 0xfd = 11111101b */ 0,
423 /* 0xfe = 11111110b */ 0,
424 /* 0xff = 11111111b */ X86_EFL_PF,
425};
426#endif /* !RT_ARCH_AMD64 || IEM_WITHOUT_ASSEMBLY */
427
428
429
430/*
431 * There are a few 64-bit on 32-bit things we'd rather do in C. Actually, doing
432 * it all in C is probably safer atm., optimize what's necessary later, maybe.
433 */
434#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
435
436
437/*********************************************************************************************************************************
438* Binary Operations *
439*********************************************************************************************************************************/
440
441/*
442 * ADD
443 */
444
445IEM_DECL_IMPL_DEF(void, iemAImpl_add_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
446{
447 uint64_t uDst = *puDst;
448 uint64_t uResult = uDst + uSrc;
449 *puDst = uResult;
450 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult < uDst, 0);
451}
452
453# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
454
455IEM_DECL_IMPL_DEF(void, iemAImpl_add_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
456{
457 uint32_t uDst = *puDst;
458 uint32_t uResult = uDst + uSrc;
459 *puDst = uResult;
460 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult < uDst, 0);
461}
462
463
464IEM_DECL_IMPL_DEF(void, iemAImpl_add_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
465{
466 uint16_t uDst = *puDst;
467 uint16_t uResult = uDst + uSrc;
468 *puDst = uResult;
469 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult < uDst, 0);
470}
471
472
473IEM_DECL_IMPL_DEF(void, iemAImpl_add_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
474{
475 uint8_t uDst = *puDst;
476 uint8_t uResult = uDst + uSrc;
477 *puDst = uResult;
478 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult < uDst, 0);
479}
480
481# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
482
483/*
484 * ADC
485 */
486
487IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
488{
489 if (!(*pfEFlags & X86_EFL_CF))
490 iemAImpl_add_u64(puDst, uSrc, pfEFlags);
491 else
492 {
493 uint64_t uDst = *puDst;
494 uint64_t uResult = uDst + uSrc + 1;
495 *puDst = uResult;
496 /** @todo verify AF and OF calculations. */
497 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult <= uDst, 0);
498 }
499}
500
501# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
502
503IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
504{
505 if (!(*pfEFlags & X86_EFL_CF))
506 iemAImpl_add_u32(puDst, uSrc, pfEFlags);
507 else
508 {
509 uint32_t uDst = *puDst;
510 uint32_t uResult = uDst + uSrc + 1;
511 *puDst = uResult;
512 /** @todo verify AF and OF calculations. */
513 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult <= uDst, 0);
514 }
515}
516
517
518IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
519{
520 if (!(*pfEFlags & X86_EFL_CF))
521 iemAImpl_add_u16(puDst, uSrc, pfEFlags);
522 else
523 {
524 uint16_t uDst = *puDst;
525 uint16_t uResult = uDst + uSrc + 1;
526 *puDst = uResult;
527 /** @todo verify AF and OF calculations. */
528 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult <= uDst, 0);
529 }
530}
531
532
533IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
534{
535 if (!(*pfEFlags & X86_EFL_CF))
536 iemAImpl_add_u8(puDst, uSrc, pfEFlags);
537 else
538 {
539 uint8_t uDst = *puDst;
540 uint8_t uResult = uDst + uSrc + 1;
541 *puDst = uResult;
542 /** @todo verify AF and OF calculations. */
543 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult <= uDst, 0);
544 }
545}
546
547# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
548
549/*
550 * SUB
551 */
552
553IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
554{
555 uint64_t uDst = *puDst;
556 uint64_t uResult = uDst - uSrc;
557 *puDst = uResult;
558 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult < uDst, 1);
559}
560
561# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
562
563IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
564{
565 uint32_t uDst = *puDst;
566 uint32_t uResult = uDst - uSrc;
567 *puDst = uResult;
568 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult < uDst, 1);
569}
570
571
572IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
573{
574 uint16_t uDst = *puDst;
575 uint16_t uResult = uDst - uSrc;
576 *puDst = uResult;
577 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult < uDst, 1);
578}
579
580
581IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
582{
583 uint8_t uDst = *puDst;
584 uint8_t uResult = uDst - uSrc;
585 *puDst = uResult;
586 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult < uDst, 1);
587}
588
589# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
590
591/*
592 * SBB
593 */
594
595IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
596{
597 if (!(*pfEFlags & X86_EFL_CF))
598 iemAImpl_sub_u64(puDst, uSrc, pfEFlags);
599 else
600 {
601 uint64_t uDst = *puDst;
602 uint64_t uResult = uDst - uSrc - 1;
603 *puDst = uResult;
604 /** @todo verify AF and OF calculations. */
605 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult <= uDst, 1);
606 }
607}
608
609# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
610
611IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
612{
613 if (!(*pfEFlags & X86_EFL_CF))
614 iemAImpl_sub_u32(puDst, uSrc, pfEFlags);
615 else
616 {
617 uint32_t uDst = *puDst;
618 uint32_t uResult = uDst - uSrc - 1;
619 *puDst = uResult;
620 /** @todo verify AF and OF calculations. */
621 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult <= uDst, 1);
622 }
623}
624
625
626IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
627{
628 if (!(*pfEFlags & X86_EFL_CF))
629 iemAImpl_sub_u16(puDst, uSrc, pfEFlags);
630 else
631 {
632 uint16_t uDst = *puDst;
633 uint16_t uResult = uDst - uSrc - 1;
634 *puDst = uResult;
635 /** @todo verify AF and OF calculations. */
636 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult <= uDst, 1);
637 }
638}
639
640
641IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
642{
643 if (!(*pfEFlags & X86_EFL_CF))
644 iemAImpl_sub_u8(puDst, uSrc, pfEFlags);
645 else
646 {
647 uint8_t uDst = *puDst;
648 uint8_t uResult = uDst - uSrc - 1;
649 *puDst = uResult;
650 /** @todo verify AF and OF calculations. */
651 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult <= uDst, 1);
652 }
653}
654
655# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
656
657
658/*
659 * OR
660 */
661
662IEM_DECL_IMPL_DEF(void, iemAImpl_or_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
663{
664 uint64_t uResult = *puDst | uSrc;
665 *puDst = uResult;
666 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
667}
668
669# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
670
671IEM_DECL_IMPL_DEF(void, iemAImpl_or_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
672{
673 uint32_t uResult = *puDst | uSrc;
674 *puDst = uResult;
675 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
676}
677
678
679IEM_DECL_IMPL_DEF(void, iemAImpl_or_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
680{
681 uint16_t uResult = *puDst | uSrc;
682 *puDst = uResult;
683 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
684}
685
686
687IEM_DECL_IMPL_DEF(void, iemAImpl_or_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
688{
689 uint8_t uResult = *puDst | uSrc;
690 *puDst = uResult;
691 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
692}
693
694# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
695
696/*
697 * XOR
698 */
699
700IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
701{
702 uint64_t uResult = *puDst ^ uSrc;
703 *puDst = uResult;
704 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
705}
706
707# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
708
709IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
710{
711 uint32_t uResult = *puDst ^ uSrc;
712 *puDst = uResult;
713 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
714}
715
716
717IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
718{
719 uint16_t uResult = *puDst ^ uSrc;
720 *puDst = uResult;
721 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
722}
723
724
725IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
726{
727 uint8_t uResult = *puDst ^ uSrc;
728 *puDst = uResult;
729 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
730}
731
732# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
733
734/*
735 * AND
736 */
737
738IEM_DECL_IMPL_DEF(void, iemAImpl_and_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
739{
740 uint64_t uResult = *puDst & uSrc;
741 *puDst = uResult;
742 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
743}
744
745# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
746
747IEM_DECL_IMPL_DEF(void, iemAImpl_and_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
748{
749 uint32_t uResult = *puDst & uSrc;
750 *puDst = uResult;
751 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
752}
753
754
755IEM_DECL_IMPL_DEF(void, iemAImpl_and_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
756{
757 uint16_t uResult = *puDst & uSrc;
758 *puDst = uResult;
759 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
760}
761
762
763IEM_DECL_IMPL_DEF(void, iemAImpl_and_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
764{
765 uint8_t uResult = *puDst & uSrc;
766 *puDst = uResult;
767 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
768}
769
770# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
771
772/*
773 * CMP
774 */
775
776IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
777{
778 uint64_t uDstTmp = *puDst;
779 iemAImpl_sub_u64(&uDstTmp, uSrc, pfEFlags);
780}
781
782# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
783
784IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
785{
786 uint32_t uDstTmp = *puDst;
787 iemAImpl_sub_u32(&uDstTmp, uSrc, pfEFlags);
788}
789
790
791IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
792{
793 uint16_t uDstTmp = *puDst;
794 iemAImpl_sub_u16(&uDstTmp, uSrc, pfEFlags);
795}
796
797
798IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
799{
800 uint8_t uDstTmp = *puDst;
801 iemAImpl_sub_u8(&uDstTmp, uSrc, pfEFlags);
802}
803
804# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
805
806/*
807 * TEST
808 */
809
810IEM_DECL_IMPL_DEF(void, iemAImpl_test_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
811{
812 uint64_t uResult = *puDst & uSrc;
813 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
814}
815
816# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
817
818IEM_DECL_IMPL_DEF(void, iemAImpl_test_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
819{
820 uint32_t uResult = *puDst & uSrc;
821 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
822}
823
824
825IEM_DECL_IMPL_DEF(void, iemAImpl_test_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
826{
827 uint16_t uResult = *puDst & uSrc;
828 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
829}
830
831
832IEM_DECL_IMPL_DEF(void, iemAImpl_test_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
833{
834 uint8_t uResult = *puDst & uSrc;
835 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
836}
837
838# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
839
840
841/*
842 * LOCK prefixed variants of the above
843 */
844
845/** 64-bit locked binary operand operation. */
846# define DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
847 do { \
848 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
849 uint ## a_cBitsWidth ## _t uTmp; \
850 uint32_t fEflTmp; \
851 do \
852 { \
853 uTmp = uOld; \
854 fEflTmp = *pfEFlags; \
855 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, uSrc, &fEflTmp); \
856 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
857 *pfEFlags = fEflTmp; \
858 } while (0)
859
860
861#define EMIT_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
862 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
863 uint ## a_cBitsWidth ## _t uSrc, \
864 uint32_t *pfEFlags)) \
865 { \
866 DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth); \
867 }
868
869EMIT_LOCKED_BIN_OP(add, 64)
870EMIT_LOCKED_BIN_OP(adc, 64)
871EMIT_LOCKED_BIN_OP(sub, 64)
872EMIT_LOCKED_BIN_OP(sbb, 64)
873EMIT_LOCKED_BIN_OP(or, 64)
874EMIT_LOCKED_BIN_OP(xor, 64)
875EMIT_LOCKED_BIN_OP(and, 64)
876# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
877EMIT_LOCKED_BIN_OP(add, 32)
878EMIT_LOCKED_BIN_OP(adc, 32)
879EMIT_LOCKED_BIN_OP(sub, 32)
880EMIT_LOCKED_BIN_OP(sbb, 32)
881EMIT_LOCKED_BIN_OP(or, 32)
882EMIT_LOCKED_BIN_OP(xor, 32)
883EMIT_LOCKED_BIN_OP(and, 32)
884
885EMIT_LOCKED_BIN_OP(add, 16)
886EMIT_LOCKED_BIN_OP(adc, 16)
887EMIT_LOCKED_BIN_OP(sub, 16)
888EMIT_LOCKED_BIN_OP(sbb, 16)
889EMIT_LOCKED_BIN_OP(or, 16)
890EMIT_LOCKED_BIN_OP(xor, 16)
891EMIT_LOCKED_BIN_OP(and, 16)
892
893EMIT_LOCKED_BIN_OP(add, 8)
894EMIT_LOCKED_BIN_OP(adc, 8)
895EMIT_LOCKED_BIN_OP(sub, 8)
896EMIT_LOCKED_BIN_OP(sbb, 8)
897EMIT_LOCKED_BIN_OP(or, 8)
898EMIT_LOCKED_BIN_OP(xor, 8)
899EMIT_LOCKED_BIN_OP(and, 8)
900# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
901
902
903/*
904 * Bit operations (same signature as above).
905 */
906
907/*
908 * BT
909 */
910
911IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
912{
913 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
914 logical operation (AND/OR/whatever). */
915 Assert(uSrc < 64);
916 uint64_t uDst = *puDst;
917 if (uDst & RT_BIT_64(uSrc))
918 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
919 else
920 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
921}
922
923# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
924
925IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
926{
927 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
928 logical operation (AND/OR/whatever). */
929 Assert(uSrc < 32);
930 uint32_t uDst = *puDst;
931 if (uDst & RT_BIT_32(uSrc))
932 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
933 else
934 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
935}
936
937IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
938{
939 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
940 logical operation (AND/OR/whatever). */
941 Assert(uSrc < 16);
942 uint16_t uDst = *puDst;
943 if (uDst & RT_BIT_32(uSrc))
944 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, X86_EFL_CF);
945 else
946 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, 0);
947}
948
949# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
950
951/*
952 * BTC
953 */
954
955IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
956{
957 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
958 logical operation (AND/OR/whatever). */
959 Assert(uSrc < 64);
960 uint64_t fMask = RT_BIT_64(uSrc);
961 uint64_t uDst = *puDst;
962 if (uDst & fMask)
963 {
964 uDst &= ~fMask;
965 *puDst = uDst;
966 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
967 }
968 else
969 {
970 uDst |= fMask;
971 *puDst = uDst;
972 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
973 }
974}
975
976# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
977
978IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
979{
980 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
981 logical operation (AND/OR/whatever). */
982 Assert(uSrc < 32);
983 uint32_t fMask = RT_BIT_32(uSrc);
984 uint32_t uDst = *puDst;
985 if (uDst & fMask)
986 {
987 uDst &= ~fMask;
988 *puDst = uDst;
989 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
990 }
991 else
992 {
993 uDst |= fMask;
994 *puDst = uDst;
995 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
996 }
997}
998
999
1000IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1001{
1002 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1003 logical operation (AND/OR/whatever). */
1004 Assert(uSrc < 16);
1005 uint16_t fMask = RT_BIT_32(uSrc);
1006 uint16_t uDst = *puDst;
1007 if (uDst & fMask)
1008 {
1009 uDst &= ~fMask;
1010 *puDst = uDst;
1011 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, X86_EFL_CF);
1012 }
1013 else
1014 {
1015 uDst |= fMask;
1016 *puDst = uDst;
1017 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, 0);
1018 }
1019}
1020
1021# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1022
1023/*
1024 * BTR
1025 */
1026
1027IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1028{
1029 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1030 logical operation (AND/OR/whatever). */
1031 Assert(uSrc < 64);
1032 uint64_t fMask = RT_BIT_64(uSrc);
1033 uint64_t uDst = *puDst;
1034 if (uDst & fMask)
1035 {
1036 uDst &= ~fMask;
1037 *puDst = uDst;
1038 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
1039 }
1040 else
1041 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
1042}
1043
1044# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1045
1046IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1047{
1048 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1049 logical operation (AND/OR/whatever). */
1050 Assert(uSrc < 32);
1051 uint32_t fMask = RT_BIT_32(uSrc);
1052 uint32_t uDst = *puDst;
1053 if (uDst & fMask)
1054 {
1055 uDst &= ~fMask;
1056 *puDst = uDst;
1057 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
1058 }
1059 else
1060 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
1061}
1062
1063
1064IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1065{
1066 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1067 logical operation (AND/OR/whatever). */
1068 Assert(uSrc < 16);
1069 uint16_t fMask = RT_BIT_32(uSrc);
1070 uint16_t uDst = *puDst;
1071 if (uDst & fMask)
1072 {
1073 uDst &= ~fMask;
1074 *puDst = uDst;
1075 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, X86_EFL_CF);
1076 }
1077 else
1078 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 16, 0);
1079}
1080
1081# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1082
1083/*
1084 * BTS
1085 */
1086
1087IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1088{
1089 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1090 logical operation (AND/OR/whatever). */
1091 Assert(uSrc < 64);
1092 uint64_t fMask = RT_BIT_64(uSrc);
1093 uint64_t uDst = *puDst;
1094 if (uDst & fMask)
1095 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, X86_EFL_CF);
1096 else
1097 {
1098 uDst |= fMask;
1099 *puDst = uDst;
1100 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 64, 0);
1101 }
1102}
1103
1104# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1105
1106IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1107{
1108 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1109 logical operation (AND/OR/whatever). */
1110 Assert(uSrc < 32);
1111 uint32_t fMask = RT_BIT_32(uSrc);
1112 uint32_t uDst = *puDst;
1113 if (uDst & fMask)
1114 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
1115 else
1116 {
1117 uDst |= fMask;
1118 *puDst = uDst;
1119 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
1120 }
1121}
1122
1123
1124IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1125{
1126 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1127 logical operation (AND/OR/whatever). */
1128 Assert(uSrc < 16);
1129 uint16_t fMask = RT_BIT_32(uSrc);
1130 uint32_t uDst = *puDst;
1131 if (uDst & fMask)
1132 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, X86_EFL_CF);
1133 else
1134 {
1135 uDst |= fMask;
1136 *puDst = uDst;
1137 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uDst, 32, 0);
1138 }
1139}
1140
1141# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1142
1143
1144EMIT_LOCKED_BIN_OP(btc, 64)
1145EMIT_LOCKED_BIN_OP(btr, 64)
1146EMIT_LOCKED_BIN_OP(bts, 64)
1147# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1148EMIT_LOCKED_BIN_OP(btc, 32)
1149EMIT_LOCKED_BIN_OP(btr, 32)
1150EMIT_LOCKED_BIN_OP(bts, 32)
1151
1152EMIT_LOCKED_BIN_OP(btc, 16)
1153EMIT_LOCKED_BIN_OP(btr, 16)
1154EMIT_LOCKED_BIN_OP(bts, 16)
1155# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1156
1157
1158/*
1159 * BSF - first (least significant) bit set
1160 */
1161
1162IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1163{
1164 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1165 /** @todo check what real CPUs do. */
1166 unsigned iBit = ASMBitFirstSetU64(uSrc);
1167 if (iBit)
1168 {
1169 *puDst = iBit - 1;
1170 *pfEFlags &= ~X86_EFL_ZF;
1171 }
1172 else
1173 *pfEFlags |= X86_EFL_ZF;
1174}
1175
1176# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1177
1178IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1179{
1180 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1181 /** @todo check what real CPUs do. */
1182 unsigned iBit = ASMBitFirstSetU32(uSrc);
1183 if (iBit)
1184 {
1185 *puDst = iBit - 1;
1186 *pfEFlags &= ~X86_EFL_ZF;
1187 }
1188 else
1189 *pfEFlags |= X86_EFL_ZF;
1190}
1191
1192
1193IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1194{
1195 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1196 /** @todo check what real CPUs do. */
1197 unsigned iBit = ASMBitFirstSetU16(uSrc);
1198 if (iBit)
1199 {
1200 *puDst = iBit - 1;
1201 *pfEFlags &= ~X86_EFL_ZF;
1202 }
1203 else
1204 *pfEFlags |= X86_EFL_ZF;
1205}
1206
1207# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1208
1209/*
1210 * BSR - last (most significant) bit set
1211 */
1212
1213IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1214{
1215 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1216 /** @todo check what real CPUs do. */
1217 unsigned iBit = ASMBitLastSetU64(uSrc);
1218 if (uSrc)
1219 {
1220 *puDst = iBit - 1;
1221 *pfEFlags &= ~X86_EFL_ZF;
1222 }
1223 else
1224 *pfEFlags |= X86_EFL_ZF;
1225}
1226
1227# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1228
1229IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1230{
1231 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1232 /** @todo check what real CPUs do. */
1233 unsigned iBit = ASMBitLastSetU32(uSrc);
1234 if (uSrc)
1235 {
1236 *puDst = iBit - 1;
1237 *pfEFlags &= ~X86_EFL_ZF;
1238 }
1239 else
1240 *pfEFlags |= X86_EFL_ZF;
1241}
1242
1243
1244IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1245{
1246 /* Note! "undefined" flags: OF, SF, AF, PF, CF. */
1247 /** @todo check what real CPUs do. */
1248 unsigned iBit = ASMBitLastSetU16(uSrc);
1249 if (uSrc)
1250 {
1251 *puDst = iBit - 1;
1252 *pfEFlags &= ~X86_EFL_ZF;
1253 }
1254 else
1255 *pfEFlags |= X86_EFL_ZF;
1256}
1257
1258# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1259
1260
1261/*
1262 * XCHG
1263 */
1264
1265IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64,(uint64_t *puMem, uint64_t *puReg))
1266{
1267 /* XCHG implies LOCK. */
1268 uint64_t uOldMem = *puMem;
1269 while (!ASMAtomicCmpXchgExU64(puMem, *puReg, uOldMem, &uOldMem))
1270 ASMNopPause();
1271 *puReg = uOldMem;
1272}
1273
1274# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1275
1276IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u32,(uint32_t *puMem, uint32_t *puReg))
1277{
1278 /* XCHG implies LOCK. */
1279 uint32_t uOldMem = *puMem;
1280 while (!ASMAtomicCmpXchgExU32(puMem, *puReg, uOldMem, &uOldMem))
1281 ASMNopPause();
1282 *puReg = uOldMem;
1283}
1284
1285
1286IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u16,(uint16_t *puMem, uint16_t *puReg))
1287{
1288 /* XCHG implies LOCK. */
1289 uint16_t uOldMem = *puMem;
1290 while (!ASMAtomicCmpXchgExU16(puMem, *puReg, uOldMem, &uOldMem))
1291 ASMNopPause();
1292 *puReg = uOldMem;
1293}
1294
1295
1296IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8,(uint8_t *puMem, uint8_t *puReg))
1297{
1298 /* XCHG implies LOCK. */
1299 uint8_t uOldMem = *puMem;
1300 while (!ASMAtomicCmpXchgExU8(puMem, *puReg, uOldMem, &uOldMem))
1301 ASMNopPause();
1302 *puReg = uOldMem;
1303}
1304
1305# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1306
1307
1308/*
1309 * XADD and LOCK XADD.
1310 */
1311
1312IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u64,(uint64_t *puDst, uint64_t *puReg, uint32_t *pfEFlags))
1313{
1314 uint64_t uDst = *puDst;
1315 uint64_t uResult = uDst;
1316 iemAImpl_add_u64(&uResult, *puReg, pfEFlags);
1317 *puDst = uResult;
1318 *puReg = uDst;
1319}
1320
1321
1322IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u64_locked,(uint64_t *puDst, uint64_t *puReg, uint32_t *pfEFlags))
1323{
1324 uint64_t uOld = ASMAtomicUoReadU64(puDst);
1325 uint64_t uTmpDst;
1326 uint32_t fEflTmp;
1327 do
1328 {
1329 uTmpDst = uOld;
1330 fEflTmp = *pfEFlags;
1331 iemAImpl_add_u64(&uTmpDst, *puReg, pfEFlags);
1332 } while (!ASMAtomicCmpXchgExU64(puDst, uTmpDst, uOld, &uOld));
1333 *puReg = uOld;
1334 *pfEFlags = fEflTmp;
1335}
1336
1337# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1338
1339IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u32,(uint32_t *puDst, uint32_t *puReg, uint32_t *pfEFlags))
1340{
1341 uint32_t uDst = *puDst;
1342 uint32_t uResult = uDst;
1343 iemAImpl_add_u32(&uResult, *puReg, pfEFlags);
1344 *puDst = uResult;
1345 *puReg = uDst;
1346}
1347
1348
1349IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u32_locked,(uint32_t *puDst, uint32_t *puReg, uint32_t *pfEFlags))
1350{
1351 uint32_t uOld = ASMAtomicUoReadU32(puDst);
1352 uint32_t uTmpDst;
1353 uint32_t fEflTmp;
1354 do
1355 {
1356 uTmpDst = uOld;
1357 fEflTmp = *pfEFlags;
1358 iemAImpl_add_u32(&uTmpDst, *puReg, pfEFlags);
1359 } while (!ASMAtomicCmpXchgExU32(puDst, uTmpDst, uOld, &uOld));
1360 *puReg = uOld;
1361 *pfEFlags = fEflTmp;
1362}
1363
1364
1365IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u16,(uint16_t *puDst, uint16_t *puReg, uint32_t *pfEFlags))
1366{
1367 uint16_t uDst = *puDst;
1368 uint16_t uResult = uDst;
1369 iemAImpl_add_u16(&uResult, *puReg, pfEFlags);
1370 *puDst = uResult;
1371 *puReg = uDst;
1372}
1373
1374
1375IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u16_locked,(uint16_t *puDst, uint16_t *puReg, uint32_t *pfEFlags))
1376{
1377 uint16_t uOld = ASMAtomicUoReadU16(puDst);
1378 uint16_t uTmpDst;
1379 uint32_t fEflTmp;
1380 do
1381 {
1382 uTmpDst = uOld;
1383 fEflTmp = *pfEFlags;
1384 iemAImpl_add_u16(&uTmpDst, *puReg, pfEFlags);
1385 } while (!ASMAtomicCmpXchgExU16(puDst, uTmpDst, uOld, &uOld));
1386 *puReg = uOld;
1387 *pfEFlags = fEflTmp;
1388}
1389
1390
1391IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u8,(uint8_t *puDst, uint8_t *puReg, uint32_t *pfEFlags))
1392{
1393 uint8_t uDst = *puDst;
1394 uint8_t uResult = uDst;
1395 iemAImpl_add_u8(&uResult, *puReg, pfEFlags);
1396 *puDst = uResult;
1397 *puReg = uDst;
1398}
1399
1400
1401IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u8_locked,(uint8_t *puDst, uint8_t *puReg, uint32_t *pfEFlags))
1402{
1403 uint8_t uOld = ASMAtomicUoReadU8(puDst);
1404 uint8_t uTmpDst;
1405 uint32_t fEflTmp;
1406 do
1407 {
1408 uTmpDst = uOld;
1409 fEflTmp = *pfEFlags;
1410 iemAImpl_add_u8(&uTmpDst, *puReg, pfEFlags);
1411 } while (!ASMAtomicCmpXchgExU8(puDst, uTmpDst, uOld, &uOld));
1412 *puReg = uOld;
1413 *pfEFlags = fEflTmp;
1414}
1415
1416# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1417#endif
1418
1419/*
1420 * CMPXCHG, CMPXCHG8B, CMPXCHG16B
1421 *
1422 * Note! We don't have non-locking/atomic cmpxchg primitives, so all cmpxchg
1423 * instructions are emulated as locked.
1424 */
1425#if defined(IEM_WITHOUT_ASSEMBLY)
1426
1427IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u8_locked, (uint8_t *pu8Dst, uint8_t *puAl, uint8_t uSrcReg, uint32_t *pEFlags))
1428{
1429 uint8_t const uOld = *puAl;
1430 if (ASMAtomicCmpXchgExU8(pu8Dst, uSrcReg, uOld, puAl))
1431 {
1432 Assert(*puAl == uOld);
1433 *pEFlags |= X86_EFL_ZF;
1434 }
1435 else
1436 *pEFlags &= ~X86_EFL_ZF;
1437}
1438
1439
1440IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u16_locked,(uint16_t *pu16Dst, uint16_t *puAx, uint16_t uSrcReg, uint32_t *pEFlags))
1441{
1442 uint16_t const uOld = *puAx;
1443 if (ASMAtomicCmpXchgExU16(pu16Dst, uSrcReg, uOld, puAx))
1444 {
1445 Assert(*puAx == uOld);
1446 *pEFlags |= X86_EFL_ZF;
1447 }
1448 else
1449 *pEFlags &= ~X86_EFL_ZF;
1450}
1451
1452
1453IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u32_locked,(uint32_t *pu32Dst, uint32_t *puEax, uint32_t uSrcReg, uint32_t *pEFlags))
1454{
1455 uint32_t const uOld = *puEax;
1456 if (ASMAtomicCmpXchgExU32(pu32Dst, uSrcReg, uOld, puEax))
1457 {
1458 Assert(*puEax == uOld);
1459 *pEFlags |= X86_EFL_ZF;
1460 }
1461 else
1462 *pEFlags &= ~X86_EFL_ZF;
1463}
1464
1465
1466# if ARCH_BITS == 32
1467IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64_locked,(uint64_t *pu64Dst, uint64_t *puRax, uint64_t *puSrcReg, uint32_t *pEFlags))
1468# else
1469IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64_locked,(uint64_t *pu64Dst, uint64_t *puRax, uint64_t uSrcReg, uint32_t *pEFlags))
1470# endif
1471{
1472# if ARCH_BITS == 32
1473 uint64_t const uSrcReg = *puSrcReg;
1474# endif
1475 uint64_t const uOld = *puRax;
1476 if (ASMAtomicCmpXchgExU64(pu64Dst, uSrcReg, uOld, puRax))
1477 {
1478 Assert(*puRax == uOld);
1479 *pEFlags |= X86_EFL_ZF;
1480 }
1481 else
1482 *pEFlags &= ~X86_EFL_ZF;
1483}
1484
1485
1486IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b_locked,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
1487 uint32_t *pEFlags))
1488{
1489 uint64_t const uNew = pu64EbxEcx->u;
1490 uint64_t const uOld = pu64EaxEdx->u;
1491 if (ASMAtomicCmpXchgExU64(pu64Dst, uNew, uOld, &pu64EaxEdx->u))
1492 {
1493 Assert(pu64EaxEdx->u == uOld);
1494 *pEFlags |= X86_EFL_ZF;
1495 }
1496 else
1497 *pEFlags &= ~X86_EFL_ZF;
1498}
1499
1500
1501# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64)
1502IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_locked,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx,
1503 uint32_t *pEFlags))
1504{
1505# ifdef VBOX_STRICT
1506 RTUINT128U const uOld = *pu128RaxRdx;
1507# endif
1508# if defined(RT_ARCH_AMD64)
1509 if (ASMAtomicCmpXchgU128v2(&pu128Dst->u, pu128RbxRcx->s.Hi, pu128RbxRcx->s.Lo, pu128RaxRdx->s.Hi, pu128RaxRdx->s.Lo,
1510 &pu128RaxRdx->u))
1511# else
1512 if (ASMAtomicCmpXchgU128(&pu128Dst->u, pu128RbxRcx->u, pu128RaxRdx->u, &pu128RaxRdx->u))
1513# endif
1514 {
1515 Assert(pu128RaxRdx->s.Lo == uOld.s.Lo && pu128RaxRdx->s.Hi == uOld.s.Hi);
1516 *pEFlags |= X86_EFL_ZF;
1517 }
1518 else
1519 *pEFlags &= ~X86_EFL_ZF;
1520}
1521# endif
1522
1523#endif /* defined(IEM_WITHOUT_ASSEMBLY) */
1524
1525# if !defined(RT_ARCH_ARM64) /** @todo may need this for unaligned accesses... */
1526IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_fallback,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx,
1527 PRTUINT128U pu128RbxRcx, uint32_t *pEFlags))
1528{
1529 RTUINT128U u128Tmp = *pu128Dst;
1530 if ( u128Tmp.s.Lo == pu128RaxRdx->s.Lo
1531 && u128Tmp.s.Hi == pu128RaxRdx->s.Hi)
1532 {
1533 *pu128Dst = *pu128RbxRcx;
1534 *pEFlags |= X86_EFL_ZF;
1535 }
1536 else
1537 {
1538 *pu128RaxRdx = u128Tmp;
1539 *pEFlags &= ~X86_EFL_ZF;
1540 }
1541}
1542#endif /* !RT_ARCH_ARM64 */
1543
1544#if defined(IEM_WITHOUT_ASSEMBLY)
1545
1546/* Unlocked versions mapped to the locked ones: */
1547
1548IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u8, (uint8_t *pu8Dst, uint8_t *puAl, uint8_t uSrcReg, uint32_t *pEFlags))
1549{
1550 iemAImpl_cmpxchg_u8_locked(pu8Dst, puAl, uSrcReg, pEFlags);
1551}
1552
1553
1554IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u16, (uint16_t *pu16Dst, uint16_t *puAx, uint16_t uSrcReg, uint32_t *pEFlags))
1555{
1556 iemAImpl_cmpxchg_u16_locked(pu16Dst, puAx, uSrcReg, pEFlags);
1557}
1558
1559
1560IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u32, (uint32_t *pu32Dst, uint32_t *puEax, uint32_t uSrcReg, uint32_t *pEFlags))
1561{
1562 iemAImpl_cmpxchg_u32_locked(pu32Dst, puEax, uSrcReg, pEFlags);
1563}
1564
1565
1566# if ARCH_BITS == 32
1567IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64, (uint64_t *pu64Dst, uint64_t *puRax, uint64_t *puSrcReg, uint32_t *pEFlags))
1568{
1569 iemAImpl_cmpxchg_u64_locked(pu64Dst, puRax, puSrcReg, pEFlags);
1570}
1571# else
1572IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64, (uint64_t *pu64Dst, uint64_t *puRax, uint64_t uSrcReg, uint32_t *pEFlags))
1573{
1574 iemAImpl_cmpxchg_u64_locked(pu64Dst, puRax, uSrcReg, pEFlags);
1575}
1576# endif
1577
1578
1579IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx, uint32_t *pEFlags))
1580{
1581 iemAImpl_cmpxchg8b_locked(pu64Dst, pu64EaxEdx, pu64EbxEcx, pEFlags);
1582}
1583
1584
1585IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx,
1586 uint32_t *pEFlags))
1587{
1588 iemAImpl_cmpxchg16b_locked(pu128Dst, pu128RaxRdx, pu128RbxRcx, pEFlags);
1589}
1590
1591#endif /* defined(IEM_WITHOUT_ASSEMBLY) */
1592
1593#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
1594
1595/*
1596 * MUL
1597 */
1598
1599IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u64,(uint64_t *pu64RAX, uint64_t *pu64RDX, uint64_t u64Factor, uint32_t *pfEFlags))
1600{
1601 RTUINT128U Result;
1602 RTUInt128MulU64ByU64(&Result, *pu64RAX, u64Factor);
1603 *pu64RAX = Result.s.Lo;
1604 *pu64RDX = Result.s.Hi;
1605
1606 /* MUL EFLAGS according to Skylake (similar to IMUL). */
1607 *pfEFlags &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF);
1608 if (Result.s.Lo & RT_BIT_64(63))
1609 *pfEFlags |= X86_EFL_SF;
1610 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1611 if (Result.s.Hi != 0)
1612 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1613 return 0;
1614}
1615
1616# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1617
1618IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u32,(uint32_t *pu32RAX, uint32_t *pu32RDX, uint32_t u32Factor, uint32_t *pfEFlags))
1619{
1620 RTUINT64U Result;
1621 Result.u = (uint64_t)*pu32RAX * u32Factor;
1622 *pu32RAX = Result.s.Lo;
1623 *pu32RDX = Result.s.Hi;
1624
1625 /* MUL EFLAGS according to Skylake (similar to IMUL). */
1626 *pfEFlags &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF);
1627 if (Result.s.Lo & RT_BIT_32(31))
1628 *pfEFlags |= X86_EFL_SF;
1629 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1630 if (Result.s.Hi != 0)
1631 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1632 return 0;
1633}
1634
1635
1636IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u16,(uint16_t *pu16RAX, uint16_t *pu16RDX, uint16_t u16Factor, uint32_t *pfEFlags))
1637{
1638 RTUINT32U Result;
1639 Result.u = (uint32_t)*pu16RAX * u16Factor;
1640 *pu16RAX = Result.s.Lo;
1641 *pu16RDX = Result.s.Hi;
1642
1643 /* MUL EFLAGS according to Skylake (similar to IMUL). */
1644 *pfEFlags &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF);
1645 if (Result.s.Lo & RT_BIT_32(15))
1646 *pfEFlags |= X86_EFL_SF;
1647 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1648 if (Result.s.Hi != 0)
1649 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1650 return 0;
1651}
1652
1653# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1654
1655
1656/*
1657 * IMUL
1658 */
1659
1660IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u64,(uint64_t *pu64RAX, uint64_t *pu64RDX, uint64_t u64Factor, uint32_t *pfEFlags))
1661{
1662 RTUINT128U Result;
1663 *pfEFlags &= ~( X86_EFL_SF | X86_EFL_CF | X86_EFL_OF
1664 /* Skylake always clears: */ | X86_EFL_AF | X86_EFL_ZF
1665 /* Skylake may set: */ | X86_EFL_PF);
1666
1667 if ((int64_t)*pu64RAX >= 0)
1668 {
1669 if ((int64_t)u64Factor >= 0)
1670 {
1671 RTUInt128MulU64ByU64(&Result, *pu64RAX, u64Factor);
1672 if (Result.s.Hi != 0 || Result.s.Lo >= UINT64_C(0x8000000000000000))
1673 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1674 }
1675 else
1676 {
1677 RTUInt128MulU64ByU64(&Result, *pu64RAX, UINT64_C(0) - u64Factor);
1678 if (Result.s.Hi != 0 || Result.s.Lo > UINT64_C(0x8000000000000000))
1679 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1680 RTUInt128AssignNeg(&Result);
1681 }
1682 }
1683 else
1684 {
1685 if ((int64_t)u64Factor >= 0)
1686 {
1687 RTUInt128MulU64ByU64(&Result, UINT64_C(0) - *pu64RAX, u64Factor);
1688 if (Result.s.Hi != 0 || Result.s.Lo > UINT64_C(0x8000000000000000))
1689 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1690 RTUInt128AssignNeg(&Result);
1691 }
1692 else
1693 {
1694 RTUInt128MulU64ByU64(&Result, UINT64_C(0) - *pu64RAX, UINT64_C(0) - u64Factor);
1695 if (Result.s.Hi != 0 || Result.s.Lo >= UINT64_C(0x8000000000000000))
1696 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1697 }
1698 }
1699 *pu64RAX = Result.s.Lo;
1700 if (Result.s.Lo & RT_BIT_64(63))
1701 *pfEFlags |= X86_EFL_SF;
1702 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1703 *pu64RDX = Result.s.Hi;
1704
1705 return 0;
1706}
1707
1708
1709IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1710{
1711/** @todo Testcase: IMUL 2 and 3 operands. */
1712 uint64_t u64Ign;
1713 iemAImpl_imul_u64(puDst, &u64Ign, uSrc, pfEFlags);
1714}
1715
1716# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1717
1718IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u32,(uint32_t *pu32RAX, uint32_t *pu32RDX, uint32_t u32Factor, uint32_t *pfEFlags))
1719{
1720 RTUINT64U Result;
1721 *pfEFlags &= ~( X86_EFL_SF | X86_EFL_CF | X86_EFL_OF
1722 /* Skylake always clears: */ | X86_EFL_AF | X86_EFL_ZF
1723 /* Skylake may set: */ | X86_EFL_PF);
1724
1725 if ((int32_t)*pu32RAX >= 0)
1726 {
1727 if ((int32_t)u32Factor >= 0)
1728 {
1729 Result.u = (uint64_t)*pu32RAX * u32Factor;
1730 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_32(31))
1731 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1732 }
1733 else
1734 {
1735 Result.u = (uint64_t)*pu32RAX * (UINT32_C(0) - u32Factor);
1736 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_32(31))
1737 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1738 Result.u = UINT64_C(0) - Result.u;
1739 }
1740 }
1741 else
1742 {
1743 if ((int32_t)u32Factor >= 0)
1744 {
1745 Result.u = (uint64_t)(UINT32_C(0) - *pu32RAX) * u32Factor;
1746 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_32(31))
1747 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1748 Result.u = UINT64_C(0) - Result.u;
1749 }
1750 else
1751 {
1752 Result.u = (uint64_t)(UINT32_C(0) - *pu32RAX) * (UINT32_C(0) - u32Factor);
1753 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_32(31))
1754 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1755 }
1756 }
1757 *pu32RAX = Result.s.Lo;
1758 if (Result.s.Lo & RT_BIT_32(31))
1759 *pfEFlags |= X86_EFL_SF;
1760 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1761 *pu32RDX = Result.s.Hi;
1762
1763 return 0;
1764}
1765
1766
1767IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1768{
1769/** @todo Testcase: IMUL 2 and 3 operands. */
1770 uint32_t u32Ign;
1771 iemAImpl_imul_u32(puDst, &u32Ign, uSrc, pfEFlags);
1772}
1773
1774
1775IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u16,(uint16_t *pu16RAX, uint16_t *pu16RDX, uint16_t u16Factor, uint32_t *pfEFlags))
1776{
1777 RTUINT32U Result;
1778 *pfEFlags &= ~( X86_EFL_SF | X86_EFL_CF | X86_EFL_OF
1779 /* Skylake always clears: */ | X86_EFL_AF | X86_EFL_ZF
1780 /* Skylake may set: */ | X86_EFL_PF);
1781
1782 if ((int16_t)*pu16RAX >= 0)
1783 {
1784 if ((int16_t)u16Factor >= 0)
1785 {
1786 Result.u = (uint32_t)*pu16RAX * u16Factor;
1787 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_32(15))
1788 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1789 }
1790 else
1791 {
1792 Result.u = (uint32_t)*pu16RAX * (UINT16_C(0) - u16Factor);
1793 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_32(15))
1794 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1795 Result.u = UINT32_C(0) - Result.u;
1796 }
1797 }
1798 else
1799 {
1800 if ((int16_t)u16Factor >= 0)
1801 {
1802 Result.u = (uint32_t)(UINT16_C(0) - *pu16RAX) * u16Factor;
1803 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_32(15))
1804 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1805 Result.u = UINT32_C(0) - Result.u;
1806 }
1807 else
1808 {
1809 Result.u = (uint32_t)(UINT16_C(0) - *pu16RAX) * (UINT16_C(0) - u16Factor);
1810 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_32(15))
1811 *pfEFlags |= X86_EFL_CF | X86_EFL_OF;
1812 }
1813 }
1814 *pu16RAX = Result.s.Lo;
1815 if (Result.s.Lo & RT_BIT_32(15))
1816 *pfEFlags |= X86_EFL_SF;
1817 *pfEFlags |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */
1818 *pu16RDX = Result.s.Hi;
1819
1820 return 0;
1821}
1822
1823
1824IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1825{
1826/** @todo Testcase: IMUL 2 and 3 operands. */
1827 uint16_t u16Ign;
1828 iemAImpl_imul_u16(puDst, &u16Ign, uSrc, pfEFlags);
1829}
1830
1831# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1832
1833
1834/*
1835 * DIV
1836 */
1837
1838IEM_DECL_IMPL_DEF(int, iemAImpl_div_u64,(uint64_t *pu64RAX, uint64_t *pu64RDX, uint64_t u64Divisor, uint32_t *pfEFlags))
1839{
1840 /* Note! Skylake leaves all flags alone. */
1841 RT_NOREF_PV(pfEFlags);
1842
1843 if ( u64Divisor != 0
1844 && *pu64RDX < u64Divisor)
1845 {
1846 RTUINT128U Dividend;
1847 Dividend.s.Lo = *pu64RAX;
1848 Dividend.s.Hi = *pu64RDX;
1849
1850 RTUINT128U Divisor;
1851 Divisor.s.Lo = u64Divisor;
1852 Divisor.s.Hi = 0;
1853
1854 RTUINT128U Remainder;
1855 RTUINT128U Quotient;
1856# ifdef __GNUC__ /* GCC maybe really annoying in function. */
1857 Quotient.s.Lo = 0;
1858 Quotient.s.Hi = 0;
1859# endif
1860 RTUInt128DivRem(&Quotient, &Remainder, &Dividend, &Divisor);
1861 Assert(Quotient.s.Hi == 0);
1862 Assert(Remainder.s.Hi == 0);
1863
1864 *pu64RAX = Quotient.s.Lo;
1865 *pu64RDX = Remainder.s.Lo;
1866 /** @todo research the undefined DIV flags. */
1867 return 0;
1868
1869 }
1870 /* #DE */
1871 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1872}
1873
1874# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1875
1876IEM_DECL_IMPL_DEF(int, iemAImpl_div_u32,(uint32_t *pu32RAX, uint32_t *pu32RDX, uint32_t u32Divisor, uint32_t *pfEFlags))
1877{
1878 /* Note! Skylake leaves all flags alone. */
1879 RT_NOREF_PV(pfEFlags);
1880
1881 if ( u32Divisor != 0
1882 && *pu32RDX < u32Divisor)
1883 {
1884 RTUINT64U Dividend;
1885 Dividend.s.Lo = *pu32RAX;
1886 Dividend.s.Hi = *pu32RDX;
1887
1888 RTUINT64U Remainder;
1889 RTUINT64U Quotient;
1890 Quotient.u = Dividend.u / u32Divisor;
1891 Remainder.u = Dividend.u % u32Divisor;
1892
1893 *pu32RAX = Quotient.s.Lo;
1894 *pu32RDX = Remainder.s.Lo;
1895 /** @todo research the undefined DIV flags. */
1896 return 0;
1897
1898 }
1899 /* #DE */
1900 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1901}
1902
1903
1904IEM_DECL_IMPL_DEF(int, iemAImpl_div_u16,(uint16_t *pu16RAX, uint16_t *pu16RDX, uint16_t u16Divisor, uint32_t *pfEFlags))
1905{
1906 /* Note! Skylake leaves all flags alone. */
1907 RT_NOREF_PV(pfEFlags);
1908
1909 if ( u16Divisor != 0
1910 && *pu16RDX < u16Divisor)
1911 {
1912 RTUINT32U Dividend;
1913 Dividend.s.Lo = *pu16RAX;
1914 Dividend.s.Hi = *pu16RDX;
1915
1916 RTUINT32U Remainder;
1917 RTUINT32U Quotient;
1918 Quotient.u = Dividend.u / u16Divisor;
1919 Remainder.u = Dividend.u % u16Divisor;
1920
1921 *pu16RAX = Quotient.s.Lo;
1922 *pu16RDX = Remainder.s.Lo;
1923 /** @todo research the undefined DIV flags. */
1924 return 0;
1925
1926 }
1927 /* #DE */
1928 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1929}
1930
1931
1932IEM_DECL_IMPL_DEF(int, iemAImpl_div_u8,(uint8_t *pu8RAX, uint8_t *pu8RDX, uint8_t u8Divisor, uint32_t *pfEFlags))
1933{
1934 /* Note! Skylake leaves all flags alone. */
1935 RT_NOREF_PV(pfEFlags);
1936
1937 if ( u8Divisor != 0
1938 && *pu8RDX < u8Divisor)
1939 {
1940 RTUINT16U Dividend;
1941 Dividend.s.Lo = *pu8RAX;
1942 Dividend.s.Hi = *pu8RDX;
1943
1944 RTUINT16U Remainder;
1945 RTUINT16U Quotient;
1946 Quotient.u = Dividend.u / u8Divisor;
1947 Remainder.u = Dividend.u % u8Divisor;
1948
1949 *pu8RAX = Quotient.s.Lo;
1950 *pu8RDX = Remainder.s.Lo;
1951 /** @todo research the undefined DIV flags. */
1952 return 0;
1953
1954 }
1955 /* #DE */
1956 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
1957}
1958
1959# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1960
1961
1962/*
1963 * IDIV
1964 */
1965
1966IEM_DECL_IMPL_DEF(int, iemAImpl_idiv_u64,(uint64_t *pu64RAX, uint64_t *pu64RDX, uint64_t u64Divisor, uint32_t *pfEFlags))
1967{
1968 /* Note! Skylake leaves all flags alone. */
1969 RT_NOREF_PV(pfEFlags);
1970
1971 /** @todo overflow checks */
1972 if (u64Divisor != 0)
1973 {
1974 /*
1975 * Convert to unsigned division.
1976 */
1977 RTUINT128U Dividend;
1978 Dividend.s.Lo = *pu64RAX;
1979 Dividend.s.Hi = *pu64RDX;
1980 if ((int64_t)*pu64RDX < 0)
1981 RTUInt128AssignNeg(&Dividend);
1982
1983 RTUINT128U Divisor;
1984 Divisor.s.Hi = 0;
1985 if ((int64_t)u64Divisor >= 0)
1986 Divisor.s.Lo = u64Divisor;
1987 else
1988 Divisor.s.Lo = UINT64_C(0) - u64Divisor;
1989
1990 RTUINT128U Remainder;
1991 RTUINT128U Quotient;
1992# ifdef __GNUC__ /* GCC maybe really annoying. */
1993 Quotient.s.Lo = 0;
1994 Quotient.s.Hi = 0;
1995# endif
1996 RTUInt128DivRem(&Quotient, &Remainder, &Dividend, &Divisor);
1997
1998 /*
1999 * Setup the result, checking for overflows.
2000 */
2001 if ((int64_t)u64Divisor >= 0)
2002 {
2003 if ((int64_t)*pu64RDX >= 0)
2004 {
2005 /* Positive divisor, positive dividend => result positive. */
2006 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint64_t)INT64_MAX)
2007 {
2008 *pu64RAX = Quotient.s.Lo;
2009 *pu64RDX = Remainder.s.Lo;
2010 return 0;
2011 }
2012 }
2013 else
2014 {
2015 /* Positive divisor, positive dividend => result negative. */
2016 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= UINT64_C(0x8000000000000000))
2017 {
2018 *pu64RAX = UINT64_C(0) - Quotient.s.Lo;
2019 *pu64RDX = UINT64_C(0) - Remainder.s.Lo;
2020 return 0;
2021 }
2022 }
2023 }
2024 else
2025 {
2026 if ((int64_t)*pu64RDX >= 0)
2027 {
2028 /* Negative divisor, positive dividend => negative quotient, positive remainder. */
2029 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= UINT64_C(0x8000000000000000))
2030 {
2031 *pu64RAX = UINT64_C(0) - Quotient.s.Lo;
2032 *pu64RDX = Remainder.s.Lo;
2033 return 0;
2034 }
2035 }
2036 else
2037 {
2038 /* Negative divisor, negative dividend => positive quotient, negative remainder. */
2039 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint64_t)INT64_MAX)
2040 {
2041 *pu64RAX = Quotient.s.Lo;
2042 *pu64RDX = UINT64_C(0) - Remainder.s.Lo;
2043 return 0;
2044 }
2045 }
2046 }
2047 }
2048 /* #DE */
2049 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
2050}
2051
2052# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2053
2054IEM_DECL_IMPL_DEF(int, iemAImpl_idiv_u32,(uint32_t *pu32RAX, uint32_t *pu32RDX, uint32_t u32Divisor, uint32_t *pfEFlags))
2055{
2056 /* Note! Skylake leaves all flags alone. */
2057 RT_NOREF_PV(pfEFlags);
2058
2059 /** @todo overflow checks */
2060 if (u32Divisor != 0)
2061 {
2062 /*
2063 * Convert to unsigned division.
2064 */
2065 RTUINT64U Dividend;
2066 Dividend.s.Lo = *pu32RAX;
2067 Dividend.s.Hi = *pu32RDX;
2068 if ((int32_t)*pu32RDX < 0)
2069 Dividend.u = UINT64_C(0) - Dividend.u;
2070
2071 uint32_t u32DivisorPositive;
2072 if ((int32_t)u32Divisor >= 0)
2073 u32DivisorPositive = u32Divisor;
2074 else
2075 u32DivisorPositive = UINT32_C(0) - u32Divisor;
2076
2077 RTUINT64U Remainder;
2078 RTUINT64U Quotient;
2079 Quotient.u = Dividend.u / u32DivisorPositive;
2080 Remainder.u = Dividend.u % u32DivisorPositive;
2081
2082 /*
2083 * Setup the result, checking for overflows.
2084 */
2085 if ((int32_t)u32Divisor >= 0)
2086 {
2087 if ((int32_t)*pu32RDX >= 0)
2088 {
2089 /* Positive divisor, positive dividend => result positive. */
2090 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint32_t)INT32_MAX)
2091 {
2092 *pu32RAX = Quotient.s.Lo;
2093 *pu32RDX = Remainder.s.Lo;
2094 return 0;
2095 }
2096 }
2097 else
2098 {
2099 /* Positive divisor, positive dividend => result negative. */
2100 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_32(31))
2101 {
2102 *pu32RAX = UINT32_C(0) - Quotient.s.Lo;
2103 *pu32RDX = UINT32_C(0) - Remainder.s.Lo;
2104 return 0;
2105 }
2106 }
2107 }
2108 else
2109 {
2110 if ((int32_t)*pu32RDX >= 0)
2111 {
2112 /* Negative divisor, positive dividend => negative quotient, positive remainder. */
2113 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_32(31))
2114 {
2115 *pu32RAX = UINT32_C(0) - Quotient.s.Lo;
2116 *pu32RDX = Remainder.s.Lo;
2117 return 0;
2118 }
2119 }
2120 else
2121 {
2122 /* Negative divisor, negative dividend => positive quotient, negative remainder. */
2123 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint32_t)INT32_MAX)
2124 {
2125 *pu32RAX = Quotient.s.Lo;
2126 *pu32RDX = UINT32_C(0) - Remainder.s.Lo;
2127 return 0;
2128 }
2129 }
2130 }
2131 }
2132 /* #DE */
2133 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
2134}
2135
2136
2137IEM_DECL_IMPL_DEF(int, iemAImpl_idiv_u16,(uint16_t *pu16RAX, uint16_t *pu16RDX, uint16_t u16Divisor, uint32_t *pfEFlags))
2138{
2139 /* Note! Skylake leaves all flags alone. */
2140 RT_NOREF_PV(pfEFlags);
2141
2142 if (u16Divisor != 0)
2143 {
2144 /*
2145 * Convert to unsigned division.
2146 */
2147 RTUINT32U Dividend;
2148 Dividend.s.Lo = *pu16RAX;
2149 Dividend.s.Hi = *pu16RDX;
2150 if ((int16_t)*pu16RDX < 0)
2151 Dividend.u = UINT32_C(0) - Dividend.u;
2152
2153 uint16_t u16DivisorPositive;
2154 if ((int16_t)u16Divisor >= 0)
2155 u16DivisorPositive = u16Divisor;
2156 else
2157 u16DivisorPositive = UINT16_C(0) - u16Divisor;
2158
2159 RTUINT32U Remainder;
2160 RTUINT32U Quotient;
2161 Quotient.u = Dividend.u / u16DivisorPositive;
2162 Remainder.u = Dividend.u % u16DivisorPositive;
2163
2164 /*
2165 * Setup the result, checking for overflows.
2166 */
2167 if ((int16_t)u16Divisor >= 0)
2168 {
2169 if ((int16_t)*pu16RDX >= 0)
2170 {
2171 /* Positive divisor, positive dividend => result positive. */
2172 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint16_t)INT16_MAX)
2173 {
2174 *pu16RAX = Quotient.s.Lo;
2175 *pu16RDX = Remainder.s.Lo;
2176 return 0;
2177 }
2178 }
2179 else
2180 {
2181 /* Positive divisor, positive dividend => result negative. */
2182 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_32(15))
2183 {
2184 *pu16RAX = UINT16_C(0) - Quotient.s.Lo;
2185 *pu16RDX = UINT16_C(0) - Remainder.s.Lo;
2186 return 0;
2187 }
2188 }
2189 }
2190 else
2191 {
2192 if ((int16_t)*pu16RDX >= 0)
2193 {
2194 /* Negative divisor, positive dividend => negative quotient, positive remainder. */
2195 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_32(15))
2196 {
2197 *pu16RAX = UINT16_C(0) - Quotient.s.Lo;
2198 *pu16RDX = Remainder.s.Lo;
2199 return 0;
2200 }
2201 }
2202 else
2203 {
2204 /* Negative divisor, negative dividend => positive quotient, negative remainder. */
2205 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint16_t)INT16_MAX)
2206 {
2207 *pu16RAX = Quotient.s.Lo;
2208 *pu16RDX = UINT16_C(0) - Remainder.s.Lo;
2209 return 0;
2210 }
2211 }
2212 }
2213 }
2214 /* #DE */
2215 return VERR_IEM_ASPECT_NOT_IMPLEMENTED;
2216}
2217
2218# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2219
2220
2221/*********************************************************************************************************************************
2222* Unary operations. *
2223*********************************************************************************************************************************/
2224
2225/**
2226 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an INC or DEC instruction.
2227 *
2228 * CF is NOT modified for hysterical raisins (allegedly for carrying and
2229 * borrowing in arithmetic loops on intel 8008).
2230 *
2231 * @returns Status bits.
2232 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2233 * @param a_uResult Unsigned result value.
2234 * @param a_uDst The original destination value (for AF calc).
2235 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2236 * @param a_OfMethod 0 for INC-style, 1 for DEC-style.
2237 */
2238#define IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth, a_OfMethod) \
2239 do { \
2240 uint32_t fEflTmp = *(a_pfEFlags); \
2241 fEflTmp &= ~X86_EFL_STATUS_BITS & ~X86_EFL_CF; \
2242 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
2243 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
2244 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
2245 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
2246 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth(a_OfMethod == 0 ? (((a_uDst) ^ RT_BIT_64(63)) & (a_uResult)) \
2247 : ((a_uDst) & ((a_uResult) ^ RT_BIT_64(63))) ); \
2248 *(a_pfEFlags) = fEflTmp; \
2249 } while (0)
2250
2251/*
2252 * INC
2253 */
2254
2255IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2256{
2257 uint64_t uDst = *puDst;
2258 uint64_t uResult = uDst + 1;
2259 *puDst = uResult;
2260 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 0 /*INC*/);
2261}
2262
2263# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2264
2265IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2266{
2267 uint32_t uDst = *puDst;
2268 uint32_t uResult = uDst + 1;
2269 *puDst = uResult;
2270 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 0 /*INC*/);
2271}
2272
2273
2274IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2275{
2276 uint16_t uDst = *puDst;
2277 uint16_t uResult = uDst + 1;
2278 *puDst = uResult;
2279 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 0 /*INC*/);
2280}
2281
2282IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2283{
2284 uint8_t uDst = *puDst;
2285 uint8_t uResult = uDst + 1;
2286 *puDst = uResult;
2287 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 0 /*INC*/);
2288}
2289
2290# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2291
2292
2293/*
2294 * DEC
2295 */
2296
2297IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2298{
2299 uint64_t uDst = *puDst;
2300 uint64_t uResult = uDst - 1;
2301 *puDst = uResult;
2302 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 1 /*INC*/);
2303}
2304
2305# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2306
2307IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2308{
2309 uint32_t uDst = *puDst;
2310 uint32_t uResult = uDst - 1;
2311 *puDst = uResult;
2312 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 1 /*INC*/);
2313}
2314
2315
2316IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2317{
2318 uint16_t uDst = *puDst;
2319 uint16_t uResult = uDst - 1;
2320 *puDst = uResult;
2321 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 1 /*INC*/);
2322}
2323
2324
2325IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2326{
2327 uint8_t uDst = *puDst;
2328 uint8_t uResult = uDst - 1;
2329 *puDst = uResult;
2330 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 1 /*INC*/);
2331}
2332
2333# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2334
2335
2336/*
2337 * NOT
2338 */
2339
2340IEM_DECL_IMPL_DEF(void, iemAImpl_not_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2341{
2342 uint64_t uDst = *puDst;
2343 uint64_t uResult = ~uDst;
2344 *puDst = uResult;
2345 /* EFLAGS are not modified. */
2346 RT_NOREF_PV(pfEFlags);
2347}
2348
2349# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2350
2351IEM_DECL_IMPL_DEF(void, iemAImpl_not_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2352{
2353 uint32_t uDst = *puDst;
2354 uint32_t uResult = ~uDst;
2355 *puDst = uResult;
2356 /* EFLAGS are not modified. */
2357 RT_NOREF_PV(pfEFlags);
2358}
2359
2360IEM_DECL_IMPL_DEF(void, iemAImpl_not_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2361{
2362 uint16_t uDst = *puDst;
2363 uint16_t uResult = ~uDst;
2364 *puDst = uResult;
2365 /* EFLAGS are not modified. */
2366 RT_NOREF_PV(pfEFlags);
2367}
2368
2369IEM_DECL_IMPL_DEF(void, iemAImpl_not_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2370{
2371 uint8_t uDst = *puDst;
2372 uint8_t uResult = ~uDst;
2373 *puDst = uResult;
2374 /* EFLAGS are not modified. */
2375 RT_NOREF_PV(pfEFlags);
2376}
2377
2378# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2379
2380
2381/*
2382 * NEG
2383 */
2384
2385/**
2386 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an NEG instruction.
2387 *
2388 * @returns Status bits.
2389 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2390 * @param a_uResult Unsigned result value.
2391 * @param a_uDst The original destination value (for AF calc).
2392 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2393 */
2394#define IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth) \
2395 do { \
2396 uint32_t fEflTmp = *(a_pfEFlags); \
2397 fEflTmp &= ~X86_EFL_STATUS_BITS & ~X86_EFL_CF; \
2398 fEflTmp |= ((a_uDst) != 0) << X86_EFL_CF_BIT; \
2399 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
2400 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
2401 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
2402 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
2403 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth((a_uDst) & (a_uResult)); \
2404 *(a_pfEFlags) = fEflTmp; \
2405 } while (0)
2406
2407IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2408{
2409 uint64_t uDst = *puDst;
2410 uint64_t uResult = (uint64_t)0 - uDst;
2411 *puDst = uResult;
2412 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 64);
2413}
2414
2415# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2416
2417IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2418{
2419 uint32_t uDst = *puDst;
2420 uint32_t uResult = (uint32_t)0 - uDst;
2421 *puDst = uResult;
2422 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 32);
2423}
2424
2425
2426IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2427{
2428 uint16_t uDst = *puDst;
2429 uint16_t uResult = (uint16_t)0 - uDst;
2430 *puDst = uResult;
2431 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 16);
2432}
2433
2434
2435IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2436{
2437 uint8_t uDst = *puDst;
2438 uint8_t uResult = (uint8_t)0 - uDst;
2439 *puDst = uResult;
2440 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 8);
2441}
2442
2443# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2444
2445/*
2446 * Locked variants.
2447 */
2448
2449/** Emit a function for doing a locked unary operand operation. */
2450# define EMIT_LOCKED_UNARY_OP(a_Mnemonic, a_cBitsWidth) \
2451 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
2452 uint32_t *pfEFlags)) \
2453 { \
2454 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
2455 uint ## a_cBitsWidth ## _t uTmp; \
2456 uint32_t fEflTmp; \
2457 do \
2458 { \
2459 uTmp = uOld; \
2460 fEflTmp = *pfEFlags; \
2461 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, &fEflTmp); \
2462 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
2463 *pfEFlags = fEflTmp; \
2464 }
2465
2466EMIT_LOCKED_UNARY_OP(inc, 64);
2467EMIT_LOCKED_UNARY_OP(dec, 64);
2468EMIT_LOCKED_UNARY_OP(not, 64);
2469EMIT_LOCKED_UNARY_OP(neg, 64);
2470# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2471EMIT_LOCKED_UNARY_OP(inc, 32);
2472EMIT_LOCKED_UNARY_OP(dec, 32);
2473EMIT_LOCKED_UNARY_OP(not, 32);
2474EMIT_LOCKED_UNARY_OP(neg, 32);
2475
2476EMIT_LOCKED_UNARY_OP(inc, 16);
2477EMIT_LOCKED_UNARY_OP(dec, 16);
2478EMIT_LOCKED_UNARY_OP(not, 16);
2479EMIT_LOCKED_UNARY_OP(neg, 16);
2480
2481EMIT_LOCKED_UNARY_OP(inc, 8);
2482EMIT_LOCKED_UNARY_OP(dec, 8);
2483EMIT_LOCKED_UNARY_OP(not, 8);
2484EMIT_LOCKED_UNARY_OP(neg, 8);
2485# endif
2486
2487
2488/*********************************************************************************************************************************
2489* Shifting and Rotating *
2490*********************************************************************************************************************************/
2491
2492/*
2493 * ROL
2494 */
2495
2496/**
2497 * Updates the status bits (OF and CF) for an ROL instruction.
2498 *
2499 * @returns Status bits.
2500 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2501 * @param a_uResult Unsigned result value.
2502 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2503 */
2504#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(a_pfEFlags, a_uResult, a_cBitsWidth) do { \
2505 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2506 it the same way as for 1 bit shifts. */ \
2507 AssertCompile(X86_EFL_CF_BIT == 0); \
2508 uint32_t fEflTmp = *(a_pfEFlags); \
2509 fEflTmp &= ~(X86_EFL_CF | X86_EFL_OF); \
2510 uint32_t const fCarry = ((a_uResult) & X86_EFL_CF); \
2511 fEflTmp |= fCarry; \
2512 fEflTmp |= (((a_uResult) >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2513 *(a_pfEFlags) = fEflTmp; \
2514 } while (0)
2515
2516IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2517{
2518 cShift &= 63;
2519 if (cShift)
2520 {
2521 uint64_t uResult = ASMRotateLeftU64(*puDst, cShift);
2522 *puDst = uResult;
2523 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(pfEFlags, uResult, 64);
2524 }
2525}
2526
2527# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2528
2529IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u32,(uint32_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2530{
2531 cShift &= 31;
2532 if (cShift)
2533 {
2534 uint32_t uResult = ASMRotateLeftU32(*puDst, cShift);
2535 *puDst = uResult;
2536 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(pfEFlags, uResult, 32);
2537 }
2538}
2539
2540
2541IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u16,(uint16_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2542{
2543 cShift &= 15;
2544 if (cShift)
2545 {
2546 uint16_t uDst = *puDst;
2547 uint16_t uResult = (uDst << cShift) | (uDst >> (16 - cShift));
2548 *puDst = uResult;
2549 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(pfEFlags, uResult, 16);
2550 }
2551}
2552
2553
2554IEM_DECL_IMPL_DEF(void, iemAImpl_rol_u8,(uint8_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2555{
2556 cShift &= 7;
2557 if (cShift)
2558 {
2559 uint8_t uDst = *puDst;
2560 uint8_t uResult = (uDst << cShift) | (uDst >> (8 - cShift));
2561 *puDst = uResult;
2562 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROL(pfEFlags, uResult, 8);
2563 }
2564}
2565
2566# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2567
2568
2569/*
2570 * ROR
2571 */
2572
2573/**
2574 * Updates the status bits (OF and CF) for an ROL instruction.
2575 *
2576 * @returns Status bits.
2577 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2578 * @param a_uResult Unsigned result value.
2579 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2580 */
2581#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(a_pfEFlags, a_uResult, a_cBitsWidth) do { \
2582 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2583 it the same way as for 1 bit shifts. */ \
2584 AssertCompile(X86_EFL_CF_BIT == 0); \
2585 uint32_t fEflTmp = *(a_pfEFlags); \
2586 fEflTmp &= ~(X86_EFL_CF | X86_EFL_OF); \
2587 uint32_t const fCarry = ((a_uResult) >> ((a_cBitsWidth) - 1)) & X86_EFL_CF; \
2588 fEflTmp |= fCarry; \
2589 fEflTmp |= (((a_uResult) >> ((a_cBitsWidth) - 2)) ^ fCarry) << X86_EFL_OF_BIT; \
2590 *(a_pfEFlags) = fEflTmp; \
2591 } while (0)
2592
2593IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u64,(uint64_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2594{
2595 cShift &= 63;
2596 if (cShift)
2597 {
2598 uint64_t const uResult = ASMRotateRightU64(*puDst, cShift);
2599 *puDst = uResult;
2600 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(pfEFlags, uResult, 64);
2601 }
2602}
2603
2604# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2605
2606IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u32,(uint32_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2607{
2608 cShift &= 31;
2609 if (cShift)
2610 {
2611 uint64_t const uResult = ASMRotateRightU32(*puDst, cShift);
2612 *puDst = uResult;
2613 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(pfEFlags, uResult, 32);
2614 }
2615}
2616
2617
2618IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u16,(uint16_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2619{
2620 cShift &= 15;
2621 if (cShift)
2622 {
2623 uint16_t uDst = *puDst;
2624 uint16_t uResult;
2625 uResult = uDst >> cShift;
2626 uResult |= uDst << (16 - cShift);
2627 *puDst = uResult;
2628 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(pfEFlags, uResult, 16);
2629 }
2630}
2631
2632
2633IEM_DECL_IMPL_DEF(void, iemAImpl_ror_u8,(uint8_t *puDst, uint8_t cShift, uint32_t *pfEFlags))
2634{
2635 cShift &= 7;
2636 if (cShift)
2637 {
2638 uint8_t uDst = *puDst;
2639 uint8_t uResult;
2640 uResult = uDst >> cShift;
2641 uResult |= uDst << (8 - cShift);
2642 *puDst = uResult;
2643 IEM_EFL_UPDATE_STATUS_BITS_FOR_ROR(pfEFlags, uResult, 8);
2644 }
2645}
2646
2647# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2648
2649
2650/*
2651 * RCL
2652 */
2653#define EMIT_RCL(a_cBitsWidth) \
2654IEM_DECL_IMPL_DEF(void, iemAImpl_rcl_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2655{ \
2656 cShift &= a_cBitsWidth - 1; \
2657 if (cShift) \
2658 { \
2659 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2660 uint ## a_cBitsWidth ## _t uResult = uDst << cShift; \
2661 if (cShift > 1) \
2662 uResult |= uDst >> (a_cBitsWidth + 1 - cShift); \
2663 \
2664 uint32_t fEfl = *pfEFlags; \
2665 AssertCompile(X86_EFL_CF_BIT == 0); \
2666 uResult |= (uint ## a_cBitsWidth ## _t)(fEfl & X86_EFL_CF) << (cShift - 1); \
2667 \
2668 *puDst = uResult; \
2669 \
2670 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2671 it the same way as for 1 bit shifts. */ \
2672 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2673 uint32_t const fCarry = (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2674 fEfl |= fCarry; \
2675 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2676 *pfEFlags = fEfl; \
2677 } \
2678}
2679EMIT_RCL(64);
2680# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2681EMIT_RCL(32);
2682EMIT_RCL(16);
2683EMIT_RCL(8);
2684# endif
2685
2686
2687/*
2688 * RCR
2689 */
2690#define EMIT_RCR(a_cBitsWidth) \
2691IEM_DECL_IMPL_DEF(void, iemAImpl_rcr_u ## a_cBitsWidth,(uint ## a_cBitsWidth ##_t *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2692{ \
2693 cShift &= a_cBitsWidth - 1; \
2694 if (cShift) \
2695 { \
2696 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2697 uint ## a_cBitsWidth ## _t uResult = uDst >> cShift; \
2698 if (cShift > 1) \
2699 uResult |= uDst << (a_cBitsWidth + 1 - cShift); \
2700 \
2701 AssertCompile(X86_EFL_CF_BIT == 0); \
2702 uint32_t fEfl = *pfEFlags; \
2703 uResult |= (uint ## a_cBitsWidth ## _t)(fEfl & X86_EFL_CF) << (a_cBitsWidth - cShift); \
2704 *puDst = uResult; \
2705 \
2706 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2707 it the same way as for 1 bit shifts. */ \
2708 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2709 uint32_t const fCarry = (uDst >> (cShift - 1)) & X86_EFL_CF; \
2710 fEfl |= fCarry; \
2711 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2712 *pfEFlags = fEfl; \
2713 } \
2714}
2715EMIT_RCR(64);
2716# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2717EMIT_RCR(32);
2718EMIT_RCR(16);
2719EMIT_RCR(8);
2720# endif
2721
2722
2723/*
2724 * SHL
2725 */
2726#define EMIT_SHL(a_cBitsWidth) \
2727IEM_DECL_IMPL_DEF(void, iemAImpl_shl_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2728{ \
2729 cShift &= a_cBitsWidth - 1; \
2730 if (cShift) \
2731 { \
2732 uint ## a_cBitsWidth ##_t const uDst = *puDst; \
2733 uint ## a_cBitsWidth ##_t uResult = uDst << cShift; \
2734 *puDst = uResult; \
2735 \
2736 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2737 it the same way as for 1 bit shifts. The AF bit is undefined, we \
2738 always set it to zero atm. */ \
2739 AssertCompile(X86_EFL_CF_BIT == 0); \
2740 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2741 uint32_t fCarry = (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2742 fEfl |= fCarry; \
2743 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2744 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2745 fEfl |= X86_EFL_CALC_ZF(uResult); \
2746 fEfl |= g_afParity[uResult & 0xff]; \
2747 *pfEFlags = fEfl; \
2748 } \
2749}
2750EMIT_SHL(64)
2751# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2752EMIT_SHL(32)
2753EMIT_SHL(16)
2754EMIT_SHL(8)
2755# endif
2756
2757
2758/*
2759 * SHR
2760 */
2761#define EMIT_SHR(a_cBitsWidth) \
2762IEM_DECL_IMPL_DEF(void, iemAImpl_shr_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2763{ \
2764 cShift &= a_cBitsWidth - 1; \
2765 if (cShift) \
2766 { \
2767 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2768 uint ## a_cBitsWidth ## _t uResult = uDst >> cShift; \
2769 *puDst = uResult; \
2770 \
2771 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2772 it the same way as for 1 bit shifts. The AF bit is undefined, we \
2773 always set it to zero atm. */ \
2774 AssertCompile(X86_EFL_CF_BIT == 0); \
2775 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2776 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2777 fEfl |= (uDst >> (a_cBitsWidth - 1)) << X86_EFL_OF_BIT; \
2778 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2779 fEfl |= X86_EFL_CALC_ZF(uResult); \
2780 fEfl |= g_afParity[uResult & 0xff]; \
2781 *pfEFlags = fEfl; \
2782 } \
2783}
2784EMIT_SHR(64)
2785# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2786EMIT_SHR(32)
2787EMIT_SHR(16)
2788EMIT_SHR(8)
2789# endif
2790
2791
2792/*
2793 * SAR
2794 */
2795#define EMIT_SAR(a_cBitsWidth) \
2796IEM_DECL_IMPL_DEF(void, iemAImpl_sar_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2797{ \
2798 cShift &= a_cBitsWidth - 1; \
2799 if (cShift) \
2800 { \
2801 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2802 uint ## a_cBitsWidth ## _t uResult = (int ## a_cBitsWidth ## _t)uDst >> cShift; \
2803 *puDst = uResult; \
2804 \
2805 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2806 it the same way as for 1 bit shifts (0). The AF bit is undefined, \
2807 we always set it to zero atm. */ \
2808 AssertCompile(X86_EFL_CF_BIT == 0); \
2809 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2810 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2811 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2812 fEfl |= X86_EFL_CALC_ZF(uResult); \
2813 fEfl |= g_afParity[uResult & 0xff]; \
2814 *pfEFlags = fEfl; \
2815 } \
2816}
2817EMIT_SAR(64)
2818# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2819EMIT_SAR(32)
2820EMIT_SAR(16)
2821EMIT_SAR(8)
2822# endif
2823
2824
2825/*
2826 * SHLD
2827 */
2828#define EMIT_SHLD(a_cBitsWidth) \
2829IEM_DECL_IMPL_DEF(void, iemAImpl_shld_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, \
2830 uint ## a_cBitsWidth ## _t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2831{ \
2832 cShift &= a_cBitsWidth - 1; \
2833 if (cShift) \
2834 { \
2835 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2836 uint ## a_cBitsWidth ## _t uResult = uDst << cShift; \
2837 uResult |= uSrc >> (a_cBitsWidth - cShift); \
2838 *puDst = uResult; \
2839 \
2840 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2841 it the same way as for 1 bit shifts. The AF bit is undefined, \
2842 we always set it to zero atm. */ \
2843 AssertCompile(X86_EFL_CF_BIT == 0); \
2844 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2845 fEfl |= (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2846 fEfl |= (uint32_t)((uDst >> (a_cBitsWidth - 1)) ^ (uint32_t)(uResult >> (a_cBitsWidth - 1))) << X86_EFL_OF_BIT; \
2847 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2848 fEfl |= X86_EFL_CALC_ZF(uResult); \
2849 fEfl |= g_afParity[uResult & 0xff]; \
2850 *pfEFlags = fEfl; \
2851 } \
2852}
2853EMIT_SHLD(64)
2854# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2855EMIT_SHLD(32)
2856EMIT_SHLD(16)
2857EMIT_SHLD(8)
2858# endif
2859
2860
2861/*
2862 * SHRD
2863 */
2864#define EMIT_SHRD(a_cBitsWidth) \
2865IEM_DECL_IMPL_DEF(void, iemAImpl_shrd_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, \
2866 uint ## a_cBitsWidth ## _t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2867{ \
2868 cShift &= a_cBitsWidth - 1; \
2869 if (cShift) \
2870 { \
2871 uint ## a_cBitsWidth ## _t const uDst = *puDst; \
2872 uint ## a_cBitsWidth ## _t uResult = uDst >> cShift; \
2873 uResult |= uSrc << (a_cBitsWidth - cShift); \
2874 *puDst = uResult; \
2875 \
2876 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2877 it the same way as for 1 bit shifts. The AF bit is undefined, \
2878 we always set it to zero atm. */ \
2879 AssertCompile(X86_EFL_CF_BIT == 0); \
2880 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2881 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2882 fEfl |= (uint32_t)((uDst >> (a_cBitsWidth - 1)) ^ (uint32_t)(uResult >> (a_cBitsWidth - 1))) << X86_EFL_OF_BIT; \
2883 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2884 fEfl |= X86_EFL_CALC_ZF(uResult); \
2885 fEfl |= g_afParity[uResult & 0xff]; \
2886 *pfEFlags = fEfl; \
2887 } \
2888}
2889EMIT_SHRD(64)
2890# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2891EMIT_SHRD(32)
2892EMIT_SHRD(16)
2893EMIT_SHRD(8)
2894# endif
2895
2896
2897# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2898/*
2899 * BSWAP
2900 */
2901
2902IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u64,(uint64_t *puDst))
2903{
2904 *puDst = ASMByteSwapU64(*puDst);
2905}
2906
2907
2908IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u32,(uint32_t *puDst))
2909{
2910 *puDst = ASMByteSwapU32(*puDst);
2911}
2912
2913
2914/* Note! undocument, so 32-bit arg */
2915IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u16,(uint32_t *puDst))
2916{
2917 *puDst = ASMByteSwapU16((uint16_t)*puDst) | (*puDst & UINT32_C(0xffff0000));
2918}
2919
2920# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2921
2922
2923
2924# if defined(IEM_WITHOUT_ASSEMBLY)
2925
2926/*
2927 * LFENCE, SFENCE & MFENCE.
2928 */
2929
2930IEM_DECL_IMPL_DEF(void, iemAImpl_lfence,(void))
2931{
2932 ASMReadFence();
2933}
2934
2935
2936IEM_DECL_IMPL_DEF(void, iemAImpl_sfence,(void))
2937{
2938 ASMWriteFence();
2939}
2940
2941
2942IEM_DECL_IMPL_DEF(void, iemAImpl_mfence,(void))
2943{
2944 ASMMemoryFence();
2945}
2946
2947
2948# ifndef RT_ARCH_ARM64
2949IEM_DECL_IMPL_DEF(void, iemAImpl_alt_mem_fence,(void))
2950{
2951 ASMMemoryFence();
2952}
2953# endif
2954
2955# endif
2956
2957#endif /* !RT_ARCH_AMD64 || IEM_WITHOUT_ASSEMBLY */
2958
2959
2960IEM_DECL_IMPL_DEF(void, iemAImpl_arpl,(uint16_t *pu16Dst, uint16_t u16Src, uint32_t *pfEFlags))
2961{
2962 if ((*pu16Dst & X86_SEL_RPL) < (u16Src & X86_SEL_RPL))
2963 {
2964 *pu16Dst &= X86_SEL_MASK_OFF_RPL;
2965 *pu16Dst |= u16Src & X86_SEL_RPL;
2966
2967 *pfEFlags |= X86_EFL_ZF;
2968 }
2969 else
2970 *pfEFlags &= ~X86_EFL_ZF;
2971}
2972
2973
2974IEM_DECL_IMPL_DEF(void, iemAImpl_movsldup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
2975{
2976 RT_NOREF(pFpuState);
2977 puDst->au32[0] = puSrc->au32[0];
2978 puDst->au32[1] = puSrc->au32[0];
2979 puDst->au32[2] = puSrc->au32[2];
2980 puDst->au32[3] = puSrc->au32[2];
2981}
2982
2983#ifdef IEM_WITH_VEX
2984
2985IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
2986{
2987 pXState->x87.aXMM[iYRegDst].au32[0] = pXState->x87.aXMM[iYRegSrc].au32[0];
2988 pXState->x87.aXMM[iYRegDst].au32[1] = pXState->x87.aXMM[iYRegSrc].au32[0];
2989 pXState->x87.aXMM[iYRegDst].au32[2] = pXState->x87.aXMM[iYRegSrc].au32[2];
2990 pXState->x87.aXMM[iYRegDst].au32[3] = pXState->x87.aXMM[iYRegSrc].au32[2];
2991 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
2992 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
2993 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
2994 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
2995}
2996
2997
2998IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
2999{
3000 pXState->x87.aXMM[iYRegDst].au32[0] = pSrc->au32[0];
3001 pXState->x87.aXMM[iYRegDst].au32[1] = pSrc->au32[0];
3002 pXState->x87.aXMM[iYRegDst].au32[2] = pSrc->au32[2];
3003 pXState->x87.aXMM[iYRegDst].au32[3] = pSrc->au32[2];
3004 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pSrc->au32[4];
3005 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pSrc->au32[4];
3006 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pSrc->au32[6];
3007 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pSrc->au32[6];
3008}
3009
3010#endif /* IEM_WITH_VEX */
3011
3012
3013IEM_DECL_IMPL_DEF(void, iemAImpl_movshdup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
3014{
3015 RT_NOREF(pFpuState);
3016 puDst->au32[0] = puSrc->au32[1];
3017 puDst->au32[1] = puSrc->au32[1];
3018 puDst->au32[2] = puSrc->au32[3];
3019 puDst->au32[3] = puSrc->au32[3];
3020}
3021
3022
3023IEM_DECL_IMPL_DEF(void, iemAImpl_movddup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, uint64_t uSrc))
3024{
3025 RT_NOREF(pFpuState);
3026 puDst->au64[0] = uSrc;
3027 puDst->au64[1] = uSrc;
3028}
3029
3030#ifdef IEM_WITH_VEX
3031
3032IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
3033{
3034 pXState->x87.aXMM[iYRegDst].au64[0] = pXState->x87.aXMM[iYRegSrc].au64[0];
3035 pXState->x87.aXMM[iYRegDst].au64[1] = pXState->x87.aXMM[iYRegSrc].au64[0];
3036 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
3037 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
3038}
3039
3040IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
3041{
3042 pXState->x87.aXMM[iYRegDst].au64[0] = pSrc->au64[0];
3043 pXState->x87.aXMM[iYRegDst].au64[1] = pSrc->au64[0];
3044 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pSrc->au64[2];
3045 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pSrc->au64[2];
3046}
3047
3048#endif /* IEM_WITH_VEX */
3049
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette