VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp@ 94195

Last change on this file since 94195 was 94195, checked in by vboxsync, 3 years ago

VMM/IEM: Adjusted double shifts C code to match intel behaviour. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 128.5 KB
Line 
1/* $Id: IEMAllAImplC.cpp 94195 2022-03-12 13:42:58Z vboxsync $ */
2/** @file
3 * IEM - Instruction Implementation in Assembly, portable C variant.
4 */
5
6/*
7 * Copyright (C) 2011-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#include "IEMInternal.h"
23#include <VBox/vmm/vmcc.h>
24#include <iprt/errcore.h>
25#include <iprt/x86.h>
26#include <iprt/uint128.h>
27
28
29/*********************************************************************************************************************************
30* Defined Constants And Macros *
31*********************************************************************************************************************************/
32/** @def IEM_WITHOUT_ASSEMBLY
33 * Enables all the code in this file.
34 */
35#if !defined(IEM_WITHOUT_ASSEMBLY)
36# if defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
37# define IEM_WITHOUT_ASSEMBLY
38# endif
39#endif
40/* IEM_WITH_ASSEMBLY trumps IEM_WITHOUT_ASSEMBLY for tstIEMAImplAsm purposes. */
41#ifdef IEM_WITH_ASSEMBLY
42# undef IEM_WITHOUT_ASSEMBLY
43#endif
44
45/**
46 * Calculates the signed flag value given a result and it's bit width.
47 *
48 * The signed flag (SF) is a duplication of the most significant bit in the
49 * result.
50 *
51 * @returns X86_EFL_SF or 0.
52 * @param a_uResult Unsigned result value.
53 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
54 */
55#define X86_EFL_CALC_SF(a_uResult, a_cBitsWidth) \
56 ( (uint32_t)((a_uResult) >> ((a_cBitsWidth) - X86_EFL_SF_BIT - 1)) & X86_EFL_SF )
57
58/**
59 * Calculates the zero flag value given a result.
60 *
61 * The zero flag (ZF) indicates whether the result is zero or not.
62 *
63 * @returns X86_EFL_ZF or 0.
64 * @param a_uResult Unsigned result value.
65 */
66#define X86_EFL_CALC_ZF(a_uResult) \
67 ( (uint32_t)((a_uResult) == 0) << X86_EFL_ZF_BIT )
68
69/**
70 * Extracts the OF flag from a OF calculation result.
71 *
72 * These are typically used by concating with a bitcount. The problem is that
73 * 8-bit values needs shifting in the other direction than the others.
74 */
75#define X86_EFL_GET_OF_8(a_uValue) (((uint32_t)(a_uValue) << (X86_EFL_OF_BIT - 8 + 1)) & X86_EFL_OF)
76#define X86_EFL_GET_OF_16(a_uValue) ((uint32_t)((a_uValue) >> (16 - X86_EFL_OF_BIT - 1)) & X86_EFL_OF)
77#define X86_EFL_GET_OF_32(a_uValue) ((uint32_t)((a_uValue) >> (32 - X86_EFL_OF_BIT - 1)) & X86_EFL_OF)
78#define X86_EFL_GET_OF_64(a_uValue) ((uint32_t)((a_uValue) >> (64 - X86_EFL_OF_BIT - 1)) & X86_EFL_OF)
79
80/**
81 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after arithmetic op.
82 *
83 * @returns Status bits.
84 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
85 * @param a_uResult Unsigned result value.
86 * @param a_uSrc The source value (for AF calc).
87 * @param a_uDst The original destination value (for AF calc).
88 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
89 * @param a_CfExpr Bool expression for the carry flag (CF).
90 * @param a_uSrcOf The a_uSrc value to use for overflow calculation.
91 */
92#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(a_pfEFlags, a_uResult, a_uDst, a_uSrc, a_cBitsWidth, a_CfExpr, a_uSrcOf) \
93 do { \
94 uint32_t fEflTmp = *(a_pfEFlags); \
95 fEflTmp &= ~X86_EFL_STATUS_BITS; \
96 fEflTmp |= (a_CfExpr) << X86_EFL_CF_BIT; \
97 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
98 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
99 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
100 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
101 \
102 /* Overflow during ADDition happens when both inputs have the same signed \
103 bit value and the result has a different sign bit value. \
104 \
105 Since subtraction can be rewritten as addition: 2 - 1 == 2 + -1, it \
106 follows that for SUBtraction the signed bit value must differ between \
107 the two inputs and the result's signed bit diff from the first input. \
108 Note! Must xor with sign bit to convert, not do (0 - a_uSrc). \
109 \
110 See also: http://teaching.idallen.com/dat2343/10f/notes/040_overflow.txt */ \
111 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth( ( ((uint ## a_cBitsWidth ## _t)~((a_uDst) ^ (a_uSrcOf))) \
112 & RT_BIT_64(a_cBitsWidth - 1)) \
113 & ((a_uResult) ^ (a_uDst)) ); \
114 *(a_pfEFlags) = fEflTmp; \
115 } while (0)
116
117/**
118 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after a logical op.
119 *
120 * CF and OF are defined to be 0 by logical operations. AF on the other hand is
121 * undefined. We do not set AF, as that seems to make the most sense (which
122 * probably makes it the most wrong in real life).
123 *
124 * @returns Status bits.
125 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
126 * @param a_uResult Unsigned result value.
127 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
128 * @param a_fExtra Additional bits to set.
129 */
130#define IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(a_pfEFlags, a_uResult, a_cBitsWidth, a_fExtra) \
131 do { \
132 uint32_t fEflTmp = *(a_pfEFlags); \
133 fEflTmp &= ~X86_EFL_STATUS_BITS; \
134 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
135 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
136 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
137 fEflTmp |= (a_fExtra); \
138 *(a_pfEFlags) = fEflTmp; \
139 } while (0)
140
141
142/*********************************************************************************************************************************
143* Global Variables *
144*********************************************************************************************************************************/
145/**
146 * Parity calculation table.
147 *
148 * This is also used by iemAllAImpl.asm.
149 *
150 * The generator code:
151 * @code
152 * #include <stdio.h>
153 *
154 * int main()
155 * {
156 * unsigned b;
157 * for (b = 0; b < 256; b++)
158 * {
159 * int cOnes = ( b & 1)
160 * + ((b >> 1) & 1)
161 * + ((b >> 2) & 1)
162 * + ((b >> 3) & 1)
163 * + ((b >> 4) & 1)
164 * + ((b >> 5) & 1)
165 * + ((b >> 6) & 1)
166 * + ((b >> 7) & 1);
167 * printf(" /" "* %#04x = %u%u%u%u%u%u%u%ub *" "/ %s,\n",
168 * b,
169 * (b >> 7) & 1,
170 * (b >> 6) & 1,
171 * (b >> 5) & 1,
172 * (b >> 4) & 1,
173 * (b >> 3) & 1,
174 * (b >> 2) & 1,
175 * (b >> 1) & 1,
176 * b & 1,
177 * cOnes & 1 ? "0" : "X86_EFL_PF");
178 * }
179 * return 0;
180 * }
181 * @endcode
182 */
183uint8_t const g_afParity[256] =
184{
185 /* 0000 = 00000000b */ X86_EFL_PF,
186 /* 0x01 = 00000001b */ 0,
187 /* 0x02 = 00000010b */ 0,
188 /* 0x03 = 00000011b */ X86_EFL_PF,
189 /* 0x04 = 00000100b */ 0,
190 /* 0x05 = 00000101b */ X86_EFL_PF,
191 /* 0x06 = 00000110b */ X86_EFL_PF,
192 /* 0x07 = 00000111b */ 0,
193 /* 0x08 = 00001000b */ 0,
194 /* 0x09 = 00001001b */ X86_EFL_PF,
195 /* 0x0a = 00001010b */ X86_EFL_PF,
196 /* 0x0b = 00001011b */ 0,
197 /* 0x0c = 00001100b */ X86_EFL_PF,
198 /* 0x0d = 00001101b */ 0,
199 /* 0x0e = 00001110b */ 0,
200 /* 0x0f = 00001111b */ X86_EFL_PF,
201 /* 0x10 = 00010000b */ 0,
202 /* 0x11 = 00010001b */ X86_EFL_PF,
203 /* 0x12 = 00010010b */ X86_EFL_PF,
204 /* 0x13 = 00010011b */ 0,
205 /* 0x14 = 00010100b */ X86_EFL_PF,
206 /* 0x15 = 00010101b */ 0,
207 /* 0x16 = 00010110b */ 0,
208 /* 0x17 = 00010111b */ X86_EFL_PF,
209 /* 0x18 = 00011000b */ X86_EFL_PF,
210 /* 0x19 = 00011001b */ 0,
211 /* 0x1a = 00011010b */ 0,
212 /* 0x1b = 00011011b */ X86_EFL_PF,
213 /* 0x1c = 00011100b */ 0,
214 /* 0x1d = 00011101b */ X86_EFL_PF,
215 /* 0x1e = 00011110b */ X86_EFL_PF,
216 /* 0x1f = 00011111b */ 0,
217 /* 0x20 = 00100000b */ 0,
218 /* 0x21 = 00100001b */ X86_EFL_PF,
219 /* 0x22 = 00100010b */ X86_EFL_PF,
220 /* 0x23 = 00100011b */ 0,
221 /* 0x24 = 00100100b */ X86_EFL_PF,
222 /* 0x25 = 00100101b */ 0,
223 /* 0x26 = 00100110b */ 0,
224 /* 0x27 = 00100111b */ X86_EFL_PF,
225 /* 0x28 = 00101000b */ X86_EFL_PF,
226 /* 0x29 = 00101001b */ 0,
227 /* 0x2a = 00101010b */ 0,
228 /* 0x2b = 00101011b */ X86_EFL_PF,
229 /* 0x2c = 00101100b */ 0,
230 /* 0x2d = 00101101b */ X86_EFL_PF,
231 /* 0x2e = 00101110b */ X86_EFL_PF,
232 /* 0x2f = 00101111b */ 0,
233 /* 0x30 = 00110000b */ X86_EFL_PF,
234 /* 0x31 = 00110001b */ 0,
235 /* 0x32 = 00110010b */ 0,
236 /* 0x33 = 00110011b */ X86_EFL_PF,
237 /* 0x34 = 00110100b */ 0,
238 /* 0x35 = 00110101b */ X86_EFL_PF,
239 /* 0x36 = 00110110b */ X86_EFL_PF,
240 /* 0x37 = 00110111b */ 0,
241 /* 0x38 = 00111000b */ 0,
242 /* 0x39 = 00111001b */ X86_EFL_PF,
243 /* 0x3a = 00111010b */ X86_EFL_PF,
244 /* 0x3b = 00111011b */ 0,
245 /* 0x3c = 00111100b */ X86_EFL_PF,
246 /* 0x3d = 00111101b */ 0,
247 /* 0x3e = 00111110b */ 0,
248 /* 0x3f = 00111111b */ X86_EFL_PF,
249 /* 0x40 = 01000000b */ 0,
250 /* 0x41 = 01000001b */ X86_EFL_PF,
251 /* 0x42 = 01000010b */ X86_EFL_PF,
252 /* 0x43 = 01000011b */ 0,
253 /* 0x44 = 01000100b */ X86_EFL_PF,
254 /* 0x45 = 01000101b */ 0,
255 /* 0x46 = 01000110b */ 0,
256 /* 0x47 = 01000111b */ X86_EFL_PF,
257 /* 0x48 = 01001000b */ X86_EFL_PF,
258 /* 0x49 = 01001001b */ 0,
259 /* 0x4a = 01001010b */ 0,
260 /* 0x4b = 01001011b */ X86_EFL_PF,
261 /* 0x4c = 01001100b */ 0,
262 /* 0x4d = 01001101b */ X86_EFL_PF,
263 /* 0x4e = 01001110b */ X86_EFL_PF,
264 /* 0x4f = 01001111b */ 0,
265 /* 0x50 = 01010000b */ X86_EFL_PF,
266 /* 0x51 = 01010001b */ 0,
267 /* 0x52 = 01010010b */ 0,
268 /* 0x53 = 01010011b */ X86_EFL_PF,
269 /* 0x54 = 01010100b */ 0,
270 /* 0x55 = 01010101b */ X86_EFL_PF,
271 /* 0x56 = 01010110b */ X86_EFL_PF,
272 /* 0x57 = 01010111b */ 0,
273 /* 0x58 = 01011000b */ 0,
274 /* 0x59 = 01011001b */ X86_EFL_PF,
275 /* 0x5a = 01011010b */ X86_EFL_PF,
276 /* 0x5b = 01011011b */ 0,
277 /* 0x5c = 01011100b */ X86_EFL_PF,
278 /* 0x5d = 01011101b */ 0,
279 /* 0x5e = 01011110b */ 0,
280 /* 0x5f = 01011111b */ X86_EFL_PF,
281 /* 0x60 = 01100000b */ X86_EFL_PF,
282 /* 0x61 = 01100001b */ 0,
283 /* 0x62 = 01100010b */ 0,
284 /* 0x63 = 01100011b */ X86_EFL_PF,
285 /* 0x64 = 01100100b */ 0,
286 /* 0x65 = 01100101b */ X86_EFL_PF,
287 /* 0x66 = 01100110b */ X86_EFL_PF,
288 /* 0x67 = 01100111b */ 0,
289 /* 0x68 = 01101000b */ 0,
290 /* 0x69 = 01101001b */ X86_EFL_PF,
291 /* 0x6a = 01101010b */ X86_EFL_PF,
292 /* 0x6b = 01101011b */ 0,
293 /* 0x6c = 01101100b */ X86_EFL_PF,
294 /* 0x6d = 01101101b */ 0,
295 /* 0x6e = 01101110b */ 0,
296 /* 0x6f = 01101111b */ X86_EFL_PF,
297 /* 0x70 = 01110000b */ 0,
298 /* 0x71 = 01110001b */ X86_EFL_PF,
299 /* 0x72 = 01110010b */ X86_EFL_PF,
300 /* 0x73 = 01110011b */ 0,
301 /* 0x74 = 01110100b */ X86_EFL_PF,
302 /* 0x75 = 01110101b */ 0,
303 /* 0x76 = 01110110b */ 0,
304 /* 0x77 = 01110111b */ X86_EFL_PF,
305 /* 0x78 = 01111000b */ X86_EFL_PF,
306 /* 0x79 = 01111001b */ 0,
307 /* 0x7a = 01111010b */ 0,
308 /* 0x7b = 01111011b */ X86_EFL_PF,
309 /* 0x7c = 01111100b */ 0,
310 /* 0x7d = 01111101b */ X86_EFL_PF,
311 /* 0x7e = 01111110b */ X86_EFL_PF,
312 /* 0x7f = 01111111b */ 0,
313 /* 0x80 = 10000000b */ 0,
314 /* 0x81 = 10000001b */ X86_EFL_PF,
315 /* 0x82 = 10000010b */ X86_EFL_PF,
316 /* 0x83 = 10000011b */ 0,
317 /* 0x84 = 10000100b */ X86_EFL_PF,
318 /* 0x85 = 10000101b */ 0,
319 /* 0x86 = 10000110b */ 0,
320 /* 0x87 = 10000111b */ X86_EFL_PF,
321 /* 0x88 = 10001000b */ X86_EFL_PF,
322 /* 0x89 = 10001001b */ 0,
323 /* 0x8a = 10001010b */ 0,
324 /* 0x8b = 10001011b */ X86_EFL_PF,
325 /* 0x8c = 10001100b */ 0,
326 /* 0x8d = 10001101b */ X86_EFL_PF,
327 /* 0x8e = 10001110b */ X86_EFL_PF,
328 /* 0x8f = 10001111b */ 0,
329 /* 0x90 = 10010000b */ X86_EFL_PF,
330 /* 0x91 = 10010001b */ 0,
331 /* 0x92 = 10010010b */ 0,
332 /* 0x93 = 10010011b */ X86_EFL_PF,
333 /* 0x94 = 10010100b */ 0,
334 /* 0x95 = 10010101b */ X86_EFL_PF,
335 /* 0x96 = 10010110b */ X86_EFL_PF,
336 /* 0x97 = 10010111b */ 0,
337 /* 0x98 = 10011000b */ 0,
338 /* 0x99 = 10011001b */ X86_EFL_PF,
339 /* 0x9a = 10011010b */ X86_EFL_PF,
340 /* 0x9b = 10011011b */ 0,
341 /* 0x9c = 10011100b */ X86_EFL_PF,
342 /* 0x9d = 10011101b */ 0,
343 /* 0x9e = 10011110b */ 0,
344 /* 0x9f = 10011111b */ X86_EFL_PF,
345 /* 0xa0 = 10100000b */ X86_EFL_PF,
346 /* 0xa1 = 10100001b */ 0,
347 /* 0xa2 = 10100010b */ 0,
348 /* 0xa3 = 10100011b */ X86_EFL_PF,
349 /* 0xa4 = 10100100b */ 0,
350 /* 0xa5 = 10100101b */ X86_EFL_PF,
351 /* 0xa6 = 10100110b */ X86_EFL_PF,
352 /* 0xa7 = 10100111b */ 0,
353 /* 0xa8 = 10101000b */ 0,
354 /* 0xa9 = 10101001b */ X86_EFL_PF,
355 /* 0xaa = 10101010b */ X86_EFL_PF,
356 /* 0xab = 10101011b */ 0,
357 /* 0xac = 10101100b */ X86_EFL_PF,
358 /* 0xad = 10101101b */ 0,
359 /* 0xae = 10101110b */ 0,
360 /* 0xaf = 10101111b */ X86_EFL_PF,
361 /* 0xb0 = 10110000b */ 0,
362 /* 0xb1 = 10110001b */ X86_EFL_PF,
363 /* 0xb2 = 10110010b */ X86_EFL_PF,
364 /* 0xb3 = 10110011b */ 0,
365 /* 0xb4 = 10110100b */ X86_EFL_PF,
366 /* 0xb5 = 10110101b */ 0,
367 /* 0xb6 = 10110110b */ 0,
368 /* 0xb7 = 10110111b */ X86_EFL_PF,
369 /* 0xb8 = 10111000b */ X86_EFL_PF,
370 /* 0xb9 = 10111001b */ 0,
371 /* 0xba = 10111010b */ 0,
372 /* 0xbb = 10111011b */ X86_EFL_PF,
373 /* 0xbc = 10111100b */ 0,
374 /* 0xbd = 10111101b */ X86_EFL_PF,
375 /* 0xbe = 10111110b */ X86_EFL_PF,
376 /* 0xbf = 10111111b */ 0,
377 /* 0xc0 = 11000000b */ X86_EFL_PF,
378 /* 0xc1 = 11000001b */ 0,
379 /* 0xc2 = 11000010b */ 0,
380 /* 0xc3 = 11000011b */ X86_EFL_PF,
381 /* 0xc4 = 11000100b */ 0,
382 /* 0xc5 = 11000101b */ X86_EFL_PF,
383 /* 0xc6 = 11000110b */ X86_EFL_PF,
384 /* 0xc7 = 11000111b */ 0,
385 /* 0xc8 = 11001000b */ 0,
386 /* 0xc9 = 11001001b */ X86_EFL_PF,
387 /* 0xca = 11001010b */ X86_EFL_PF,
388 /* 0xcb = 11001011b */ 0,
389 /* 0xcc = 11001100b */ X86_EFL_PF,
390 /* 0xcd = 11001101b */ 0,
391 /* 0xce = 11001110b */ 0,
392 /* 0xcf = 11001111b */ X86_EFL_PF,
393 /* 0xd0 = 11010000b */ 0,
394 /* 0xd1 = 11010001b */ X86_EFL_PF,
395 /* 0xd2 = 11010010b */ X86_EFL_PF,
396 /* 0xd3 = 11010011b */ 0,
397 /* 0xd4 = 11010100b */ X86_EFL_PF,
398 /* 0xd5 = 11010101b */ 0,
399 /* 0xd6 = 11010110b */ 0,
400 /* 0xd7 = 11010111b */ X86_EFL_PF,
401 /* 0xd8 = 11011000b */ X86_EFL_PF,
402 /* 0xd9 = 11011001b */ 0,
403 /* 0xda = 11011010b */ 0,
404 /* 0xdb = 11011011b */ X86_EFL_PF,
405 /* 0xdc = 11011100b */ 0,
406 /* 0xdd = 11011101b */ X86_EFL_PF,
407 /* 0xde = 11011110b */ X86_EFL_PF,
408 /* 0xdf = 11011111b */ 0,
409 /* 0xe0 = 11100000b */ 0,
410 /* 0xe1 = 11100001b */ X86_EFL_PF,
411 /* 0xe2 = 11100010b */ X86_EFL_PF,
412 /* 0xe3 = 11100011b */ 0,
413 /* 0xe4 = 11100100b */ X86_EFL_PF,
414 /* 0xe5 = 11100101b */ 0,
415 /* 0xe6 = 11100110b */ 0,
416 /* 0xe7 = 11100111b */ X86_EFL_PF,
417 /* 0xe8 = 11101000b */ X86_EFL_PF,
418 /* 0xe9 = 11101001b */ 0,
419 /* 0xea = 11101010b */ 0,
420 /* 0xeb = 11101011b */ X86_EFL_PF,
421 /* 0xec = 11101100b */ 0,
422 /* 0xed = 11101101b */ X86_EFL_PF,
423 /* 0xee = 11101110b */ X86_EFL_PF,
424 /* 0xef = 11101111b */ 0,
425 /* 0xf0 = 11110000b */ X86_EFL_PF,
426 /* 0xf1 = 11110001b */ 0,
427 /* 0xf2 = 11110010b */ 0,
428 /* 0xf3 = 11110011b */ X86_EFL_PF,
429 /* 0xf4 = 11110100b */ 0,
430 /* 0xf5 = 11110101b */ X86_EFL_PF,
431 /* 0xf6 = 11110110b */ X86_EFL_PF,
432 /* 0xf7 = 11110111b */ 0,
433 /* 0xf8 = 11111000b */ 0,
434 /* 0xf9 = 11111001b */ X86_EFL_PF,
435 /* 0xfa = 11111010b */ X86_EFL_PF,
436 /* 0xfb = 11111011b */ 0,
437 /* 0xfc = 11111100b */ X86_EFL_PF,
438 /* 0xfd = 11111101b */ 0,
439 /* 0xfe = 11111110b */ 0,
440 /* 0xff = 11111111b */ X86_EFL_PF,
441};
442
443
444/*
445 * There are a few 64-bit on 32-bit things we'd rather do in C. Actually, doing
446 * it all in C is probably safer atm., optimize what's necessary later, maybe.
447 */
448#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
449
450
451/*********************************************************************************************************************************
452* Binary Operations *
453*********************************************************************************************************************************/
454
455/*
456 * ADD
457 */
458
459IEM_DECL_IMPL_DEF(void, iemAImpl_add_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
460{
461 uint64_t uDst = *puDst;
462 uint64_t uResult = uDst + uSrc;
463 *puDst = uResult;
464 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult < uDst, uSrc);
465}
466
467# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
468
469IEM_DECL_IMPL_DEF(void, iemAImpl_add_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
470{
471 uint32_t uDst = *puDst;
472 uint32_t uResult = uDst + uSrc;
473 *puDst = uResult;
474 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult < uDst, uSrc);
475}
476
477
478IEM_DECL_IMPL_DEF(void, iemAImpl_add_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
479{
480 uint16_t uDst = *puDst;
481 uint16_t uResult = uDst + uSrc;
482 *puDst = uResult;
483 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult < uDst, uSrc);
484}
485
486
487IEM_DECL_IMPL_DEF(void, iemAImpl_add_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
488{
489 uint8_t uDst = *puDst;
490 uint8_t uResult = uDst + uSrc;
491 *puDst = uResult;
492 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult < uDst, uSrc);
493}
494
495# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
496
497/*
498 * ADC
499 */
500
501IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
502{
503 if (!(*pfEFlags & X86_EFL_CF))
504 iemAImpl_add_u64(puDst, uSrc, pfEFlags);
505 else
506 {
507 uint64_t uDst = *puDst;
508 uint64_t uResult = uDst + uSrc + 1;
509 *puDst = uResult;
510 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult <= uDst, uSrc);
511 }
512}
513
514# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
515
516IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
517{
518 if (!(*pfEFlags & X86_EFL_CF))
519 iemAImpl_add_u32(puDst, uSrc, pfEFlags);
520 else
521 {
522 uint32_t uDst = *puDst;
523 uint32_t uResult = uDst + uSrc + 1;
524 *puDst = uResult;
525 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult <= uDst, uSrc);
526 }
527}
528
529
530IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
531{
532 if (!(*pfEFlags & X86_EFL_CF))
533 iemAImpl_add_u16(puDst, uSrc, pfEFlags);
534 else
535 {
536 uint16_t uDst = *puDst;
537 uint16_t uResult = uDst + uSrc + 1;
538 *puDst = uResult;
539 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult <= uDst, uSrc);
540 }
541}
542
543
544IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
545{
546 if (!(*pfEFlags & X86_EFL_CF))
547 iemAImpl_add_u8(puDst, uSrc, pfEFlags);
548 else
549 {
550 uint8_t uDst = *puDst;
551 uint8_t uResult = uDst + uSrc + 1;
552 *puDst = uResult;
553 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult <= uDst, uSrc);
554 }
555}
556
557# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
558
559/*
560 * SUB
561 */
562
563IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
564{
565 uint64_t uDst = *puDst;
566 uint64_t uResult = uDst - uSrc;
567 *puDst = uResult;
568 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uDst < uSrc, uSrc ^ RT_BIT_64(63));
569}
570
571# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
572
573IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
574{
575 uint32_t uDst = *puDst;
576 uint32_t uResult = uDst - uSrc;
577 *puDst = uResult;
578 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uDst < uSrc, uSrc ^ RT_BIT_32(31));
579}
580
581
582IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
583{
584 uint16_t uDst = *puDst;
585 uint16_t uResult = uDst - uSrc;
586 *puDst = uResult;
587 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uDst < uSrc, uSrc ^ (uint16_t)0x8000);
588}
589
590
591IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
592{
593 uint8_t uDst = *puDst;
594 uint8_t uResult = uDst - uSrc;
595 *puDst = uResult;
596 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uDst < uSrc, uSrc ^ (uint8_t)0x80);
597}
598
599# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
600
601/*
602 * SBB
603 */
604
605IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
606{
607 if (!(*pfEFlags & X86_EFL_CF))
608 iemAImpl_sub_u64(puDst, uSrc, pfEFlags);
609 else
610 {
611 uint64_t uDst = *puDst;
612 uint64_t uResult = uDst - uSrc - 1;
613 *puDst = uResult;
614 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uDst <= uSrc, uSrc ^ RT_BIT_64(63));
615 }
616}
617
618# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
619
620IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
621{
622 if (!(*pfEFlags & X86_EFL_CF))
623 iemAImpl_sub_u32(puDst, uSrc, pfEFlags);
624 else
625 {
626 uint32_t uDst = *puDst;
627 uint32_t uResult = uDst - uSrc - 1;
628 *puDst = uResult;
629 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uDst <= uSrc, uSrc ^ RT_BIT_32(31));
630 }
631}
632
633
634IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
635{
636 if (!(*pfEFlags & X86_EFL_CF))
637 iemAImpl_sub_u16(puDst, uSrc, pfEFlags);
638 else
639 {
640 uint16_t uDst = *puDst;
641 uint16_t uResult = uDst - uSrc - 1;
642 *puDst = uResult;
643 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uDst <= uSrc, uSrc ^ (uint16_t)0x8000);
644 }
645}
646
647
648IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
649{
650 if (!(*pfEFlags & X86_EFL_CF))
651 iemAImpl_sub_u8(puDst, uSrc, pfEFlags);
652 else
653 {
654 uint8_t uDst = *puDst;
655 uint8_t uResult = uDst - uSrc - 1;
656 *puDst = uResult;
657 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uDst <= uSrc, uSrc ^ (uint8_t)0x80);
658 }
659}
660
661# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
662
663
664/*
665 * OR
666 */
667
668IEM_DECL_IMPL_DEF(void, iemAImpl_or_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
669{
670 uint64_t uResult = *puDst | uSrc;
671 *puDst = uResult;
672 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
673}
674
675# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
676
677IEM_DECL_IMPL_DEF(void, iemAImpl_or_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
678{
679 uint32_t uResult = *puDst | uSrc;
680 *puDst = uResult;
681 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
682}
683
684
685IEM_DECL_IMPL_DEF(void, iemAImpl_or_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
686{
687 uint16_t uResult = *puDst | uSrc;
688 *puDst = uResult;
689 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
690}
691
692
693IEM_DECL_IMPL_DEF(void, iemAImpl_or_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
694{
695 uint8_t uResult = *puDst | uSrc;
696 *puDst = uResult;
697 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
698}
699
700# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
701
702/*
703 * XOR
704 */
705
706IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
707{
708 uint64_t uResult = *puDst ^ uSrc;
709 *puDst = uResult;
710 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
711}
712
713# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
714
715IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
716{
717 uint32_t uResult = *puDst ^ uSrc;
718 *puDst = uResult;
719 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
720}
721
722
723IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
724{
725 uint16_t uResult = *puDst ^ uSrc;
726 *puDst = uResult;
727 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
728}
729
730
731IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
732{
733 uint8_t uResult = *puDst ^ uSrc;
734 *puDst = uResult;
735 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
736}
737
738# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
739
740/*
741 * AND
742 */
743
744IEM_DECL_IMPL_DEF(void, iemAImpl_and_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
745{
746 uint64_t uResult = *puDst & uSrc;
747 *puDst = uResult;
748 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
749}
750
751# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
752
753IEM_DECL_IMPL_DEF(void, iemAImpl_and_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
754{
755 uint32_t uResult = *puDst & uSrc;
756 *puDst = uResult;
757 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
758}
759
760
761IEM_DECL_IMPL_DEF(void, iemAImpl_and_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
762{
763 uint16_t uResult = *puDst & uSrc;
764 *puDst = uResult;
765 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
766}
767
768
769IEM_DECL_IMPL_DEF(void, iemAImpl_and_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
770{
771 uint8_t uResult = *puDst & uSrc;
772 *puDst = uResult;
773 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
774}
775
776# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
777
778/*
779 * CMP
780 */
781
782IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
783{
784 uint64_t uDstTmp = *puDst;
785 iemAImpl_sub_u64(&uDstTmp, uSrc, pfEFlags);
786}
787
788# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
789
790IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
791{
792 uint32_t uDstTmp = *puDst;
793 iemAImpl_sub_u32(&uDstTmp, uSrc, pfEFlags);
794}
795
796
797IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
798{
799 uint16_t uDstTmp = *puDst;
800 iemAImpl_sub_u16(&uDstTmp, uSrc, pfEFlags);
801}
802
803
804IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
805{
806 uint8_t uDstTmp = *puDst;
807 iemAImpl_sub_u8(&uDstTmp, uSrc, pfEFlags);
808}
809
810# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
811
812/*
813 * TEST
814 */
815
816IEM_DECL_IMPL_DEF(void, iemAImpl_test_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
817{
818 uint64_t uResult = *puDst & uSrc;
819 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
820}
821
822# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
823
824IEM_DECL_IMPL_DEF(void, iemAImpl_test_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
825{
826 uint32_t uResult = *puDst & uSrc;
827 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
828}
829
830
831IEM_DECL_IMPL_DEF(void, iemAImpl_test_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
832{
833 uint16_t uResult = *puDst & uSrc;
834 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
835}
836
837
838IEM_DECL_IMPL_DEF(void, iemAImpl_test_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
839{
840 uint8_t uResult = *puDst & uSrc;
841 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
842}
843
844# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
845
846
847/*
848 * LOCK prefixed variants of the above
849 */
850
851/** 64-bit locked binary operand operation. */
852# define DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
853 do { \
854 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
855 uint ## a_cBitsWidth ## _t uTmp; \
856 uint32_t fEflTmp; \
857 do \
858 { \
859 uTmp = uOld; \
860 fEflTmp = *pfEFlags; \
861 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, uSrc, &fEflTmp); \
862 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
863 *pfEFlags = fEflTmp; \
864 } while (0)
865
866
867#define EMIT_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
868 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
869 uint ## a_cBitsWidth ## _t uSrc, \
870 uint32_t *pfEFlags)) \
871 { \
872 DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth); \
873 }
874
875EMIT_LOCKED_BIN_OP(add, 64)
876EMIT_LOCKED_BIN_OP(adc, 64)
877EMIT_LOCKED_BIN_OP(sub, 64)
878EMIT_LOCKED_BIN_OP(sbb, 64)
879EMIT_LOCKED_BIN_OP(or, 64)
880EMIT_LOCKED_BIN_OP(xor, 64)
881EMIT_LOCKED_BIN_OP(and, 64)
882# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
883EMIT_LOCKED_BIN_OP(add, 32)
884EMIT_LOCKED_BIN_OP(adc, 32)
885EMIT_LOCKED_BIN_OP(sub, 32)
886EMIT_LOCKED_BIN_OP(sbb, 32)
887EMIT_LOCKED_BIN_OP(or, 32)
888EMIT_LOCKED_BIN_OP(xor, 32)
889EMIT_LOCKED_BIN_OP(and, 32)
890
891EMIT_LOCKED_BIN_OP(add, 16)
892EMIT_LOCKED_BIN_OP(adc, 16)
893EMIT_LOCKED_BIN_OP(sub, 16)
894EMIT_LOCKED_BIN_OP(sbb, 16)
895EMIT_LOCKED_BIN_OP(or, 16)
896EMIT_LOCKED_BIN_OP(xor, 16)
897EMIT_LOCKED_BIN_OP(and, 16)
898
899EMIT_LOCKED_BIN_OP(add, 8)
900EMIT_LOCKED_BIN_OP(adc, 8)
901EMIT_LOCKED_BIN_OP(sub, 8)
902EMIT_LOCKED_BIN_OP(sbb, 8)
903EMIT_LOCKED_BIN_OP(or, 8)
904EMIT_LOCKED_BIN_OP(xor, 8)
905EMIT_LOCKED_BIN_OP(and, 8)
906# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
907
908
909/*
910 * Bit operations (same signature as above).
911 */
912
913/*
914 * BT
915 */
916
917IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
918{
919 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
920 not modified by either AMD (3990x) or Intel (i9-9980HK). */
921 Assert(uSrc < 64);
922 uint64_t uDst = *puDst;
923 if (uDst & RT_BIT_64(uSrc))
924 *pfEFlags |= X86_EFL_CF;
925 else
926 *pfEFlags &= ~X86_EFL_CF;
927}
928
929# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
930
931IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
932{
933 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
934 not modified by either AMD (3990x) or Intel (i9-9980HK). */
935 Assert(uSrc < 32);
936 uint32_t uDst = *puDst;
937 if (uDst & RT_BIT_32(uSrc))
938 *pfEFlags |= X86_EFL_CF;
939 else
940 *pfEFlags &= ~X86_EFL_CF;
941}
942
943IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
944{
945 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
946 not modified by either AMD (3990x) or Intel (i9-9980HK). */
947 Assert(uSrc < 16);
948 uint16_t uDst = *puDst;
949 if (uDst & RT_BIT_32(uSrc))
950 *pfEFlags |= X86_EFL_CF;
951 else
952 *pfEFlags &= ~X86_EFL_CF;
953}
954
955# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
956
957/*
958 * BTC
959 */
960
961IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
962{
963 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
964 not modified by either AMD (3990x) or Intel (i9-9980HK). */
965 Assert(uSrc < 64);
966 uint64_t fMask = RT_BIT_64(uSrc);
967 uint64_t uDst = *puDst;
968 if (uDst & fMask)
969 {
970 uDst &= ~fMask;
971 *puDst = uDst;
972 *pfEFlags |= X86_EFL_CF;
973 }
974 else
975 {
976 uDst |= fMask;
977 *puDst = uDst;
978 *pfEFlags &= ~X86_EFL_CF;
979 }
980}
981
982# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
983
984IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
985{
986 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
987 not modified by either AMD (3990x) or Intel (i9-9980HK). */
988 Assert(uSrc < 32);
989 uint32_t fMask = RT_BIT_32(uSrc);
990 uint32_t uDst = *puDst;
991 if (uDst & fMask)
992 {
993 uDst &= ~fMask;
994 *puDst = uDst;
995 *pfEFlags |= X86_EFL_CF;
996 }
997 else
998 {
999 uDst |= fMask;
1000 *puDst = uDst;
1001 *pfEFlags &= ~X86_EFL_CF;
1002 }
1003}
1004
1005
1006IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1007{
1008 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1009 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1010 Assert(uSrc < 16);
1011 uint16_t fMask = RT_BIT_32(uSrc);
1012 uint16_t uDst = *puDst;
1013 if (uDst & fMask)
1014 {
1015 uDst &= ~fMask;
1016 *puDst = uDst;
1017 *pfEFlags |= X86_EFL_CF;
1018 }
1019 else
1020 {
1021 uDst |= fMask;
1022 *puDst = uDst;
1023 *pfEFlags &= ~X86_EFL_CF;
1024 }
1025}
1026
1027# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1028
1029/*
1030 * BTR
1031 */
1032
1033IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1034{
1035 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1036 logical operation (AND/OR/whatever). */
1037 Assert(uSrc < 64);
1038 uint64_t fMask = RT_BIT_64(uSrc);
1039 uint64_t uDst = *puDst;
1040 if (uDst & fMask)
1041 {
1042 uDst &= ~fMask;
1043 *puDst = uDst;
1044 *pfEFlags |= X86_EFL_CF;
1045 }
1046 else
1047 *pfEFlags &= ~X86_EFL_CF;
1048}
1049
1050# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1051
1052IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1053{
1054 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1055 logical operation (AND/OR/whatever). */
1056 Assert(uSrc < 32);
1057 uint32_t fMask = RT_BIT_32(uSrc);
1058 uint32_t uDst = *puDst;
1059 if (uDst & fMask)
1060 {
1061 uDst &= ~fMask;
1062 *puDst = uDst;
1063 *pfEFlags |= X86_EFL_CF;
1064 }
1065 else
1066 *pfEFlags &= ~X86_EFL_CF;
1067}
1068
1069
1070IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1071{
1072 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1073 logical operation (AND/OR/whatever). */
1074 Assert(uSrc < 16);
1075 uint16_t fMask = RT_BIT_32(uSrc);
1076 uint16_t uDst = *puDst;
1077 if (uDst & fMask)
1078 {
1079 uDst &= ~fMask;
1080 *puDst = uDst;
1081 *pfEFlags |= X86_EFL_CF;
1082 }
1083 else
1084 *pfEFlags &= ~X86_EFL_CF;
1085}
1086
1087# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1088
1089/*
1090 * BTS
1091 */
1092
1093IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1094{
1095 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1096 logical operation (AND/OR/whatever). */
1097 Assert(uSrc < 64);
1098 uint64_t fMask = RT_BIT_64(uSrc);
1099 uint64_t uDst = *puDst;
1100 if (uDst & fMask)
1101 *pfEFlags |= X86_EFL_CF;
1102 else
1103 {
1104 uDst |= fMask;
1105 *puDst = uDst;
1106 *pfEFlags &= ~X86_EFL_CF;
1107 }
1108}
1109
1110# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1111
1112IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1113{
1114 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1115 logical operation (AND/OR/whatever). */
1116 Assert(uSrc < 32);
1117 uint32_t fMask = RT_BIT_32(uSrc);
1118 uint32_t uDst = *puDst;
1119 if (uDst & fMask)
1120 *pfEFlags |= X86_EFL_CF;
1121 else
1122 {
1123 uDst |= fMask;
1124 *puDst = uDst;
1125 *pfEFlags &= ~X86_EFL_CF;
1126 }
1127}
1128
1129
1130IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1131{
1132 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1133 logical operation (AND/OR/whatever). */
1134 Assert(uSrc < 16);
1135 uint16_t fMask = RT_BIT_32(uSrc);
1136 uint32_t uDst = *puDst;
1137 if (uDst & fMask)
1138 *pfEFlags |= X86_EFL_CF;
1139 else
1140 {
1141 uDst |= fMask;
1142 *puDst = uDst;
1143 *pfEFlags &= ~X86_EFL_CF;
1144 }
1145}
1146
1147# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1148
1149
1150EMIT_LOCKED_BIN_OP(btc, 64)
1151EMIT_LOCKED_BIN_OP(btr, 64)
1152EMIT_LOCKED_BIN_OP(bts, 64)
1153# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1154EMIT_LOCKED_BIN_OP(btc, 32)
1155EMIT_LOCKED_BIN_OP(btr, 32)
1156EMIT_LOCKED_BIN_OP(bts, 32)
1157
1158EMIT_LOCKED_BIN_OP(btc, 16)
1159EMIT_LOCKED_BIN_OP(btr, 16)
1160EMIT_LOCKED_BIN_OP(bts, 16)
1161# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1162
1163
1164/*
1165 * Helpers for BSR and BSF.
1166 *
1167 * Note! "undefined" flags: OF, SF, AF, PF, CF.
1168 * Intel behavior modelled on 10980xe, AMD on 3990X. Other marchs may
1169 * produce different result (see https://www.sandpile.org/x86/flags.htm),
1170 * but we restrict ourselves to emulating these recent marchs.
1171 */
1172#define SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlag, a_iBit) do { \
1173 unsigned iBit = (a_iBit); \
1174 uint32_t fEfl = *pfEFlags & ~(X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF); \
1175 if (iBit) \
1176 { \
1177 *puDst = --iBit; \
1178 fEfl |= g_afParity[iBit]; \
1179 } \
1180 else \
1181 fEfl |= X86_EFL_ZF | X86_EFL_PF; \
1182 *pfEFlags = fEfl; \
1183 } while (0)
1184#define SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlag, a_iBit) do { \
1185 unsigned const iBit = (a_iBit); \
1186 if (iBit) \
1187 { \
1188 *puDst = iBit - 1; \
1189 *pfEFlags &= ~X86_EFL_ZF; \
1190 } \
1191 else \
1192 *pfEFlags |= X86_EFL_ZF; \
1193 } while (0)
1194
1195
1196/*
1197 * BSF - first (least significant) bit set
1198 */
1199IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1200{
1201 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU64(uSrc));
1202}
1203
1204IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64_intel,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1205{
1206 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU64(uSrc));
1207}
1208
1209IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64_amd,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1210{
1211 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitFirstSetU64(uSrc));
1212}
1213
1214# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1215
1216IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1217{
1218 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU32(uSrc));
1219}
1220
1221IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32_intel,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1222{
1223 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU32(uSrc));
1224}
1225
1226IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32_amd,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1227{
1228 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitFirstSetU32(uSrc));
1229}
1230
1231
1232IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1233{
1234 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU16(uSrc));
1235}
1236
1237IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16_intel,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1238{
1239 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU16(uSrc));
1240}
1241
1242IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16_amd,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1243{
1244 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitFirstSetU16(uSrc));
1245}
1246
1247# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1248
1249
1250/*
1251 * BSR - last (most significant) bit set
1252 */
1253IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1254{
1255 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU64(uSrc));
1256}
1257
1258IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64_intel,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1259{
1260 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU64(uSrc));
1261}
1262
1263IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64_amd,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1264{
1265 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitLastSetU64(uSrc));
1266}
1267
1268# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1269
1270IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1271{
1272 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU32(uSrc));
1273}
1274
1275IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32_intel,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1276{
1277 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU32(uSrc));
1278}
1279
1280IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32_amd,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1281{
1282 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitLastSetU32(uSrc));
1283}
1284
1285
1286IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1287{
1288 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU16(uSrc));
1289}
1290
1291IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16_intel,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1292{
1293 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU16(uSrc));
1294}
1295
1296IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16_amd,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1297{
1298 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitLastSetU16(uSrc));
1299}
1300
1301# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1302
1303
1304/*
1305 * XCHG
1306 */
1307
1308IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64_locked,(uint64_t *puMem, uint64_t *puReg))
1309{
1310#if ARCH_BITS >= 64
1311 *puReg = ASMAtomicXchgU64(puMem, *puReg);
1312#else
1313 uint64_t uOldMem = *puMem;
1314 while (!ASMAtomicCmpXchgExU64(puMem, *puReg, uOldMem, &uOldMem))
1315 ASMNopPause();
1316 *puReg = uOldMem;
1317#endif
1318}
1319
1320# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1321
1322IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u32_locked,(uint32_t *puMem, uint32_t *puReg))
1323{
1324 *puReg = ASMAtomicXchgU32(puMem, *puReg);
1325}
1326
1327
1328IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u16_locked,(uint16_t *puMem, uint16_t *puReg))
1329{
1330 *puReg = ASMAtomicXchgU16(puMem, *puReg);
1331}
1332
1333
1334IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8_locked,(uint8_t *puMem, uint8_t *puReg))
1335{
1336 *puReg = ASMAtomicXchgU8(puMem, *puReg);
1337}
1338
1339# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1340
1341
1342/* Unlocked variants for fDisregardLock mode: */
1343
1344IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64_unlocked,(uint64_t *puMem, uint64_t *puReg))
1345{
1346 uint64_t const uOld = *puMem;
1347 *puMem = *puReg;
1348 *puReg = uOld;
1349}
1350
1351# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1352
1353IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u32_unlocked,(uint32_t *puMem, uint32_t *puReg))
1354{
1355 uint32_t const uOld = *puMem;
1356 *puMem = *puReg;
1357 *puReg = uOld;
1358}
1359
1360
1361IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u16_unlocked,(uint16_t *puMem, uint16_t *puReg))
1362{
1363 uint16_t const uOld = *puMem;
1364 *puMem = *puReg;
1365 *puReg = uOld;
1366}
1367
1368
1369IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8_unlocked,(uint8_t *puMem, uint8_t *puReg))
1370{
1371 uint8_t const uOld = *puMem;
1372 *puMem = *puReg;
1373 *puReg = uOld;
1374}
1375
1376# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1377
1378
1379/*
1380 * XADD and LOCK XADD.
1381 */
1382#define EMIT_XADD(a_cBitsWidth, a_Type) \
1383IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u ## a_cBitsWidth,(a_Type *puDst, a_Type *puReg, uint32_t *pfEFlags)) \
1384{ \
1385 a_Type uDst = *puDst; \
1386 a_Type uResult = uDst; \
1387 iemAImpl_add_u ## a_cBitsWidth(&uResult, *puReg, pfEFlags); \
1388 *puDst = uResult; \
1389 *puReg = uDst; \
1390} \
1391\
1392IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u ## a_cBitsWidth ## _locked,(a_Type *puDst, a_Type *puReg, uint32_t *pfEFlags)) \
1393{ \
1394 a_Type uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
1395 a_Type uResult; \
1396 uint32_t fEflTmp; \
1397 do \
1398 { \
1399 uResult = uOld; \
1400 fEflTmp = *pfEFlags; \
1401 iemAImpl_add_u ## a_cBitsWidth(&uResult, *puReg, &fEflTmp); \
1402 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uResult, uOld, &uOld)); \
1403 *puReg = uOld; \
1404 *pfEFlags = fEflTmp; \
1405}
1406EMIT_XADD(64, uint64_t)
1407# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1408EMIT_XADD(32, uint32_t)
1409EMIT_XADD(16, uint16_t)
1410EMIT_XADD(8, uint8_t)
1411# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1412
1413#endif
1414
1415/*
1416 * CMPXCHG, CMPXCHG8B, CMPXCHG16B
1417 *
1418 * Note! We don't have non-locking/atomic cmpxchg primitives, so all cmpxchg
1419 * instructions are emulated as locked.
1420 */
1421#if defined(IEM_WITHOUT_ASSEMBLY)
1422
1423IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u8_locked, (uint8_t *pu8Dst, uint8_t *puAl, uint8_t uSrcReg, uint32_t *pEFlags))
1424{
1425 uint8_t uOld = *puAl;
1426 if (ASMAtomicCmpXchgExU8(pu8Dst, uSrcReg, uOld, puAl))
1427 Assert(*puAl == uOld);
1428 iemAImpl_cmp_u8(&uOld, *puAl, pEFlags);
1429}
1430
1431
1432IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u16_locked,(uint16_t *pu16Dst, uint16_t *puAx, uint16_t uSrcReg, uint32_t *pEFlags))
1433{
1434 uint16_t uOld = *puAx;
1435 if (ASMAtomicCmpXchgExU16(pu16Dst, uSrcReg, uOld, puAx))
1436 Assert(*puAx == uOld);
1437 iemAImpl_cmp_u16(&uOld, *puAx, pEFlags);
1438}
1439
1440
1441IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u32_locked,(uint32_t *pu32Dst, uint32_t *puEax, uint32_t uSrcReg, uint32_t *pEFlags))
1442{
1443 uint32_t uOld = *puEax;
1444 if (ASMAtomicCmpXchgExU32(pu32Dst, uSrcReg, uOld, puEax))
1445 Assert(*puEax == uOld);
1446 iemAImpl_cmp_u32(&uOld, *puEax, pEFlags);
1447}
1448
1449
1450# if ARCH_BITS == 32
1451IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64_locked,(uint64_t *pu64Dst, uint64_t *puRax, uint64_t *puSrcReg, uint32_t *pEFlags))
1452# else
1453IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64_locked,(uint64_t *pu64Dst, uint64_t *puRax, uint64_t uSrcReg, uint32_t *pEFlags))
1454# endif
1455{
1456# if ARCH_BITS == 32
1457 uint64_t const uSrcReg = *puSrcReg;
1458# endif
1459 uint64_t uOld = *puRax;
1460 if (ASMAtomicCmpXchgExU64(pu64Dst, uSrcReg, uOld, puRax))
1461 Assert(*puRax == uOld);
1462 iemAImpl_cmp_u64(&uOld, *puRax, pEFlags);
1463}
1464
1465
1466IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b_locked,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
1467 uint32_t *pEFlags))
1468{
1469 uint64_t const uNew = pu64EbxEcx->u;
1470 uint64_t const uOld = pu64EaxEdx->u;
1471 if (ASMAtomicCmpXchgExU64(pu64Dst, uNew, uOld, &pu64EaxEdx->u))
1472 {
1473 Assert(pu64EaxEdx->u == uOld);
1474 *pEFlags |= X86_EFL_ZF;
1475 }
1476 else
1477 *pEFlags &= ~X86_EFL_ZF;
1478}
1479
1480
1481# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64)
1482IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_locked,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx,
1483 uint32_t *pEFlags))
1484{
1485# ifdef VBOX_STRICT
1486 RTUINT128U const uOld = *pu128RaxRdx;
1487# endif
1488# if defined(RT_ARCH_AMD64)
1489 if (ASMAtomicCmpXchgU128v2(&pu128Dst->u, pu128RbxRcx->s.Hi, pu128RbxRcx->s.Lo, pu128RaxRdx->s.Hi, pu128RaxRdx->s.Lo,
1490 &pu128RaxRdx->u))
1491# else
1492 if (ASMAtomicCmpXchgU128(&pu128Dst->u, pu128RbxRcx->u, pu128RaxRdx->u, &pu128RaxRdx->u))
1493# endif
1494 {
1495 Assert(pu128RaxRdx->s.Lo == uOld.s.Lo && pu128RaxRdx->s.Hi == uOld.s.Hi);
1496 *pEFlags |= X86_EFL_ZF;
1497 }
1498 else
1499 *pEFlags &= ~X86_EFL_ZF;
1500}
1501# endif
1502
1503#endif /* defined(IEM_WITHOUT_ASSEMBLY) */
1504
1505# if !defined(RT_ARCH_ARM64) /** @todo may need this for unaligned accesses... */
1506IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_fallback,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx,
1507 PRTUINT128U pu128RbxRcx, uint32_t *pEFlags))
1508{
1509 RTUINT128U u128Tmp = *pu128Dst;
1510 if ( u128Tmp.s.Lo == pu128RaxRdx->s.Lo
1511 && u128Tmp.s.Hi == pu128RaxRdx->s.Hi)
1512 {
1513 *pu128Dst = *pu128RbxRcx;
1514 *pEFlags |= X86_EFL_ZF;
1515 }
1516 else
1517 {
1518 *pu128RaxRdx = u128Tmp;
1519 *pEFlags &= ~X86_EFL_ZF;
1520 }
1521}
1522#endif /* !RT_ARCH_ARM64 */
1523
1524#if defined(IEM_WITHOUT_ASSEMBLY)
1525
1526/* Unlocked versions mapped to the locked ones: */
1527
1528IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u8, (uint8_t *pu8Dst, uint8_t *puAl, uint8_t uSrcReg, uint32_t *pEFlags))
1529{
1530 iemAImpl_cmpxchg_u8_locked(pu8Dst, puAl, uSrcReg, pEFlags);
1531}
1532
1533
1534IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u16, (uint16_t *pu16Dst, uint16_t *puAx, uint16_t uSrcReg, uint32_t *pEFlags))
1535{
1536 iemAImpl_cmpxchg_u16_locked(pu16Dst, puAx, uSrcReg, pEFlags);
1537}
1538
1539
1540IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u32, (uint32_t *pu32Dst, uint32_t *puEax, uint32_t uSrcReg, uint32_t *pEFlags))
1541{
1542 iemAImpl_cmpxchg_u32_locked(pu32Dst, puEax, uSrcReg, pEFlags);
1543}
1544
1545
1546# if ARCH_BITS == 32
1547IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64, (uint64_t *pu64Dst, uint64_t *puRax, uint64_t *puSrcReg, uint32_t *pEFlags))
1548{
1549 iemAImpl_cmpxchg_u64_locked(pu64Dst, puRax, puSrcReg, pEFlags);
1550}
1551# else
1552IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64, (uint64_t *pu64Dst, uint64_t *puRax, uint64_t uSrcReg, uint32_t *pEFlags))
1553{
1554 iemAImpl_cmpxchg_u64_locked(pu64Dst, puRax, uSrcReg, pEFlags);
1555}
1556# endif
1557
1558
1559IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx, uint32_t *pEFlags))
1560{
1561 iemAImpl_cmpxchg8b_locked(pu64Dst, pu64EaxEdx, pu64EbxEcx, pEFlags);
1562}
1563
1564
1565IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx,
1566 uint32_t *pEFlags))
1567{
1568 iemAImpl_cmpxchg16b_locked(pu128Dst, pu128RaxRdx, pu128RbxRcx, pEFlags);
1569}
1570
1571#endif /* defined(IEM_WITHOUT_ASSEMBLY) */
1572
1573#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
1574
1575/*
1576 * MUL, IMUL, DIV and IDIV helpers.
1577 *
1578 * - The U64 versions must use 128-bit intermediates, so we need to abstract the
1579 * division step so we can select between using C operators and
1580 * RTUInt128DivRem/RTUInt128MulU64ByU64.
1581 *
1582 * - The U8 versions work returns output in AL + AH instead of xDX + xAX, with the
1583 * IDIV/DIV taking all the input in AX too. This means we have to abstract some
1584 * input loads and the result storing.
1585 */
1586
1587DECLINLINE(void) RTUInt128DivRemByU64(PRTUINT128U pQuotient, PRTUINT128U pRemainder, PCRTUINT128U pDividend, uint64_t u64Divisor)
1588{
1589# ifdef __GNUC__ /* GCC maybe really annoying in function. */
1590 pQuotient->s.Lo = 0;
1591 pQuotient->s.Hi = 0;
1592# endif
1593 RTUINT128U Divisor;
1594 Divisor.s.Lo = u64Divisor;
1595 Divisor.s.Hi = 0;
1596 RTUInt128DivRem(pQuotient, pRemainder, pDividend, &Divisor);
1597}
1598
1599# define DIV_LOAD(a_Dividend) \
1600 a_Dividend.s.Lo = *puA, a_Dividend.s.Hi = *puD
1601# define DIV_LOAD_U8(a_Dividend) \
1602 a_Dividend.u = *puAX
1603
1604# define DIV_STORE(a_Quotient, a_uReminder) *puA = (a_Quotient), *puD = (a_uReminder)
1605# define DIV_STORE_U8(a_Quotient, a_uReminder) *puAX = (uint8_t)(a_Quotient) | ((uint16_t)(a_uReminder) << 8)
1606
1607# define MUL_LOAD_F1() *puA
1608# define MUL_LOAD_F1_U8() ((uint8_t)*puAX)
1609
1610# define MUL_STORE(a_Result) *puA = (a_Result).s.Lo, *puD = (a_Result).s.Hi
1611# define MUL_STORE_U8(a_Result) *puAX = a_Result.u
1612
1613# define MULDIV_NEG(a_Value, a_cBitsWidth2x) \
1614 (a_Value).u = UINT ## a_cBitsWidth2x ## _C(0) - (a_Value).u
1615# define MULDIV_NEG_U128(a_Value, a_cBitsWidth2x) \
1616 RTUInt128AssignNeg(&(a_Value))
1617
1618# define MULDIV_MUL(a_Result, a_Factor1, a_Factor2, a_cBitsWidth2x) \
1619 (a_Result).u = (uint ## a_cBitsWidth2x ## _t)(a_Factor1) * (a_Factor2)
1620# define MULDIV_MUL_U128(a_Result, a_Factor1, a_Factor2, a_cBitsWidth2x) \
1621 RTUInt128MulU64ByU64(&(a_Result), a_Factor1, a_Factor2);
1622
1623# define MULDIV_MODDIV(a_Quotient, a_Remainder, a_Dividend, a_uDivisor) \
1624 a_Quotient.u = (a_Dividend).u / (a_uDivisor), \
1625 a_Remainder.u = (a_Dividend).u % (a_uDivisor)
1626# define MULDIV_MODDIV_U128(a_Quotient, a_Remainder, a_Dividend, a_uDivisor) \
1627 RTUInt128DivRemByU64(&a_Quotient, &a_Remainder, &a_Dividend, a_uDivisor)
1628
1629
1630/*
1631 * MUL
1632 */
1633# define EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, a_Suffix, a_fIntelFlags) \
1634IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_mul_u,a_cBitsWidth,a_Suffix), a_Args) \
1635{ \
1636 RTUINT ## a_cBitsWidth2x ## U Result; \
1637 a_fnMul(Result, a_fnLoadF1(), uFactor, a_cBitsWidth2x); \
1638 a_fnStore(Result); \
1639 \
1640 /* Calc EFLAGS: */ \
1641 uint32_t fEfl = *pfEFlags; \
1642 if (a_fIntelFlags) \
1643 { /* Intel: 6700K and 10980XE behavior */ \
1644 fEfl &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF); \
1645 if (Result.s.Lo & RT_BIT_64(a_cBitsWidth - 1)) \
1646 fEfl |= X86_EFL_SF; \
1647 fEfl |= g_afParity[Result.s.Lo & 0xff]; \
1648 if (Result.s.Hi != 0) \
1649 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1650 } \
1651 else \
1652 { /* AMD: 3990X */ \
1653 if (Result.s.Hi != 0) \
1654 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1655 else \
1656 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
1657 } \
1658 *pfEFlags = fEfl; \
1659 return 0; \
1660} \
1661
1662# define EMIT_MUL(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul) \
1663 EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, RT_NOTHING, 1) \
1664 EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, _intel, 1) \
1665 EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, _amd, 0) \
1666
1667EMIT_MUL(64, 128, (uint64_t *puA, uint64_t *puD, uint64_t uFactor, uint32_t *pfEFlags), (puA, puD, uFactor, pfEFlags),
1668 MUL_LOAD_F1, MUL_STORE, MULDIV_MUL_U128)
1669# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1670EMIT_MUL(32, 64, (uint32_t *puA, uint32_t *puD, uint32_t uFactor, uint32_t *pfEFlags), (puA, puD, uFactor, pfEFlags),
1671 MUL_LOAD_F1, MUL_STORE, MULDIV_MUL)
1672EMIT_MUL(16, 32, (uint16_t *puA, uint16_t *puD, uint16_t uFactor, uint32_t *pfEFlags), (puA, puD, uFactor, pfEFlags),
1673 MUL_LOAD_F1, MUL_STORE, MULDIV_MUL)
1674EMIT_MUL(8, 16, (uint16_t *puAX, uint8_t uFactor, uint32_t *pfEFlags), (puAX, uFactor, pfEFlags),
1675 MUL_LOAD_F1_U8, MUL_STORE_U8, MULDIV_MUL)
1676# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1677
1678
1679/*
1680 * IMUL
1681 *
1682 * The SF, ZF, AF and PF flags are "undefined". AMD (3990x) leaves these
1683 * flags as is. Whereas Intel skylake (6700K and 10980X (Cascade Lake)) always
1684 * clear AF and ZF and calculates SF and PF as per the lower half of the result.
1685 */
1686# define EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, \
1687 a_Suffix, a_fIntelFlags) \
1688IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_imul_u,a_cBitsWidth,a_Suffix),a_Args) \
1689{ \
1690 RTUINT ## a_cBitsWidth2x ## U Result; \
1691 uint32_t fEfl = *pfEFlags & ~(X86_EFL_CF | X86_EFL_OF); \
1692 \
1693 uint ## a_cBitsWidth ## _t const uFactor1 = a_fnLoadF1(); \
1694 if (!(uFactor1 & RT_BIT_64(a_cBitsWidth - 1))) \
1695 { \
1696 if (!(uFactor2 & RT_BIT_64(a_cBitsWidth - 1))) \
1697 { \
1698 a_fnMul(Result, uFactor1, uFactor2, a_cBitsWidth2x); \
1699 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_64(a_cBitsWidth - 1)) \
1700 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1701 } \
1702 else \
1703 { \
1704 uint ## a_cBitsWidth ## _t const uPositiveFactor2 = UINT ## a_cBitsWidth ## _C(0) - uFactor2; \
1705 a_fnMul(Result, uFactor1, uPositiveFactor2, a_cBitsWidth2x); \
1706 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_64(a_cBitsWidth - 1)) \
1707 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1708 a_fnNeg(Result, a_cBitsWidth2x); \
1709 } \
1710 } \
1711 else \
1712 { \
1713 if (!(uFactor2 & RT_BIT_64(a_cBitsWidth - 1))) \
1714 { \
1715 uint ## a_cBitsWidth ## _t const uPositiveFactor1 = UINT ## a_cBitsWidth ## _C(0) - uFactor1; \
1716 a_fnMul(Result, uPositiveFactor1, uFactor2, a_cBitsWidth2x); \
1717 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_64(a_cBitsWidth - 1)) \
1718 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1719 a_fnNeg(Result, a_cBitsWidth2x); \
1720 } \
1721 else \
1722 { \
1723 uint ## a_cBitsWidth ## _t const uPositiveFactor1 = UINT ## a_cBitsWidth ## _C(0) - uFactor1; \
1724 uint ## a_cBitsWidth ## _t const uPositiveFactor2 = UINT ## a_cBitsWidth ## _C(0) - uFactor2; \
1725 a_fnMul(Result, uPositiveFactor1, uPositiveFactor2, a_cBitsWidth2x); \
1726 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_64(a_cBitsWidth - 1)) \
1727 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1728 } \
1729 } \
1730 a_fnStore(Result); \
1731 \
1732 if (a_fIntelFlags) \
1733 { \
1734 fEfl &= ~(X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF | X86_EFL_PF); \
1735 if (Result.s.Lo & RT_BIT_64(a_cBitsWidth - 1)) \
1736 fEfl |= X86_EFL_SF; \
1737 fEfl |= g_afParity[Result.s.Lo & 0xff]; \
1738 } \
1739 *pfEFlags = fEfl; \
1740 return 0; \
1741}
1742# define EMIT_IMUL(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul) \
1743 EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, RT_NOTHING, 1) \
1744 EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, _intel, 1) \
1745 EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, _amd, 0)
1746
1747EMIT_IMUL(64, 128, (uint64_t *puA, uint64_t *puD, uint64_t uFactor2, uint32_t *pfEFlags), (puA, puD, uFactor2, pfEFlags),
1748 MUL_LOAD_F1, MUL_STORE, MULDIV_NEG_U128, MULDIV_MUL_U128)
1749# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1750EMIT_IMUL(32, 64, (uint32_t *puA, uint32_t *puD, uint32_t uFactor2, uint32_t *pfEFlags), (puA, puD, uFactor2, pfEFlags),
1751 MUL_LOAD_F1, MUL_STORE, MULDIV_NEG, MULDIV_MUL)
1752EMIT_IMUL(16, 32, (uint16_t *puA, uint16_t *puD, uint16_t uFactor2, uint32_t *pfEFlags), (puA, puD, uFactor2, pfEFlags),
1753 MUL_LOAD_F1, MUL_STORE, MULDIV_NEG, MULDIV_MUL)
1754EMIT_IMUL(8, 16, (uint16_t *puAX, uint8_t uFactor2, uint32_t *pfEFlags), (puAX, uFactor2, pfEFlags),
1755 MUL_LOAD_F1_U8, MUL_STORE_U8, MULDIV_NEG, MULDIV_MUL)
1756# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1757
1758
1759/*
1760 * IMUL with two operands are mapped onto the three operand variant, ignoring
1761 * the high part of the product.
1762 */
1763# define EMIT_IMUL_TWO(a_cBits, a_uType) \
1764IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u ## a_cBits,(a_uType *puDst, a_uType uSrc, uint32_t *pfEFlags)) \
1765{ \
1766 a_uType uIgn; \
1767 iemAImpl_imul_u ## a_cBits(puDst, &uIgn, uSrc, pfEFlags); \
1768} \
1769\
1770IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u ## a_cBits ## _intel,(a_uType *puDst, a_uType uSrc, uint32_t *pfEFlags)) \
1771{ \
1772 a_uType uIgn; \
1773 iemAImpl_imul_u ## a_cBits ## _intel(puDst, &uIgn, uSrc, pfEFlags); \
1774} \
1775\
1776IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u ## a_cBits ## _amd,(a_uType *puDst, a_uType uSrc, uint32_t *pfEFlags)) \
1777{ \
1778 a_uType uIgn; \
1779 iemAImpl_imul_u ## a_cBits ## _amd(puDst, &uIgn, uSrc, pfEFlags); \
1780}
1781
1782EMIT_IMUL_TWO(64, uint64_t)
1783# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1784EMIT_IMUL_TWO(32, uint32_t)
1785EMIT_IMUL_TWO(16, uint16_t)
1786# endif
1787
1788
1789/*
1790 * DIV
1791 */
1792# define EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, \
1793 a_Suffix, a_fIntelFlags) \
1794IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_div_u,a_cBitsWidth,a_Suffix),a_Args) \
1795{ \
1796 RTUINT ## a_cBitsWidth2x ## U Dividend; \
1797 a_fnLoad(Dividend); \
1798 if ( uDivisor != 0 \
1799 && Dividend.s.Hi < uDivisor) \
1800 { \
1801 RTUINT ## a_cBitsWidth2x ## U Remainder, Quotient; \
1802 a_fnDivRem(Quotient, Remainder, Dividend, uDivisor); \
1803 a_fnStore(Quotient.s.Lo, Remainder.s.Lo); \
1804 \
1805 /* Calc EFLAGS: Intel 6700K and 10980XE leaves them alone. AMD 3990X sets AF and clears PF, ZF and SF. */ \
1806 if (!a_fIntelFlags) \
1807 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
1808 return 0; \
1809 } \
1810 /* #DE */ \
1811 return -1; \
1812}
1813# define EMIT_DIV(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem) \
1814 EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, RT_NOTHING, 1) \
1815 EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, _intel, 1) \
1816 EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, _amd, 0)
1817
1818EMIT_DIV(64,128,(uint64_t *puA, uint64_t *puD, uint64_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1819 DIV_LOAD, DIV_STORE, MULDIV_MODDIV_U128)
1820# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1821EMIT_DIV(32,64, (uint32_t *puA, uint32_t *puD, uint32_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1822 DIV_LOAD, DIV_STORE, MULDIV_MODDIV)
1823EMIT_DIV(16,32, (uint16_t *puA, uint16_t *puD, uint16_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1824 DIV_LOAD, DIV_STORE, MULDIV_MODDIV)
1825EMIT_DIV(8,16, (uint16_t *puAX, uint8_t uDivisor, uint32_t *pfEFlags), (puAX, uDivisor, pfEFlags),
1826 DIV_LOAD_U8, DIV_STORE_U8, MULDIV_MODDIV)
1827# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1828
1829
1830/*
1831 * IDIV
1832 *
1833 * EFLAGS are ignored and left as-is by Intel 6700K and 10980XE. AMD 3990X will
1834 * set AF and clear PF, ZF and SF just like it does for DIV.
1835 *
1836 */
1837# define EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, \
1838 a_Suffix, a_fIntelFlags) \
1839IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_idiv_u,a_cBitsWidth,a_Suffix),a_Args) \
1840{ \
1841 /* Note! Skylake leaves all flags alone. */ \
1842 \
1843 /** @todo overflow checks */ \
1844 if (uDivisor != 0) \
1845 { \
1846 /* \
1847 * Convert to unsigned division. \
1848 */ \
1849 RTUINT ## a_cBitsWidth2x ## U Dividend; \
1850 a_fnLoad(Dividend); \
1851 bool const fSignedDividend = RT_BOOL(Dividend.s.Hi & RT_BIT_64(a_cBitsWidth - 1)); \
1852 if (fSignedDividend) \
1853 a_fnNeg(Dividend, a_cBitsWidth2x); \
1854 \
1855 uint ## a_cBitsWidth ## _t uDivisorPositive; \
1856 if (!(uDivisor & RT_BIT_64(a_cBitsWidth - 1))) \
1857 uDivisorPositive = uDivisor; \
1858 else \
1859 uDivisorPositive = UINT ## a_cBitsWidth ## _C(0) - uDivisor; \
1860 \
1861 RTUINT ## a_cBitsWidth2x ## U Remainder, Quotient; \
1862 a_fnDivRem(Quotient, Remainder, Dividend, uDivisorPositive); \
1863 \
1864 /* \
1865 * Setup the result, checking for overflows. \
1866 */ \
1867 if (!(uDivisor & RT_BIT_64(a_cBitsWidth - 1))) \
1868 { \
1869 if (!fSignedDividend) \
1870 { \
1871 /* Positive divisor, positive dividend => result positive. */ \
1872 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint ## a_cBitsWidth ## _t)INT ## a_cBitsWidth ## _MAX) \
1873 { \
1874 a_fnStore(Quotient.s.Lo, Remainder.s.Lo); \
1875 if (!a_fIntelFlags) \
1876 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
1877 return 0; \
1878 } \
1879 } \
1880 else \
1881 { \
1882 /* Positive divisor, negative dividend => result negative. */ \
1883 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_64(a_cBitsWidth - 1)) \
1884 { \
1885 a_fnStore(UINT ## a_cBitsWidth ## _C(0) - Quotient.s.Lo, UINT ## a_cBitsWidth ## _C(0) - Remainder.s.Lo); \
1886 if (!a_fIntelFlags) \
1887 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
1888 return 0; \
1889 } \
1890 } \
1891 } \
1892 else \
1893 { \
1894 if (!fSignedDividend) \
1895 { \
1896 /* Negative divisor, positive dividend => negative quotient, positive remainder. */ \
1897 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_64(a_cBitsWidth - 1)) \
1898 { \
1899 a_fnStore(UINT ## a_cBitsWidth ## _C(0) - Quotient.s.Lo, Remainder.s.Lo); \
1900 if (!a_fIntelFlags) \
1901 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
1902 return 0; \
1903 } \
1904 } \
1905 else \
1906 { \
1907 /* Negative divisor, negative dividend => positive quotient, negative remainder. */ \
1908 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint ## a_cBitsWidth ## _t)INT ## a_cBitsWidth ## _MAX) \
1909 { \
1910 a_fnStore(Quotient.s.Lo, UINT ## a_cBitsWidth ## _C(0) - Remainder.s.Lo); \
1911 if (!a_fIntelFlags) \
1912 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
1913 return 0; \
1914 } \
1915 } \
1916 } \
1917 } \
1918 /* #DE */ \
1919 return -1; \
1920}
1921# define EMIT_IDIV(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem) \
1922 EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, RT_NOTHING, 1) \
1923 EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, _intel, 1) \
1924 EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, _amd, 0)
1925
1926EMIT_IDIV(64,128,(uint64_t *puA, uint64_t *puD, uint64_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1927 DIV_LOAD, DIV_STORE, MULDIV_NEG_U128, MULDIV_MODDIV_U128)
1928# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1929EMIT_IDIV(32,64,(uint32_t *puA, uint32_t *puD, uint32_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1930 DIV_LOAD, DIV_STORE, MULDIV_NEG, MULDIV_MODDIV)
1931EMIT_IDIV(16,32,(uint16_t *puA, uint16_t *puD, uint16_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1932 DIV_LOAD, DIV_STORE, MULDIV_NEG, MULDIV_MODDIV)
1933EMIT_IDIV(8,16,(uint16_t *puAX, uint8_t uDivisor, uint32_t *pfEFlags), (puAX, uDivisor, pfEFlags),
1934 DIV_LOAD_U8, DIV_STORE_U8, MULDIV_NEG, MULDIV_MODDIV)
1935# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1936
1937
1938/*********************************************************************************************************************************
1939* Unary operations. *
1940*********************************************************************************************************************************/
1941
1942/**
1943 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an INC or DEC instruction.
1944 *
1945 * CF is NOT modified for hysterical raisins (allegedly for carrying and
1946 * borrowing in arithmetic loops on intel 8008).
1947 *
1948 * @returns Status bits.
1949 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
1950 * @param a_uResult Unsigned result value.
1951 * @param a_uDst The original destination value (for AF calc).
1952 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
1953 * @param a_OfMethod 0 for INC-style, 1 for DEC-style.
1954 */
1955#define IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth, a_OfMethod) \
1956 do { \
1957 uint32_t fEflTmp = *(a_pfEFlags); \
1958 fEflTmp &= ~X86_EFL_STATUS_BITS | X86_EFL_CF; \
1959 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
1960 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
1961 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
1962 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
1963 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth(a_OfMethod == 0 ? (((a_uDst) ^ RT_BIT_64(a_cBitsWidth - 1)) & (a_uResult)) \
1964 : ((a_uDst) & ((a_uResult) ^ RT_BIT_64(a_cBitsWidth - 1))) ); \
1965 *(a_pfEFlags) = fEflTmp; \
1966 } while (0)
1967
1968/*
1969 * INC
1970 */
1971
1972IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u64,(uint64_t *puDst, uint32_t *pfEFlags))
1973{
1974 uint64_t uDst = *puDst;
1975 uint64_t uResult = uDst + 1;
1976 *puDst = uResult;
1977 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 0 /*INC*/);
1978}
1979
1980# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1981
1982IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u32,(uint32_t *puDst, uint32_t *pfEFlags))
1983{
1984 uint32_t uDst = *puDst;
1985 uint32_t uResult = uDst + 1;
1986 *puDst = uResult;
1987 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 0 /*INC*/);
1988}
1989
1990
1991IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u16,(uint16_t *puDst, uint32_t *pfEFlags))
1992{
1993 uint16_t uDst = *puDst;
1994 uint16_t uResult = uDst + 1;
1995 *puDst = uResult;
1996 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 0 /*INC*/);
1997}
1998
1999IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2000{
2001 uint8_t uDst = *puDst;
2002 uint8_t uResult = uDst + 1;
2003 *puDst = uResult;
2004 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 0 /*INC*/);
2005}
2006
2007# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2008
2009
2010/*
2011 * DEC
2012 */
2013
2014IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2015{
2016 uint64_t uDst = *puDst;
2017 uint64_t uResult = uDst - 1;
2018 *puDst = uResult;
2019 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 1 /*INC*/);
2020}
2021
2022# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2023
2024IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2025{
2026 uint32_t uDst = *puDst;
2027 uint32_t uResult = uDst - 1;
2028 *puDst = uResult;
2029 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 1 /*INC*/);
2030}
2031
2032
2033IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2034{
2035 uint16_t uDst = *puDst;
2036 uint16_t uResult = uDst - 1;
2037 *puDst = uResult;
2038 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 1 /*INC*/);
2039}
2040
2041
2042IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2043{
2044 uint8_t uDst = *puDst;
2045 uint8_t uResult = uDst - 1;
2046 *puDst = uResult;
2047 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 1 /*INC*/);
2048}
2049
2050# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2051
2052
2053/*
2054 * NOT
2055 */
2056
2057IEM_DECL_IMPL_DEF(void, iemAImpl_not_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2058{
2059 uint64_t uDst = *puDst;
2060 uint64_t uResult = ~uDst;
2061 *puDst = uResult;
2062 /* EFLAGS are not modified. */
2063 RT_NOREF_PV(pfEFlags);
2064}
2065
2066# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2067
2068IEM_DECL_IMPL_DEF(void, iemAImpl_not_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2069{
2070 uint32_t uDst = *puDst;
2071 uint32_t uResult = ~uDst;
2072 *puDst = uResult;
2073 /* EFLAGS are not modified. */
2074 RT_NOREF_PV(pfEFlags);
2075}
2076
2077IEM_DECL_IMPL_DEF(void, iemAImpl_not_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2078{
2079 uint16_t uDst = *puDst;
2080 uint16_t uResult = ~uDst;
2081 *puDst = uResult;
2082 /* EFLAGS are not modified. */
2083 RT_NOREF_PV(pfEFlags);
2084}
2085
2086IEM_DECL_IMPL_DEF(void, iemAImpl_not_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2087{
2088 uint8_t uDst = *puDst;
2089 uint8_t uResult = ~uDst;
2090 *puDst = uResult;
2091 /* EFLAGS are not modified. */
2092 RT_NOREF_PV(pfEFlags);
2093}
2094
2095# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2096
2097
2098/*
2099 * NEG
2100 */
2101
2102/**
2103 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an NEG instruction.
2104 *
2105 * @returns Status bits.
2106 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2107 * @param a_uResult Unsigned result value.
2108 * @param a_uDst The original destination value (for AF calc).
2109 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2110 */
2111#define IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth) \
2112 do { \
2113 uint32_t fEflTmp = *(a_pfEFlags); \
2114 fEflTmp &= ~X86_EFL_STATUS_BITS & ~X86_EFL_CF; \
2115 fEflTmp |= ((a_uDst) != 0) << X86_EFL_CF_BIT; \
2116 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
2117 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
2118 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
2119 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
2120 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth((a_uDst) & (a_uResult)); \
2121 *(a_pfEFlags) = fEflTmp; \
2122 } while (0)
2123
2124IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2125{
2126 uint64_t uDst = *puDst;
2127 uint64_t uResult = (uint64_t)0 - uDst;
2128 *puDst = uResult;
2129 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 64);
2130}
2131
2132# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2133
2134IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2135{
2136 uint32_t uDst = *puDst;
2137 uint32_t uResult = (uint32_t)0 - uDst;
2138 *puDst = uResult;
2139 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 32);
2140}
2141
2142
2143IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2144{
2145 uint16_t uDst = *puDst;
2146 uint16_t uResult = (uint16_t)0 - uDst;
2147 *puDst = uResult;
2148 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 16);
2149}
2150
2151
2152IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2153{
2154 uint8_t uDst = *puDst;
2155 uint8_t uResult = (uint8_t)0 - uDst;
2156 *puDst = uResult;
2157 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 8);
2158}
2159
2160# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2161
2162/*
2163 * Locked variants.
2164 */
2165
2166/** Emit a function for doing a locked unary operand operation. */
2167# define EMIT_LOCKED_UNARY_OP(a_Mnemonic, a_cBitsWidth) \
2168 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
2169 uint32_t *pfEFlags)) \
2170 { \
2171 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
2172 uint ## a_cBitsWidth ## _t uTmp; \
2173 uint32_t fEflTmp; \
2174 do \
2175 { \
2176 uTmp = uOld; \
2177 fEflTmp = *pfEFlags; \
2178 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, &fEflTmp); \
2179 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
2180 *pfEFlags = fEflTmp; \
2181 }
2182
2183EMIT_LOCKED_UNARY_OP(inc, 64)
2184EMIT_LOCKED_UNARY_OP(dec, 64)
2185EMIT_LOCKED_UNARY_OP(not, 64)
2186EMIT_LOCKED_UNARY_OP(neg, 64)
2187# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2188EMIT_LOCKED_UNARY_OP(inc, 32)
2189EMIT_LOCKED_UNARY_OP(dec, 32)
2190EMIT_LOCKED_UNARY_OP(not, 32)
2191EMIT_LOCKED_UNARY_OP(neg, 32)
2192
2193EMIT_LOCKED_UNARY_OP(inc, 16)
2194EMIT_LOCKED_UNARY_OP(dec, 16)
2195EMIT_LOCKED_UNARY_OP(not, 16)
2196EMIT_LOCKED_UNARY_OP(neg, 16)
2197
2198EMIT_LOCKED_UNARY_OP(inc, 8)
2199EMIT_LOCKED_UNARY_OP(dec, 8)
2200EMIT_LOCKED_UNARY_OP(not, 8)
2201EMIT_LOCKED_UNARY_OP(neg, 8)
2202# endif
2203
2204#endif /* !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) */
2205
2206
2207/*********************************************************************************************************************************
2208* Shifting and Rotating *
2209*********************************************************************************************************************************/
2210
2211/*
2212 * ROL
2213 */
2214#define EMIT_ROL(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags, a_fnHlp) \
2215IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_rol_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2216{ \
2217 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2218 if (cShift) \
2219 { \
2220 if (a_cBitsWidth < 32) \
2221 cShift &= a_cBitsWidth - 1; \
2222 a_uType const uDst = *puDst; \
2223 a_uType const uResult = a_fnHlp(uDst, cShift); \
2224 *puDst = uResult; \
2225 \
2226 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2227 it the same way as for 1 bit shifts. */ \
2228 AssertCompile(X86_EFL_CF_BIT == 0); \
2229 uint32_t fEfl = *pfEFlags; \
2230 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2231 uint32_t const fCarry = (uResult & X86_EFL_CF); \
2232 fEfl |= fCarry; \
2233 if (!a_fIntelFlags) /* AMD 3990X: According to the last sub-shift: */ \
2234 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2235 else /* Intel 10980XE: According to the first sub-shift: */ \
2236 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); \
2237 *pfEFlags = fEfl; \
2238 } \
2239}
2240
2241#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2242EMIT_ROL(64, uint64_t, RT_NOTHING, 1, ASMRotateLeftU64)
2243#endif
2244EMIT_ROL(64, uint64_t, _intel, 1, ASMRotateLeftU64)
2245EMIT_ROL(64, uint64_t, _amd, 0, ASMRotateLeftU64)
2246
2247#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2248EMIT_ROL(32, uint32_t, RT_NOTHING, 1, ASMRotateLeftU32)
2249#endif
2250EMIT_ROL(32, uint32_t, _intel, 1, ASMRotateLeftU32)
2251EMIT_ROL(32, uint32_t, _amd, 0, ASMRotateLeftU32)
2252
2253DECL_FORCE_INLINE(uint16_t) iemAImpl_rol_u16_hlp(uint16_t uValue, uint8_t cShift)
2254{
2255 return (uValue << cShift) | (uValue >> (16 - cShift));
2256}
2257#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2258EMIT_ROL(16, uint16_t, RT_NOTHING, 1, iemAImpl_rol_u16_hlp)
2259#endif
2260EMIT_ROL(16, uint16_t, _intel, 1, iemAImpl_rol_u16_hlp)
2261EMIT_ROL(16, uint16_t, _amd, 0, iemAImpl_rol_u16_hlp)
2262
2263DECL_FORCE_INLINE(uint8_t) iemAImpl_rol_u8_hlp(uint8_t uValue, uint8_t cShift)
2264{
2265 return (uValue << cShift) | (uValue >> (8 - cShift));
2266}
2267#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2268EMIT_ROL(8, uint8_t, RT_NOTHING, 1, iemAImpl_rol_u8_hlp)
2269#endif
2270EMIT_ROL(8, uint8_t, _intel, 1, iemAImpl_rol_u8_hlp)
2271EMIT_ROL(8, uint8_t, _amd, 0, iemAImpl_rol_u8_hlp)
2272
2273
2274/*
2275 * ROR
2276 */
2277#define EMIT_ROR(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags, a_fnHlp) \
2278IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_ror_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2279{ \
2280 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2281 if (cShift) \
2282 { \
2283 if (a_cBitsWidth < 32) \
2284 cShift &= a_cBitsWidth - 1; \
2285 a_uType const uDst = *puDst; \
2286 a_uType const uResult = a_fnHlp(uDst, cShift); \
2287 *puDst = uResult; \
2288 \
2289 /* Calc EFLAGS: */ \
2290 AssertCompile(X86_EFL_CF_BIT == 0); \
2291 uint32_t fEfl = *pfEFlags; \
2292 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2293 uint32_t const fCarry = (uResult >> ((a_cBitsWidth) - 1)) & X86_EFL_CF; \
2294 fEfl |= fCarry; \
2295 if (!a_fIntelFlags) /* AMD 3990X: According to the last sub-shift: */ \
2296 fEfl |= (((uResult >> ((a_cBitsWidth) - 2)) ^ fCarry) & 1) << X86_EFL_OF_BIT; \
2297 else /* Intel 10980XE: According to the first sub-shift: */ \
2298 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << (a_cBitsWidth - 1))); \
2299 *pfEFlags = fEfl; \
2300 } \
2301}
2302
2303#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2304EMIT_ROR(64, uint64_t, RT_NOTHING, 1, ASMRotateRightU64)
2305#endif
2306EMIT_ROR(64, uint64_t, _intel, 1, ASMRotateRightU64)
2307EMIT_ROR(64, uint64_t, _amd, 0, ASMRotateRightU64)
2308
2309#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2310EMIT_ROR(32, uint32_t, RT_NOTHING, 1, ASMRotateRightU32)
2311#endif
2312EMIT_ROR(32, uint32_t, _intel, 1, ASMRotateRightU32)
2313EMIT_ROR(32, uint32_t, _amd, 0, ASMRotateRightU32)
2314
2315DECL_FORCE_INLINE(uint16_t) iemAImpl_ror_u16_hlp(uint16_t uValue, uint8_t cShift)
2316{
2317 return (uValue >> cShift) | (uValue << (16 - cShift));
2318}
2319#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2320EMIT_ROR(16, uint16_t, RT_NOTHING, 1, iemAImpl_ror_u16_hlp)
2321#endif
2322EMIT_ROR(16, uint16_t, _intel, 1, iemAImpl_ror_u16_hlp)
2323EMIT_ROR(16, uint16_t, _amd, 0, iemAImpl_ror_u16_hlp)
2324
2325DECL_FORCE_INLINE(uint8_t) iemAImpl_ror_u8_hlp(uint8_t uValue, uint8_t cShift)
2326{
2327 return (uValue >> cShift) | (uValue << (8 - cShift));
2328}
2329#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2330EMIT_ROR(8, uint8_t, RT_NOTHING, 1, iemAImpl_ror_u8_hlp)
2331#endif
2332EMIT_ROR(8, uint8_t, _intel, 1, iemAImpl_ror_u8_hlp)
2333EMIT_ROR(8, uint8_t, _amd, 0, iemAImpl_ror_u8_hlp)
2334
2335
2336/*
2337 * RCL
2338 */
2339#define EMIT_RCL(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2340IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_rcl_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2341{ \
2342 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2343 if (cShift) \
2344 { \
2345 if (a_cBitsWidth < 32) \
2346 cShift %= a_cBitsWidth + 1; \
2347 a_uType const uDst = *puDst; \
2348 a_uType uResult = uDst << cShift; \
2349 if (cShift > 1) \
2350 uResult |= uDst >> (a_cBitsWidth + 1 - cShift); \
2351 \
2352 AssertCompile(X86_EFL_CF_BIT == 0); \
2353 uint32_t fEfl = *pfEFlags; \
2354 uint32_t fInCarry = fEfl & X86_EFL_CF; \
2355 uResult |= (a_uType)fInCarry << (cShift - 1); \
2356 \
2357 *puDst = uResult; \
2358 \
2359 /* Calc EFLAGS. */ \
2360 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2361 uint32_t const fOutCarry = a_cBitsWidth >= 32 || cShift \
2362 ? (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF : fInCarry; \
2363 fEfl |= fOutCarry; \
2364 if (!a_fIntelFlags) /* AMD 3990X: According to the last sub-shift: */ \
2365 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fOutCarry) << X86_EFL_OF_BIT; \
2366 else /* Intel 10980XE: According to the first sub-shift: */ \
2367 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1) ); \
2368 *pfEFlags = fEfl; \
2369 } \
2370}
2371
2372#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2373EMIT_RCL(64, uint64_t, RT_NOTHING, 1)
2374#endif
2375EMIT_RCL(64, uint64_t, _intel, 1)
2376EMIT_RCL(64, uint64_t, _amd, 0)
2377
2378#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2379EMIT_RCL(32, uint32_t, RT_NOTHING, 1)
2380#endif
2381EMIT_RCL(32, uint32_t, _intel, 1)
2382EMIT_RCL(32, uint32_t, _amd, 0)
2383
2384#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2385EMIT_RCL(16, uint16_t, RT_NOTHING, 1)
2386#endif
2387EMIT_RCL(16, uint16_t, _intel, 1)
2388EMIT_RCL(16, uint16_t, _amd, 0)
2389
2390#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2391EMIT_RCL(8, uint8_t, RT_NOTHING, 1)
2392#endif
2393EMIT_RCL(8, uint8_t, _intel, 1)
2394EMIT_RCL(8, uint8_t, _amd, 0)
2395
2396
2397/*
2398 * RCR
2399 */
2400#define EMIT_RCR(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2401IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_rcr_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2402{ \
2403 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2404 if (cShift) \
2405 { \
2406 if (a_cBitsWidth < 32) \
2407 cShift %= a_cBitsWidth + 1; \
2408 a_uType const uDst = *puDst; \
2409 a_uType uResult = uDst >> cShift; \
2410 if (cShift > 1) \
2411 uResult |= uDst << (a_cBitsWidth + 1 - cShift); \
2412 \
2413 AssertCompile(X86_EFL_CF_BIT == 0); \
2414 uint32_t fEfl = *pfEFlags; \
2415 uint32_t fInCarry = fEfl & X86_EFL_CF; \
2416 uResult |= (a_uType)fInCarry << (a_cBitsWidth - cShift); \
2417 *puDst = uResult; \
2418 \
2419 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2420 it the same way as for 1 bit shifts. */ \
2421 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2422 uint32_t const fOutCarry = (uDst >> (cShift - 1)) & X86_EFL_CF; \
2423 fEfl |= fOutCarry; \
2424 if (!a_fIntelFlags) /* AMD 3990X: XOR two most signficant bits of the result: */ \
2425 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uResult ^ (uResult << 1)); \
2426 else /* Intel 10980XE: same as AMD, but only for the first sub-shift: */ \
2427 fEfl |= (fInCarry ^ (uint32_t)(uDst >> (a_cBitsWidth - 1))) << X86_EFL_OF_BIT; \
2428 *pfEFlags = fEfl; \
2429 } \
2430}
2431
2432#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2433EMIT_RCR(64, uint64_t, RT_NOTHING, 1)
2434#endif
2435EMIT_RCR(64, uint64_t, _intel, 1)
2436EMIT_RCR(64, uint64_t, _amd, 0)
2437
2438#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2439EMIT_RCR(32, uint32_t, RT_NOTHING, 1)
2440#endif
2441EMIT_RCR(32, uint32_t, _intel, 1)
2442EMIT_RCR(32, uint32_t, _amd, 0)
2443
2444#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2445EMIT_RCR(16, uint16_t, RT_NOTHING, 1)
2446#endif
2447EMIT_RCR(16, uint16_t, _intel, 1)
2448EMIT_RCR(16, uint16_t, _amd, 0)
2449
2450#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2451EMIT_RCR(8, uint8_t, RT_NOTHING, 1)
2452#endif
2453EMIT_RCR(8, uint8_t, _intel, 1)
2454EMIT_RCR(8, uint8_t, _amd, 0)
2455
2456
2457/*
2458 * SHL
2459 */
2460#define EMIT_SHL(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2461IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shl_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2462{ \
2463 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2464 if (cShift) \
2465 { \
2466 a_uType const uDst = *puDst; \
2467 a_uType uResult = uDst << cShift; \
2468 *puDst = uResult; \
2469 \
2470 /* Calc EFLAGS. */ \
2471 AssertCompile(X86_EFL_CF_BIT == 0); \
2472 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2473 uint32_t fCarry = (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2474 fEfl |= fCarry; \
2475 if (!a_fIntelFlags) \
2476 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; /* AMD 3990X: Last shift result. */ \
2477 else \
2478 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); /* Intel 10980XE: First shift result. */ \
2479 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2480 fEfl |= X86_EFL_CALC_ZF(uResult); \
2481 fEfl |= g_afParity[uResult & 0xff]; \
2482 if (!a_fIntelFlags) \
2483 fEfl |= X86_EFL_AF; /* AMD 3990x sets it unconditionally, Intel 10980XE does the oposite */ \
2484 *pfEFlags = fEfl; \
2485 } \
2486}
2487
2488#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2489EMIT_SHL(64, uint64_t, RT_NOTHING, 1)
2490#endif
2491EMIT_SHL(64, uint64_t, _intel, 1)
2492EMIT_SHL(64, uint64_t, _amd, 0)
2493
2494#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2495EMIT_SHL(32, uint32_t, RT_NOTHING, 1)
2496#endif
2497EMIT_SHL(32, uint32_t, _intel, 1)
2498EMIT_SHL(32, uint32_t, _amd, 0)
2499
2500#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2501EMIT_SHL(16, uint16_t, RT_NOTHING, 1)
2502#endif
2503EMIT_SHL(16, uint16_t, _intel, 1)
2504EMIT_SHL(16, uint16_t, _amd, 0)
2505
2506#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2507EMIT_SHL(8, uint8_t, RT_NOTHING, 1)
2508#endif
2509EMIT_SHL(8, uint8_t, _intel, 1)
2510EMIT_SHL(8, uint8_t, _amd, 0)
2511
2512
2513/*
2514 * SHR
2515 */
2516#define EMIT_SHR(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2517IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shr_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2518{ \
2519 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2520 if (cShift) \
2521 { \
2522 a_uType const uDst = *puDst; \
2523 a_uType uResult = uDst >> cShift; \
2524 *puDst = uResult; \
2525 \
2526 /* Calc EFLAGS. */ \
2527 AssertCompile(X86_EFL_CF_BIT == 0); \
2528 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2529 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2530 if (a_fIntelFlags || cShift == 1) /* AMD 3990x does what intel documents; Intel 10980XE does this for all shift counts. */ \
2531 fEfl |= (uDst >> (a_cBitsWidth - 1)) << X86_EFL_OF_BIT; \
2532 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2533 fEfl |= X86_EFL_CALC_ZF(uResult); \
2534 fEfl |= g_afParity[uResult & 0xff]; \
2535 if (!a_fIntelFlags) \
2536 fEfl |= X86_EFL_AF; /* AMD 3990x sets it unconditionally, Intel 10980XE does the oposite */ \
2537 *pfEFlags = fEfl; \
2538 } \
2539}
2540
2541#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2542EMIT_SHR(64, uint64_t, RT_NOTHING, 1)
2543#endif
2544EMIT_SHR(64, uint64_t, _intel, 1)
2545EMIT_SHR(64, uint64_t, _amd, 0)
2546
2547#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2548EMIT_SHR(32, uint32_t, RT_NOTHING, 1)
2549#endif
2550EMIT_SHR(32, uint32_t, _intel, 1)
2551EMIT_SHR(32, uint32_t, _amd, 0)
2552
2553#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2554EMIT_SHR(16, uint16_t, RT_NOTHING, 1)
2555#endif
2556EMIT_SHR(16, uint16_t, _intel, 1)
2557EMIT_SHR(16, uint16_t, _amd, 0)
2558
2559#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2560EMIT_SHR(8, uint8_t, RT_NOTHING, 1)
2561#endif
2562EMIT_SHR(8, uint8_t, _intel, 1)
2563EMIT_SHR(8, uint8_t, _amd, 0)
2564
2565
2566/*
2567 * SAR
2568 */
2569#define EMIT_SAR(a_cBitsWidth, a_uType, a_iType, a_Suffix, a_fIntelFlags) \
2570IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_sar_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2571{ \
2572 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2573 if (cShift) \
2574 { \
2575 a_iType const iDst = (a_iType)*puDst; \
2576 a_uType uResult = iDst >> cShift; \
2577 *puDst = uResult; \
2578 \
2579 /* Calc EFLAGS. \
2580 Note! The OF flag is always zero because the result never differs from the input. */ \
2581 AssertCompile(X86_EFL_CF_BIT == 0); \
2582 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2583 fEfl |= (iDst >> (cShift - 1)) & X86_EFL_CF; \
2584 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2585 fEfl |= X86_EFL_CALC_ZF(uResult); \
2586 fEfl |= g_afParity[uResult & 0xff]; \
2587 if (!a_fIntelFlags) \
2588 fEfl |= X86_EFL_AF; /* AMD 3990x sets it unconditionally, Intel 10980XE does the oposite */ \
2589 *pfEFlags = fEfl; \
2590 } \
2591}
2592
2593#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2594EMIT_SAR(64, uint64_t, int64_t, RT_NOTHING, 1)
2595#endif
2596EMIT_SAR(64, uint64_t, int64_t, _intel, 1)
2597EMIT_SAR(64, uint64_t, int64_t, _amd, 0)
2598
2599#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2600EMIT_SAR(32, uint32_t, int32_t, RT_NOTHING, 1)
2601#endif
2602EMIT_SAR(32, uint32_t, int32_t, _intel, 1)
2603EMIT_SAR(32, uint32_t, int32_t, _amd, 0)
2604
2605#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2606EMIT_SAR(16, uint16_t, int16_t, RT_NOTHING, 1)
2607#endif
2608EMIT_SAR(16, uint16_t, int16_t, _intel, 1)
2609EMIT_SAR(16, uint16_t, int16_t, _amd, 0)
2610
2611#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2612EMIT_SAR(8, uint8_t, int8_t, RT_NOTHING, 1)
2613#endif
2614EMIT_SAR(8, uint8_t, int8_t, _intel, 1)
2615EMIT_SAR(8, uint8_t, int8_t, _amd, 0)
2616
2617
2618/*
2619 * SHLD
2620 *
2621 * - CF is the last bit shifted out of puDst.
2622 * - AF is always cleared by Intel 10980XE.
2623 * - AF is always set by AMD 3990X.
2624 * - OF is set according to the first shift on Intel 10980XE, it seems.
2625 * - OF is set according to the last sub-shift on AMD 3990X.
2626 * - ZF, SF and PF are calculated according to the result by both vendors.
2627 *
2628 * For 16-bit shifts the count mask isn't 15, but 31, and the CPU will
2629 * pick either the source register or the destination register for input bits
2630 * when going beyond 16. According to https://www.sandpile.org/x86/flags.htm
2631 * intel has changed behaviour here several times. We implement what current
2632 * skylake based does for now, we can extend this later as needed.
2633 */
2634#define EMIT_SHLD(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2635IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shld_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, a_uType uSrc, uint8_t cShift, \
2636 uint32_t *pfEFlags)) \
2637{ \
2638 cShift &= a_cBitsWidth - 1; \
2639 if (cShift) \
2640 { \
2641 a_uType const uDst = *puDst; \
2642 a_uType uResult = uDst << cShift; \
2643 uResult |= uSrc >> (a_cBitsWidth - cShift); \
2644 *puDst = uResult; \
2645 \
2646 /* CALC EFLAGS: */ \
2647 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2648 if (a_fIntelFlags) \
2649 /* Intel 6700K & 10980XE: Set according to the first shift. AF always cleared. */ \
2650 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); \
2651 else \
2652 { /* AMD 3990X: Set according to last shift. AF always set. */ \
2653 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth((uDst << (cShift - 1)) ^ uResult); \
2654 fEfl |= X86_EFL_AF; \
2655 } \
2656 AssertCompile(X86_EFL_CF_BIT == 0); \
2657 fEfl |= (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; /* CF = last bit shifted out */ \
2658 fEfl |= g_afParity[uResult & 0xff]; \
2659 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2660 fEfl |= X86_EFL_CALC_ZF(uResult); \
2661 *pfEFlags = fEfl; \
2662 } \
2663}
2664
2665#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2666EMIT_SHLD(64, uint64_t, RT_NOTHING, 1)
2667#endif
2668EMIT_SHLD(64, uint64_t, _intel, 1)
2669EMIT_SHLD(64, uint64_t, _amd, 0)
2670
2671#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2672EMIT_SHLD(32, uint32_t, RT_NOTHING, 1)
2673#endif
2674EMIT_SHLD(32, uint32_t, _intel, 1)
2675EMIT_SHLD(32, uint32_t, _amd, 0)
2676
2677#define EMIT_SHLD_16(a_Suffix, a_fIntelFlags) \
2678IEM_DECL_IMPL_DEF(void, RT_CONCAT(iemAImpl_shld_u16,a_Suffix),(uint16_t *puDst, uint16_t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2679{ \
2680 cShift &= 31; \
2681 if (cShift) \
2682 { \
2683 uint16_t const uDst = *puDst; \
2684 uint64_t const uTmp = a_fIntelFlags \
2685 ? ((uint64_t)uDst << 32) | ((uint32_t)uSrc << 16) | uDst \
2686 : ((uint64_t)uDst << 32) | ((uint32_t)uSrc << 16) | uSrc; \
2687 uint16_t const uResult = (uint16_t)((uTmp << cShift) >> 32); \
2688 *puDst = uResult; \
2689 \
2690 /* CALC EFLAGS: */ \
2691 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2692 AssertCompile(X86_EFL_CF_BIT == 0); \
2693 if (a_fIntelFlags) \
2694 { \
2695 fEfl |= (uTmp >> (48 - cShift)) & X86_EFL_CF; /* CF = last bit shifted out of the combined operand */ \
2696 /* Intel 6700K & 10980XE: OF is et according to the first shift. AF always cleared. */ \
2697 fEfl |= X86_EFL_GET_OF_16(uDst ^ (uDst << 1)); \
2698 } \
2699 else \
2700 { \
2701 /* AMD 3990X: OF is set according to last shift, with some weirdness. AF always set. CF = last bit shifted out of uDst. */ \
2702 if (cShift < 16) \
2703 { \
2704 fEfl |= (uDst >> (16 - cShift)) & X86_EFL_CF; \
2705 fEfl |= X86_EFL_GET_OF_16((uDst << (cShift - 1)) ^ uResult); \
2706 } \
2707 else \
2708 fEfl |= X86_EFL_GET_OF_16((uDst << (cShift - 1)) ^ 0); \
2709 fEfl |= X86_EFL_AF; \
2710 } \
2711 fEfl |= g_afParity[uResult & 0xff]; \
2712 fEfl |= X86_EFL_CALC_SF(uResult, 16); \
2713 fEfl |= X86_EFL_CALC_ZF(uResult); \
2714 *pfEFlags = fEfl; \
2715 } \
2716}
2717
2718#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2719EMIT_SHLD_16(RT_NOTHING, 1)
2720#endif
2721EMIT_SHLD_16(_intel, 1)
2722EMIT_SHLD_16(_amd, 0)
2723
2724
2725/*
2726 * SHRD
2727 *
2728 * EFLAGS behaviour seems to be the same as with SHLD:
2729 * - CF is the last bit shifted out of puDst.
2730 * - AF is always cleared by Intel 10980XE.
2731 * - AF is always set by AMD 3990X.
2732 * - OF is set according to the first shift on Intel 10980XE, it seems.
2733 * - OF is set according to the last sub-shift on AMD 3990X.
2734 * - ZF, SF and PF are calculated according to the result by both vendors.
2735 *
2736 * For 16-bit shifts the count mask isn't 15, but 31, and the CPU will
2737 * pick either the source register or the destination register for input bits
2738 * when going beyond 16. According to https://www.sandpile.org/x86/flags.htm
2739 * intel has changed behaviour here several times. We implement what current
2740 * skylake based does for now, we can extend this later as needed.
2741 */
2742#define EMIT_SHRD(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2743IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shrd_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, a_uType uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2744{ \
2745 cShift &= a_cBitsWidth - 1; \
2746 if (cShift) \
2747 { \
2748 a_uType const uDst = *puDst; \
2749 a_uType uResult = uDst >> cShift; \
2750 uResult |= uSrc << (a_cBitsWidth - cShift); \
2751 *puDst = uResult; \
2752 \
2753 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2754 AssertCompile(X86_EFL_CF_BIT == 0); \
2755 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2756 if (a_fIntelFlags) \
2757 /* Intel 6700K & 10980XE: Set according to the first shift. AF always cleared. */ \
2758 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uSrc << (a_cBitsWidth - 1))); \
2759 else \
2760 { /* AMD 3990X: Set according to last shift. AF always set. */ \
2761 if (cShift > 1) /* Set according to last shift. */ \
2762 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth((uSrc << (a_cBitsWidth - cShift + 1)) ^ uResult); \
2763 else \
2764 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ uResult); \
2765 fEfl |= X86_EFL_AF; \
2766 } \
2767 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2768 fEfl |= X86_EFL_CALC_ZF(uResult); \
2769 fEfl |= g_afParity[uResult & 0xff]; \
2770 *pfEFlags = fEfl; \
2771 } \
2772}
2773
2774#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2775EMIT_SHRD(64, uint64_t, RT_NOTHING, 1)
2776#endif
2777EMIT_SHRD(64, uint64_t, _intel, 1)
2778EMIT_SHRD(64, uint64_t, _amd, 0)
2779
2780#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2781EMIT_SHRD(32, uint32_t, RT_NOTHING, 1)
2782#endif
2783EMIT_SHRD(32, uint32_t, _intel, 1)
2784EMIT_SHRD(32, uint32_t, _amd, 0)
2785
2786#define EMIT_SHRD_16(a_Suffix, a_fIntelFlags) \
2787IEM_DECL_IMPL_DEF(void, RT_CONCAT(iemAImpl_shrd_u16,a_Suffix),(uint16_t *puDst, uint16_t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2788{ \
2789 cShift &= 31; \
2790 if (cShift) \
2791 { \
2792 uint16_t const uDst = *puDst; \
2793 uint64_t const uTmp = a_fIntelFlags \
2794 ? uDst | ((uint32_t)uSrc << 16) | ((uint64_t)uDst << 32) \
2795 : uDst | ((uint32_t)uSrc << 16) | ((uint64_t)uSrc << 32); \
2796 uint16_t const uResult = (uint16_t)(uTmp >> cShift); \
2797 *puDst = uResult; \
2798 \
2799 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2800 AssertCompile(X86_EFL_CF_BIT == 0); \
2801 if (a_fIntelFlags) \
2802 { \
2803 /* Intel 10980XE: The CF is the last shifted out of the combined uTmp operand. */ \
2804 fEfl |= (uTmp >> (cShift - 1)) & X86_EFL_CF; \
2805 /* Intel 6700K & 10980XE: Set according to the first shift. AF always cleared. */ \
2806 fEfl |= X86_EFL_GET_OF_16(uDst ^ (uSrc << 15)); \
2807 } \
2808 else \
2809 { \
2810 /* AMD 3990X: CF flag seems to be last bit shifted out of uDst, not the combined uSrc:uSrc:uDst operand. */ \
2811 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2812 /* AMD 3990X: Set according to last shift. AF always set. */ \
2813 if (cShift > 1) /* Set according to last shift. */ \
2814 fEfl |= X86_EFL_GET_OF_16((uint16_t)(uTmp >> (cShift - 1)) ^ uResult); \
2815 else \
2816 fEfl |= X86_EFL_GET_OF_16(uDst ^ uResult); \
2817 fEfl |= X86_EFL_AF; \
2818 } \
2819 fEfl |= X86_EFL_CALC_SF(uResult, 16); \
2820 fEfl |= X86_EFL_CALC_ZF(uResult); \
2821 fEfl |= g_afParity[uResult & 0xff]; \
2822 *pfEFlags = fEfl; \
2823 } \
2824}
2825
2826#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2827EMIT_SHRD_16(RT_NOTHING, 1)
2828#endif
2829EMIT_SHRD_16(_intel, 1)
2830EMIT_SHRD_16(_amd, 0)
2831
2832
2833#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2834
2835# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2836/*
2837 * BSWAP
2838 */
2839
2840IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u64,(uint64_t *puDst))
2841{
2842 *puDst = ASMByteSwapU64(*puDst);
2843}
2844
2845
2846IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u32,(uint32_t *puDst))
2847{
2848 *puDst = ASMByteSwapU32(*puDst);
2849}
2850
2851
2852/* Note! undocument, so 32-bit arg */
2853IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u16,(uint32_t *puDst))
2854{
2855#if 0
2856 *(uint16_t *)puDst = ASMByteSwapU16(*(uint16_t *)puDst);
2857#else
2858 /* This is the behaviour AMD 3990x (64-bit mode): */
2859 *(uint16_t *)puDst = 0;
2860#endif
2861}
2862
2863# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2864
2865
2866
2867# if defined(IEM_WITHOUT_ASSEMBLY)
2868
2869/*
2870 * LFENCE, SFENCE & MFENCE.
2871 */
2872
2873IEM_DECL_IMPL_DEF(void, iemAImpl_lfence,(void))
2874{
2875 ASMReadFence();
2876}
2877
2878
2879IEM_DECL_IMPL_DEF(void, iemAImpl_sfence,(void))
2880{
2881 ASMWriteFence();
2882}
2883
2884
2885IEM_DECL_IMPL_DEF(void, iemAImpl_mfence,(void))
2886{
2887 ASMMemoryFence();
2888}
2889
2890
2891# ifndef RT_ARCH_ARM64
2892IEM_DECL_IMPL_DEF(void, iemAImpl_alt_mem_fence,(void))
2893{
2894 ASMMemoryFence();
2895}
2896# endif
2897
2898# endif
2899
2900#endif /* !RT_ARCH_AMD64 || IEM_WITHOUT_ASSEMBLY */
2901
2902
2903IEM_DECL_IMPL_DEF(void, iemAImpl_arpl,(uint16_t *pu16Dst, uint16_t u16Src, uint32_t *pfEFlags))
2904{
2905 if ((*pu16Dst & X86_SEL_RPL) < (u16Src & X86_SEL_RPL))
2906 {
2907 *pu16Dst &= X86_SEL_MASK_OFF_RPL;
2908 *pu16Dst |= u16Src & X86_SEL_RPL;
2909
2910 *pfEFlags |= X86_EFL_ZF;
2911 }
2912 else
2913 *pfEFlags &= ~X86_EFL_ZF;
2914}
2915
2916
2917/*********************************************************************************************************************************
2918* x87 FPU *
2919*********************************************************************************************************************************/
2920#if defined(IEM_WITHOUT_ASSEMBLY)
2921
2922IEM_DECL_IMPL_DEF(void, iemAImpl_f2xm1_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
2923{
2924 RT_NOREF(pFpuState, pFpuRes, pr80Val);
2925 AssertReleaseFailed();
2926}
2927
2928
2929IEM_DECL_IMPL_DEF(void, iemAImpl_fabs_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
2930{
2931 RT_NOREF(pFpuState, pFpuRes, pr80Val);
2932 AssertReleaseFailed();
2933}
2934
2935
2936IEM_DECL_IMPL_DEF(void, iemAImpl_fadd_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
2937 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
2938{
2939 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
2940 AssertReleaseFailed();
2941}
2942
2943
2944IEM_DECL_IMPL_DEF(void, iemAImpl_fadd_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
2945 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
2946{
2947 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
2948 AssertReleaseFailed();
2949}
2950
2951
2952IEM_DECL_IMPL_DEF(void, iemAImpl_fadd_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
2953 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
2954{
2955 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
2956 AssertReleaseFailed();
2957}
2958
2959
2960IEM_DECL_IMPL_DEF(void, iemAImpl_fchs_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
2961{
2962 RT_NOREF(pFpuState, pFpuRes, pr80Val);
2963 AssertReleaseFailed();
2964}
2965
2966
2967IEM_DECL_IMPL_DEF(void, iemAImpl_fcom_r80_by_r32,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
2968 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
2969{
2970 RT_NOREF(pFpuState, pFSW, pr80Val1, pr32Val2);
2971 AssertReleaseFailed();
2972}
2973
2974
2975IEM_DECL_IMPL_DEF(void, iemAImpl_fcom_r80_by_r64,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
2976 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
2977{
2978 RT_NOREF(pFpuState, pFSW, pr80Val1, pr64Val2);
2979 AssertReleaseFailed();
2980}
2981
2982
2983IEM_DECL_IMPL_DEF(void, iemAImpl_fcom_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
2984 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
2985{
2986 RT_NOREF(pFpuState, pFSW, pr80Val1, pr80Val2);
2987 AssertReleaseFailed();
2988}
2989
2990
2991IEM_DECL_IMPL_DEF(uint32_t, iemAImpl_fcomi_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
2992 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
2993{
2994 RT_NOREF(pFpuState, pFSW, pr80Val1, pr80Val2);
2995 AssertReleaseFailed();
2996 return 0;
2997}
2998
2999
3000IEM_DECL_IMPL_DEF(void, iemAImpl_fcos_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
3001{
3002 RT_NOREF(pFpuState, pFpuRes, pr80Val);
3003 AssertReleaseFailed();
3004}
3005
3006
3007IEM_DECL_IMPL_DEF(void, iemAImpl_fdiv_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3008 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
3009{
3010 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
3011 AssertReleaseFailed();
3012}
3013
3014
3015IEM_DECL_IMPL_DEF(void, iemAImpl_fdiv_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3016 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
3017{
3018 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
3019 AssertReleaseFailed();
3020}
3021
3022
3023IEM_DECL_IMPL_DEF(void, iemAImpl_fdiv_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3024 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
3025{
3026 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
3027 AssertReleaseFailed();
3028}
3029
3030
3031IEM_DECL_IMPL_DEF(void, iemAImpl_fdivr_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3032 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
3033{
3034 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
3035 AssertReleaseFailed();
3036}
3037
3038
3039IEM_DECL_IMPL_DEF(void, iemAImpl_fdivr_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3040 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
3041{
3042 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
3043 AssertReleaseFailed();
3044}
3045
3046
3047IEM_DECL_IMPL_DEF(void, iemAImpl_fdivr_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3048 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
3049{
3050 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
3051 AssertReleaseFailed();
3052}
3053
3054
3055IEM_DECL_IMPL_DEF(void, iemAImpl_fiadd_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3056 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
3057{
3058 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
3059 AssertReleaseFailed();
3060}
3061
3062
3063IEM_DECL_IMPL_DEF(void, iemAImpl_fiadd_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3064 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
3065{
3066 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
3067 AssertReleaseFailed();
3068}
3069
3070
3071IEM_DECL_IMPL_DEF(void, iemAImpl_ficom_r80_by_i16,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw,
3072 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
3073{
3074 RT_NOREF(pFpuState, pu16Fsw, pr80Val1, pi16Val2);
3075 AssertReleaseFailed();
3076}
3077
3078
3079IEM_DECL_IMPL_DEF(void, iemAImpl_ficom_r80_by_i32,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw,
3080 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
3081{
3082 RT_NOREF(pFpuState, pu16Fsw, pr80Val1, pi32Val2);
3083 AssertReleaseFailed();
3084}
3085
3086
3087IEM_DECL_IMPL_DEF(void, iemAImpl_fidiv_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3088 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
3089{
3090 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
3091 AssertReleaseFailed();
3092}
3093
3094
3095IEM_DECL_IMPL_DEF(void, iemAImpl_fidiv_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3096 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
3097{
3098 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
3099 AssertReleaseFailed();
3100}
3101
3102
3103IEM_DECL_IMPL_DEF(void, iemAImpl_fidivr_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3104 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
3105{
3106 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
3107 AssertReleaseFailed();
3108}
3109
3110
3111IEM_DECL_IMPL_DEF(void, iemAImpl_fidivr_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3112 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
3113{
3114 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
3115 AssertReleaseFailed();
3116}
3117
3118
3119IEM_DECL_IMPL_DEF(void, iemAImpl_fild_i16_to_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, int16_t const *pi16Val))
3120{
3121 RT_NOREF(pFpuState, pFpuRes, pi16Val);
3122 AssertReleaseFailed();
3123}
3124
3125
3126IEM_DECL_IMPL_DEF(void, iemAImpl_fild_i32_to_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, int32_t const *pi32Val))
3127{
3128 RT_NOREF(pFpuState, pFpuRes, pi32Val);
3129 AssertReleaseFailed();
3130}
3131
3132
3133IEM_DECL_IMPL_DEF(void, iemAImpl_fild_i64_to_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, int64_t const *pi64Val))
3134{
3135 RT_NOREF(pFpuState, pFpuRes, pi64Val);
3136 AssertReleaseFailed();
3137}
3138
3139
3140IEM_DECL_IMPL_DEF(void, iemAImpl_fimul_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3141 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
3142{
3143 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
3144 AssertReleaseFailed();
3145}
3146
3147
3148IEM_DECL_IMPL_DEF(void, iemAImpl_fimul_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3149 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
3150{
3151 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
3152 AssertReleaseFailed();
3153}
3154
3155
3156IEM_DECL_IMPL_DEF(void, iemAImpl_fist_r80_to_i16,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3157 int16_t *pi16Val, PCRTFLOAT80U pr80Val))
3158{
3159 RT_NOREF(pFpuState, pu16FSW, pi16Val, pr80Val);
3160 AssertReleaseFailed();
3161}
3162
3163
3164IEM_DECL_IMPL_DEF(void, iemAImpl_fist_r80_to_i32,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3165 int32_t *pi32Val, PCRTFLOAT80U pr80Val))
3166{
3167 RT_NOREF(pFpuState, pu16FSW, pi32Val, pr80Val);
3168 AssertReleaseFailed();
3169}
3170
3171
3172IEM_DECL_IMPL_DEF(void, iemAImpl_fist_r80_to_i64,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3173 int64_t *pi64Val, PCRTFLOAT80U pr80Val))
3174{
3175 RT_NOREF(pFpuState, pu16FSW, pi64Val, pr80Val);
3176 AssertReleaseFailed();
3177}
3178
3179
3180IEM_DECL_IMPL_DEF(void, iemAImpl_fistt_r80_to_i16,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3181 int16_t *pi16Val, PCRTFLOAT80U pr80Val))
3182{
3183 RT_NOREF(pFpuState, pu16FSW, pi16Val, pr80Val);
3184 AssertReleaseFailed();
3185}
3186
3187
3188IEM_DECL_IMPL_DEF(void, iemAImpl_fistt_r80_to_i32,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3189 int32_t *pi32Val, PCRTFLOAT80U pr80Val))
3190{
3191 RT_NOREF(pFpuState, pu16FSW, pi32Val, pr80Val);
3192 AssertReleaseFailed();
3193}
3194
3195
3196IEM_DECL_IMPL_DEF(void, iemAImpl_fistt_r80_to_i64,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3197 int64_t *pi64Val, PCRTFLOAT80U pr80Val))
3198{
3199 RT_NOREF(pFpuState, pu16FSW, pi64Val, pr80Val);
3200 AssertReleaseFailed();
3201}
3202
3203
3204IEM_DECL_IMPL_DEF(void, iemAImpl_fisub_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3205 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
3206{
3207 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
3208 AssertReleaseFailed();
3209}
3210
3211
3212IEM_DECL_IMPL_DEF(void, iemAImpl_fisub_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3213 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
3214{
3215 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
3216 AssertReleaseFailed();
3217}
3218
3219
3220IEM_DECL_IMPL_DEF(void, iemAImpl_fisubr_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3221 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
3222{
3223 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
3224 AssertReleaseFailed();
3225}
3226
3227
3228IEM_DECL_IMPL_DEF(void, iemAImpl_fisubr_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3229 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
3230{
3231 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
3232 AssertReleaseFailed();
3233}
3234
3235
3236IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r32_to_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT32U pr32Val))
3237{
3238 RT_NOREF(pFpuState, pFpuRes, pr32Val);
3239 AssertReleaseFailed();
3240}
3241
3242
3243IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r64_to_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT64U pr64Val))
3244{
3245 RT_NOREF(pFpuState, pFpuRes, pr64Val);
3246 AssertReleaseFailed();
3247}
3248
3249IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
3250{
3251 RT_NOREF(pFpuState, pFpuRes, pr80Val);
3252 AssertReleaseFailed();
3253}
3254
3255
3256IEM_DECL_IMPL_DEF(void, iemAImpl_fld1,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3257{
3258 RT_NOREF(pFpuState, pFpuRes);
3259 AssertReleaseFailed();
3260}
3261
3262
3263IEM_DECL_IMPL_DEF(void, iemAImpl_fldl2e,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3264{
3265 RT_NOREF(pFpuState, pFpuRes);
3266 AssertReleaseFailed();
3267}
3268
3269
3270IEM_DECL_IMPL_DEF(void, iemAImpl_fldl2t,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3271{
3272 RT_NOREF(pFpuState, pFpuRes);
3273 AssertReleaseFailed();
3274}
3275
3276
3277IEM_DECL_IMPL_DEF(void, iemAImpl_fldlg2,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3278{
3279 RT_NOREF(pFpuState, pFpuRes);
3280 AssertReleaseFailed();
3281}
3282
3283
3284IEM_DECL_IMPL_DEF(void, iemAImpl_fldln2,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3285{
3286 RT_NOREF(pFpuState, pFpuRes);
3287 AssertReleaseFailed();
3288}
3289
3290
3291IEM_DECL_IMPL_DEF(void, iemAImpl_fldpi,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3292{
3293 RT_NOREF(pFpuState, pFpuRes);
3294 AssertReleaseFailed();
3295}
3296
3297
3298IEM_DECL_IMPL_DEF(void, iemAImpl_fldz,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3299{
3300 RT_NOREF(pFpuState, pFpuRes);
3301 AssertReleaseFailed();
3302}
3303
3304
3305IEM_DECL_IMPL_DEF(void, iemAImpl_fmul_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3306 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
3307{
3308 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
3309 AssertReleaseFailed();
3310}
3311
3312
3313IEM_DECL_IMPL_DEF(void, iemAImpl_fmul_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3314 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
3315{
3316 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
3317 AssertReleaseFailed();
3318}
3319
3320
3321IEM_DECL_IMPL_DEF(void, iemAImpl_fmul_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3322 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
3323{
3324 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
3325 AssertReleaseFailed();
3326}
3327
3328
3329IEM_DECL_IMPL_DEF(void, iemAImpl_fpatan_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3330 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
3331{
3332 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
3333 AssertReleaseFailed();
3334}
3335
3336
3337IEM_DECL_IMPL_DEF(void, iemAImpl_fprem_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3338 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
3339{
3340 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
3341 AssertReleaseFailed();
3342}
3343
3344
3345IEM_DECL_IMPL_DEF(void, iemAImpl_fprem1_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3346 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
3347{
3348 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
3349 AssertReleaseFailed();
3350}
3351
3352
3353IEM_DECL_IMPL_DEF(void, iemAImpl_fptan_r80_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
3354{
3355 RT_NOREF(pFpuState, pFpuResTwo, pr80Val);
3356 AssertReleaseFailed();
3357}
3358
3359
3360IEM_DECL_IMPL_DEF(void, iemAImpl_frndint_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
3361{
3362 RT_NOREF(pFpuState, pFpuRes, pr80Val);
3363 AssertReleaseFailed();
3364}
3365
3366
3367IEM_DECL_IMPL_DEF(void, iemAImpl_fscale_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3368 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
3369{
3370 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
3371 AssertReleaseFailed();
3372}
3373
3374
3375IEM_DECL_IMPL_DEF(void, iemAImpl_fsin_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
3376{
3377 RT_NOREF(pFpuState, pFpuRes, pr80Val);
3378 AssertReleaseFailed();
3379}
3380
3381
3382IEM_DECL_IMPL_DEF(void, iemAImpl_fsincos_r80_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
3383{
3384 RT_NOREF(pFpuState, pFpuResTwo, pr80Val);
3385 AssertReleaseFailed();
3386}
3387
3388
3389IEM_DECL_IMPL_DEF(void, iemAImpl_fsqrt_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
3390{
3391 RT_NOREF(pFpuState, pFpuRes, pr80Val);
3392 AssertReleaseFailed();
3393}
3394
3395
3396IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r32,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3397 PRTFLOAT32U pr32Dst, PCRTFLOAT80U pr80Src))
3398{
3399 RT_NOREF(pFpuState, pu16FSW, pr32Dst, pr80Src);
3400 AssertReleaseFailed();
3401}
3402
3403
3404IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r64,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3405 PRTFLOAT64U pr64Dst, PCRTFLOAT80U pr80Src))
3406{
3407 RT_NOREF(pFpuState, pu16FSW, pr64Dst, pr80Src);
3408 AssertReleaseFailed();
3409}
3410
3411
3412IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3413 PRTFLOAT80U pr80Dst, PCRTFLOAT80U pr80Src))
3414{
3415 RT_NOREF(pFpuState, pu16FSW, pr80Dst, pr80Src);
3416 AssertReleaseFailed();
3417}
3418
3419
3420IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_d80,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3421 PRTPBCD80U pd80Dst, PCRTFLOAT80U pr80Src))
3422{
3423 RT_NOREF(pFpuState, pu16FSW, pd80Dst, pr80Src);
3424 AssertReleaseFailed();
3425}
3426
3427
3428IEM_DECL_IMPL_DEF(void, iemAImpl_fsub_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3429 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
3430{
3431 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
3432 AssertReleaseFailed();
3433}
3434
3435
3436IEM_DECL_IMPL_DEF(void, iemAImpl_fsub_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3437 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
3438{
3439 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
3440 AssertReleaseFailed();
3441}
3442
3443
3444IEM_DECL_IMPL_DEF(void, iemAImpl_fsub_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3445 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
3446{
3447 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
3448 AssertReleaseFailed();
3449}
3450
3451
3452IEM_DECL_IMPL_DEF(void, iemAImpl_fsubr_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3453 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
3454{
3455 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
3456 AssertReleaseFailed();
3457}
3458
3459
3460IEM_DECL_IMPL_DEF(void, iemAImpl_fsubr_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3461 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
3462{
3463 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
3464 AssertReleaseFailed();
3465}
3466
3467
3468IEM_DECL_IMPL_DEF(void, iemAImpl_fsubr_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3469 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
3470{
3471 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
3472 AssertReleaseFailed();
3473}
3474
3475
3476IEM_DECL_IMPL_DEF(void, iemAImpl_ftst_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw, PCRTFLOAT80U pr80Val))
3477{
3478 RT_NOREF(pFpuState, pu16Fsw, pr80Val);
3479 AssertReleaseFailed();
3480}
3481
3482
3483IEM_DECL_IMPL_DEF(void, iemAImpl_fucom_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
3484 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
3485{
3486 RT_NOREF(pFpuState, pFSW, pr80Val1, pr80Val2);
3487 AssertReleaseFailed();
3488}
3489
3490
3491IEM_DECL_IMPL_DEF(uint32_t, iemAImpl_fucomi_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw,
3492 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
3493{
3494 RT_NOREF(pFpuState, pu16Fsw, pr80Val1, pr80Val2);
3495 AssertReleaseFailed();
3496 return 0;
3497}
3498
3499
3500IEM_DECL_IMPL_DEF(void, iemAImpl_fxam_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw, PCRTFLOAT80U pr80Val))
3501{
3502 RT_NOREF(pFpuState, pu16Fsw, pr80Val);
3503 AssertReleaseFailed();
3504}
3505
3506
3507IEM_DECL_IMPL_DEF(void, iemAImpl_fxtract_r80_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
3508{
3509 RT_NOREF(pFpuState, pFpuResTwo, pr80Val);
3510 AssertReleaseFailed();
3511}
3512
3513
3514IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2x_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3515 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
3516{
3517 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
3518 AssertReleaseFailed();
3519}
3520
3521
3522IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2xp1_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
3523 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
3524{
3525 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
3526 AssertReleaseFailed();
3527}
3528
3529#endif /* IEM_WITHOUT_ASSEMBLY */
3530
3531
3532/*********************************************************************************************************************************
3533* MMX, SSE & AVX *
3534*********************************************************************************************************************************/
3535
3536IEM_DECL_IMPL_DEF(void, iemAImpl_movsldup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
3537{
3538 RT_NOREF(pFpuState);
3539 puDst->au32[0] = puSrc->au32[0];
3540 puDst->au32[1] = puSrc->au32[0];
3541 puDst->au32[2] = puSrc->au32[2];
3542 puDst->au32[3] = puSrc->au32[2];
3543}
3544
3545#ifdef IEM_WITH_VEX
3546
3547IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
3548{
3549 pXState->x87.aXMM[iYRegDst].au32[0] = pXState->x87.aXMM[iYRegSrc].au32[0];
3550 pXState->x87.aXMM[iYRegDst].au32[1] = pXState->x87.aXMM[iYRegSrc].au32[0];
3551 pXState->x87.aXMM[iYRegDst].au32[2] = pXState->x87.aXMM[iYRegSrc].au32[2];
3552 pXState->x87.aXMM[iYRegDst].au32[3] = pXState->x87.aXMM[iYRegSrc].au32[2];
3553 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
3554 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
3555 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
3556 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
3557}
3558
3559
3560IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
3561{
3562 pXState->x87.aXMM[iYRegDst].au32[0] = pSrc->au32[0];
3563 pXState->x87.aXMM[iYRegDst].au32[1] = pSrc->au32[0];
3564 pXState->x87.aXMM[iYRegDst].au32[2] = pSrc->au32[2];
3565 pXState->x87.aXMM[iYRegDst].au32[3] = pSrc->au32[2];
3566 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pSrc->au32[4];
3567 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pSrc->au32[4];
3568 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pSrc->au32[6];
3569 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pSrc->au32[6];
3570}
3571
3572#endif /* IEM_WITH_VEX */
3573
3574
3575IEM_DECL_IMPL_DEF(void, iemAImpl_movshdup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
3576{
3577 RT_NOREF(pFpuState);
3578 puDst->au32[0] = puSrc->au32[1];
3579 puDst->au32[1] = puSrc->au32[1];
3580 puDst->au32[2] = puSrc->au32[3];
3581 puDst->au32[3] = puSrc->au32[3];
3582}
3583
3584
3585IEM_DECL_IMPL_DEF(void, iemAImpl_movddup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, uint64_t uSrc))
3586{
3587 RT_NOREF(pFpuState);
3588 puDst->au64[0] = uSrc;
3589 puDst->au64[1] = uSrc;
3590}
3591
3592#ifdef IEM_WITH_VEX
3593
3594IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
3595{
3596 pXState->x87.aXMM[iYRegDst].au64[0] = pXState->x87.aXMM[iYRegSrc].au64[0];
3597 pXState->x87.aXMM[iYRegDst].au64[1] = pXState->x87.aXMM[iYRegSrc].au64[0];
3598 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
3599 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
3600}
3601
3602IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
3603{
3604 pXState->x87.aXMM[iYRegDst].au64[0] = pSrc->au64[0];
3605 pXState->x87.aXMM[iYRegDst].au64[1] = pSrc->au64[0];
3606 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pSrc->au64[2];
3607 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pSrc->au64[2];
3608}
3609
3610#endif /* IEM_WITH_VEX */
3611
3612#ifdef IEM_WITHOUT_ASSEMBLY
3613
3614IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqb_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
3615{
3616 RT_NOREF(pFpuState, pu64Dst, pu64Src);
3617 AssertReleaseFailed();
3618}
3619
3620
3621IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqb_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
3622{
3623 RT_NOREF(pFpuState, pu128Dst, pu128Src);
3624 AssertReleaseFailed();
3625}
3626
3627
3628IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqw_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
3629{
3630 RT_NOREF(pFpuState, pu64Dst, pu64Src);
3631 AssertReleaseFailed();
3632}
3633
3634
3635IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqw_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
3636{
3637 RT_NOREF(pFpuState, pu128Dst, pu128Src);
3638 AssertReleaseFailed();
3639}
3640
3641
3642IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqd_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
3643{
3644 RT_NOREF(pFpuState, pu64Dst, pu64Src);
3645 AssertReleaseFailed();
3646}
3647
3648
3649IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqd_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
3650{
3651 RT_NOREF(pFpuState, pu128Dst, pu128Src);
3652 AssertReleaseFailed();
3653}
3654
3655
3656IEM_DECL_IMPL_DEF(void, iemAImpl_pxor_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
3657{
3658 RT_NOREF(pFpuState, pu64Dst, pu64Src);
3659 AssertReleaseFailed();
3660}
3661
3662
3663IEM_DECL_IMPL_DEF(void, iemAImpl_pxor_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
3664{
3665 RT_NOREF(pFpuState, pu128Dst, pu128Src);
3666 AssertReleaseFailed();
3667}
3668
3669
3670IEM_DECL_IMPL_DEF(void, iemAImpl_pmovmskb_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
3671{
3672 RT_NOREF(pFpuState, pu64Dst, pu64Src);
3673 AssertReleaseFailed();
3674
3675}
3676
3677
3678IEM_DECL_IMPL_DEF(void, iemAImpl_pmovmskb_u128,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, PCRTUINT128U pu128Src))
3679{
3680 RT_NOREF(pFpuState, pu64Dst, pu128Src);
3681 AssertReleaseFailed();
3682}
3683
3684
3685IEM_DECL_IMPL_DEF(void, iemAImpl_pshufw,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src, uint8_t bEvil))
3686{
3687 RT_NOREF(pFpuState, pu64Dst, pu64Src, bEvil);
3688 AssertReleaseFailed();
3689}
3690
3691
3692IEM_DECL_IMPL_DEF(void, iemAImpl_pshufhw,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src, uint8_t bEvil))
3693{
3694 RT_NOREF(pFpuState, pu128Dst, pu128Src, bEvil);
3695 AssertReleaseFailed();
3696}
3697
3698
3699IEM_DECL_IMPL_DEF(void, iemAImpl_pshuflw,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src, uint8_t bEvil))
3700{
3701 RT_NOREF(pFpuState, pu128Dst, pu128Src, bEvil);
3702 AssertReleaseFailed();
3703}
3704
3705
3706IEM_DECL_IMPL_DEF(void, iemAImpl_pshufd,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src, uint8_t bEvil))
3707{
3708 RT_NOREF(pFpuState, pu128Dst, pu128Src, bEvil);
3709 AssertReleaseFailed();
3710}
3711
3712/* PUNPCKHxxx */
3713
3714IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhbw_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
3715{
3716 RT_NOREF(pFpuState, pu64Dst, pu64Src);
3717 AssertReleaseFailed();
3718}
3719
3720
3721IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhbw_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
3722{
3723 RT_NOREF(pFpuState, pu128Dst, pu128Src);
3724 AssertReleaseFailed();
3725}
3726
3727
3728IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhwd_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
3729{
3730 RT_NOREF(pFpuState, pu64Dst, pu64Src);
3731 AssertReleaseFailed();
3732}
3733
3734
3735IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhwd_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
3736{
3737 RT_NOREF(pFpuState, pu128Dst, pu128Src);
3738 AssertReleaseFailed();
3739}
3740
3741
3742IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhdq_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
3743{
3744 RT_NOREF(pFpuState, pu64Dst, pu64Src);
3745 AssertReleaseFailed();
3746}
3747
3748
3749IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhdq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
3750{
3751 RT_NOREF(pFpuState, pu128Dst, pu128Src);
3752 AssertReleaseFailed();
3753}
3754
3755
3756IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhqdq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
3757{
3758 RT_NOREF(pFpuState, pu128Dst, pu128Src);
3759 AssertReleaseFailed();
3760}
3761
3762/* PUNPCKLxxx */
3763
3764IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklbw_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint32_t const *pu32Src))
3765{
3766 RT_NOREF(pFpuState, pu64Dst, pu32Src);
3767 AssertReleaseFailed();
3768}
3769
3770
3771IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklbw_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
3772{
3773 RT_NOREF(pFpuState, pu128Dst, pu64Src);
3774 AssertReleaseFailed();
3775}
3776
3777
3778IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklwd_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint32_t const *pu32Src))
3779{
3780 RT_NOREF(pFpuState, pu64Dst, pu32Src);
3781 AssertReleaseFailed();
3782}
3783
3784
3785IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklwd_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
3786{
3787 RT_NOREF(pFpuState, pu128Dst, pu64Src);
3788 AssertReleaseFailed();
3789}
3790
3791
3792IEM_DECL_IMPL_DEF(void, iemAImpl_punpckldq_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint32_t const *pu32Src))
3793{
3794 RT_NOREF(pFpuState, pu64Dst, pu32Src);
3795 AssertReleaseFailed();
3796}
3797
3798
3799IEM_DECL_IMPL_DEF(void, iemAImpl_punpckldq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
3800{
3801 RT_NOREF(pFpuState, pu128Dst, pu64Src);
3802 AssertReleaseFailed();
3803}
3804
3805
3806IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklqdq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
3807{
3808 RT_NOREF(pFpuState, pu128Dst, pu64Src);
3809 AssertReleaseFailed();
3810}
3811
3812#endif /* IEM_WITHOUT_ASSEMBLY */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette