VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp@ 94447

Last change on this file since 94447 was 94447, checked in by vboxsync, 3 years ago

VMM/IEM: C implementation of fabs, fchs, ftst, and fxam. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 183.0 KB
Line 
1/* $Id: IEMAllAImplC.cpp 94447 2022-04-01 18:54:16Z vboxsync $ */
2/** @file
3 * IEM - Instruction Implementation in Assembly, portable C variant.
4 */
5
6/*
7 * Copyright (C) 2011-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#include "IEMInternal.h"
23#include <VBox/vmm/vmcc.h>
24#include <iprt/errcore.h>
25#include <iprt/x86.h>
26#include <iprt/uint128.h>
27
28
29/*********************************************************************************************************************************
30* Defined Constants And Macros *
31*********************************************************************************************************************************/
32/** @def IEM_WITHOUT_ASSEMBLY
33 * Enables all the code in this file.
34 */
35#if !defined(IEM_WITHOUT_ASSEMBLY)
36# if defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
37# define IEM_WITHOUT_ASSEMBLY
38# endif
39#endif
40/* IEM_WITH_ASSEMBLY trumps IEM_WITHOUT_ASSEMBLY for tstIEMAImplAsm purposes. */
41#ifdef IEM_WITH_ASSEMBLY
42# undef IEM_WITHOUT_ASSEMBLY
43#endif
44
45/**
46 * Calculates the signed flag value given a result and it's bit width.
47 *
48 * The signed flag (SF) is a duplication of the most significant bit in the
49 * result.
50 *
51 * @returns X86_EFL_SF or 0.
52 * @param a_uResult Unsigned result value.
53 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
54 */
55#define X86_EFL_CALC_SF(a_uResult, a_cBitsWidth) \
56 ( (uint32_t)((a_uResult) >> ((a_cBitsWidth) - X86_EFL_SF_BIT - 1)) & X86_EFL_SF )
57
58/**
59 * Calculates the zero flag value given a result.
60 *
61 * The zero flag (ZF) indicates whether the result is zero or not.
62 *
63 * @returns X86_EFL_ZF or 0.
64 * @param a_uResult Unsigned result value.
65 */
66#define X86_EFL_CALC_ZF(a_uResult) \
67 ( (uint32_t)((a_uResult) == 0) << X86_EFL_ZF_BIT )
68
69/**
70 * Extracts the OF flag from a OF calculation result.
71 *
72 * These are typically used by concating with a bitcount. The problem is that
73 * 8-bit values needs shifting in the other direction than the others.
74 */
75#define X86_EFL_GET_OF_8(a_uValue) (((uint32_t)(a_uValue) << (X86_EFL_OF_BIT - 8 + 1)) & X86_EFL_OF)
76#define X86_EFL_GET_OF_16(a_uValue) ((uint32_t)((a_uValue) >> (16 - X86_EFL_OF_BIT - 1)) & X86_EFL_OF)
77#define X86_EFL_GET_OF_32(a_uValue) ((uint32_t)((a_uValue) >> (32 - X86_EFL_OF_BIT - 1)) & X86_EFL_OF)
78#define X86_EFL_GET_OF_64(a_uValue) ((uint32_t)((a_uValue) >> (64 - X86_EFL_OF_BIT - 1)) & X86_EFL_OF)
79
80/**
81 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after arithmetic op.
82 *
83 * @returns Status bits.
84 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
85 * @param a_uResult Unsigned result value.
86 * @param a_uSrc The source value (for AF calc).
87 * @param a_uDst The original destination value (for AF calc).
88 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
89 * @param a_CfExpr Bool expression for the carry flag (CF).
90 * @param a_uSrcOf The a_uSrc value to use for overflow calculation.
91 */
92#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(a_pfEFlags, a_uResult, a_uDst, a_uSrc, a_cBitsWidth, a_CfExpr, a_uSrcOf) \
93 do { \
94 uint32_t fEflTmp = *(a_pfEFlags); \
95 fEflTmp &= ~X86_EFL_STATUS_BITS; \
96 fEflTmp |= (a_CfExpr) << X86_EFL_CF_BIT; \
97 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
98 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
99 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
100 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
101 \
102 /* Overflow during ADDition happens when both inputs have the same signed \
103 bit value and the result has a different sign bit value. \
104 \
105 Since subtraction can be rewritten as addition: 2 - 1 == 2 + -1, it \
106 follows that for SUBtraction the signed bit value must differ between \
107 the two inputs and the result's signed bit diff from the first input. \
108 Note! Must xor with sign bit to convert, not do (0 - a_uSrc). \
109 \
110 See also: http://teaching.idallen.com/dat2343/10f/notes/040_overflow.txt */ \
111 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth( ( ((uint ## a_cBitsWidth ## _t)~((a_uDst) ^ (a_uSrcOf))) \
112 & RT_BIT_64(a_cBitsWidth - 1)) \
113 & ((a_uResult) ^ (a_uDst)) ); \
114 *(a_pfEFlags) = fEflTmp; \
115 } while (0)
116
117/**
118 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after a logical op.
119 *
120 * CF and OF are defined to be 0 by logical operations. AF on the other hand is
121 * undefined. We do not set AF, as that seems to make the most sense (which
122 * probably makes it the most wrong in real life).
123 *
124 * @returns Status bits.
125 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
126 * @param a_uResult Unsigned result value.
127 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
128 * @param a_fExtra Additional bits to set.
129 */
130#define IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(a_pfEFlags, a_uResult, a_cBitsWidth, a_fExtra) \
131 do { \
132 uint32_t fEflTmp = *(a_pfEFlags); \
133 fEflTmp &= ~X86_EFL_STATUS_BITS; \
134 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
135 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
136 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
137 fEflTmp |= (a_fExtra); \
138 *(a_pfEFlags) = fEflTmp; \
139 } while (0)
140
141
142/*********************************************************************************************************************************
143* Global Variables *
144*********************************************************************************************************************************/
145/**
146 * Parity calculation table.
147 *
148 * This is also used by iemAllAImpl.asm.
149 *
150 * The generator code:
151 * @code
152 * #include <stdio.h>
153 *
154 * int main()
155 * {
156 * unsigned b;
157 * for (b = 0; b < 256; b++)
158 * {
159 * int cOnes = ( b & 1)
160 * + ((b >> 1) & 1)
161 * + ((b >> 2) & 1)
162 * + ((b >> 3) & 1)
163 * + ((b >> 4) & 1)
164 * + ((b >> 5) & 1)
165 * + ((b >> 6) & 1)
166 * + ((b >> 7) & 1);
167 * printf(" /" "* %#04x = %u%u%u%u%u%u%u%ub *" "/ %s,\n",
168 * b,
169 * (b >> 7) & 1,
170 * (b >> 6) & 1,
171 * (b >> 5) & 1,
172 * (b >> 4) & 1,
173 * (b >> 3) & 1,
174 * (b >> 2) & 1,
175 * (b >> 1) & 1,
176 * b & 1,
177 * cOnes & 1 ? "0" : "X86_EFL_PF");
178 * }
179 * return 0;
180 * }
181 * @endcode
182 */
183uint8_t const g_afParity[256] =
184{
185 /* 0000 = 00000000b */ X86_EFL_PF,
186 /* 0x01 = 00000001b */ 0,
187 /* 0x02 = 00000010b */ 0,
188 /* 0x03 = 00000011b */ X86_EFL_PF,
189 /* 0x04 = 00000100b */ 0,
190 /* 0x05 = 00000101b */ X86_EFL_PF,
191 /* 0x06 = 00000110b */ X86_EFL_PF,
192 /* 0x07 = 00000111b */ 0,
193 /* 0x08 = 00001000b */ 0,
194 /* 0x09 = 00001001b */ X86_EFL_PF,
195 /* 0x0a = 00001010b */ X86_EFL_PF,
196 /* 0x0b = 00001011b */ 0,
197 /* 0x0c = 00001100b */ X86_EFL_PF,
198 /* 0x0d = 00001101b */ 0,
199 /* 0x0e = 00001110b */ 0,
200 /* 0x0f = 00001111b */ X86_EFL_PF,
201 /* 0x10 = 00010000b */ 0,
202 /* 0x11 = 00010001b */ X86_EFL_PF,
203 /* 0x12 = 00010010b */ X86_EFL_PF,
204 /* 0x13 = 00010011b */ 0,
205 /* 0x14 = 00010100b */ X86_EFL_PF,
206 /* 0x15 = 00010101b */ 0,
207 /* 0x16 = 00010110b */ 0,
208 /* 0x17 = 00010111b */ X86_EFL_PF,
209 /* 0x18 = 00011000b */ X86_EFL_PF,
210 /* 0x19 = 00011001b */ 0,
211 /* 0x1a = 00011010b */ 0,
212 /* 0x1b = 00011011b */ X86_EFL_PF,
213 /* 0x1c = 00011100b */ 0,
214 /* 0x1d = 00011101b */ X86_EFL_PF,
215 /* 0x1e = 00011110b */ X86_EFL_PF,
216 /* 0x1f = 00011111b */ 0,
217 /* 0x20 = 00100000b */ 0,
218 /* 0x21 = 00100001b */ X86_EFL_PF,
219 /* 0x22 = 00100010b */ X86_EFL_PF,
220 /* 0x23 = 00100011b */ 0,
221 /* 0x24 = 00100100b */ X86_EFL_PF,
222 /* 0x25 = 00100101b */ 0,
223 /* 0x26 = 00100110b */ 0,
224 /* 0x27 = 00100111b */ X86_EFL_PF,
225 /* 0x28 = 00101000b */ X86_EFL_PF,
226 /* 0x29 = 00101001b */ 0,
227 /* 0x2a = 00101010b */ 0,
228 /* 0x2b = 00101011b */ X86_EFL_PF,
229 /* 0x2c = 00101100b */ 0,
230 /* 0x2d = 00101101b */ X86_EFL_PF,
231 /* 0x2e = 00101110b */ X86_EFL_PF,
232 /* 0x2f = 00101111b */ 0,
233 /* 0x30 = 00110000b */ X86_EFL_PF,
234 /* 0x31 = 00110001b */ 0,
235 /* 0x32 = 00110010b */ 0,
236 /* 0x33 = 00110011b */ X86_EFL_PF,
237 /* 0x34 = 00110100b */ 0,
238 /* 0x35 = 00110101b */ X86_EFL_PF,
239 /* 0x36 = 00110110b */ X86_EFL_PF,
240 /* 0x37 = 00110111b */ 0,
241 /* 0x38 = 00111000b */ 0,
242 /* 0x39 = 00111001b */ X86_EFL_PF,
243 /* 0x3a = 00111010b */ X86_EFL_PF,
244 /* 0x3b = 00111011b */ 0,
245 /* 0x3c = 00111100b */ X86_EFL_PF,
246 /* 0x3d = 00111101b */ 0,
247 /* 0x3e = 00111110b */ 0,
248 /* 0x3f = 00111111b */ X86_EFL_PF,
249 /* 0x40 = 01000000b */ 0,
250 /* 0x41 = 01000001b */ X86_EFL_PF,
251 /* 0x42 = 01000010b */ X86_EFL_PF,
252 /* 0x43 = 01000011b */ 0,
253 /* 0x44 = 01000100b */ X86_EFL_PF,
254 /* 0x45 = 01000101b */ 0,
255 /* 0x46 = 01000110b */ 0,
256 /* 0x47 = 01000111b */ X86_EFL_PF,
257 /* 0x48 = 01001000b */ X86_EFL_PF,
258 /* 0x49 = 01001001b */ 0,
259 /* 0x4a = 01001010b */ 0,
260 /* 0x4b = 01001011b */ X86_EFL_PF,
261 /* 0x4c = 01001100b */ 0,
262 /* 0x4d = 01001101b */ X86_EFL_PF,
263 /* 0x4e = 01001110b */ X86_EFL_PF,
264 /* 0x4f = 01001111b */ 0,
265 /* 0x50 = 01010000b */ X86_EFL_PF,
266 /* 0x51 = 01010001b */ 0,
267 /* 0x52 = 01010010b */ 0,
268 /* 0x53 = 01010011b */ X86_EFL_PF,
269 /* 0x54 = 01010100b */ 0,
270 /* 0x55 = 01010101b */ X86_EFL_PF,
271 /* 0x56 = 01010110b */ X86_EFL_PF,
272 /* 0x57 = 01010111b */ 0,
273 /* 0x58 = 01011000b */ 0,
274 /* 0x59 = 01011001b */ X86_EFL_PF,
275 /* 0x5a = 01011010b */ X86_EFL_PF,
276 /* 0x5b = 01011011b */ 0,
277 /* 0x5c = 01011100b */ X86_EFL_PF,
278 /* 0x5d = 01011101b */ 0,
279 /* 0x5e = 01011110b */ 0,
280 /* 0x5f = 01011111b */ X86_EFL_PF,
281 /* 0x60 = 01100000b */ X86_EFL_PF,
282 /* 0x61 = 01100001b */ 0,
283 /* 0x62 = 01100010b */ 0,
284 /* 0x63 = 01100011b */ X86_EFL_PF,
285 /* 0x64 = 01100100b */ 0,
286 /* 0x65 = 01100101b */ X86_EFL_PF,
287 /* 0x66 = 01100110b */ X86_EFL_PF,
288 /* 0x67 = 01100111b */ 0,
289 /* 0x68 = 01101000b */ 0,
290 /* 0x69 = 01101001b */ X86_EFL_PF,
291 /* 0x6a = 01101010b */ X86_EFL_PF,
292 /* 0x6b = 01101011b */ 0,
293 /* 0x6c = 01101100b */ X86_EFL_PF,
294 /* 0x6d = 01101101b */ 0,
295 /* 0x6e = 01101110b */ 0,
296 /* 0x6f = 01101111b */ X86_EFL_PF,
297 /* 0x70 = 01110000b */ 0,
298 /* 0x71 = 01110001b */ X86_EFL_PF,
299 /* 0x72 = 01110010b */ X86_EFL_PF,
300 /* 0x73 = 01110011b */ 0,
301 /* 0x74 = 01110100b */ X86_EFL_PF,
302 /* 0x75 = 01110101b */ 0,
303 /* 0x76 = 01110110b */ 0,
304 /* 0x77 = 01110111b */ X86_EFL_PF,
305 /* 0x78 = 01111000b */ X86_EFL_PF,
306 /* 0x79 = 01111001b */ 0,
307 /* 0x7a = 01111010b */ 0,
308 /* 0x7b = 01111011b */ X86_EFL_PF,
309 /* 0x7c = 01111100b */ 0,
310 /* 0x7d = 01111101b */ X86_EFL_PF,
311 /* 0x7e = 01111110b */ X86_EFL_PF,
312 /* 0x7f = 01111111b */ 0,
313 /* 0x80 = 10000000b */ 0,
314 /* 0x81 = 10000001b */ X86_EFL_PF,
315 /* 0x82 = 10000010b */ X86_EFL_PF,
316 /* 0x83 = 10000011b */ 0,
317 /* 0x84 = 10000100b */ X86_EFL_PF,
318 /* 0x85 = 10000101b */ 0,
319 /* 0x86 = 10000110b */ 0,
320 /* 0x87 = 10000111b */ X86_EFL_PF,
321 /* 0x88 = 10001000b */ X86_EFL_PF,
322 /* 0x89 = 10001001b */ 0,
323 /* 0x8a = 10001010b */ 0,
324 /* 0x8b = 10001011b */ X86_EFL_PF,
325 /* 0x8c = 10001100b */ 0,
326 /* 0x8d = 10001101b */ X86_EFL_PF,
327 /* 0x8e = 10001110b */ X86_EFL_PF,
328 /* 0x8f = 10001111b */ 0,
329 /* 0x90 = 10010000b */ X86_EFL_PF,
330 /* 0x91 = 10010001b */ 0,
331 /* 0x92 = 10010010b */ 0,
332 /* 0x93 = 10010011b */ X86_EFL_PF,
333 /* 0x94 = 10010100b */ 0,
334 /* 0x95 = 10010101b */ X86_EFL_PF,
335 /* 0x96 = 10010110b */ X86_EFL_PF,
336 /* 0x97 = 10010111b */ 0,
337 /* 0x98 = 10011000b */ 0,
338 /* 0x99 = 10011001b */ X86_EFL_PF,
339 /* 0x9a = 10011010b */ X86_EFL_PF,
340 /* 0x9b = 10011011b */ 0,
341 /* 0x9c = 10011100b */ X86_EFL_PF,
342 /* 0x9d = 10011101b */ 0,
343 /* 0x9e = 10011110b */ 0,
344 /* 0x9f = 10011111b */ X86_EFL_PF,
345 /* 0xa0 = 10100000b */ X86_EFL_PF,
346 /* 0xa1 = 10100001b */ 0,
347 /* 0xa2 = 10100010b */ 0,
348 /* 0xa3 = 10100011b */ X86_EFL_PF,
349 /* 0xa4 = 10100100b */ 0,
350 /* 0xa5 = 10100101b */ X86_EFL_PF,
351 /* 0xa6 = 10100110b */ X86_EFL_PF,
352 /* 0xa7 = 10100111b */ 0,
353 /* 0xa8 = 10101000b */ 0,
354 /* 0xa9 = 10101001b */ X86_EFL_PF,
355 /* 0xaa = 10101010b */ X86_EFL_PF,
356 /* 0xab = 10101011b */ 0,
357 /* 0xac = 10101100b */ X86_EFL_PF,
358 /* 0xad = 10101101b */ 0,
359 /* 0xae = 10101110b */ 0,
360 /* 0xaf = 10101111b */ X86_EFL_PF,
361 /* 0xb0 = 10110000b */ 0,
362 /* 0xb1 = 10110001b */ X86_EFL_PF,
363 /* 0xb2 = 10110010b */ X86_EFL_PF,
364 /* 0xb3 = 10110011b */ 0,
365 /* 0xb4 = 10110100b */ X86_EFL_PF,
366 /* 0xb5 = 10110101b */ 0,
367 /* 0xb6 = 10110110b */ 0,
368 /* 0xb7 = 10110111b */ X86_EFL_PF,
369 /* 0xb8 = 10111000b */ X86_EFL_PF,
370 /* 0xb9 = 10111001b */ 0,
371 /* 0xba = 10111010b */ 0,
372 /* 0xbb = 10111011b */ X86_EFL_PF,
373 /* 0xbc = 10111100b */ 0,
374 /* 0xbd = 10111101b */ X86_EFL_PF,
375 /* 0xbe = 10111110b */ X86_EFL_PF,
376 /* 0xbf = 10111111b */ 0,
377 /* 0xc0 = 11000000b */ X86_EFL_PF,
378 /* 0xc1 = 11000001b */ 0,
379 /* 0xc2 = 11000010b */ 0,
380 /* 0xc3 = 11000011b */ X86_EFL_PF,
381 /* 0xc4 = 11000100b */ 0,
382 /* 0xc5 = 11000101b */ X86_EFL_PF,
383 /* 0xc6 = 11000110b */ X86_EFL_PF,
384 /* 0xc7 = 11000111b */ 0,
385 /* 0xc8 = 11001000b */ 0,
386 /* 0xc9 = 11001001b */ X86_EFL_PF,
387 /* 0xca = 11001010b */ X86_EFL_PF,
388 /* 0xcb = 11001011b */ 0,
389 /* 0xcc = 11001100b */ X86_EFL_PF,
390 /* 0xcd = 11001101b */ 0,
391 /* 0xce = 11001110b */ 0,
392 /* 0xcf = 11001111b */ X86_EFL_PF,
393 /* 0xd0 = 11010000b */ 0,
394 /* 0xd1 = 11010001b */ X86_EFL_PF,
395 /* 0xd2 = 11010010b */ X86_EFL_PF,
396 /* 0xd3 = 11010011b */ 0,
397 /* 0xd4 = 11010100b */ X86_EFL_PF,
398 /* 0xd5 = 11010101b */ 0,
399 /* 0xd6 = 11010110b */ 0,
400 /* 0xd7 = 11010111b */ X86_EFL_PF,
401 /* 0xd8 = 11011000b */ X86_EFL_PF,
402 /* 0xd9 = 11011001b */ 0,
403 /* 0xda = 11011010b */ 0,
404 /* 0xdb = 11011011b */ X86_EFL_PF,
405 /* 0xdc = 11011100b */ 0,
406 /* 0xdd = 11011101b */ X86_EFL_PF,
407 /* 0xde = 11011110b */ X86_EFL_PF,
408 /* 0xdf = 11011111b */ 0,
409 /* 0xe0 = 11100000b */ 0,
410 /* 0xe1 = 11100001b */ X86_EFL_PF,
411 /* 0xe2 = 11100010b */ X86_EFL_PF,
412 /* 0xe3 = 11100011b */ 0,
413 /* 0xe4 = 11100100b */ X86_EFL_PF,
414 /* 0xe5 = 11100101b */ 0,
415 /* 0xe6 = 11100110b */ 0,
416 /* 0xe7 = 11100111b */ X86_EFL_PF,
417 /* 0xe8 = 11101000b */ X86_EFL_PF,
418 /* 0xe9 = 11101001b */ 0,
419 /* 0xea = 11101010b */ 0,
420 /* 0xeb = 11101011b */ X86_EFL_PF,
421 /* 0xec = 11101100b */ 0,
422 /* 0xed = 11101101b */ X86_EFL_PF,
423 /* 0xee = 11101110b */ X86_EFL_PF,
424 /* 0xef = 11101111b */ 0,
425 /* 0xf0 = 11110000b */ X86_EFL_PF,
426 /* 0xf1 = 11110001b */ 0,
427 /* 0xf2 = 11110010b */ 0,
428 /* 0xf3 = 11110011b */ X86_EFL_PF,
429 /* 0xf4 = 11110100b */ 0,
430 /* 0xf5 = 11110101b */ X86_EFL_PF,
431 /* 0xf6 = 11110110b */ X86_EFL_PF,
432 /* 0xf7 = 11110111b */ 0,
433 /* 0xf8 = 11111000b */ 0,
434 /* 0xf9 = 11111001b */ X86_EFL_PF,
435 /* 0xfa = 11111010b */ X86_EFL_PF,
436 /* 0xfb = 11111011b */ 0,
437 /* 0xfc = 11111100b */ X86_EFL_PF,
438 /* 0xfd = 11111101b */ 0,
439 /* 0xfe = 11111110b */ 0,
440 /* 0xff = 11111111b */ X86_EFL_PF,
441};
442
443
444/*
445 * There are a few 64-bit on 32-bit things we'd rather do in C. Actually, doing
446 * it all in C is probably safer atm., optimize what's necessary later, maybe.
447 */
448#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
449
450
451/*********************************************************************************************************************************
452* Binary Operations *
453*********************************************************************************************************************************/
454
455/*
456 * ADD
457 */
458
459IEM_DECL_IMPL_DEF(void, iemAImpl_add_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
460{
461 uint64_t uDst = *puDst;
462 uint64_t uResult = uDst + uSrc;
463 *puDst = uResult;
464 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult < uDst, uSrc);
465}
466
467# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
468
469IEM_DECL_IMPL_DEF(void, iemAImpl_add_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
470{
471 uint32_t uDst = *puDst;
472 uint32_t uResult = uDst + uSrc;
473 *puDst = uResult;
474 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult < uDst, uSrc);
475}
476
477
478IEM_DECL_IMPL_DEF(void, iemAImpl_add_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
479{
480 uint16_t uDst = *puDst;
481 uint16_t uResult = uDst + uSrc;
482 *puDst = uResult;
483 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult < uDst, uSrc);
484}
485
486
487IEM_DECL_IMPL_DEF(void, iemAImpl_add_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
488{
489 uint8_t uDst = *puDst;
490 uint8_t uResult = uDst + uSrc;
491 *puDst = uResult;
492 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult < uDst, uSrc);
493}
494
495# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
496
497/*
498 * ADC
499 */
500
501IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
502{
503 if (!(*pfEFlags & X86_EFL_CF))
504 iemAImpl_add_u64(puDst, uSrc, pfEFlags);
505 else
506 {
507 uint64_t uDst = *puDst;
508 uint64_t uResult = uDst + uSrc + 1;
509 *puDst = uResult;
510 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult <= uDst, uSrc);
511 }
512}
513
514# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
515
516IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
517{
518 if (!(*pfEFlags & X86_EFL_CF))
519 iemAImpl_add_u32(puDst, uSrc, pfEFlags);
520 else
521 {
522 uint32_t uDst = *puDst;
523 uint32_t uResult = uDst + uSrc + 1;
524 *puDst = uResult;
525 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult <= uDst, uSrc);
526 }
527}
528
529
530IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
531{
532 if (!(*pfEFlags & X86_EFL_CF))
533 iemAImpl_add_u16(puDst, uSrc, pfEFlags);
534 else
535 {
536 uint16_t uDst = *puDst;
537 uint16_t uResult = uDst + uSrc + 1;
538 *puDst = uResult;
539 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult <= uDst, uSrc);
540 }
541}
542
543
544IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
545{
546 if (!(*pfEFlags & X86_EFL_CF))
547 iemAImpl_add_u8(puDst, uSrc, pfEFlags);
548 else
549 {
550 uint8_t uDst = *puDst;
551 uint8_t uResult = uDst + uSrc + 1;
552 *puDst = uResult;
553 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult <= uDst, uSrc);
554 }
555}
556
557# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
558
559/*
560 * SUB
561 */
562
563IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
564{
565 uint64_t uDst = *puDst;
566 uint64_t uResult = uDst - uSrc;
567 *puDst = uResult;
568 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uDst < uSrc, uSrc ^ RT_BIT_64(63));
569}
570
571# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
572
573IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
574{
575 uint32_t uDst = *puDst;
576 uint32_t uResult = uDst - uSrc;
577 *puDst = uResult;
578 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uDst < uSrc, uSrc ^ RT_BIT_32(31));
579}
580
581
582IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
583{
584 uint16_t uDst = *puDst;
585 uint16_t uResult = uDst - uSrc;
586 *puDst = uResult;
587 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uDst < uSrc, uSrc ^ (uint16_t)0x8000);
588}
589
590
591IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
592{
593 uint8_t uDst = *puDst;
594 uint8_t uResult = uDst - uSrc;
595 *puDst = uResult;
596 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uDst < uSrc, uSrc ^ (uint8_t)0x80);
597}
598
599# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
600
601/*
602 * SBB
603 */
604
605IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
606{
607 if (!(*pfEFlags & X86_EFL_CF))
608 iemAImpl_sub_u64(puDst, uSrc, pfEFlags);
609 else
610 {
611 uint64_t uDst = *puDst;
612 uint64_t uResult = uDst - uSrc - 1;
613 *puDst = uResult;
614 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uDst <= uSrc, uSrc ^ RT_BIT_64(63));
615 }
616}
617
618# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
619
620IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
621{
622 if (!(*pfEFlags & X86_EFL_CF))
623 iemAImpl_sub_u32(puDst, uSrc, pfEFlags);
624 else
625 {
626 uint32_t uDst = *puDst;
627 uint32_t uResult = uDst - uSrc - 1;
628 *puDst = uResult;
629 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uDst <= uSrc, uSrc ^ RT_BIT_32(31));
630 }
631}
632
633
634IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
635{
636 if (!(*pfEFlags & X86_EFL_CF))
637 iemAImpl_sub_u16(puDst, uSrc, pfEFlags);
638 else
639 {
640 uint16_t uDst = *puDst;
641 uint16_t uResult = uDst - uSrc - 1;
642 *puDst = uResult;
643 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uDst <= uSrc, uSrc ^ (uint16_t)0x8000);
644 }
645}
646
647
648IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
649{
650 if (!(*pfEFlags & X86_EFL_CF))
651 iemAImpl_sub_u8(puDst, uSrc, pfEFlags);
652 else
653 {
654 uint8_t uDst = *puDst;
655 uint8_t uResult = uDst - uSrc - 1;
656 *puDst = uResult;
657 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uDst <= uSrc, uSrc ^ (uint8_t)0x80);
658 }
659}
660
661# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
662
663
664/*
665 * OR
666 */
667
668IEM_DECL_IMPL_DEF(void, iemAImpl_or_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
669{
670 uint64_t uResult = *puDst | uSrc;
671 *puDst = uResult;
672 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
673}
674
675# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
676
677IEM_DECL_IMPL_DEF(void, iemAImpl_or_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
678{
679 uint32_t uResult = *puDst | uSrc;
680 *puDst = uResult;
681 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
682}
683
684
685IEM_DECL_IMPL_DEF(void, iemAImpl_or_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
686{
687 uint16_t uResult = *puDst | uSrc;
688 *puDst = uResult;
689 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
690}
691
692
693IEM_DECL_IMPL_DEF(void, iemAImpl_or_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
694{
695 uint8_t uResult = *puDst | uSrc;
696 *puDst = uResult;
697 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
698}
699
700# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
701
702/*
703 * XOR
704 */
705
706IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
707{
708 uint64_t uResult = *puDst ^ uSrc;
709 *puDst = uResult;
710 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
711}
712
713# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
714
715IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
716{
717 uint32_t uResult = *puDst ^ uSrc;
718 *puDst = uResult;
719 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
720}
721
722
723IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
724{
725 uint16_t uResult = *puDst ^ uSrc;
726 *puDst = uResult;
727 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
728}
729
730
731IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
732{
733 uint8_t uResult = *puDst ^ uSrc;
734 *puDst = uResult;
735 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
736}
737
738# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
739
740/*
741 * AND
742 */
743
744IEM_DECL_IMPL_DEF(void, iemAImpl_and_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
745{
746 uint64_t uResult = *puDst & uSrc;
747 *puDst = uResult;
748 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
749}
750
751# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
752
753IEM_DECL_IMPL_DEF(void, iemAImpl_and_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
754{
755 uint32_t uResult = *puDst & uSrc;
756 *puDst = uResult;
757 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
758}
759
760
761IEM_DECL_IMPL_DEF(void, iemAImpl_and_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
762{
763 uint16_t uResult = *puDst & uSrc;
764 *puDst = uResult;
765 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
766}
767
768
769IEM_DECL_IMPL_DEF(void, iemAImpl_and_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
770{
771 uint8_t uResult = *puDst & uSrc;
772 *puDst = uResult;
773 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
774}
775
776# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
777
778/*
779 * CMP
780 */
781
782IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
783{
784 uint64_t uDstTmp = *puDst;
785 iemAImpl_sub_u64(&uDstTmp, uSrc, pfEFlags);
786}
787
788# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
789
790IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
791{
792 uint32_t uDstTmp = *puDst;
793 iemAImpl_sub_u32(&uDstTmp, uSrc, pfEFlags);
794}
795
796
797IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
798{
799 uint16_t uDstTmp = *puDst;
800 iemAImpl_sub_u16(&uDstTmp, uSrc, pfEFlags);
801}
802
803
804IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
805{
806 uint8_t uDstTmp = *puDst;
807 iemAImpl_sub_u8(&uDstTmp, uSrc, pfEFlags);
808}
809
810# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
811
812/*
813 * TEST
814 */
815
816IEM_DECL_IMPL_DEF(void, iemAImpl_test_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
817{
818 uint64_t uResult = *puDst & uSrc;
819 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
820}
821
822# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
823
824IEM_DECL_IMPL_DEF(void, iemAImpl_test_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
825{
826 uint32_t uResult = *puDst & uSrc;
827 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
828}
829
830
831IEM_DECL_IMPL_DEF(void, iemAImpl_test_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
832{
833 uint16_t uResult = *puDst & uSrc;
834 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
835}
836
837
838IEM_DECL_IMPL_DEF(void, iemAImpl_test_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
839{
840 uint8_t uResult = *puDst & uSrc;
841 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
842}
843
844# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
845
846
847/*
848 * LOCK prefixed variants of the above
849 */
850
851/** 64-bit locked binary operand operation. */
852# define DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
853 do { \
854 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
855 uint ## a_cBitsWidth ## _t uTmp; \
856 uint32_t fEflTmp; \
857 do \
858 { \
859 uTmp = uOld; \
860 fEflTmp = *pfEFlags; \
861 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, uSrc, &fEflTmp); \
862 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
863 *pfEFlags = fEflTmp; \
864 } while (0)
865
866
867#define EMIT_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
868 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
869 uint ## a_cBitsWidth ## _t uSrc, \
870 uint32_t *pfEFlags)) \
871 { \
872 DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth); \
873 }
874
875EMIT_LOCKED_BIN_OP(add, 64)
876EMIT_LOCKED_BIN_OP(adc, 64)
877EMIT_LOCKED_BIN_OP(sub, 64)
878EMIT_LOCKED_BIN_OP(sbb, 64)
879EMIT_LOCKED_BIN_OP(or, 64)
880EMIT_LOCKED_BIN_OP(xor, 64)
881EMIT_LOCKED_BIN_OP(and, 64)
882# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
883EMIT_LOCKED_BIN_OP(add, 32)
884EMIT_LOCKED_BIN_OP(adc, 32)
885EMIT_LOCKED_BIN_OP(sub, 32)
886EMIT_LOCKED_BIN_OP(sbb, 32)
887EMIT_LOCKED_BIN_OP(or, 32)
888EMIT_LOCKED_BIN_OP(xor, 32)
889EMIT_LOCKED_BIN_OP(and, 32)
890
891EMIT_LOCKED_BIN_OP(add, 16)
892EMIT_LOCKED_BIN_OP(adc, 16)
893EMIT_LOCKED_BIN_OP(sub, 16)
894EMIT_LOCKED_BIN_OP(sbb, 16)
895EMIT_LOCKED_BIN_OP(or, 16)
896EMIT_LOCKED_BIN_OP(xor, 16)
897EMIT_LOCKED_BIN_OP(and, 16)
898
899EMIT_LOCKED_BIN_OP(add, 8)
900EMIT_LOCKED_BIN_OP(adc, 8)
901EMIT_LOCKED_BIN_OP(sub, 8)
902EMIT_LOCKED_BIN_OP(sbb, 8)
903EMIT_LOCKED_BIN_OP(or, 8)
904EMIT_LOCKED_BIN_OP(xor, 8)
905EMIT_LOCKED_BIN_OP(and, 8)
906# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
907
908
909/*
910 * Bit operations (same signature as above).
911 */
912
913/*
914 * BT
915 */
916
917IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
918{
919 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
920 not modified by either AMD (3990x) or Intel (i9-9980HK). */
921 Assert(uSrc < 64);
922 uint64_t uDst = *puDst;
923 if (uDst & RT_BIT_64(uSrc))
924 *pfEFlags |= X86_EFL_CF;
925 else
926 *pfEFlags &= ~X86_EFL_CF;
927}
928
929# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
930
931IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
932{
933 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
934 not modified by either AMD (3990x) or Intel (i9-9980HK). */
935 Assert(uSrc < 32);
936 uint32_t uDst = *puDst;
937 if (uDst & RT_BIT_32(uSrc))
938 *pfEFlags |= X86_EFL_CF;
939 else
940 *pfEFlags &= ~X86_EFL_CF;
941}
942
943IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
944{
945 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
946 not modified by either AMD (3990x) or Intel (i9-9980HK). */
947 Assert(uSrc < 16);
948 uint16_t uDst = *puDst;
949 if (uDst & RT_BIT_32(uSrc))
950 *pfEFlags |= X86_EFL_CF;
951 else
952 *pfEFlags &= ~X86_EFL_CF;
953}
954
955# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
956
957/*
958 * BTC
959 */
960
961IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
962{
963 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
964 not modified by either AMD (3990x) or Intel (i9-9980HK). */
965 Assert(uSrc < 64);
966 uint64_t fMask = RT_BIT_64(uSrc);
967 uint64_t uDst = *puDst;
968 if (uDst & fMask)
969 {
970 uDst &= ~fMask;
971 *puDst = uDst;
972 *pfEFlags |= X86_EFL_CF;
973 }
974 else
975 {
976 uDst |= fMask;
977 *puDst = uDst;
978 *pfEFlags &= ~X86_EFL_CF;
979 }
980}
981
982# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
983
984IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
985{
986 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
987 not modified by either AMD (3990x) or Intel (i9-9980HK). */
988 Assert(uSrc < 32);
989 uint32_t fMask = RT_BIT_32(uSrc);
990 uint32_t uDst = *puDst;
991 if (uDst & fMask)
992 {
993 uDst &= ~fMask;
994 *puDst = uDst;
995 *pfEFlags |= X86_EFL_CF;
996 }
997 else
998 {
999 uDst |= fMask;
1000 *puDst = uDst;
1001 *pfEFlags &= ~X86_EFL_CF;
1002 }
1003}
1004
1005
1006IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1007{
1008 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1009 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1010 Assert(uSrc < 16);
1011 uint16_t fMask = RT_BIT_32(uSrc);
1012 uint16_t uDst = *puDst;
1013 if (uDst & fMask)
1014 {
1015 uDst &= ~fMask;
1016 *puDst = uDst;
1017 *pfEFlags |= X86_EFL_CF;
1018 }
1019 else
1020 {
1021 uDst |= fMask;
1022 *puDst = uDst;
1023 *pfEFlags &= ~X86_EFL_CF;
1024 }
1025}
1026
1027# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1028
1029/*
1030 * BTR
1031 */
1032
1033IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1034{
1035 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1036 logical operation (AND/OR/whatever). */
1037 Assert(uSrc < 64);
1038 uint64_t fMask = RT_BIT_64(uSrc);
1039 uint64_t uDst = *puDst;
1040 if (uDst & fMask)
1041 {
1042 uDst &= ~fMask;
1043 *puDst = uDst;
1044 *pfEFlags |= X86_EFL_CF;
1045 }
1046 else
1047 *pfEFlags &= ~X86_EFL_CF;
1048}
1049
1050# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1051
1052IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1053{
1054 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1055 logical operation (AND/OR/whatever). */
1056 Assert(uSrc < 32);
1057 uint32_t fMask = RT_BIT_32(uSrc);
1058 uint32_t uDst = *puDst;
1059 if (uDst & fMask)
1060 {
1061 uDst &= ~fMask;
1062 *puDst = uDst;
1063 *pfEFlags |= X86_EFL_CF;
1064 }
1065 else
1066 *pfEFlags &= ~X86_EFL_CF;
1067}
1068
1069
1070IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1071{
1072 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1073 logical operation (AND/OR/whatever). */
1074 Assert(uSrc < 16);
1075 uint16_t fMask = RT_BIT_32(uSrc);
1076 uint16_t uDst = *puDst;
1077 if (uDst & fMask)
1078 {
1079 uDst &= ~fMask;
1080 *puDst = uDst;
1081 *pfEFlags |= X86_EFL_CF;
1082 }
1083 else
1084 *pfEFlags &= ~X86_EFL_CF;
1085}
1086
1087# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1088
1089/*
1090 * BTS
1091 */
1092
1093IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1094{
1095 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1096 logical operation (AND/OR/whatever). */
1097 Assert(uSrc < 64);
1098 uint64_t fMask = RT_BIT_64(uSrc);
1099 uint64_t uDst = *puDst;
1100 if (uDst & fMask)
1101 *pfEFlags |= X86_EFL_CF;
1102 else
1103 {
1104 uDst |= fMask;
1105 *puDst = uDst;
1106 *pfEFlags &= ~X86_EFL_CF;
1107 }
1108}
1109
1110# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1111
1112IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1113{
1114 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1115 logical operation (AND/OR/whatever). */
1116 Assert(uSrc < 32);
1117 uint32_t fMask = RT_BIT_32(uSrc);
1118 uint32_t uDst = *puDst;
1119 if (uDst & fMask)
1120 *pfEFlags |= X86_EFL_CF;
1121 else
1122 {
1123 uDst |= fMask;
1124 *puDst = uDst;
1125 *pfEFlags &= ~X86_EFL_CF;
1126 }
1127}
1128
1129
1130IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1131{
1132 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1133 logical operation (AND/OR/whatever). */
1134 Assert(uSrc < 16);
1135 uint16_t fMask = RT_BIT_32(uSrc);
1136 uint32_t uDst = *puDst;
1137 if (uDst & fMask)
1138 *pfEFlags |= X86_EFL_CF;
1139 else
1140 {
1141 uDst |= fMask;
1142 *puDst = uDst;
1143 *pfEFlags &= ~X86_EFL_CF;
1144 }
1145}
1146
1147# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1148
1149
1150EMIT_LOCKED_BIN_OP(btc, 64)
1151EMIT_LOCKED_BIN_OP(btr, 64)
1152EMIT_LOCKED_BIN_OP(bts, 64)
1153# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1154EMIT_LOCKED_BIN_OP(btc, 32)
1155EMIT_LOCKED_BIN_OP(btr, 32)
1156EMIT_LOCKED_BIN_OP(bts, 32)
1157
1158EMIT_LOCKED_BIN_OP(btc, 16)
1159EMIT_LOCKED_BIN_OP(btr, 16)
1160EMIT_LOCKED_BIN_OP(bts, 16)
1161# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1162
1163
1164/*
1165 * Helpers for BSR and BSF.
1166 *
1167 * Note! "undefined" flags: OF, SF, AF, PF, CF.
1168 * Intel behavior modelled on 10980xe, AMD on 3990X. Other marchs may
1169 * produce different result (see https://www.sandpile.org/x86/flags.htm),
1170 * but we restrict ourselves to emulating these recent marchs.
1171 */
1172#define SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlag, a_iBit) do { \
1173 unsigned iBit = (a_iBit); \
1174 uint32_t fEfl = *pfEFlags & ~(X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF); \
1175 if (iBit) \
1176 { \
1177 *puDst = --iBit; \
1178 fEfl |= g_afParity[iBit]; \
1179 } \
1180 else \
1181 fEfl |= X86_EFL_ZF | X86_EFL_PF; \
1182 *pfEFlags = fEfl; \
1183 } while (0)
1184#define SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlag, a_iBit) do { \
1185 unsigned const iBit = (a_iBit); \
1186 if (iBit) \
1187 { \
1188 *puDst = iBit - 1; \
1189 *pfEFlags &= ~X86_EFL_ZF; \
1190 } \
1191 else \
1192 *pfEFlags |= X86_EFL_ZF; \
1193 } while (0)
1194
1195
1196/*
1197 * BSF - first (least significant) bit set
1198 */
1199IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1200{
1201 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU64(uSrc));
1202}
1203
1204IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64_intel,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1205{
1206 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU64(uSrc));
1207}
1208
1209IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64_amd,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1210{
1211 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitFirstSetU64(uSrc));
1212}
1213
1214# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1215
1216IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1217{
1218 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU32(uSrc));
1219}
1220
1221IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32_intel,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1222{
1223 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU32(uSrc));
1224}
1225
1226IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32_amd,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1227{
1228 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitFirstSetU32(uSrc));
1229}
1230
1231
1232IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1233{
1234 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU16(uSrc));
1235}
1236
1237IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16_intel,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1238{
1239 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU16(uSrc));
1240}
1241
1242IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16_amd,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1243{
1244 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitFirstSetU16(uSrc));
1245}
1246
1247# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1248
1249
1250/*
1251 * BSR - last (most significant) bit set
1252 */
1253IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1254{
1255 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU64(uSrc));
1256}
1257
1258IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64_intel,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1259{
1260 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU64(uSrc));
1261}
1262
1263IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64_amd,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1264{
1265 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitLastSetU64(uSrc));
1266}
1267
1268# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1269
1270IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1271{
1272 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU32(uSrc));
1273}
1274
1275IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32_intel,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1276{
1277 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU32(uSrc));
1278}
1279
1280IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32_amd,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1281{
1282 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitLastSetU32(uSrc));
1283}
1284
1285
1286IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1287{
1288 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU16(uSrc));
1289}
1290
1291IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16_intel,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1292{
1293 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU16(uSrc));
1294}
1295
1296IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16_amd,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1297{
1298 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitLastSetU16(uSrc));
1299}
1300
1301# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1302
1303
1304/*
1305 * XCHG
1306 */
1307
1308IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64_locked,(uint64_t *puMem, uint64_t *puReg))
1309{
1310#if ARCH_BITS >= 64
1311 *puReg = ASMAtomicXchgU64(puMem, *puReg);
1312#else
1313 uint64_t uOldMem = *puMem;
1314 while (!ASMAtomicCmpXchgExU64(puMem, *puReg, uOldMem, &uOldMem))
1315 ASMNopPause();
1316 *puReg = uOldMem;
1317#endif
1318}
1319
1320# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1321
1322IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u32_locked,(uint32_t *puMem, uint32_t *puReg))
1323{
1324 *puReg = ASMAtomicXchgU32(puMem, *puReg);
1325}
1326
1327
1328IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u16_locked,(uint16_t *puMem, uint16_t *puReg))
1329{
1330 *puReg = ASMAtomicXchgU16(puMem, *puReg);
1331}
1332
1333
1334IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8_locked,(uint8_t *puMem, uint8_t *puReg))
1335{
1336 *puReg = ASMAtomicXchgU8(puMem, *puReg);
1337}
1338
1339# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1340
1341
1342/* Unlocked variants for fDisregardLock mode: */
1343
1344IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64_unlocked,(uint64_t *puMem, uint64_t *puReg))
1345{
1346 uint64_t const uOld = *puMem;
1347 *puMem = *puReg;
1348 *puReg = uOld;
1349}
1350
1351# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1352
1353IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u32_unlocked,(uint32_t *puMem, uint32_t *puReg))
1354{
1355 uint32_t const uOld = *puMem;
1356 *puMem = *puReg;
1357 *puReg = uOld;
1358}
1359
1360
1361IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u16_unlocked,(uint16_t *puMem, uint16_t *puReg))
1362{
1363 uint16_t const uOld = *puMem;
1364 *puMem = *puReg;
1365 *puReg = uOld;
1366}
1367
1368
1369IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8_unlocked,(uint8_t *puMem, uint8_t *puReg))
1370{
1371 uint8_t const uOld = *puMem;
1372 *puMem = *puReg;
1373 *puReg = uOld;
1374}
1375
1376# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1377
1378
1379/*
1380 * XADD and LOCK XADD.
1381 */
1382#define EMIT_XADD(a_cBitsWidth, a_Type) \
1383IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u ## a_cBitsWidth,(a_Type *puDst, a_Type *puReg, uint32_t *pfEFlags)) \
1384{ \
1385 a_Type uDst = *puDst; \
1386 a_Type uResult = uDst; \
1387 iemAImpl_add_u ## a_cBitsWidth(&uResult, *puReg, pfEFlags); \
1388 *puDst = uResult; \
1389 *puReg = uDst; \
1390} \
1391\
1392IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u ## a_cBitsWidth ## _locked,(a_Type *puDst, a_Type *puReg, uint32_t *pfEFlags)) \
1393{ \
1394 a_Type uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
1395 a_Type uResult; \
1396 uint32_t fEflTmp; \
1397 do \
1398 { \
1399 uResult = uOld; \
1400 fEflTmp = *pfEFlags; \
1401 iemAImpl_add_u ## a_cBitsWidth(&uResult, *puReg, &fEflTmp); \
1402 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uResult, uOld, &uOld)); \
1403 *puReg = uOld; \
1404 *pfEFlags = fEflTmp; \
1405}
1406EMIT_XADD(64, uint64_t)
1407# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1408EMIT_XADD(32, uint32_t)
1409EMIT_XADD(16, uint16_t)
1410EMIT_XADD(8, uint8_t)
1411# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1412
1413#endif
1414
1415/*
1416 * CMPXCHG, CMPXCHG8B, CMPXCHG16B
1417 *
1418 * Note! We don't have non-locking/atomic cmpxchg primitives, so all cmpxchg
1419 * instructions are emulated as locked.
1420 */
1421#if defined(IEM_WITHOUT_ASSEMBLY)
1422
1423IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u8_locked, (uint8_t *pu8Dst, uint8_t *puAl, uint8_t uSrcReg, uint32_t *pEFlags))
1424{
1425 uint8_t uOld = *puAl;
1426 if (ASMAtomicCmpXchgExU8(pu8Dst, uSrcReg, uOld, puAl))
1427 Assert(*puAl == uOld);
1428 iemAImpl_cmp_u8(&uOld, *puAl, pEFlags);
1429}
1430
1431
1432IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u16_locked,(uint16_t *pu16Dst, uint16_t *puAx, uint16_t uSrcReg, uint32_t *pEFlags))
1433{
1434 uint16_t uOld = *puAx;
1435 if (ASMAtomicCmpXchgExU16(pu16Dst, uSrcReg, uOld, puAx))
1436 Assert(*puAx == uOld);
1437 iemAImpl_cmp_u16(&uOld, *puAx, pEFlags);
1438}
1439
1440
1441IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u32_locked,(uint32_t *pu32Dst, uint32_t *puEax, uint32_t uSrcReg, uint32_t *pEFlags))
1442{
1443 uint32_t uOld = *puEax;
1444 if (ASMAtomicCmpXchgExU32(pu32Dst, uSrcReg, uOld, puEax))
1445 Assert(*puEax == uOld);
1446 iemAImpl_cmp_u32(&uOld, *puEax, pEFlags);
1447}
1448
1449
1450# if ARCH_BITS == 32
1451IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64_locked,(uint64_t *pu64Dst, uint64_t *puRax, uint64_t *puSrcReg, uint32_t *pEFlags))
1452# else
1453IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64_locked,(uint64_t *pu64Dst, uint64_t *puRax, uint64_t uSrcReg, uint32_t *pEFlags))
1454# endif
1455{
1456# if ARCH_BITS == 32
1457 uint64_t const uSrcReg = *puSrcReg;
1458# endif
1459 uint64_t uOld = *puRax;
1460 if (ASMAtomicCmpXchgExU64(pu64Dst, uSrcReg, uOld, puRax))
1461 Assert(*puRax == uOld);
1462 iemAImpl_cmp_u64(&uOld, *puRax, pEFlags);
1463}
1464
1465
1466IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b_locked,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
1467 uint32_t *pEFlags))
1468{
1469 uint64_t const uNew = pu64EbxEcx->u;
1470 uint64_t const uOld = pu64EaxEdx->u;
1471 if (ASMAtomicCmpXchgExU64(pu64Dst, uNew, uOld, &pu64EaxEdx->u))
1472 {
1473 Assert(pu64EaxEdx->u == uOld);
1474 *pEFlags |= X86_EFL_ZF;
1475 }
1476 else
1477 *pEFlags &= ~X86_EFL_ZF;
1478}
1479
1480
1481# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64)
1482IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_locked,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx,
1483 uint32_t *pEFlags))
1484{
1485# ifdef VBOX_STRICT
1486 RTUINT128U const uOld = *pu128RaxRdx;
1487# endif
1488# if defined(RT_ARCH_AMD64)
1489 if (ASMAtomicCmpXchgU128v2(&pu128Dst->u, pu128RbxRcx->s.Hi, pu128RbxRcx->s.Lo, pu128RaxRdx->s.Hi, pu128RaxRdx->s.Lo,
1490 &pu128RaxRdx->u))
1491# else
1492 if (ASMAtomicCmpXchgU128(&pu128Dst->u, pu128RbxRcx->u, pu128RaxRdx->u, &pu128RaxRdx->u))
1493# endif
1494 {
1495 Assert(pu128RaxRdx->s.Lo == uOld.s.Lo && pu128RaxRdx->s.Hi == uOld.s.Hi);
1496 *pEFlags |= X86_EFL_ZF;
1497 }
1498 else
1499 *pEFlags &= ~X86_EFL_ZF;
1500}
1501# endif
1502
1503#endif /* defined(IEM_WITHOUT_ASSEMBLY) */
1504
1505# if !defined(RT_ARCH_ARM64) /** @todo may need this for unaligned accesses... */
1506IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_fallback,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx,
1507 PRTUINT128U pu128RbxRcx, uint32_t *pEFlags))
1508{
1509 RTUINT128U u128Tmp = *pu128Dst;
1510 if ( u128Tmp.s.Lo == pu128RaxRdx->s.Lo
1511 && u128Tmp.s.Hi == pu128RaxRdx->s.Hi)
1512 {
1513 *pu128Dst = *pu128RbxRcx;
1514 *pEFlags |= X86_EFL_ZF;
1515 }
1516 else
1517 {
1518 *pu128RaxRdx = u128Tmp;
1519 *pEFlags &= ~X86_EFL_ZF;
1520 }
1521}
1522#endif /* !RT_ARCH_ARM64 */
1523
1524#if defined(IEM_WITHOUT_ASSEMBLY)
1525
1526/* Unlocked versions mapped to the locked ones: */
1527
1528IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u8, (uint8_t *pu8Dst, uint8_t *puAl, uint8_t uSrcReg, uint32_t *pEFlags))
1529{
1530 iemAImpl_cmpxchg_u8_locked(pu8Dst, puAl, uSrcReg, pEFlags);
1531}
1532
1533
1534IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u16, (uint16_t *pu16Dst, uint16_t *puAx, uint16_t uSrcReg, uint32_t *pEFlags))
1535{
1536 iemAImpl_cmpxchg_u16_locked(pu16Dst, puAx, uSrcReg, pEFlags);
1537}
1538
1539
1540IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u32, (uint32_t *pu32Dst, uint32_t *puEax, uint32_t uSrcReg, uint32_t *pEFlags))
1541{
1542 iemAImpl_cmpxchg_u32_locked(pu32Dst, puEax, uSrcReg, pEFlags);
1543}
1544
1545
1546# if ARCH_BITS == 32
1547IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64, (uint64_t *pu64Dst, uint64_t *puRax, uint64_t *puSrcReg, uint32_t *pEFlags))
1548{
1549 iemAImpl_cmpxchg_u64_locked(pu64Dst, puRax, puSrcReg, pEFlags);
1550}
1551# else
1552IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64, (uint64_t *pu64Dst, uint64_t *puRax, uint64_t uSrcReg, uint32_t *pEFlags))
1553{
1554 iemAImpl_cmpxchg_u64_locked(pu64Dst, puRax, uSrcReg, pEFlags);
1555}
1556# endif
1557
1558
1559IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx, uint32_t *pEFlags))
1560{
1561 iemAImpl_cmpxchg8b_locked(pu64Dst, pu64EaxEdx, pu64EbxEcx, pEFlags);
1562}
1563
1564
1565IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx,
1566 uint32_t *pEFlags))
1567{
1568 iemAImpl_cmpxchg16b_locked(pu128Dst, pu128RaxRdx, pu128RbxRcx, pEFlags);
1569}
1570
1571#endif /* defined(IEM_WITHOUT_ASSEMBLY) */
1572
1573#if (!defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)) \
1574 && !defined(DOXYGEN_RUNNING) /* Doxygen has some groking issues here and ends up mixing up input. Not worth tracking down now. */
1575
1576/*
1577 * MUL, IMUL, DIV and IDIV helpers.
1578 *
1579 * - The U64 versions must use 128-bit intermediates, so we need to abstract the
1580 * division step so we can select between using C operators and
1581 * RTUInt128DivRem/RTUInt128MulU64ByU64.
1582 *
1583 * - The U8 versions work returns output in AL + AH instead of xDX + xAX, with the
1584 * IDIV/DIV taking all the input in AX too. This means we have to abstract some
1585 * input loads and the result storing.
1586 */
1587
1588DECLINLINE(void) RTUInt128DivRemByU64(PRTUINT128U pQuotient, PRTUINT128U pRemainder, PCRTUINT128U pDividend, uint64_t u64Divisor)
1589{
1590# ifdef __GNUC__ /* GCC maybe really annoying in function. */
1591 pQuotient->s.Lo = 0;
1592 pQuotient->s.Hi = 0;
1593# endif
1594 RTUINT128U Divisor;
1595 Divisor.s.Lo = u64Divisor;
1596 Divisor.s.Hi = 0;
1597 RTUInt128DivRem(pQuotient, pRemainder, pDividend, &Divisor);
1598}
1599
1600# define DIV_LOAD(a_Dividend) \
1601 a_Dividend.s.Lo = *puA, a_Dividend.s.Hi = *puD
1602# define DIV_LOAD_U8(a_Dividend) \
1603 a_Dividend.u = *puAX
1604
1605# define DIV_STORE(a_Quotient, a_uReminder) *puA = (a_Quotient), *puD = (a_uReminder)
1606# define DIV_STORE_U8(a_Quotient, a_uReminder) *puAX = (uint8_t)(a_Quotient) | ((uint16_t)(a_uReminder) << 8)
1607
1608# define MUL_LOAD_F1() *puA
1609# define MUL_LOAD_F1_U8() ((uint8_t)*puAX)
1610
1611# define MUL_STORE(a_Result) *puA = (a_Result).s.Lo, *puD = (a_Result).s.Hi
1612# define MUL_STORE_U8(a_Result) *puAX = a_Result.u
1613
1614# define MULDIV_NEG(a_Value, a_cBitsWidth2x) \
1615 (a_Value).u = UINT ## a_cBitsWidth2x ## _C(0) - (a_Value).u
1616# define MULDIV_NEG_U128(a_Value, a_cBitsWidth2x) \
1617 RTUInt128AssignNeg(&(a_Value))
1618
1619# define MULDIV_MUL(a_Result, a_Factor1, a_Factor2, a_cBitsWidth2x) \
1620 (a_Result).u = (uint ## a_cBitsWidth2x ## _t)(a_Factor1) * (a_Factor2)
1621# define MULDIV_MUL_U128(a_Result, a_Factor1, a_Factor2, a_cBitsWidth2x) \
1622 RTUInt128MulU64ByU64(&(a_Result), a_Factor1, a_Factor2);
1623
1624# define MULDIV_MODDIV(a_Quotient, a_Remainder, a_Dividend, a_uDivisor) \
1625 a_Quotient.u = (a_Dividend).u / (a_uDivisor), \
1626 a_Remainder.u = (a_Dividend).u % (a_uDivisor)
1627# define MULDIV_MODDIV_U128(a_Quotient, a_Remainder, a_Dividend, a_uDivisor) \
1628 RTUInt128DivRemByU64(&a_Quotient, &a_Remainder, &a_Dividend, a_uDivisor)
1629
1630
1631/*
1632 * MUL
1633 */
1634# define EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, a_Suffix, a_fIntelFlags) \
1635IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_mul_u,a_cBitsWidth,a_Suffix), a_Args) \
1636{ \
1637 RTUINT ## a_cBitsWidth2x ## U Result; \
1638 a_fnMul(Result, a_fnLoadF1(), uFactor, a_cBitsWidth2x); \
1639 a_fnStore(Result); \
1640 \
1641 /* Calc EFLAGS: */ \
1642 uint32_t fEfl = *pfEFlags; \
1643 if (a_fIntelFlags) \
1644 { /* Intel: 6700K and 10980XE behavior */ \
1645 fEfl &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF); \
1646 if (Result.s.Lo & RT_BIT_64(a_cBitsWidth - 1)) \
1647 fEfl |= X86_EFL_SF; \
1648 fEfl |= g_afParity[Result.s.Lo & 0xff]; \
1649 if (Result.s.Hi != 0) \
1650 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1651 } \
1652 else \
1653 { /* AMD: 3990X */ \
1654 if (Result.s.Hi != 0) \
1655 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1656 else \
1657 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
1658 } \
1659 *pfEFlags = fEfl; \
1660 return 0; \
1661} \
1662
1663# define EMIT_MUL(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul) \
1664 EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, RT_NOTHING, 1) \
1665 EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, _intel, 1) \
1666 EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, _amd, 0) \
1667
1668# ifndef DOXYGEN_RUNNING /* this totally confuses doxygen for some reason */
1669EMIT_MUL(64, 128, (uint64_t *puA, uint64_t *puD, uint64_t uFactor, uint32_t *pfEFlags), (puA, puD, uFactor, pfEFlags),
1670 MUL_LOAD_F1, MUL_STORE, MULDIV_MUL_U128)
1671# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1672EMIT_MUL(32, 64, (uint32_t *puA, uint32_t *puD, uint32_t uFactor, uint32_t *pfEFlags), (puA, puD, uFactor, pfEFlags),
1673 MUL_LOAD_F1, MUL_STORE, MULDIV_MUL)
1674EMIT_MUL(16, 32, (uint16_t *puA, uint16_t *puD, uint16_t uFactor, uint32_t *pfEFlags), (puA, puD, uFactor, pfEFlags),
1675 MUL_LOAD_F1, MUL_STORE, MULDIV_MUL)
1676EMIT_MUL(8, 16, (uint16_t *puAX, uint8_t uFactor, uint32_t *pfEFlags), (puAX, uFactor, pfEFlags),
1677 MUL_LOAD_F1_U8, MUL_STORE_U8, MULDIV_MUL)
1678# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1679# endif /* !DOXYGEN_RUNNING */
1680
1681
1682/*
1683 * IMUL
1684 *
1685 * The SF, ZF, AF and PF flags are "undefined". AMD (3990x) leaves these
1686 * flags as is. Whereas Intel skylake (6700K and 10980X (Cascade Lake)) always
1687 * clear AF and ZF and calculates SF and PF as per the lower half of the result.
1688 */
1689# define EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, \
1690 a_Suffix, a_fIntelFlags) \
1691IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_imul_u,a_cBitsWidth,a_Suffix),a_Args) \
1692{ \
1693 RTUINT ## a_cBitsWidth2x ## U Result; \
1694 uint32_t fEfl = *pfEFlags & ~(X86_EFL_CF | X86_EFL_OF); \
1695 \
1696 uint ## a_cBitsWidth ## _t const uFactor1 = a_fnLoadF1(); \
1697 if (!(uFactor1 & RT_BIT_64(a_cBitsWidth - 1))) \
1698 { \
1699 if (!(uFactor2 & RT_BIT_64(a_cBitsWidth - 1))) \
1700 { \
1701 a_fnMul(Result, uFactor1, uFactor2, a_cBitsWidth2x); \
1702 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_64(a_cBitsWidth - 1)) \
1703 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1704 } \
1705 else \
1706 { \
1707 uint ## a_cBitsWidth ## _t const uPositiveFactor2 = UINT ## a_cBitsWidth ## _C(0) - uFactor2; \
1708 a_fnMul(Result, uFactor1, uPositiveFactor2, a_cBitsWidth2x); \
1709 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_64(a_cBitsWidth - 1)) \
1710 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1711 a_fnNeg(Result, a_cBitsWidth2x); \
1712 } \
1713 } \
1714 else \
1715 { \
1716 if (!(uFactor2 & RT_BIT_64(a_cBitsWidth - 1))) \
1717 { \
1718 uint ## a_cBitsWidth ## _t const uPositiveFactor1 = UINT ## a_cBitsWidth ## _C(0) - uFactor1; \
1719 a_fnMul(Result, uPositiveFactor1, uFactor2, a_cBitsWidth2x); \
1720 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_64(a_cBitsWidth - 1)) \
1721 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1722 a_fnNeg(Result, a_cBitsWidth2x); \
1723 } \
1724 else \
1725 { \
1726 uint ## a_cBitsWidth ## _t const uPositiveFactor1 = UINT ## a_cBitsWidth ## _C(0) - uFactor1; \
1727 uint ## a_cBitsWidth ## _t const uPositiveFactor2 = UINT ## a_cBitsWidth ## _C(0) - uFactor2; \
1728 a_fnMul(Result, uPositiveFactor1, uPositiveFactor2, a_cBitsWidth2x); \
1729 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_64(a_cBitsWidth - 1)) \
1730 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1731 } \
1732 } \
1733 a_fnStore(Result); \
1734 \
1735 if (a_fIntelFlags) \
1736 { \
1737 fEfl &= ~(X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF | X86_EFL_PF); \
1738 if (Result.s.Lo & RT_BIT_64(a_cBitsWidth - 1)) \
1739 fEfl |= X86_EFL_SF; \
1740 fEfl |= g_afParity[Result.s.Lo & 0xff]; \
1741 } \
1742 *pfEFlags = fEfl; \
1743 return 0; \
1744}
1745# define EMIT_IMUL(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul) \
1746 EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, RT_NOTHING, 1) \
1747 EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, _intel, 1) \
1748 EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, _amd, 0)
1749
1750# ifndef DOXYGEN_RUNNING /* this totally confuses doxygen for some reason */
1751EMIT_IMUL(64, 128, (uint64_t *puA, uint64_t *puD, uint64_t uFactor2, uint32_t *pfEFlags), (puA, puD, uFactor2, pfEFlags),
1752 MUL_LOAD_F1, MUL_STORE, MULDIV_NEG_U128, MULDIV_MUL_U128)
1753# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1754EMIT_IMUL(32, 64, (uint32_t *puA, uint32_t *puD, uint32_t uFactor2, uint32_t *pfEFlags), (puA, puD, uFactor2, pfEFlags),
1755 MUL_LOAD_F1, MUL_STORE, MULDIV_NEG, MULDIV_MUL)
1756EMIT_IMUL(16, 32, (uint16_t *puA, uint16_t *puD, uint16_t uFactor2, uint32_t *pfEFlags), (puA, puD, uFactor2, pfEFlags),
1757 MUL_LOAD_F1, MUL_STORE, MULDIV_NEG, MULDIV_MUL)
1758EMIT_IMUL(8, 16, (uint16_t *puAX, uint8_t uFactor2, uint32_t *pfEFlags), (puAX, uFactor2, pfEFlags),
1759 MUL_LOAD_F1_U8, MUL_STORE_U8, MULDIV_NEG, MULDIV_MUL)
1760# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1761# endif /* !DOXYGEN_RUNNING */
1762
1763
1764/*
1765 * IMUL with two operands are mapped onto the three operand variant, ignoring
1766 * the high part of the product.
1767 */
1768# define EMIT_IMUL_TWO(a_cBits, a_uType) \
1769IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u ## a_cBits,(a_uType *puDst, a_uType uSrc, uint32_t *pfEFlags)) \
1770{ \
1771 a_uType uIgn; \
1772 iemAImpl_imul_u ## a_cBits(puDst, &uIgn, uSrc, pfEFlags); \
1773} \
1774\
1775IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u ## a_cBits ## _intel,(a_uType *puDst, a_uType uSrc, uint32_t *pfEFlags)) \
1776{ \
1777 a_uType uIgn; \
1778 iemAImpl_imul_u ## a_cBits ## _intel(puDst, &uIgn, uSrc, pfEFlags); \
1779} \
1780\
1781IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u ## a_cBits ## _amd,(a_uType *puDst, a_uType uSrc, uint32_t *pfEFlags)) \
1782{ \
1783 a_uType uIgn; \
1784 iemAImpl_imul_u ## a_cBits ## _amd(puDst, &uIgn, uSrc, pfEFlags); \
1785}
1786
1787EMIT_IMUL_TWO(64, uint64_t)
1788# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1789EMIT_IMUL_TWO(32, uint32_t)
1790EMIT_IMUL_TWO(16, uint16_t)
1791# endif
1792
1793
1794/*
1795 * DIV
1796 */
1797# define EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, \
1798 a_Suffix, a_fIntelFlags) \
1799IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_div_u,a_cBitsWidth,a_Suffix),a_Args) \
1800{ \
1801 RTUINT ## a_cBitsWidth2x ## U Dividend; \
1802 a_fnLoad(Dividend); \
1803 if ( uDivisor != 0 \
1804 && Dividend.s.Hi < uDivisor) \
1805 { \
1806 RTUINT ## a_cBitsWidth2x ## U Remainder, Quotient; \
1807 a_fnDivRem(Quotient, Remainder, Dividend, uDivisor); \
1808 a_fnStore(Quotient.s.Lo, Remainder.s.Lo); \
1809 \
1810 /* Calc EFLAGS: Intel 6700K and 10980XE leaves them alone. AMD 3990X sets AF and clears PF, ZF and SF. */ \
1811 if (!a_fIntelFlags) \
1812 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
1813 return 0; \
1814 } \
1815 /* #DE */ \
1816 return -1; \
1817}
1818# define EMIT_DIV(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem) \
1819 EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, RT_NOTHING, 1) \
1820 EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, _intel, 1) \
1821 EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, _amd, 0)
1822
1823# ifndef DOXYGEN_RUNNING /* this totally confuses doxygen for some reason */
1824EMIT_DIV(64,128,(uint64_t *puA, uint64_t *puD, uint64_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1825 DIV_LOAD, DIV_STORE, MULDIV_MODDIV_U128)
1826# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1827EMIT_DIV(32,64, (uint32_t *puA, uint32_t *puD, uint32_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1828 DIV_LOAD, DIV_STORE, MULDIV_MODDIV)
1829EMIT_DIV(16,32, (uint16_t *puA, uint16_t *puD, uint16_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1830 DIV_LOAD, DIV_STORE, MULDIV_MODDIV)
1831EMIT_DIV(8,16, (uint16_t *puAX, uint8_t uDivisor, uint32_t *pfEFlags), (puAX, uDivisor, pfEFlags),
1832 DIV_LOAD_U8, DIV_STORE_U8, MULDIV_MODDIV)
1833# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1834# endif /* !DOXYGEN_RUNNING */
1835
1836
1837/*
1838 * IDIV
1839 *
1840 * EFLAGS are ignored and left as-is by Intel 6700K and 10980XE. AMD 3990X will
1841 * set AF and clear PF, ZF and SF just like it does for DIV.
1842 *
1843 */
1844# define EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, \
1845 a_Suffix, a_fIntelFlags) \
1846IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_idiv_u,a_cBitsWidth,a_Suffix),a_Args) \
1847{ \
1848 /* Note! Skylake leaves all flags alone. */ \
1849 \
1850 /** @todo overflow checks */ \
1851 if (uDivisor != 0) \
1852 { \
1853 /* \
1854 * Convert to unsigned division. \
1855 */ \
1856 RTUINT ## a_cBitsWidth2x ## U Dividend; \
1857 a_fnLoad(Dividend); \
1858 bool const fSignedDividend = RT_BOOL(Dividend.s.Hi & RT_BIT_64(a_cBitsWidth - 1)); \
1859 if (fSignedDividend) \
1860 a_fnNeg(Dividend, a_cBitsWidth2x); \
1861 \
1862 uint ## a_cBitsWidth ## _t uDivisorPositive; \
1863 if (!(uDivisor & RT_BIT_64(a_cBitsWidth - 1))) \
1864 uDivisorPositive = uDivisor; \
1865 else \
1866 uDivisorPositive = UINT ## a_cBitsWidth ## _C(0) - uDivisor; \
1867 \
1868 RTUINT ## a_cBitsWidth2x ## U Remainder, Quotient; \
1869 a_fnDivRem(Quotient, Remainder, Dividend, uDivisorPositive); \
1870 \
1871 /* \
1872 * Setup the result, checking for overflows. \
1873 */ \
1874 if (!(uDivisor & RT_BIT_64(a_cBitsWidth - 1))) \
1875 { \
1876 if (!fSignedDividend) \
1877 { \
1878 /* Positive divisor, positive dividend => result positive. */ \
1879 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint ## a_cBitsWidth ## _t)INT ## a_cBitsWidth ## _MAX) \
1880 { \
1881 a_fnStore(Quotient.s.Lo, Remainder.s.Lo); \
1882 if (!a_fIntelFlags) \
1883 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
1884 return 0; \
1885 } \
1886 } \
1887 else \
1888 { \
1889 /* Positive divisor, negative dividend => result negative. */ \
1890 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_64(a_cBitsWidth - 1)) \
1891 { \
1892 a_fnStore(UINT ## a_cBitsWidth ## _C(0) - Quotient.s.Lo, UINT ## a_cBitsWidth ## _C(0) - Remainder.s.Lo); \
1893 if (!a_fIntelFlags) \
1894 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
1895 return 0; \
1896 } \
1897 } \
1898 } \
1899 else \
1900 { \
1901 if (!fSignedDividend) \
1902 { \
1903 /* Negative divisor, positive dividend => negative quotient, positive remainder. */ \
1904 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_64(a_cBitsWidth - 1)) \
1905 { \
1906 a_fnStore(UINT ## a_cBitsWidth ## _C(0) - Quotient.s.Lo, Remainder.s.Lo); \
1907 if (!a_fIntelFlags) \
1908 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
1909 return 0; \
1910 } \
1911 } \
1912 else \
1913 { \
1914 /* Negative divisor, negative dividend => positive quotient, negative remainder. */ \
1915 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint ## a_cBitsWidth ## _t)INT ## a_cBitsWidth ## _MAX) \
1916 { \
1917 a_fnStore(Quotient.s.Lo, UINT ## a_cBitsWidth ## _C(0) - Remainder.s.Lo); \
1918 if (!a_fIntelFlags) \
1919 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
1920 return 0; \
1921 } \
1922 } \
1923 } \
1924 } \
1925 /* #DE */ \
1926 return -1; \
1927}
1928# define EMIT_IDIV(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem) \
1929 EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, RT_NOTHING, 1) \
1930 EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, _intel, 1) \
1931 EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, _amd, 0)
1932
1933# ifndef DOXYGEN_RUNNING /* this totally confuses doxygen for some reason */
1934EMIT_IDIV(64,128,(uint64_t *puA, uint64_t *puD, uint64_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1935 DIV_LOAD, DIV_STORE, MULDIV_NEG_U128, MULDIV_MODDIV_U128)
1936# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1937EMIT_IDIV(32,64,(uint32_t *puA, uint32_t *puD, uint32_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1938 DIV_LOAD, DIV_STORE, MULDIV_NEG, MULDIV_MODDIV)
1939EMIT_IDIV(16,32,(uint16_t *puA, uint16_t *puD, uint16_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1940 DIV_LOAD, DIV_STORE, MULDIV_NEG, MULDIV_MODDIV)
1941EMIT_IDIV(8,16,(uint16_t *puAX, uint8_t uDivisor, uint32_t *pfEFlags), (puAX, uDivisor, pfEFlags),
1942 DIV_LOAD_U8, DIV_STORE_U8, MULDIV_NEG, MULDIV_MODDIV)
1943# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1944# endif /* !DOXYGEN_RUNNING */
1945
1946#endif /* (!defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)) && !defined(DOXYGEN_RUNNING) */
1947
1948
1949/*********************************************************************************************************************************
1950* Unary operations. *
1951*********************************************************************************************************************************/
1952#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
1953
1954/** @def IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC
1955 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an INC or DEC instruction.
1956 *
1957 * CF is NOT modified for hysterical raisins (allegedly for carrying and
1958 * borrowing in arithmetic loops on intel 8008).
1959 *
1960 * @returns Status bits.
1961 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
1962 * @param a_uResult Unsigned result value.
1963 * @param a_uDst The original destination value (for AF calc).
1964 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
1965 * @param a_OfMethod 0 for INC-style, 1 for DEC-style.
1966 */
1967#define IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth, a_OfMethod) \
1968 do { \
1969 uint32_t fEflTmp = *(a_pfEFlags); \
1970 fEflTmp &= ~X86_EFL_STATUS_BITS | X86_EFL_CF; \
1971 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
1972 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
1973 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
1974 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
1975 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth(a_OfMethod == 0 ? (((a_uDst) ^ RT_BIT_64(a_cBitsWidth - 1)) & (a_uResult)) \
1976 : ((a_uDst) & ((a_uResult) ^ RT_BIT_64(a_cBitsWidth - 1))) ); \
1977 *(a_pfEFlags) = fEflTmp; \
1978 } while (0)
1979
1980/*
1981 * INC
1982 */
1983
1984IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u64,(uint64_t *puDst, uint32_t *pfEFlags))
1985{
1986 uint64_t uDst = *puDst;
1987 uint64_t uResult = uDst + 1;
1988 *puDst = uResult;
1989 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 0 /*INC*/);
1990}
1991
1992# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1993
1994IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u32,(uint32_t *puDst, uint32_t *pfEFlags))
1995{
1996 uint32_t uDst = *puDst;
1997 uint32_t uResult = uDst + 1;
1998 *puDst = uResult;
1999 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 0 /*INC*/);
2000}
2001
2002
2003IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2004{
2005 uint16_t uDst = *puDst;
2006 uint16_t uResult = uDst + 1;
2007 *puDst = uResult;
2008 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 0 /*INC*/);
2009}
2010
2011IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2012{
2013 uint8_t uDst = *puDst;
2014 uint8_t uResult = uDst + 1;
2015 *puDst = uResult;
2016 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 0 /*INC*/);
2017}
2018
2019# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2020
2021
2022/*
2023 * DEC
2024 */
2025
2026IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2027{
2028 uint64_t uDst = *puDst;
2029 uint64_t uResult = uDst - 1;
2030 *puDst = uResult;
2031 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 1 /*INC*/);
2032}
2033
2034# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2035
2036IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2037{
2038 uint32_t uDst = *puDst;
2039 uint32_t uResult = uDst - 1;
2040 *puDst = uResult;
2041 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 1 /*INC*/);
2042}
2043
2044
2045IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2046{
2047 uint16_t uDst = *puDst;
2048 uint16_t uResult = uDst - 1;
2049 *puDst = uResult;
2050 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 1 /*INC*/);
2051}
2052
2053
2054IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2055{
2056 uint8_t uDst = *puDst;
2057 uint8_t uResult = uDst - 1;
2058 *puDst = uResult;
2059 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 1 /*INC*/);
2060}
2061
2062# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2063
2064
2065/*
2066 * NOT
2067 */
2068
2069IEM_DECL_IMPL_DEF(void, iemAImpl_not_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2070{
2071 uint64_t uDst = *puDst;
2072 uint64_t uResult = ~uDst;
2073 *puDst = uResult;
2074 /* EFLAGS are not modified. */
2075 RT_NOREF_PV(pfEFlags);
2076}
2077
2078# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2079
2080IEM_DECL_IMPL_DEF(void, iemAImpl_not_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2081{
2082 uint32_t uDst = *puDst;
2083 uint32_t uResult = ~uDst;
2084 *puDst = uResult;
2085 /* EFLAGS are not modified. */
2086 RT_NOREF_PV(pfEFlags);
2087}
2088
2089IEM_DECL_IMPL_DEF(void, iemAImpl_not_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2090{
2091 uint16_t uDst = *puDst;
2092 uint16_t uResult = ~uDst;
2093 *puDst = uResult;
2094 /* EFLAGS are not modified. */
2095 RT_NOREF_PV(pfEFlags);
2096}
2097
2098IEM_DECL_IMPL_DEF(void, iemAImpl_not_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2099{
2100 uint8_t uDst = *puDst;
2101 uint8_t uResult = ~uDst;
2102 *puDst = uResult;
2103 /* EFLAGS are not modified. */
2104 RT_NOREF_PV(pfEFlags);
2105}
2106
2107# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2108
2109
2110/*
2111 * NEG
2112 */
2113
2114/**
2115 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an NEG instruction.
2116 *
2117 * @returns Status bits.
2118 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2119 * @param a_uResult Unsigned result value.
2120 * @param a_uDst The original destination value (for AF calc).
2121 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2122 */
2123#define IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth) \
2124 do { \
2125 uint32_t fEflTmp = *(a_pfEFlags); \
2126 fEflTmp &= ~X86_EFL_STATUS_BITS & ~X86_EFL_CF; \
2127 fEflTmp |= ((a_uDst) != 0) << X86_EFL_CF_BIT; \
2128 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
2129 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
2130 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
2131 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
2132 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth((a_uDst) & (a_uResult)); \
2133 *(a_pfEFlags) = fEflTmp; \
2134 } while (0)
2135
2136IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2137{
2138 uint64_t uDst = *puDst;
2139 uint64_t uResult = (uint64_t)0 - uDst;
2140 *puDst = uResult;
2141 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 64);
2142}
2143
2144# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2145
2146IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2147{
2148 uint32_t uDst = *puDst;
2149 uint32_t uResult = (uint32_t)0 - uDst;
2150 *puDst = uResult;
2151 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 32);
2152}
2153
2154
2155IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2156{
2157 uint16_t uDst = *puDst;
2158 uint16_t uResult = (uint16_t)0 - uDst;
2159 *puDst = uResult;
2160 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 16);
2161}
2162
2163
2164IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2165{
2166 uint8_t uDst = *puDst;
2167 uint8_t uResult = (uint8_t)0 - uDst;
2168 *puDst = uResult;
2169 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 8);
2170}
2171
2172# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2173
2174/*
2175 * Locked variants.
2176 */
2177
2178/** Emit a function for doing a locked unary operand operation. */
2179# define EMIT_LOCKED_UNARY_OP(a_Mnemonic, a_cBitsWidth) \
2180 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
2181 uint32_t *pfEFlags)) \
2182 { \
2183 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
2184 uint ## a_cBitsWidth ## _t uTmp; \
2185 uint32_t fEflTmp; \
2186 do \
2187 { \
2188 uTmp = uOld; \
2189 fEflTmp = *pfEFlags; \
2190 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, &fEflTmp); \
2191 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
2192 *pfEFlags = fEflTmp; \
2193 }
2194
2195EMIT_LOCKED_UNARY_OP(inc, 64)
2196EMIT_LOCKED_UNARY_OP(dec, 64)
2197EMIT_LOCKED_UNARY_OP(not, 64)
2198EMIT_LOCKED_UNARY_OP(neg, 64)
2199# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2200EMIT_LOCKED_UNARY_OP(inc, 32)
2201EMIT_LOCKED_UNARY_OP(dec, 32)
2202EMIT_LOCKED_UNARY_OP(not, 32)
2203EMIT_LOCKED_UNARY_OP(neg, 32)
2204
2205EMIT_LOCKED_UNARY_OP(inc, 16)
2206EMIT_LOCKED_UNARY_OP(dec, 16)
2207EMIT_LOCKED_UNARY_OP(not, 16)
2208EMIT_LOCKED_UNARY_OP(neg, 16)
2209
2210EMIT_LOCKED_UNARY_OP(inc, 8)
2211EMIT_LOCKED_UNARY_OP(dec, 8)
2212EMIT_LOCKED_UNARY_OP(not, 8)
2213EMIT_LOCKED_UNARY_OP(neg, 8)
2214# endif
2215
2216#endif /* !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) */
2217
2218
2219/*********************************************************************************************************************************
2220* Shifting and Rotating *
2221*********************************************************************************************************************************/
2222
2223/*
2224 * ROL
2225 */
2226#define EMIT_ROL(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags, a_fnHlp) \
2227IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_rol_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2228{ \
2229 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2230 if (cShift) \
2231 { \
2232 if (a_cBitsWidth < 32) \
2233 cShift &= a_cBitsWidth - 1; \
2234 a_uType const uDst = *puDst; \
2235 a_uType const uResult = a_fnHlp(uDst, cShift); \
2236 *puDst = uResult; \
2237 \
2238 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2239 it the same way as for 1 bit shifts. */ \
2240 AssertCompile(X86_EFL_CF_BIT == 0); \
2241 uint32_t fEfl = *pfEFlags; \
2242 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2243 uint32_t const fCarry = (uResult & X86_EFL_CF); \
2244 fEfl |= fCarry; \
2245 if (!a_fIntelFlags) /* AMD 3990X: According to the last sub-shift: */ \
2246 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2247 else /* Intel 10980XE: According to the first sub-shift: */ \
2248 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); \
2249 *pfEFlags = fEfl; \
2250 } \
2251}
2252
2253#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2254EMIT_ROL(64, uint64_t, RT_NOTHING, 1, ASMRotateLeftU64)
2255#endif
2256EMIT_ROL(64, uint64_t, _intel, 1, ASMRotateLeftU64)
2257EMIT_ROL(64, uint64_t, _amd, 0, ASMRotateLeftU64)
2258
2259#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2260EMIT_ROL(32, uint32_t, RT_NOTHING, 1, ASMRotateLeftU32)
2261#endif
2262EMIT_ROL(32, uint32_t, _intel, 1, ASMRotateLeftU32)
2263EMIT_ROL(32, uint32_t, _amd, 0, ASMRotateLeftU32)
2264
2265DECL_FORCE_INLINE(uint16_t) iemAImpl_rol_u16_hlp(uint16_t uValue, uint8_t cShift)
2266{
2267 return (uValue << cShift) | (uValue >> (16 - cShift));
2268}
2269#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2270EMIT_ROL(16, uint16_t, RT_NOTHING, 1, iemAImpl_rol_u16_hlp)
2271#endif
2272EMIT_ROL(16, uint16_t, _intel, 1, iemAImpl_rol_u16_hlp)
2273EMIT_ROL(16, uint16_t, _amd, 0, iemAImpl_rol_u16_hlp)
2274
2275DECL_FORCE_INLINE(uint8_t) iemAImpl_rol_u8_hlp(uint8_t uValue, uint8_t cShift)
2276{
2277 return (uValue << cShift) | (uValue >> (8 - cShift));
2278}
2279#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2280EMIT_ROL(8, uint8_t, RT_NOTHING, 1, iemAImpl_rol_u8_hlp)
2281#endif
2282EMIT_ROL(8, uint8_t, _intel, 1, iemAImpl_rol_u8_hlp)
2283EMIT_ROL(8, uint8_t, _amd, 0, iemAImpl_rol_u8_hlp)
2284
2285
2286/*
2287 * ROR
2288 */
2289#define EMIT_ROR(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags, a_fnHlp) \
2290IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_ror_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2291{ \
2292 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2293 if (cShift) \
2294 { \
2295 if (a_cBitsWidth < 32) \
2296 cShift &= a_cBitsWidth - 1; \
2297 a_uType const uDst = *puDst; \
2298 a_uType const uResult = a_fnHlp(uDst, cShift); \
2299 *puDst = uResult; \
2300 \
2301 /* Calc EFLAGS: */ \
2302 AssertCompile(X86_EFL_CF_BIT == 0); \
2303 uint32_t fEfl = *pfEFlags; \
2304 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2305 uint32_t const fCarry = (uResult >> ((a_cBitsWidth) - 1)) & X86_EFL_CF; \
2306 fEfl |= fCarry; \
2307 if (!a_fIntelFlags) /* AMD 3990X: According to the last sub-shift: */ \
2308 fEfl |= (((uResult >> ((a_cBitsWidth) - 2)) ^ fCarry) & 1) << X86_EFL_OF_BIT; \
2309 else /* Intel 10980XE: According to the first sub-shift: */ \
2310 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << (a_cBitsWidth - 1))); \
2311 *pfEFlags = fEfl; \
2312 } \
2313}
2314
2315#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2316EMIT_ROR(64, uint64_t, RT_NOTHING, 1, ASMRotateRightU64)
2317#endif
2318EMIT_ROR(64, uint64_t, _intel, 1, ASMRotateRightU64)
2319EMIT_ROR(64, uint64_t, _amd, 0, ASMRotateRightU64)
2320
2321#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2322EMIT_ROR(32, uint32_t, RT_NOTHING, 1, ASMRotateRightU32)
2323#endif
2324EMIT_ROR(32, uint32_t, _intel, 1, ASMRotateRightU32)
2325EMIT_ROR(32, uint32_t, _amd, 0, ASMRotateRightU32)
2326
2327DECL_FORCE_INLINE(uint16_t) iemAImpl_ror_u16_hlp(uint16_t uValue, uint8_t cShift)
2328{
2329 return (uValue >> cShift) | (uValue << (16 - cShift));
2330}
2331#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2332EMIT_ROR(16, uint16_t, RT_NOTHING, 1, iemAImpl_ror_u16_hlp)
2333#endif
2334EMIT_ROR(16, uint16_t, _intel, 1, iemAImpl_ror_u16_hlp)
2335EMIT_ROR(16, uint16_t, _amd, 0, iemAImpl_ror_u16_hlp)
2336
2337DECL_FORCE_INLINE(uint8_t) iemAImpl_ror_u8_hlp(uint8_t uValue, uint8_t cShift)
2338{
2339 return (uValue >> cShift) | (uValue << (8 - cShift));
2340}
2341#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2342EMIT_ROR(8, uint8_t, RT_NOTHING, 1, iemAImpl_ror_u8_hlp)
2343#endif
2344EMIT_ROR(8, uint8_t, _intel, 1, iemAImpl_ror_u8_hlp)
2345EMIT_ROR(8, uint8_t, _amd, 0, iemAImpl_ror_u8_hlp)
2346
2347
2348/*
2349 * RCL
2350 */
2351#define EMIT_RCL(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2352IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_rcl_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2353{ \
2354 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2355 if (a_cBitsWidth < 32 && a_fIntelFlags) \
2356 cShift %= a_cBitsWidth + 1; \
2357 if (cShift) \
2358 { \
2359 if (a_cBitsWidth < 32 && !a_fIntelFlags) \
2360 cShift %= a_cBitsWidth + 1; \
2361 a_uType const uDst = *puDst; \
2362 a_uType uResult = uDst << cShift; \
2363 if (cShift > 1) \
2364 uResult |= uDst >> (a_cBitsWidth + 1 - cShift); \
2365 \
2366 AssertCompile(X86_EFL_CF_BIT == 0); \
2367 uint32_t fEfl = *pfEFlags; \
2368 uint32_t fInCarry = fEfl & X86_EFL_CF; \
2369 uResult |= (a_uType)fInCarry << (cShift - 1); \
2370 \
2371 *puDst = uResult; \
2372 \
2373 /* Calc EFLAGS. */ \
2374 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2375 uint32_t const fOutCarry = a_cBitsWidth >= 32 || a_fIntelFlags || cShift \
2376 ? (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF : fInCarry; \
2377 fEfl |= fOutCarry; \
2378 if (!a_fIntelFlags) /* AMD 3990X: According to the last sub-shift: */ \
2379 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fOutCarry) << X86_EFL_OF_BIT; \
2380 else /* Intel 10980XE: According to the first sub-shift: */ \
2381 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); \
2382 *pfEFlags = fEfl; \
2383 } \
2384}
2385
2386#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2387EMIT_RCL(64, uint64_t, RT_NOTHING, 1)
2388#endif
2389EMIT_RCL(64, uint64_t, _intel, 1)
2390EMIT_RCL(64, uint64_t, _amd, 0)
2391
2392#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2393EMIT_RCL(32, uint32_t, RT_NOTHING, 1)
2394#endif
2395EMIT_RCL(32, uint32_t, _intel, 1)
2396EMIT_RCL(32, uint32_t, _amd, 0)
2397
2398#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2399EMIT_RCL(16, uint16_t, RT_NOTHING, 1)
2400#endif
2401EMIT_RCL(16, uint16_t, _intel, 1)
2402EMIT_RCL(16, uint16_t, _amd, 0)
2403
2404#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2405EMIT_RCL(8, uint8_t, RT_NOTHING, 1)
2406#endif
2407EMIT_RCL(8, uint8_t, _intel, 1)
2408EMIT_RCL(8, uint8_t, _amd, 0)
2409
2410
2411/*
2412 * RCR
2413 */
2414#define EMIT_RCR(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2415IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_rcr_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2416{ \
2417 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2418 if (a_cBitsWidth < 32 && a_fIntelFlags) \
2419 cShift %= a_cBitsWidth + 1; \
2420 if (cShift) \
2421 { \
2422 if (a_cBitsWidth < 32 && !a_fIntelFlags) \
2423 cShift %= a_cBitsWidth + 1; \
2424 a_uType const uDst = *puDst; \
2425 a_uType uResult = uDst >> cShift; \
2426 if (cShift > 1) \
2427 uResult |= uDst << (a_cBitsWidth + 1 - cShift); \
2428 \
2429 AssertCompile(X86_EFL_CF_BIT == 0); \
2430 uint32_t fEfl = *pfEFlags; \
2431 uint32_t fInCarry = fEfl & X86_EFL_CF; \
2432 uResult |= (a_uType)fInCarry << (a_cBitsWidth - cShift); \
2433 *puDst = uResult; \
2434 \
2435 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2436 it the same way as for 1 bit shifts. */ \
2437 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2438 uint32_t const fOutCarry = a_cBitsWidth >= 32 || a_fIntelFlags || cShift \
2439 ? (uDst >> (cShift - 1)) & X86_EFL_CF : fInCarry; \
2440 fEfl |= fOutCarry; \
2441 if (!a_fIntelFlags) /* AMD 3990X: XOR two most signficant bits of the result: */ \
2442 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uResult ^ (uResult << 1)); \
2443 else /* Intel 10980XE: same as AMD, but only for the first sub-shift: */ \
2444 fEfl |= (fInCarry ^ (uint32_t)(uDst >> (a_cBitsWidth - 1))) << X86_EFL_OF_BIT; \
2445 *pfEFlags = fEfl; \
2446 } \
2447}
2448
2449#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2450EMIT_RCR(64, uint64_t, RT_NOTHING, 1)
2451#endif
2452EMIT_RCR(64, uint64_t, _intel, 1)
2453EMIT_RCR(64, uint64_t, _amd, 0)
2454
2455#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2456EMIT_RCR(32, uint32_t, RT_NOTHING, 1)
2457#endif
2458EMIT_RCR(32, uint32_t, _intel, 1)
2459EMIT_RCR(32, uint32_t, _amd, 0)
2460
2461#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2462EMIT_RCR(16, uint16_t, RT_NOTHING, 1)
2463#endif
2464EMIT_RCR(16, uint16_t, _intel, 1)
2465EMIT_RCR(16, uint16_t, _amd, 0)
2466
2467#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2468EMIT_RCR(8, uint8_t, RT_NOTHING, 1)
2469#endif
2470EMIT_RCR(8, uint8_t, _intel, 1)
2471EMIT_RCR(8, uint8_t, _amd, 0)
2472
2473
2474/*
2475 * SHL
2476 */
2477#define EMIT_SHL(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2478IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shl_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2479{ \
2480 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2481 if (cShift) \
2482 { \
2483 a_uType const uDst = *puDst; \
2484 a_uType uResult = uDst << cShift; \
2485 *puDst = uResult; \
2486 \
2487 /* Calc EFLAGS. */ \
2488 AssertCompile(X86_EFL_CF_BIT == 0); \
2489 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2490 uint32_t fCarry = (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2491 fEfl |= fCarry; \
2492 if (!a_fIntelFlags) \
2493 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; /* AMD 3990X: Last shift result. */ \
2494 else \
2495 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); /* Intel 10980XE: First shift result. */ \
2496 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2497 fEfl |= X86_EFL_CALC_ZF(uResult); \
2498 fEfl |= g_afParity[uResult & 0xff]; \
2499 if (!a_fIntelFlags) \
2500 fEfl |= X86_EFL_AF; /* AMD 3990x sets it unconditionally, Intel 10980XE does the oposite */ \
2501 *pfEFlags = fEfl; \
2502 } \
2503}
2504
2505#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2506EMIT_SHL(64, uint64_t, RT_NOTHING, 1)
2507#endif
2508EMIT_SHL(64, uint64_t, _intel, 1)
2509EMIT_SHL(64, uint64_t, _amd, 0)
2510
2511#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2512EMIT_SHL(32, uint32_t, RT_NOTHING, 1)
2513#endif
2514EMIT_SHL(32, uint32_t, _intel, 1)
2515EMIT_SHL(32, uint32_t, _amd, 0)
2516
2517#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2518EMIT_SHL(16, uint16_t, RT_NOTHING, 1)
2519#endif
2520EMIT_SHL(16, uint16_t, _intel, 1)
2521EMIT_SHL(16, uint16_t, _amd, 0)
2522
2523#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2524EMIT_SHL(8, uint8_t, RT_NOTHING, 1)
2525#endif
2526EMIT_SHL(8, uint8_t, _intel, 1)
2527EMIT_SHL(8, uint8_t, _amd, 0)
2528
2529
2530/*
2531 * SHR
2532 */
2533#define EMIT_SHR(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2534IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shr_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2535{ \
2536 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2537 if (cShift) \
2538 { \
2539 a_uType const uDst = *puDst; \
2540 a_uType uResult = uDst >> cShift; \
2541 *puDst = uResult; \
2542 \
2543 /* Calc EFLAGS. */ \
2544 AssertCompile(X86_EFL_CF_BIT == 0); \
2545 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2546 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2547 if (a_fIntelFlags || cShift == 1) /* AMD 3990x does what intel documents; Intel 10980XE does this for all shift counts. */ \
2548 fEfl |= (uDst >> (a_cBitsWidth - 1)) << X86_EFL_OF_BIT; \
2549 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2550 fEfl |= X86_EFL_CALC_ZF(uResult); \
2551 fEfl |= g_afParity[uResult & 0xff]; \
2552 if (!a_fIntelFlags) \
2553 fEfl |= X86_EFL_AF; /* AMD 3990x sets it unconditionally, Intel 10980XE does the oposite */ \
2554 *pfEFlags = fEfl; \
2555 } \
2556}
2557
2558#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2559EMIT_SHR(64, uint64_t, RT_NOTHING, 1)
2560#endif
2561EMIT_SHR(64, uint64_t, _intel, 1)
2562EMIT_SHR(64, uint64_t, _amd, 0)
2563
2564#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2565EMIT_SHR(32, uint32_t, RT_NOTHING, 1)
2566#endif
2567EMIT_SHR(32, uint32_t, _intel, 1)
2568EMIT_SHR(32, uint32_t, _amd, 0)
2569
2570#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2571EMIT_SHR(16, uint16_t, RT_NOTHING, 1)
2572#endif
2573EMIT_SHR(16, uint16_t, _intel, 1)
2574EMIT_SHR(16, uint16_t, _amd, 0)
2575
2576#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2577EMIT_SHR(8, uint8_t, RT_NOTHING, 1)
2578#endif
2579EMIT_SHR(8, uint8_t, _intel, 1)
2580EMIT_SHR(8, uint8_t, _amd, 0)
2581
2582
2583/*
2584 * SAR
2585 */
2586#define EMIT_SAR(a_cBitsWidth, a_uType, a_iType, a_Suffix, a_fIntelFlags) \
2587IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_sar_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2588{ \
2589 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2590 if (cShift) \
2591 { \
2592 a_iType const iDst = (a_iType)*puDst; \
2593 a_uType uResult = iDst >> cShift; \
2594 *puDst = uResult; \
2595 \
2596 /* Calc EFLAGS. \
2597 Note! The OF flag is always zero because the result never differs from the input. */ \
2598 AssertCompile(X86_EFL_CF_BIT == 0); \
2599 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2600 fEfl |= (iDst >> (cShift - 1)) & X86_EFL_CF; \
2601 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2602 fEfl |= X86_EFL_CALC_ZF(uResult); \
2603 fEfl |= g_afParity[uResult & 0xff]; \
2604 if (!a_fIntelFlags) \
2605 fEfl |= X86_EFL_AF; /* AMD 3990x sets it unconditionally, Intel 10980XE does the oposite */ \
2606 *pfEFlags = fEfl; \
2607 } \
2608}
2609
2610#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2611EMIT_SAR(64, uint64_t, int64_t, RT_NOTHING, 1)
2612#endif
2613EMIT_SAR(64, uint64_t, int64_t, _intel, 1)
2614EMIT_SAR(64, uint64_t, int64_t, _amd, 0)
2615
2616#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2617EMIT_SAR(32, uint32_t, int32_t, RT_NOTHING, 1)
2618#endif
2619EMIT_SAR(32, uint32_t, int32_t, _intel, 1)
2620EMIT_SAR(32, uint32_t, int32_t, _amd, 0)
2621
2622#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2623EMIT_SAR(16, uint16_t, int16_t, RT_NOTHING, 1)
2624#endif
2625EMIT_SAR(16, uint16_t, int16_t, _intel, 1)
2626EMIT_SAR(16, uint16_t, int16_t, _amd, 0)
2627
2628#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2629EMIT_SAR(8, uint8_t, int8_t, RT_NOTHING, 1)
2630#endif
2631EMIT_SAR(8, uint8_t, int8_t, _intel, 1)
2632EMIT_SAR(8, uint8_t, int8_t, _amd, 0)
2633
2634
2635/*
2636 * SHLD
2637 *
2638 * - CF is the last bit shifted out of puDst.
2639 * - AF is always cleared by Intel 10980XE.
2640 * - AF is always set by AMD 3990X.
2641 * - OF is set according to the first shift on Intel 10980XE, it seems.
2642 * - OF is set according to the last sub-shift on AMD 3990X.
2643 * - ZF, SF and PF are calculated according to the result by both vendors.
2644 *
2645 * For 16-bit shifts the count mask isn't 15, but 31, and the CPU will
2646 * pick either the source register or the destination register for input bits
2647 * when going beyond 16. According to https://www.sandpile.org/x86/flags.htm
2648 * intel has changed behaviour here several times. We implement what current
2649 * skylake based does for now, we can extend this later as needed.
2650 */
2651#define EMIT_SHLD(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2652IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shld_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, a_uType uSrc, uint8_t cShift, \
2653 uint32_t *pfEFlags)) \
2654{ \
2655 cShift &= a_cBitsWidth - 1; \
2656 if (cShift) \
2657 { \
2658 a_uType const uDst = *puDst; \
2659 a_uType uResult = uDst << cShift; \
2660 uResult |= uSrc >> (a_cBitsWidth - cShift); \
2661 *puDst = uResult; \
2662 \
2663 /* CALC EFLAGS: */ \
2664 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2665 if (a_fIntelFlags) \
2666 /* Intel 6700K & 10980XE: Set according to the first shift. AF always cleared. */ \
2667 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); \
2668 else \
2669 { /* AMD 3990X: Set according to last shift. AF always set. */ \
2670 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth((uDst << (cShift - 1)) ^ uResult); \
2671 fEfl |= X86_EFL_AF; \
2672 } \
2673 AssertCompile(X86_EFL_CF_BIT == 0); \
2674 fEfl |= (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; /* CF = last bit shifted out */ \
2675 fEfl |= g_afParity[uResult & 0xff]; \
2676 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2677 fEfl |= X86_EFL_CALC_ZF(uResult); \
2678 *pfEFlags = fEfl; \
2679 } \
2680}
2681
2682#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2683EMIT_SHLD(64, uint64_t, RT_NOTHING, 1)
2684#endif
2685EMIT_SHLD(64, uint64_t, _intel, 1)
2686EMIT_SHLD(64, uint64_t, _amd, 0)
2687
2688#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2689EMIT_SHLD(32, uint32_t, RT_NOTHING, 1)
2690#endif
2691EMIT_SHLD(32, uint32_t, _intel, 1)
2692EMIT_SHLD(32, uint32_t, _amd, 0)
2693
2694#define EMIT_SHLD_16(a_Suffix, a_fIntelFlags) \
2695IEM_DECL_IMPL_DEF(void, RT_CONCAT(iemAImpl_shld_u16,a_Suffix),(uint16_t *puDst, uint16_t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2696{ \
2697 cShift &= 31; \
2698 if (cShift) \
2699 { \
2700 uint16_t const uDst = *puDst; \
2701 uint64_t const uTmp = a_fIntelFlags \
2702 ? ((uint64_t)uDst << 32) | ((uint32_t)uSrc << 16) | uDst \
2703 : ((uint64_t)uDst << 32) | ((uint32_t)uSrc << 16) | uSrc; \
2704 uint16_t const uResult = (uint16_t)((uTmp << cShift) >> 32); \
2705 *puDst = uResult; \
2706 \
2707 /* CALC EFLAGS: */ \
2708 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2709 AssertCompile(X86_EFL_CF_BIT == 0); \
2710 if (a_fIntelFlags) \
2711 { \
2712 fEfl |= (uTmp >> (48 - cShift)) & X86_EFL_CF; /* CF = last bit shifted out of the combined operand */ \
2713 /* Intel 6700K & 10980XE: OF is et according to the first shift. AF always cleared. */ \
2714 fEfl |= X86_EFL_GET_OF_16(uDst ^ (uDst << 1)); \
2715 } \
2716 else \
2717 { \
2718 /* AMD 3990X: OF is set according to last shift, with some weirdness. AF always set. CF = last bit shifted out of uDst. */ \
2719 if (cShift < 16) \
2720 { \
2721 fEfl |= (uDst >> (16 - cShift)) & X86_EFL_CF; \
2722 fEfl |= X86_EFL_GET_OF_16((uDst << (cShift - 1)) ^ uResult); \
2723 } \
2724 else \
2725 { \
2726 if (cShift == 16) \
2727 fEfl |= uDst & X86_EFL_CF; \
2728 fEfl |= X86_EFL_GET_OF_16((uDst << (cShift - 1)) ^ 0); \
2729 } \
2730 fEfl |= X86_EFL_AF; \
2731 } \
2732 fEfl |= g_afParity[uResult & 0xff]; \
2733 fEfl |= X86_EFL_CALC_SF(uResult, 16); \
2734 fEfl |= X86_EFL_CALC_ZF(uResult); \
2735 *pfEFlags = fEfl; \
2736 } \
2737}
2738
2739#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2740EMIT_SHLD_16(RT_NOTHING, 1)
2741#endif
2742EMIT_SHLD_16(_intel, 1)
2743EMIT_SHLD_16(_amd, 0)
2744
2745
2746/*
2747 * SHRD
2748 *
2749 * EFLAGS behaviour seems to be the same as with SHLD:
2750 * - CF is the last bit shifted out of puDst.
2751 * - AF is always cleared by Intel 10980XE.
2752 * - AF is always set by AMD 3990X.
2753 * - OF is set according to the first shift on Intel 10980XE, it seems.
2754 * - OF is set according to the last sub-shift on AMD 3990X.
2755 * - ZF, SF and PF are calculated according to the result by both vendors.
2756 *
2757 * For 16-bit shifts the count mask isn't 15, but 31, and the CPU will
2758 * pick either the source register or the destination register for input bits
2759 * when going beyond 16. According to https://www.sandpile.org/x86/flags.htm
2760 * intel has changed behaviour here several times. We implement what current
2761 * skylake based does for now, we can extend this later as needed.
2762 */
2763#define EMIT_SHRD(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2764IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shrd_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, a_uType uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2765{ \
2766 cShift &= a_cBitsWidth - 1; \
2767 if (cShift) \
2768 { \
2769 a_uType const uDst = *puDst; \
2770 a_uType uResult = uDst >> cShift; \
2771 uResult |= uSrc << (a_cBitsWidth - cShift); \
2772 *puDst = uResult; \
2773 \
2774 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2775 AssertCompile(X86_EFL_CF_BIT == 0); \
2776 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2777 if (a_fIntelFlags) \
2778 /* Intel 6700K & 10980XE: Set according to the first shift. AF always cleared. */ \
2779 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uSrc << (a_cBitsWidth - 1))); \
2780 else \
2781 { /* AMD 3990X: Set according to last shift. AF always set. */ \
2782 if (cShift > 1) /* Set according to last shift. */ \
2783 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth((uSrc << (a_cBitsWidth - cShift + 1)) ^ uResult); \
2784 else \
2785 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ uResult); \
2786 fEfl |= X86_EFL_AF; \
2787 } \
2788 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2789 fEfl |= X86_EFL_CALC_ZF(uResult); \
2790 fEfl |= g_afParity[uResult & 0xff]; \
2791 *pfEFlags = fEfl; \
2792 } \
2793}
2794
2795#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2796EMIT_SHRD(64, uint64_t, RT_NOTHING, 1)
2797#endif
2798EMIT_SHRD(64, uint64_t, _intel, 1)
2799EMIT_SHRD(64, uint64_t, _amd, 0)
2800
2801#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2802EMIT_SHRD(32, uint32_t, RT_NOTHING, 1)
2803#endif
2804EMIT_SHRD(32, uint32_t, _intel, 1)
2805EMIT_SHRD(32, uint32_t, _amd, 0)
2806
2807#define EMIT_SHRD_16(a_Suffix, a_fIntelFlags) \
2808IEM_DECL_IMPL_DEF(void, RT_CONCAT(iemAImpl_shrd_u16,a_Suffix),(uint16_t *puDst, uint16_t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2809{ \
2810 cShift &= 31; \
2811 if (cShift) \
2812 { \
2813 uint16_t const uDst = *puDst; \
2814 uint64_t const uTmp = a_fIntelFlags \
2815 ? uDst | ((uint32_t)uSrc << 16) | ((uint64_t)uDst << 32) \
2816 : uDst | ((uint32_t)uSrc << 16) | ((uint64_t)uSrc << 32); \
2817 uint16_t const uResult = (uint16_t)(uTmp >> cShift); \
2818 *puDst = uResult; \
2819 \
2820 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2821 AssertCompile(X86_EFL_CF_BIT == 0); \
2822 if (a_fIntelFlags) \
2823 { \
2824 /* Intel 10980XE: The CF is the last shifted out of the combined uTmp operand. */ \
2825 fEfl |= (uTmp >> (cShift - 1)) & X86_EFL_CF; \
2826 /* Intel 6700K & 10980XE: Set according to the first shift. AF always cleared. */ \
2827 fEfl |= X86_EFL_GET_OF_16(uDst ^ (uSrc << 15)); \
2828 } \
2829 else \
2830 { \
2831 /* AMD 3990X: CF flag seems to be last bit shifted out of uDst, not the combined uSrc:uSrc:uDst operand. */ \
2832 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2833 /* AMD 3990X: Set according to last shift. AF always set. */ \
2834 if (cShift > 1) /* Set according to last shift. */ \
2835 fEfl |= X86_EFL_GET_OF_16((uint16_t)(uTmp >> (cShift - 1)) ^ uResult); \
2836 else \
2837 fEfl |= X86_EFL_GET_OF_16(uDst ^ uResult); \
2838 fEfl |= X86_EFL_AF; \
2839 } \
2840 fEfl |= X86_EFL_CALC_SF(uResult, 16); \
2841 fEfl |= X86_EFL_CALC_ZF(uResult); \
2842 fEfl |= g_afParity[uResult & 0xff]; \
2843 *pfEFlags = fEfl; \
2844 } \
2845}
2846
2847#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2848EMIT_SHRD_16(RT_NOTHING, 1)
2849#endif
2850EMIT_SHRD_16(_intel, 1)
2851EMIT_SHRD_16(_amd, 0)
2852
2853
2854#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2855
2856# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2857/*
2858 * BSWAP
2859 */
2860
2861IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u64,(uint64_t *puDst))
2862{
2863 *puDst = ASMByteSwapU64(*puDst);
2864}
2865
2866
2867IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u32,(uint32_t *puDst))
2868{
2869 *puDst = ASMByteSwapU32(*puDst);
2870}
2871
2872
2873/* Note! undocument, so 32-bit arg */
2874IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u16,(uint32_t *puDst))
2875{
2876#if 0
2877 *(uint16_t *)puDst = ASMByteSwapU16(*(uint16_t *)puDst);
2878#else
2879 /* This is the behaviour AMD 3990x (64-bit mode): */
2880 *(uint16_t *)puDst = 0;
2881#endif
2882}
2883
2884# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2885
2886
2887
2888# if defined(IEM_WITHOUT_ASSEMBLY)
2889
2890/*
2891 * LFENCE, SFENCE & MFENCE.
2892 */
2893
2894IEM_DECL_IMPL_DEF(void, iemAImpl_lfence,(void))
2895{
2896 ASMReadFence();
2897}
2898
2899
2900IEM_DECL_IMPL_DEF(void, iemAImpl_sfence,(void))
2901{
2902 ASMWriteFence();
2903}
2904
2905
2906IEM_DECL_IMPL_DEF(void, iemAImpl_mfence,(void))
2907{
2908 ASMMemoryFence();
2909}
2910
2911
2912# ifndef RT_ARCH_ARM64
2913IEM_DECL_IMPL_DEF(void, iemAImpl_alt_mem_fence,(void))
2914{
2915 ASMMemoryFence();
2916}
2917# endif
2918
2919# endif
2920
2921#endif /* !RT_ARCH_AMD64 || IEM_WITHOUT_ASSEMBLY */
2922
2923
2924IEM_DECL_IMPL_DEF(void, iemAImpl_arpl,(uint16_t *pu16Dst, uint16_t u16Src, uint32_t *pfEFlags))
2925{
2926 if ((*pu16Dst & X86_SEL_RPL) < (u16Src & X86_SEL_RPL))
2927 {
2928 *pu16Dst &= X86_SEL_MASK_OFF_RPL;
2929 *pu16Dst |= u16Src & X86_SEL_RPL;
2930
2931 *pfEFlags |= X86_EFL_ZF;
2932 }
2933 else
2934 *pfEFlags &= ~X86_EFL_ZF;
2935}
2936
2937
2938#if defined(IEM_WITHOUT_ASSEMBLY)
2939
2940/*********************************************************************************************************************************
2941* x87 FPU Loads *
2942*********************************************************************************************************************************/
2943
2944IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT32U pr32Val))
2945{
2946 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
2947 if (RTFLOAT32U_IS_NORMAL(pr32Val))
2948 {
2949 pFpuRes->r80Result.sj64.fSign = pr32Val->s.fSign;
2950 pFpuRes->r80Result.sj64.fInteger = 1;
2951 pFpuRes->r80Result.sj64.uFraction = (uint64_t)pr32Val->s.uFraction
2952 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS);
2953 pFpuRes->r80Result.sj64.uExponent = pr32Val->s.uExponent - RTFLOAT32U_EXP_BIAS + RTFLOAT80U_EXP_BIAS;
2954 Assert(RTFLOAT80U_IS_NORMAL(&pFpuRes->r80Result));
2955 }
2956 else if (RTFLOAT32U_IS_ZERO(pr32Val))
2957 {
2958 pFpuRes->r80Result.s.fSign = pr32Val->s.fSign;
2959 pFpuRes->r80Result.s.uExponent = 0;
2960 pFpuRes->r80Result.s.uMantissa = 0;
2961 Assert(RTFLOAT80U_IS_ZERO(&pFpuRes->r80Result));
2962 }
2963 else if (RTFLOAT32U_IS_SUBNORMAL(pr32Val))
2964 {
2965 /* Subnormal values gets normalized. */
2966 pFpuRes->r80Result.sj64.fSign = pr32Val->s.fSign;
2967 pFpuRes->r80Result.sj64.fInteger = 1;
2968 unsigned const cExtraShift = RTFLOAT32U_FRACTION_BITS - ASMBitLastSetU32(pr32Val->s.uFraction);
2969 pFpuRes->r80Result.sj64.uFraction = (uint64_t)pr32Val->s.uFraction
2970 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS + cExtraShift + 1);
2971 pFpuRes->r80Result.sj64.uExponent = pr32Val->s.uExponent - RTFLOAT32U_EXP_BIAS + RTFLOAT80U_EXP_BIAS - cExtraShift;
2972 pFpuRes->FSW |= X86_FSW_DE;
2973 if (!(pFpuState->FCW & X86_FCW_DM))
2974 pFpuRes->FSW |= X86_FSW_ES | X86_FSW_B; /* The value is still pushed. */
2975 }
2976 else if (RTFLOAT32U_IS_INF(pr32Val))
2977 {
2978 pFpuRes->r80Result.s.fSign = pr32Val->s.fSign;
2979 pFpuRes->r80Result.s.uExponent = RTFLOAT80U_EXP_MAX;
2980 pFpuRes->r80Result.s.uMantissa = RT_BIT_64(63);
2981 Assert(RTFLOAT80U_IS_INF(&pFpuRes->r80Result));
2982 }
2983 else
2984 {
2985 /* Signalling and quiet NaNs, both turn into quiet ones when loaded (weird). */
2986 Assert(RTFLOAT32U_IS_NAN(pr32Val));
2987 pFpuRes->r80Result.sj64.fSign = pr32Val->s.fSign;
2988 pFpuRes->r80Result.sj64.uExponent = RTFLOAT80U_EXP_MAX;
2989 pFpuRes->r80Result.sj64.fInteger = 1;
2990 pFpuRes->r80Result.sj64.uFraction = (uint64_t)pr32Val->s.uFraction
2991 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS);
2992 if (RTFLOAT32U_IS_SIGNALLING_NAN(pr32Val))
2993 {
2994 pFpuRes->r80Result.sj64.uFraction |= RT_BIT_64(62); /* make quiet */
2995 Assert(RTFLOAT80U_IS_QUIET_NAN(&pFpuRes->r80Result));
2996 pFpuRes->FSW |= X86_FSW_IE;
2997
2998 if (!(pFpuState->FCW & X86_FCW_IM))
2999 {
3000 /* The value is not pushed. */
3001 pFpuRes->FSW &= ~X86_FSW_TOP_MASK;
3002 pFpuRes->FSW |= X86_FSW_ES | X86_FSW_B;
3003 pFpuRes->r80Result.au64[0] = 0;
3004 pFpuRes->r80Result.au16[4] = 0;
3005 }
3006 }
3007 else
3008 Assert(RTFLOAT80U_IS_QUIET_NAN(&pFpuRes->r80Result));
3009 }
3010}
3011
3012
3013IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT64U pr64Val))
3014{
3015 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3016 if (RTFLOAT64U_IS_NORMAL(pr64Val))
3017 {
3018 pFpuRes->r80Result.sj64.fSign = pr64Val->s.fSign;
3019 pFpuRes->r80Result.sj64.fInteger = 1;
3020 pFpuRes->r80Result.sj64.uFraction = pr64Val->s64.uFraction << (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS);
3021 pFpuRes->r80Result.sj64.uExponent = pr64Val->s.uExponent - RTFLOAT64U_EXP_BIAS + RTFLOAT80U_EXP_BIAS;
3022 Assert(RTFLOAT80U_IS_NORMAL(&pFpuRes->r80Result));
3023 }
3024 else if (RTFLOAT64U_IS_ZERO(pr64Val))
3025 {
3026 pFpuRes->r80Result.s.fSign = pr64Val->s.fSign;
3027 pFpuRes->r80Result.s.uExponent = 0;
3028 pFpuRes->r80Result.s.uMantissa = 0;
3029 Assert(RTFLOAT80U_IS_ZERO(&pFpuRes->r80Result));
3030 }
3031 else if (RTFLOAT64U_IS_SUBNORMAL(pr64Val))
3032 {
3033 /* Subnormal values gets normalized. */
3034 pFpuRes->r80Result.sj64.fSign = pr64Val->s.fSign;
3035 pFpuRes->r80Result.sj64.fInteger = 1;
3036 unsigned const cExtraShift = RTFLOAT64U_FRACTION_BITS - ASMBitLastSetU64(pr64Val->s64.uFraction);
3037 pFpuRes->r80Result.sj64.uFraction = pr64Val->s64.uFraction
3038 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS + cExtraShift + 1);
3039 pFpuRes->r80Result.sj64.uExponent = pr64Val->s.uExponent - RTFLOAT64U_EXP_BIAS + RTFLOAT80U_EXP_BIAS - cExtraShift;
3040 pFpuRes->FSW |= X86_FSW_DE;
3041 if (!(pFpuState->FCW & X86_FCW_DM))
3042 pFpuRes->FSW |= X86_FSW_ES | X86_FSW_B; /* The value is still pushed. */
3043 }
3044 else if (RTFLOAT64U_IS_INF(pr64Val))
3045 {
3046 pFpuRes->r80Result.s.fSign = pr64Val->s.fSign;
3047 pFpuRes->r80Result.s.uExponent = RTFLOAT80U_EXP_MAX;
3048 pFpuRes->r80Result.s.uMantissa = RT_BIT_64(63);
3049 Assert(RTFLOAT80U_IS_INF(&pFpuRes->r80Result));
3050 }
3051 else
3052 {
3053 /* Signalling and quiet NaNs, both turn into quiet ones when loaded (weird). */
3054 Assert(RTFLOAT64U_IS_NAN(pr64Val));
3055 pFpuRes->r80Result.sj64.fSign = pr64Val->s.fSign;
3056 pFpuRes->r80Result.sj64.uExponent = RTFLOAT80U_EXP_MAX;
3057 pFpuRes->r80Result.sj64.fInteger = 1;
3058 pFpuRes->r80Result.sj64.uFraction = pr64Val->s64.uFraction << (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS);
3059 if (RTFLOAT64U_IS_SIGNALLING_NAN(pr64Val))
3060 {
3061 pFpuRes->r80Result.sj64.uFraction |= RT_BIT_64(62); /* make quiet */
3062 Assert(RTFLOAT80U_IS_QUIET_NAN(&pFpuRes->r80Result));
3063 pFpuRes->FSW |= X86_FSW_IE;
3064
3065 if (!(pFpuState->FCW & X86_FCW_IM))
3066 {
3067 /* The value is not pushed. */
3068 pFpuRes->FSW &= ~X86_FSW_TOP_MASK;
3069 pFpuRes->FSW |= X86_FSW_ES | X86_FSW_B;
3070 pFpuRes->r80Result.au64[0] = 0;
3071 pFpuRes->r80Result.au16[4] = 0;
3072 }
3073 }
3074 else
3075 Assert(RTFLOAT80U_IS_QUIET_NAN(&pFpuRes->r80Result));
3076 }
3077}
3078
3079
3080IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
3081{
3082 pFpuRes->r80Result.au64[0] = pr80Val->au64[0];
3083 pFpuRes->r80Result.au16[4] = pr80Val->au16[4];
3084 /* Raises no exceptions. */
3085 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3086}
3087
3088
3089IEM_DECL_IMPL_DEF(void, iemAImpl_fld1,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3090{
3091 pFpuRes->r80Result.sj64.fSign = 0;
3092 pFpuRes->r80Result.sj64.uExponent = 0 + 16383;
3093 pFpuRes->r80Result.sj64.fInteger = 1;
3094 pFpuRes->r80Result.sj64.uFraction = 0;
3095
3096 /*
3097 * FPU status word:
3098 * - TOP is irrelevant, but we must match x86 assembly version.
3099 * - C1 is always cleared as we don't have any stack overflows.
3100 * - C0, C2, and C3 are undefined and Intel 10980XE does not touch them.
3101 */
3102 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3));
3103}
3104
3105
3106IEM_DECL_IMPL_DEF(void, iemAImpl_fldl2e,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3107{
3108 pFpuRes->r80Result.sj64.fSign = 0;
3109 pFpuRes->r80Result.sj64.uExponent = 0 + 16383;
3110 pFpuRes->r80Result.sj64.fInteger = 1;
3111 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3112 || (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_UP
3113 ? UINT64_C(0x38aa3b295c17f0bc) : UINT64_C(0x38aa3b295c17f0bb);
3114 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3115}
3116
3117
3118IEM_DECL_IMPL_DEF(void, iemAImpl_fldl2t,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3119{
3120 pFpuRes->r80Result.sj64.fSign = 0;
3121 pFpuRes->r80Result.sj64.uExponent = 1 + 16383;
3122 pFpuRes->r80Result.sj64.fInteger = 1;
3123 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) != X86_FCW_RC_UP
3124 ? UINT64_C(0x549a784bcd1b8afe) : UINT64_C(0x549a784bcd1b8aff);
3125 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3126}
3127
3128
3129IEM_DECL_IMPL_DEF(void, iemAImpl_fldlg2,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3130{
3131 pFpuRes->r80Result.sj64.fSign = 0;
3132 pFpuRes->r80Result.sj64.uExponent = -2 + 16383;
3133 pFpuRes->r80Result.sj64.fInteger = 1;
3134 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3135 || (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_UP
3136 ? UINT64_C(0x1a209a84fbcff799) : UINT64_C(0x1a209a84fbcff798);
3137 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3138}
3139
3140
3141IEM_DECL_IMPL_DEF(void, iemAImpl_fldln2,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3142{
3143 pFpuRes->r80Result.sj64.fSign = 0;
3144 pFpuRes->r80Result.sj64.uExponent = -1 + 16383;
3145 pFpuRes->r80Result.sj64.fInteger = 1;
3146 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3147 || (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_UP
3148 ? UINT64_C(0x317217f7d1cf79ac) : UINT64_C(0x317217f7d1cf79ab);
3149 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3150}
3151
3152
3153IEM_DECL_IMPL_DEF(void, iemAImpl_fldpi,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3154{
3155 pFpuRes->r80Result.sj64.fSign = 0;
3156 pFpuRes->r80Result.sj64.uExponent = 1 + 16383;
3157 pFpuRes->r80Result.sj64.fInteger = 1;
3158 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3159 || (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_UP
3160 ? UINT64_C(0x490fdaa22168c235) : UINT64_C(0x490fdaa22168c234);
3161 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3162}
3163
3164
3165IEM_DECL_IMPL_DEF(void, iemAImpl_fldz,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3166{
3167 pFpuRes->r80Result.s.fSign = 0;
3168 pFpuRes->r80Result.s.uExponent = 0;
3169 pFpuRes->r80Result.s.uMantissa = 0;
3170 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3171}
3172
3173#define EMIT_FILD(a_cBits) \
3174IEM_DECL_IMPL_DEF(void, iemAImpl_fild_r80_from_i ## a_cBits,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, \
3175 int ## a_cBits ## _t const *piVal)) \
3176{ \
3177 int ## a_cBits ## _t iVal = *piVal; \
3178 if (iVal == 0) \
3179 { \
3180 pFpuRes->r80Result.s.fSign = 0; \
3181 pFpuRes->r80Result.s.uExponent = 0; \
3182 pFpuRes->r80Result.s.uMantissa = 0; \
3183 } \
3184 else \
3185 { \
3186 if (iVal > 0) \
3187 pFpuRes->r80Result.s.fSign = 0; \
3188 else \
3189 { \
3190 pFpuRes->r80Result.s.fSign = 1; \
3191 iVal = -iVal; \
3192 } \
3193 unsigned const cBits = ASMBitLastSetU ## a_cBits((uint ## a_cBits ## _t)iVal); \
3194 pFpuRes->r80Result.s.uExponent = cBits - 1 + RTFLOAT80U_EXP_BIAS; \
3195 pFpuRes->r80Result.s.uMantissa = (uint64_t)iVal << (RTFLOAT80U_FRACTION_BITS + 1 - cBits); \
3196 } \
3197 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */ \
3198}
3199EMIT_FILD(16)
3200EMIT_FILD(32)
3201EMIT_FILD(64)
3202
3203
3204IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_d80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTPBCD80U pd80Val))
3205{
3206 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3207 if ( pd80Val->s.abPairs[0] == 0
3208 && pd80Val->s.abPairs[1] == 0
3209 && pd80Val->s.abPairs[2] == 0
3210 && pd80Val->s.abPairs[3] == 0
3211 && pd80Val->s.abPairs[4] == 0
3212 && pd80Val->s.abPairs[5] == 0
3213 && pd80Val->s.abPairs[6] == 0
3214 && pd80Val->s.abPairs[7] == 0
3215 && pd80Val->s.abPairs[8] == 0)
3216 {
3217 pFpuRes->r80Result.s.fSign = pd80Val->s.fSign;
3218 pFpuRes->r80Result.s.uExponent = 0;
3219 pFpuRes->r80Result.s.uMantissa = 0;
3220 }
3221 else
3222 {
3223 pFpuRes->r80Result.s.fSign = pd80Val->s.fSign;
3224
3225 size_t cPairs = RT_ELEMENTS(pd80Val->s.abPairs);
3226 while (cPairs > 0 && pd80Val->s.abPairs[cPairs - 1] == 0)
3227 cPairs--;
3228
3229 uint64_t uVal = 0;
3230 uint64_t uFactor = 1;
3231 for (size_t iPair = 0; iPair < cPairs; iPair++, uFactor *= 100)
3232 uVal += RTPBCD80U_LO_DIGIT(pd80Val->s.abPairs[iPair]) * uFactor
3233 + RTPBCD80U_HI_DIGIT(pd80Val->s.abPairs[iPair]) * uFactor * 10;
3234
3235 unsigned const cBits = ASMBitLastSetU64(uVal);
3236 pFpuRes->r80Result.s.uExponent = cBits - 1 + RTFLOAT80U_EXP_BIAS;
3237 pFpuRes->r80Result.s.uMantissa = uVal << (RTFLOAT80U_FRACTION_BITS + 1 - cBits);
3238 }
3239}
3240
3241
3242/*********************************************************************************************************************************
3243* x87 FPU Stores *
3244*********************************************************************************************************************************/
3245
3246/**
3247 * Helper for storing a deconstructed and normal R80 value as a 64-bit one.
3248 *
3249 * This uses the rounding rules indicated by fFcw and returns updated fFsw.
3250 *
3251 * @returns Updated FPU status word value.
3252 * @param fSignIn Incoming sign indicator.
3253 * @param uMantissaIn Incoming mantissa (dot between bit 63 and 62).
3254 * @param iExponentIn Unbiased exponent.
3255 * @param fFcw The FPU control word.
3256 * @param fFsw Prepped FPU status word, i.e. exceptions and C1 clear.
3257 * @param pr32Dst Where to return the output value, if one should be
3258 * returned.
3259 *
3260 * @note Tailored as a helper for iemAImpl_fst_r80_to_r32 right now.
3261 * @note Exact same logic as iemAImpl_StoreNormalR80AsR64.
3262 */
3263static uint16_t iemAImpl_StoreNormalR80AsR32(bool fSignIn, uint64_t uMantissaIn, int32_t iExponentIn,
3264 uint16_t fFcw, uint16_t fFsw, PRTFLOAT32U pr32Dst)
3265{
3266 uint64_t const fRoundingOffMask = RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS) - 1; /* 0x7ff */
3267 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3268 ? RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS - 1) /* 0x400 */
3269 : (fFcw & X86_FCW_RC_MASK) == (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
3270 ? fRoundingOffMask
3271 : 0;
3272 uint64_t fRoundedOff = uMantissaIn & fRoundingOffMask;
3273
3274 /*
3275 * Deal with potential overflows/underflows first, optimizing for none.
3276 * 0 and MAX are used for special values; MAX-1 may be rounded up to MAX.
3277 */
3278 int32_t iExponentOut = (int32_t)iExponentIn + RTFLOAT32U_EXP_BIAS;
3279 if ((uint32_t)iExponentOut - 1 < (uint32_t)(RTFLOAT32U_EXP_MAX - 3))
3280 { /* likely? */ }
3281 /*
3282 * Underflow if the exponent zero or negative. This is attempted mapped
3283 * to a subnormal number when possible, with some additional trickery ofc.
3284 */
3285 else if (iExponentOut <= 0)
3286 {
3287 bool const fIsTiny = iExponentOut < 0
3288 || UINT64_MAX - uMantissaIn > uRoundingAdd;
3289 if (!(fFcw & X86_FCW_UM) && fIsTiny)
3290 /* Note! 754-1985 sec 7.4 has something about bias adjust of 192 here, not in 2008 & 2019. Perhaps only 8087 & 287? */
3291 return fFsw | X86_FSW_UE | X86_FSW_ES | X86_FSW_B;
3292
3293 if (iExponentOut <= 0)
3294 {
3295 uMantissaIn = iExponentOut <= -63
3296 ? uMantissaIn != 0
3297 : (uMantissaIn >> (-iExponentOut + 1)) | ((uMantissaIn & (RT_BIT_64(-iExponentOut + 1) - 1)) != 0);
3298 fRoundedOff = uMantissaIn & fRoundingOffMask;
3299 if (fRoundedOff && fIsTiny)
3300 fFsw |= X86_FSW_UE;
3301 iExponentOut = 0;
3302 }
3303 }
3304 /*
3305 * Overflow if at or above max exponent value or if we will reach max
3306 * when rounding. Will return +/-zero or +/-max value depending on
3307 * whether we're rounding or not.
3308 */
3309 else if ( iExponentOut >= RTFLOAT32U_EXP_MAX
3310 || ( iExponentOut == RTFLOAT32U_EXP_MAX - 1
3311 && UINT64_MAX - uMantissaIn <= uRoundingAdd))
3312 {
3313 fFsw |= X86_FSW_OE;
3314 if (!(fFcw & X86_FCW_OM))
3315 return fFsw | X86_FSW_ES | X86_FSW_B;
3316 fFsw |= X86_FSW_PE;
3317 if (uRoundingAdd)
3318 fFsw |= X86_FSW_C1;
3319 if (!(fFcw & X86_FCW_PM))
3320 fFsw |= X86_FSW_ES | X86_FSW_B;
3321
3322 pr32Dst->s.fSign = fSignIn;
3323 if (uRoundingAdd)
3324 { /* Zero */
3325 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3326 pr32Dst->s.uFraction = 0;
3327 }
3328 else
3329 { /* Max */
3330 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX - 1;
3331 pr32Dst->s.uFraction = RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1;
3332 }
3333 return fFsw;
3334 }
3335
3336 /*
3337 * Normal or subnormal number.
3338 */
3339 /* Do rounding - just truncate in near mode when midway on an even outcome. */
3340 uint64_t uMantissaOut = uMantissaIn;
3341 if ( (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_NEAREST
3342 || (uMantissaIn & RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS))
3343 || fRoundedOff != uRoundingAdd)
3344 {
3345 uMantissaOut = uMantissaIn + uRoundingAdd;
3346 if (uMantissaOut >= uMantissaIn)
3347 { /* likely */ }
3348 else
3349 {
3350 uMantissaOut >>= 1; /* (We don't need to add bit 63 here (the integer bit), as it will be chopped off below.) */
3351 iExponentOut++;
3352 Assert(iExponentOut < RTFLOAT32U_EXP_MAX); /* checked above */
3353 fFsw |= X86_FSW_C1;
3354 }
3355 }
3356 else
3357 uMantissaOut = uMantissaIn;
3358
3359 /* Truncate the mantissa and set the return value. */
3360 uMantissaOut >>= RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS;
3361
3362 pr32Dst->s.uFraction = (uint32_t)uMantissaOut; /* Note! too big for bitfield if normal. */
3363 pr32Dst->s.uExponent = iExponentOut;
3364 pr32Dst->s.fSign = fSignIn;
3365
3366 /* Set status flags realted to rounding. */
3367 if (fRoundedOff)
3368 {
3369 fFsw |= X86_FSW_PE;
3370 if (uMantissaOut > (uMantissaIn >> (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS)))
3371 fFsw |= X86_FSW_C1;
3372 if (!(fFcw & X86_FCW_PM))
3373 fFsw |= X86_FSW_ES | X86_FSW_B;
3374 }
3375
3376 return fFsw;
3377}
3378
3379
3380/**
3381 * @note Exact same logic as iemAImpl_fst_r80_to_r64.
3382 */
3383IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r32,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3384 PRTFLOAT32U pr32Dst, PCRTFLOAT80U pr80Src))
3385{
3386 uint16_t const fFcw = pFpuState->FCW;
3387 uint16_t fFsw = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3));
3388 if (RTFLOAT80U_IS_NORMAL(pr80Src))
3389 fFsw = iemAImpl_StoreNormalR80AsR32(pr80Src->s.fSign, pr80Src->s.uMantissa,
3390 (int32_t)pr80Src->s.uExponent - RTFLOAT80U_EXP_BIAS, fFcw, fFsw, pr32Dst);
3391 else if (RTFLOAT80U_IS_ZERO(pr80Src))
3392 {
3393 pr32Dst->s.fSign = pr80Src->s.fSign;
3394 pr32Dst->s.uExponent = 0;
3395 pr32Dst->s.uFraction = 0;
3396 Assert(RTFLOAT32U_IS_ZERO(pr32Dst));
3397 }
3398 else if (RTFLOAT80U_IS_INF(pr80Src))
3399 {
3400 pr32Dst->s.fSign = pr80Src->s.fSign;
3401 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3402 pr32Dst->s.uFraction = 0;
3403 Assert(RTFLOAT32U_IS_INF(pr32Dst));
3404 }
3405 else if (RTFLOAT80U_IS_INDEFINITE(pr80Src))
3406 {
3407 /* Mapped to +/-QNaN */
3408 pr32Dst->s.fSign = pr80Src->s.fSign;
3409 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3410 pr32Dst->s.uFraction = RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1);
3411 }
3412 else if (RTFLOAT80U_IS_PSEUDO_INF(pr80Src) || RTFLOAT80U_IS_UNNORMAL(pr80Src) || RTFLOAT80U_IS_PSEUDO_NAN(pr80Src))
3413 {
3414 /* Pseudo-Inf / Pseudo-Nan / Unnormal -> QNaN (during load, probably) */
3415 if (fFcw & X86_FCW_IM)
3416 {
3417 pr32Dst->s.fSign = 1;
3418 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3419 pr32Dst->s.uFraction = RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1);
3420 fFsw |= X86_FSW_IE;
3421 }
3422 else
3423 fFsw |= X86_FSW_IE | X86_FSW_ES | X86_FSW_B;;
3424 }
3425 else if (RTFLOAT80U_IS_NAN(pr80Src))
3426 {
3427 /* IM applies to signalled NaN input only. Everything is converted to quiet NaN. */
3428 if ((fFcw & X86_FCW_IM) || !RTFLOAT80U_IS_SIGNALLING_NAN(pr80Src))
3429 {
3430 pr32Dst->s.fSign = pr80Src->s.fSign;
3431 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3432 pr32Dst->s.uFraction = (uint32_t)(pr80Src->sj64.uFraction >> (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS));
3433 pr32Dst->s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1);
3434 if (RTFLOAT80U_IS_SIGNALLING_NAN(pr80Src))
3435 fFsw |= X86_FSW_IE;
3436 }
3437 else
3438 fFsw |= X86_FSW_IE | X86_FSW_ES | X86_FSW_B;
3439 }
3440 else
3441 {
3442 /* Denormal values causes both an underflow and precision exception. */
3443 Assert(RTFLOAT80U_IS_DENORMAL(pr80Src) || RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Src));
3444 if (fFcw & X86_FCW_UM)
3445 {
3446 pr32Dst->s.fSign = pr80Src->s.fSign;
3447 pr32Dst->s.uExponent = 0;
3448 if ((fFcw & X86_FCW_RC_MASK) == (!pr80Src->s.fSign ? X86_FCW_RC_UP : X86_FCW_RC_DOWN))
3449 {
3450 pr32Dst->s.uFraction = 1;
3451 fFsw |= X86_FSW_UE | X86_FSW_PE | X86_FSW_C1;
3452 if (!(fFcw & X86_FCW_PM))
3453 fFsw |= X86_FSW_ES | X86_FSW_B;
3454 }
3455 else
3456 {
3457 pr32Dst->s.uFraction = 0;
3458 fFsw |= X86_FSW_UE | X86_FSW_PE;
3459 if (!(fFcw & X86_FCW_PM))
3460 fFsw |= X86_FSW_ES | X86_FSW_B;
3461 }
3462 }
3463 else
3464 fFsw |= X86_FSW_UE | X86_FSW_ES | X86_FSW_B;
3465 }
3466 *pu16FSW = fFsw;
3467}
3468
3469
3470/**
3471 * Helper for storing a deconstructed and normal R80 value as a 64-bit one.
3472 *
3473 * This uses the rounding rules indicated by fFcw and returns updated fFsw.
3474 *
3475 * @returns Updated FPU status word value.
3476 * @param fSignIn Incoming sign indicator.
3477 * @param uMantissaIn Incoming mantissa (dot between bit 63 and 62).
3478 * @param iExponentIn Unbiased exponent.
3479 * @param fFcw The FPU control word.
3480 * @param fFsw Prepped FPU status word, i.e. exceptions and C1 clear.
3481 * @param pr64Dst Where to return the output value, if one should be
3482 * returned.
3483 *
3484 * @note Tailored as a helper for iemAImpl_fst_r80_to_r64 right now.
3485 * @note Exact same logic as iemAImpl_StoreNormalR80AsR32.
3486 */
3487static uint16_t iemAImpl_StoreNormalR80AsR64(bool fSignIn, uint64_t uMantissaIn, int32_t iExponentIn,
3488 uint16_t fFcw, uint16_t fFsw, PRTFLOAT64U pr64Dst)
3489{
3490 uint64_t const fRoundingOffMask = RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS) - 1; /* 0x7ff */
3491 uint32_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3492 ? RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS - 1) /* 0x400 */
3493 : (fFcw & X86_FCW_RC_MASK) == (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
3494 ? fRoundingOffMask
3495 : 0;
3496 uint32_t fRoundedOff = uMantissaIn & fRoundingOffMask;
3497
3498 /*
3499 * Deal with potential overflows/underflows first, optimizing for none.
3500 * 0 and MAX are used for special values; MAX-1 may be rounded up to MAX.
3501 */
3502 int32_t iExponentOut = (int32_t)iExponentIn + RTFLOAT64U_EXP_BIAS;
3503 if ((uint32_t)iExponentOut - 1 < (uint32_t)(RTFLOAT64U_EXP_MAX - 3))
3504 { /* likely? */ }
3505 /*
3506 * Underflow if the exponent zero or negative. This is attempted mapped
3507 * to a subnormal number when possible, with some additional trickery ofc.
3508 */
3509 else if (iExponentOut <= 0)
3510 {
3511 bool const fIsTiny = iExponentOut < 0
3512 || UINT64_MAX - uMantissaIn > uRoundingAdd;
3513 if (!(fFcw & X86_FCW_UM) && fIsTiny)
3514 /* Note! 754-1985 sec 7.4 has something about bias adjust of 1536 here, not in 2008 & 2019. Perhaps only 8087 & 287? */
3515 return fFsw | X86_FSW_UE | X86_FSW_ES | X86_FSW_B;
3516
3517 if (iExponentOut <= 0)
3518 {
3519 uMantissaIn = iExponentOut <= -63
3520 ? uMantissaIn != 0
3521 : (uMantissaIn >> (-iExponentOut + 1)) | ((uMantissaIn & (RT_BIT_64(-iExponentOut + 1) - 1)) != 0);
3522 fRoundedOff = uMantissaIn & fRoundingOffMask;
3523 if (fRoundedOff && fIsTiny)
3524 fFsw |= X86_FSW_UE;
3525 iExponentOut = 0;
3526 }
3527 }
3528 /*
3529 * Overflow if at or above max exponent value or if we will reach max
3530 * when rounding. Will return +/-zero or +/-max value depending on
3531 * whether we're rounding or not.
3532 */
3533 else if ( iExponentOut >= RTFLOAT64U_EXP_MAX
3534 || ( iExponentOut == RTFLOAT64U_EXP_MAX - 1
3535 && UINT64_MAX - uMantissaIn <= uRoundingAdd))
3536 {
3537 fFsw |= X86_FSW_OE;
3538 if (!(fFcw & X86_FCW_OM))
3539 return fFsw | X86_FSW_ES | X86_FSW_B;
3540 fFsw |= X86_FSW_PE;
3541 if (uRoundingAdd)
3542 fFsw |= X86_FSW_C1;
3543 if (!(fFcw & X86_FCW_PM))
3544 fFsw |= X86_FSW_ES | X86_FSW_B;
3545
3546 pr64Dst->s64.fSign = fSignIn;
3547 if (uRoundingAdd)
3548 { /* Zero */
3549 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3550 pr64Dst->s64.uFraction = 0;
3551 }
3552 else
3553 { /* Max */
3554 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX - 1;
3555 pr64Dst->s64.uFraction = RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1;
3556 }
3557 return fFsw;
3558 }
3559
3560 /*
3561 * Normal or subnormal number.
3562 */
3563 /* Do rounding - just truncate in near mode when midway on an even outcome. */
3564 uint64_t uMantissaOut = uMantissaIn;
3565 if ( (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_NEAREST
3566 || (uMantissaIn & RT_BIT_32(RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS))
3567 || fRoundedOff != uRoundingAdd)
3568 {
3569 uMantissaOut = uMantissaIn + uRoundingAdd;
3570 if (uMantissaOut >= uMantissaIn)
3571 { /* likely */ }
3572 else
3573 {
3574 uMantissaOut >>= 1; /* (We don't need to add bit 63 here (the integer bit), as it will be chopped off below.) */
3575 iExponentOut++;
3576 Assert(iExponentOut < RTFLOAT64U_EXP_MAX); /* checked above */
3577 fFsw |= X86_FSW_C1;
3578 }
3579 }
3580 else
3581 uMantissaOut = uMantissaIn;
3582
3583 /* Truncate the mantissa and set the return value. */
3584 uMantissaOut >>= RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS;
3585
3586 pr64Dst->s64.uFraction = uMantissaOut; /* Note! too big for bitfield if normal. */
3587 pr64Dst->s64.uExponent = iExponentOut;
3588 pr64Dst->s64.fSign = fSignIn;
3589
3590 /* Set status flags realted to rounding. */
3591 if (fRoundedOff)
3592 {
3593 fFsw |= X86_FSW_PE;
3594 if (uMantissaOut > (uMantissaIn >> (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS)))
3595 fFsw |= X86_FSW_C1;
3596 if (!(fFcw & X86_FCW_PM))
3597 fFsw |= X86_FSW_ES | X86_FSW_B;
3598 }
3599
3600 return fFsw;
3601}
3602
3603
3604/**
3605 * @note Exact same logic as iemAImpl_fst_r80_to_r32.
3606 */
3607IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r64,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3608 PRTFLOAT64U pr64Dst, PCRTFLOAT80U pr80Src))
3609{
3610 uint16_t const fFcw = pFpuState->FCW;
3611 uint16_t fFsw = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3));
3612 if (RTFLOAT80U_IS_NORMAL(pr80Src))
3613 fFsw = iemAImpl_StoreNormalR80AsR64(pr80Src->s.fSign, pr80Src->s.uMantissa,
3614 (int32_t)pr80Src->s.uExponent - RTFLOAT80U_EXP_BIAS, fFcw, fFsw, pr64Dst);
3615 else if (RTFLOAT80U_IS_ZERO(pr80Src))
3616 {
3617 pr64Dst->s64.fSign = pr80Src->s.fSign;
3618 pr64Dst->s64.uExponent = 0;
3619 pr64Dst->s64.uFraction = 0;
3620 Assert(RTFLOAT64U_IS_ZERO(pr64Dst));
3621 }
3622 else if (RTFLOAT80U_IS_INF(pr80Src))
3623 {
3624 pr64Dst->s64.fSign = pr80Src->s.fSign;
3625 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3626 pr64Dst->s64.uFraction = 0;
3627 Assert(RTFLOAT64U_IS_INF(pr64Dst));
3628 }
3629 else if (RTFLOAT80U_IS_INDEFINITE(pr80Src))
3630 {
3631 /* Mapped to +/-QNaN */
3632 pr64Dst->s64.fSign = pr80Src->s.fSign;
3633 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3634 pr64Dst->s64.uFraction = RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1);
3635 }
3636 else if (RTFLOAT80U_IS_PSEUDO_INF(pr80Src) || RTFLOAT80U_IS_UNNORMAL(pr80Src) || RTFLOAT80U_IS_PSEUDO_NAN(pr80Src))
3637 {
3638 /* Pseudo-Inf / Pseudo-Nan / Unnormal -> QNaN (during load, probably) */
3639 if (fFcw & X86_FCW_IM)
3640 {
3641 pr64Dst->s64.fSign = 1;
3642 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3643 pr64Dst->s64.uFraction = RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1);
3644 fFsw |= X86_FSW_IE;
3645 }
3646 else
3647 fFsw |= X86_FSW_IE | X86_FSW_ES | X86_FSW_B;;
3648 }
3649 else if (RTFLOAT80U_IS_NAN(pr80Src))
3650 {
3651 /* IM applies to signalled NaN input only. Everything is converted to quiet NaN. */
3652 if ((fFcw & X86_FCW_IM) || !RTFLOAT80U_IS_SIGNALLING_NAN(pr80Src))
3653 {
3654 pr64Dst->s64.fSign = pr80Src->s.fSign;
3655 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3656 pr64Dst->s64.uFraction = pr80Src->sj64.uFraction >> (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS);
3657 pr64Dst->s64.uFraction |= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1);
3658 if (RTFLOAT80U_IS_SIGNALLING_NAN(pr80Src))
3659 fFsw |= X86_FSW_IE;
3660 }
3661 else
3662 fFsw |= X86_FSW_IE | X86_FSW_ES | X86_FSW_B;
3663 }
3664 else
3665 {
3666 /* Denormal values causes both an underflow and precision exception. */
3667 Assert(RTFLOAT80U_IS_DENORMAL(pr80Src) || RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Src));
3668 if (fFcw & X86_FCW_UM)
3669 {
3670 pr64Dst->s64.fSign = pr80Src->s.fSign;
3671 pr64Dst->s64.uExponent = 0;
3672 if ((fFcw & X86_FCW_RC_MASK) == (!pr80Src->s.fSign ? X86_FCW_RC_UP : X86_FCW_RC_DOWN))
3673 {
3674 pr64Dst->s64.uFraction = 1;
3675 fFsw |= X86_FSW_UE | X86_FSW_PE | X86_FSW_C1;
3676 if (!(fFcw & X86_FCW_PM))
3677 fFsw |= X86_FSW_ES | X86_FSW_B;
3678 }
3679 else
3680 {
3681 pr64Dst->s64.uFraction = 0;
3682 fFsw |= X86_FSW_UE | X86_FSW_PE;
3683 if (!(fFcw & X86_FCW_PM))
3684 fFsw |= X86_FSW_ES | X86_FSW_B;
3685 }
3686 }
3687 else
3688 fFsw |= X86_FSW_UE | X86_FSW_ES | X86_FSW_B;
3689 }
3690 *pu16FSW = fFsw;
3691}
3692
3693
3694IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3695 PRTFLOAT80U pr80Dst, PCRTFLOAT80U pr80Src))
3696{
3697 /*
3698 * FPU status word:
3699 * - TOP is irrelevant, but we must match x86 assembly version (0).
3700 * - C1 is always cleared as we don't have any stack overflows.
3701 * - C0, C2, and C3 are undefined and Intel 10980XE does not touch them.
3702 */
3703 *pu16FSW = pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3); /* see iemAImpl_fld1 */
3704 *pr80Dst = *pr80Src;
3705}
3706
3707
3708/*
3709 *
3710 * Mantissa:
3711 * 63 56 48 40 32 24 16 8 0
3712 * v v v v v v v v v
3713 * 1[.]111 0000 1111 0000 1111 0000 1111 0000 1111 0000 1111 0000 1111 0000 1111 0000
3714 * \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \
3715 * Exp: 0 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60
3716 *
3717 * int64_t has the same width, only bit 63 is the sign bit. So, the max we can map over
3718 * are bits 1 thru 63, dropping off bit 0, with an exponent of 62. The number of bits we
3719 * drop off from the mantissa increases with decreasing exponent, till an exponent of 0
3720 * where we'll drop off all but bit 63.
3721 */
3722#define EMIT_FIST(a_cBits, a_iType, a_iTypeMin, a_iTypeIndefinite) \
3723IEM_DECL_IMPL_DEF(void, iemAImpl_fist_r80_to_i ## a_cBits,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW, \
3724 a_iType *piDst, PCRTFLOAT80U pr80Val)) \
3725{ \
3726 uint16_t const fFcw = pFpuState->FCW; \
3727 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); \
3728 bool const fSignIn = pr80Val->s.fSign; \
3729 \
3730 /* \
3731 * Deal with normal numbers first. \
3732 */ \
3733 if (RTFLOAT80U_IS_NORMAL(pr80Val)) \
3734 { \
3735 uint64_t uMantissa = pr80Val->s.uMantissa; \
3736 int32_t iExponent = (int32_t)pr80Val->s.uExponent - RTFLOAT80U_EXP_BIAS; \
3737 \
3738 if ((uint32_t)iExponent <= a_cBits - 2) \
3739 { \
3740 unsigned const cShiftOff = 63 - iExponent; \
3741 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1; \
3742 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST \
3743 ? RT_BIT_64(cShiftOff - 1) \
3744 : (fFcw & X86_FCW_RC_MASK) == (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP) \
3745 ? fRoundingOffMask \
3746 : 0; \
3747 uint64_t fRoundedOff = uMantissa & fRoundingOffMask; \
3748 \
3749 uMantissa >>= cShiftOff; \
3750 uint64_t const uRounding = (fRoundedOff + uRoundingAdd) >> cShiftOff; \
3751 uMantissa += uRounding; \
3752 if (!(uMantissa & RT_BIT_64(a_cBits - 1))) \
3753 { \
3754 if (fRoundedOff) \
3755 { \
3756 if ((uMantissa & 1) && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST && fRoundedOff == uRoundingAdd) \
3757 uMantissa &= ~(uint64_t)1; /* round to even number if equal distance between up/down. */ \
3758 else if (uRounding) \
3759 fFsw |= X86_FSW_C1; \
3760 fFsw |= X86_FSW_PE; \
3761 if (!(fFcw & X86_FCW_PM)) \
3762 fFsw |= X86_FSW_ES | X86_FSW_B; \
3763 } \
3764 \
3765 if (!fSignIn) \
3766 *piDst = (a_iType)uMantissa; \
3767 else \
3768 *piDst = -(a_iType)uMantissa; \
3769 } \
3770 else \
3771 { \
3772 /* overflowed after rounding. */ \
3773 AssertMsg(iExponent == a_cBits - 2 && uMantissa == RT_BIT_64(a_cBits - 1), \
3774 ("e=%d m=%#RX64 (org %#RX64) s=%d; shift=%d ro=%#RX64 rm=%#RX64 ra=%#RX64\n", iExponent, uMantissa, \
3775 pr80Val->s.uMantissa, fSignIn, cShiftOff, fRoundedOff, fRoundingOffMask, uRoundingAdd)); \
3776 \
3777 /* Special case for the integer minimum value. */ \
3778 if (fSignIn) \
3779 { \
3780 *piDst = a_iTypeMin; \
3781 fFsw |= X86_FSW_PE | X86_FSW_C1; \
3782 if (!(fFcw & X86_FCW_PM)) \
3783 fFsw |= X86_FSW_ES | X86_FSW_B; \
3784 } \
3785 else \
3786 { \
3787 fFsw |= X86_FSW_IE; \
3788 if (fFcw & X86_FCW_IM) \
3789 *piDst = a_iTypeMin; \
3790 else \
3791 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
3792 } \
3793 } \
3794 } \
3795 /* \
3796 * Tiny sub-zero numbers. \
3797 */ \
3798 else if (iExponent < 0) \
3799 { \
3800 if (!fSignIn) \
3801 { \
3802 if ( (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_UP \
3803 || (iExponent == -1 && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST)) \
3804 { \
3805 *piDst = 1; \
3806 fFsw |= X86_FSW_C1; \
3807 } \
3808 else \
3809 *piDst = 0; \
3810 } \
3811 else \
3812 { \
3813 if ( (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_UP \
3814 || (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_ZERO \
3815 || (iExponent < -1 && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST)) \
3816 *piDst = 0; \
3817 else \
3818 { \
3819 *piDst = -1; \
3820 fFsw |= X86_FSW_C1; \
3821 } \
3822 } \
3823 fFsw |= X86_FSW_PE; \
3824 if (!(fFcw & X86_FCW_PM)) \
3825 fFsw |= X86_FSW_ES | X86_FSW_B; \
3826 } \
3827 /* \
3828 * Special MIN case. \
3829 */ \
3830 else if ( fSignIn && iExponent == a_cBits - 1 \
3831 && ( a_cBits < 64 && (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_DOWN \
3832 ? uMantissa < (RT_BIT_64(63) | RT_BIT_64(65 - a_cBits)) \
3833 : uMantissa == RT_BIT_64(63))) \
3834 { \
3835 *piDst = a_iTypeMin; \
3836 if (uMantissa & (RT_BIT_64(64 - a_cBits + 1) - 1)) \
3837 { \
3838 fFsw |= X86_FSW_PE; \
3839 if (!(fFcw & X86_FCW_PM)) \
3840 fFsw |= X86_FSW_ES | X86_FSW_B; \
3841 } \
3842 } \
3843 /* \
3844 * Too large/small number outside the target integer range. \
3845 */ \
3846 else \
3847 { \
3848 fFsw |= X86_FSW_IE; \
3849 if (fFcw & X86_FCW_IM) \
3850 *piDst = a_iTypeIndefinite; \
3851 else \
3852 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
3853 } \
3854 } \
3855 /* \
3856 * Map both +0 and -0 to integer zero (signless/+). \
3857 */ \
3858 else if (RTFLOAT80U_IS_ZERO(pr80Val)) \
3859 *piDst = 0; \
3860 /* \
3861 * Denormals are just really tiny sub-zero numbers that are either rounded \
3862 * to zero, 1 or -1 depending on sign and rounding control. \
3863 */ \
3864 else if (RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val) || RTFLOAT80U_IS_DENORMAL(pr80Val)) \
3865 { \
3866 if ((fFcw & X86_FCW_RC_MASK) != (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)) \
3867 *piDst = 0; \
3868 else \
3869 { \
3870 *piDst = fSignIn ? -1 : 1; \
3871 fFsw |= X86_FSW_C1; \
3872 } \
3873 fFsw |= X86_FSW_PE; \
3874 if (!(fFcw & X86_FCW_PM)) \
3875 fFsw |= X86_FSW_ES | X86_FSW_B; \
3876 } \
3877 /* \
3878 * All other special values are considered invalid arguments and result \
3879 * in an IE exception and indefinite value if masked. \
3880 */ \
3881 else \
3882 { \
3883 fFsw |= X86_FSW_IE; \
3884 if (fFcw & X86_FCW_IM) \
3885 *piDst = a_iTypeIndefinite; \
3886 else \
3887 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
3888 } \
3889 *pu16FSW = fFsw; \
3890}
3891EMIT_FIST(64, int64_t, INT64_MIN, X86_FPU_INT64_INDEFINITE)
3892EMIT_FIST(32, int32_t, INT32_MIN, X86_FPU_INT32_INDEFINITE)
3893EMIT_FIST(16, int16_t, INT16_MIN, X86_FPU_INT16_INDEFINITE)
3894
3895#endif /*IEM_WITHOUT_ASSEMBLY */
3896
3897
3898/*
3899 * The FISTT instruction was added with SSE3 and are a lot simpler than FIST.
3900 *
3901 * The 16-bit version is a bit peculiar, though, as it seems to be raising IE
3902 * as if it was the 32-bit version (i.e. starting with exp 31 instead of 15),
3903 * thus the @a a_cBitsIn.
3904 */
3905#define EMIT_FISTT(a_cBits, a_cBitsIn, a_iType, a_iTypeMin, a_iTypeMax, a_iTypeIndefinite, a_Suffix, a_fIntelVersion) \
3906IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_fistt_r80_to_i,a_cBits,a_Suffix),(PCX86FXSTATE pFpuState, uint16_t *pu16FSW, \
3907 a_iType *piDst, PCRTFLOAT80U pr80Val)) \
3908{ \
3909 uint16_t const fFcw = pFpuState->FCW; \
3910 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); \
3911 bool const fSignIn = pr80Val->s.fSign; \
3912 \
3913 /* \
3914 * Deal with normal numbers first. \
3915 */ \
3916 if (RTFLOAT80U_IS_NORMAL(pr80Val)) \
3917 { \
3918 uint64_t uMantissa = pr80Val->s.uMantissa; \
3919 int32_t iExponent = (int32_t)pr80Val->s.uExponent - RTFLOAT80U_EXP_BIAS; \
3920 \
3921 if ((uint32_t)iExponent <= a_cBitsIn - 2) \
3922 { \
3923 unsigned const cShiftOff = 63 - iExponent; \
3924 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1; \
3925 uint64_t const fRoundedOff = uMantissa & fRoundingOffMask; \
3926 uMantissa >>= cShiftOff; \
3927 /*Assert(!(uMantissa & RT_BIT_64(a_cBits - 1)));*/ \
3928 if (!fSignIn) \
3929 *piDst = (a_iType)uMantissa; \
3930 else \
3931 *piDst = -(a_iType)uMantissa; \
3932 \
3933 if (fRoundedOff) \
3934 { \
3935 fFsw |= X86_FSW_PE; \
3936 if (!(fFcw & X86_FCW_PM)) \
3937 fFsw |= X86_FSW_ES | X86_FSW_B; \
3938 } \
3939 } \
3940 /* \
3941 * Tiny sub-zero numbers. \
3942 */ \
3943 else if (iExponent < 0) \
3944 { \
3945 *piDst = 0; \
3946 fFsw |= X86_FSW_PE; \
3947 if (!(fFcw & X86_FCW_PM)) \
3948 fFsw |= X86_FSW_ES | X86_FSW_B; \
3949 } \
3950 /* \
3951 * Special MIN case. \
3952 */ \
3953 else if ( fSignIn && iExponent == a_cBits - 1 \
3954 && (a_cBits < 64 \
3955 ? uMantissa < (RT_BIT_64(63) | RT_BIT_64(65 - a_cBits)) \
3956 : uMantissa == RT_BIT_64(63)) ) \
3957 { \
3958 *piDst = a_iTypeMin; \
3959 if (uMantissa & (RT_BIT_64(64 - a_cBits + 1) - 1)) \
3960 { \
3961 fFsw |= X86_FSW_PE; \
3962 if (!(fFcw & X86_FCW_PM)) \
3963 fFsw |= X86_FSW_ES | X86_FSW_B; \
3964 } \
3965 } \
3966 /* \
3967 * Figure this weirdness. \
3968 */ \
3969 else if (a_cBits == 16 && fSignIn && iExponent == 31 && uMantissa < UINT64_C(0x8000100000000000) ) \
3970 { \
3971 *piDst = 0; \
3972 if (uMantissa & (RT_BIT_64(64 - a_cBits + 1) - 1)) \
3973 { \
3974 fFsw |= X86_FSW_PE; \
3975 if (!(fFcw & X86_FCW_PM)) \
3976 fFsw |= X86_FSW_ES | X86_FSW_B; \
3977 } \
3978 } \
3979 /* \
3980 * Too large/small number outside the target integer range. \
3981 */ \
3982 else \
3983 { \
3984 fFsw |= X86_FSW_IE; \
3985 if (fFcw & X86_FCW_IM) \
3986 *piDst = a_iTypeIndefinite; \
3987 else \
3988 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
3989 } \
3990 } \
3991 /* \
3992 * Map both +0 and -0 to integer zero (signless/+). \
3993 */ \
3994 else if (RTFLOAT80U_IS_ZERO(pr80Val)) \
3995 *piDst = 0; \
3996 /* \
3997 * Denormals are just really tiny sub-zero numbers that are trucated to zero. \
3998 */ \
3999 else if (RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val) || RTFLOAT80U_IS_DENORMAL(pr80Val)) \
4000 { \
4001 *piDst = 0; \
4002 fFsw |= X86_FSW_PE; \
4003 if (!(fFcw & X86_FCW_PM)) \
4004 fFsw |= X86_FSW_ES | X86_FSW_B; \
4005 } \
4006 /* \
4007 * All other special values are considered invalid arguments and result \
4008 * in an IE exception and indefinite value if masked. \
4009 */ \
4010 else \
4011 { \
4012 fFsw |= X86_FSW_IE; \
4013 if (fFcw & X86_FCW_IM) \
4014 *piDst = a_iTypeIndefinite; \
4015 else \
4016 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
4017 } \
4018 *pu16FSW = fFsw; \
4019}
4020#if defined(IEM_WITHOUT_ASSEMBLY)
4021EMIT_FISTT(64, 64, int64_t, INT64_MIN, INT64_MAX, X86_FPU_INT64_INDEFINITE, RT_NOTHING, 1)
4022EMIT_FISTT(32, 32, int32_t, INT32_MIN, INT32_MAX, X86_FPU_INT32_INDEFINITE, RT_NOTHING, 1)
4023EMIT_FISTT(16, 32, int16_t, INT16_MIN, INT16_MAX, 0 /* X86_FPU_INT16_INDEFINITE - weird weird weird! */, RT_NOTHING, 1)
4024#endif
4025EMIT_FISTT(16, 32, int16_t, INT16_MIN, INT16_MAX, 0 /* X86_FPU_INT16_INDEFINITE - weird weird weird! */, _intel, 1)
4026EMIT_FISTT(16, 32, int16_t, INT16_MIN, INT16_MAX, 0 /* X86_FPU_INT16_INDEFINITE - weird weird weird! */, _amd, 0)
4027
4028
4029#if defined(IEM_WITHOUT_ASSEMBLY)
4030
4031IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_d80,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
4032 PRTPBCD80U pd80Dst, PCRTFLOAT80U pr80Src))
4033{
4034 /*static RTPBCD80U const s_ad80MaxMin[2] = { RTPBCD80U_INIT_MAX(), RTPBCD80U_INIT_MIN() };*/
4035 static RTPBCD80U const s_ad80Zeros[2] = { RTPBCD80U_INIT_ZERO(0), RTPBCD80U_INIT_ZERO(1) };
4036 static RTPBCD80U const s_ad80One[2] = { RTPBCD80U_INIT_C(0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,1),
4037 RTPBCD80U_INIT_C(1, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,1) };
4038 static RTPBCD80U const s_d80Indefinite = RTPBCD80U_INIT_INDEFINITE();
4039
4040 uint16_t const fFcw = pFpuState->FCW;
4041 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3));
4042 bool const fSignIn = pr80Src->s.fSign;
4043
4044 /*
4045 * Deal with normal numbers first.
4046 */
4047 if (RTFLOAT80U_IS_NORMAL(pr80Src))
4048 {
4049 uint64_t uMantissa = pr80Src->s.uMantissa;
4050 int32_t iExponent = (int32_t)pr80Src->s.uExponent - RTFLOAT80U_EXP_BIAS;
4051 if ( (uint32_t)iExponent <= 58
4052 || ((uint32_t)iExponent == 59 && uMantissa <= UINT64_C(0xde0b6b3a763fffff)) )
4053 {
4054 unsigned const cShiftOff = 63 - iExponent;
4055 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1;
4056 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
4057 ? RT_BIT_64(cShiftOff - 1)
4058 : (fFcw & X86_FCW_RC_MASK) == (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
4059 ? fRoundingOffMask
4060 : 0;
4061 uint64_t fRoundedOff = uMantissa & fRoundingOffMask;
4062
4063 uMantissa >>= cShiftOff;
4064 uint64_t const uRounding = (fRoundedOff + uRoundingAdd) >> cShiftOff;
4065 uMantissa += uRounding;
4066 if (uMantissa <= (uint64_t)RTPBCD80U_MAX)
4067 {
4068 if (fRoundedOff)
4069 {
4070 if ((uMantissa & 1) && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST && fRoundedOff == uRoundingAdd)
4071 uMantissa &= ~(uint64_t)1; /* round to even number if equal distance between up/down. */
4072 else if (uRounding)
4073 fFsw |= X86_FSW_C1;
4074 fFsw |= X86_FSW_PE;
4075 if (!(fFcw & X86_FCW_PM))
4076 fFsw |= X86_FSW_ES | X86_FSW_B;
4077 }
4078
4079 pd80Dst->s.fSign = fSignIn;
4080 pd80Dst->s.uPad = 0;
4081 for (size_t iPair = 0; iPair < RT_ELEMENTS(pd80Dst->s.abPairs); iPair++)
4082 {
4083 unsigned const uDigits = uMantissa % 100;
4084 uMantissa /= 100;
4085 uint8_t const bLo = uDigits % 10;
4086 uint8_t const bHi = uDigits / 10;
4087 pd80Dst->s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(bHi, bLo);
4088 }
4089 }
4090 else
4091 {
4092 /* overflowed after rounding. */
4093 fFsw |= X86_FSW_IE;
4094 if (fFcw & X86_FCW_IM)
4095 *pd80Dst = s_d80Indefinite;
4096 else
4097 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT);
4098 }
4099 }
4100 /*
4101 * Tiny sub-zero numbers.
4102 */
4103 else if (iExponent < 0)
4104 {
4105 if (!fSignIn)
4106 {
4107 if ( (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_UP
4108 || (iExponent == -1 && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST))
4109 {
4110 *pd80Dst = s_ad80One[fSignIn];
4111 fFsw |= X86_FSW_C1;
4112 }
4113 else
4114 *pd80Dst = s_ad80Zeros[fSignIn];
4115 }
4116 else
4117 {
4118 if ( (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_UP
4119 || (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_ZERO
4120 || (iExponent < -1 && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST))
4121 *pd80Dst = s_ad80Zeros[fSignIn];
4122 else
4123 {
4124 *pd80Dst = s_ad80One[fSignIn];
4125 fFsw |= X86_FSW_C1;
4126 }
4127 }
4128 fFsw |= X86_FSW_PE;
4129 if (!(fFcw & X86_FCW_PM))
4130 fFsw |= X86_FSW_ES | X86_FSW_B;
4131 }
4132 /*
4133 * Too large/small number outside the target integer range.
4134 */
4135 else
4136 {
4137 fFsw |= X86_FSW_IE;
4138 if (fFcw & X86_FCW_IM)
4139 *pd80Dst = s_d80Indefinite;
4140 else
4141 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT);
4142 }
4143 }
4144 /*
4145 * Map both +0 and -0 to integer zero (signless/+).
4146 */
4147 else if (RTFLOAT80U_IS_ZERO(pr80Src))
4148 *pd80Dst = s_ad80Zeros[fSignIn];
4149 /*
4150 * Denormals are just really tiny sub-zero numbers that are either rounded
4151 * to zero, 1 or -1 depending on sign and rounding control.
4152 */
4153 else if (RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Src) || RTFLOAT80U_IS_DENORMAL(pr80Src))
4154 {
4155 if ((fFcw & X86_FCW_RC_MASK) != (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP))
4156 *pd80Dst = s_ad80Zeros[fSignIn];
4157 else
4158 {
4159 *pd80Dst = s_ad80One[fSignIn];
4160 fFsw |= X86_FSW_C1;
4161 }
4162 fFsw |= X86_FSW_PE;
4163 if (!(fFcw & X86_FCW_PM))
4164 fFsw |= X86_FSW_ES | X86_FSW_B;
4165 }
4166 /*
4167 * All other special values are considered invalid arguments and result
4168 * in an IE exception and indefinite value if masked.
4169 */
4170 else
4171 {
4172 fFsw |= X86_FSW_IE;
4173 if (fFcw & X86_FCW_IM)
4174 *pd80Dst = s_d80Indefinite;
4175 else
4176 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT);
4177 }
4178 *pu16FSW = fFsw;
4179}
4180
4181
4182/*********************************************************************************************************************************
4183* x86 FPU Division Operations *
4184*********************************************************************************************************************************/
4185
4186IEM_DECL_IMPL_DEF(void, iemAImpl_fdiv_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4187 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
4188{
4189 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
4190 AssertReleaseFailed();
4191}
4192
4193
4194IEM_DECL_IMPL_DEF(void, iemAImpl_fdiv_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4195 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
4196{
4197 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
4198 AssertReleaseFailed();
4199}
4200
4201
4202IEM_DECL_IMPL_DEF(void, iemAImpl_fdiv_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4203 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4204{
4205 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4206 AssertReleaseFailed();
4207}
4208
4209
4210IEM_DECL_IMPL_DEF(void, iemAImpl_fdivr_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4211 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
4212{
4213 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
4214 AssertReleaseFailed();
4215}
4216
4217
4218IEM_DECL_IMPL_DEF(void, iemAImpl_fdivr_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4219 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
4220{
4221 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
4222 AssertReleaseFailed();
4223}
4224
4225
4226IEM_DECL_IMPL_DEF(void, iemAImpl_fdivr_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4227 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4228{
4229 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4230 AssertReleaseFailed();
4231}
4232
4233
4234IEM_DECL_IMPL_DEF(void, iemAImpl_fidiv_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4235 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
4236{
4237 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
4238 AssertReleaseFailed();
4239}
4240
4241
4242IEM_DECL_IMPL_DEF(void, iemAImpl_fidiv_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4243 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
4244{
4245 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
4246 AssertReleaseFailed();
4247}
4248
4249
4250IEM_DECL_IMPL_DEF(void, iemAImpl_fidivr_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4251 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
4252{
4253 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
4254 AssertReleaseFailed();
4255}
4256
4257
4258IEM_DECL_IMPL_DEF(void, iemAImpl_fidivr_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4259 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
4260{
4261 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
4262 AssertReleaseFailed();
4263}
4264
4265
4266IEM_DECL_IMPL_DEF(void, iemAImpl_fprem_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4267 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4268{
4269 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4270 AssertReleaseFailed();
4271}
4272
4273
4274IEM_DECL_IMPL_DEF(void, iemAImpl_fprem1_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4275 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4276{
4277 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4278 AssertReleaseFailed();
4279}
4280
4281
4282/*********************************************************************************************************************************
4283* x87 FPU Multiplication Operations *
4284*********************************************************************************************************************************/
4285
4286IEM_DECL_IMPL_DEF(void, iemAImpl_fmul_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4287 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
4288{
4289 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
4290 AssertReleaseFailed();
4291}
4292
4293
4294IEM_DECL_IMPL_DEF(void, iemAImpl_fmul_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4295 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
4296{
4297 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
4298 AssertReleaseFailed();
4299}
4300
4301
4302IEM_DECL_IMPL_DEF(void, iemAImpl_fmul_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4303 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4304{
4305 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4306 AssertReleaseFailed();
4307}
4308
4309
4310IEM_DECL_IMPL_DEF(void, iemAImpl_fimul_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4311 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
4312{
4313 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
4314 AssertReleaseFailed();
4315}
4316
4317
4318IEM_DECL_IMPL_DEF(void, iemAImpl_fimul_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4319 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
4320{
4321 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
4322 AssertReleaseFailed();
4323}
4324
4325
4326/*********************************************************************************************************************************
4327* x87 FPU Addition and Subtraction *
4328*********************************************************************************************************************************/
4329
4330IEM_DECL_IMPL_DEF(void, iemAImpl_fadd_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4331 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
4332{
4333 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
4334 AssertReleaseFailed();
4335}
4336
4337
4338IEM_DECL_IMPL_DEF(void, iemAImpl_fadd_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4339 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
4340{
4341 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
4342 AssertReleaseFailed();
4343}
4344
4345
4346IEM_DECL_IMPL_DEF(void, iemAImpl_fadd_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4347 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4348{
4349 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4350 AssertReleaseFailed();
4351}
4352
4353
4354IEM_DECL_IMPL_DEF(void, iemAImpl_fiadd_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4355 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
4356{
4357 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
4358 AssertReleaseFailed();
4359}
4360
4361
4362IEM_DECL_IMPL_DEF(void, iemAImpl_fiadd_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4363 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
4364{
4365 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
4366 AssertReleaseFailed();
4367}
4368
4369
4370IEM_DECL_IMPL_DEF(void, iemAImpl_fisub_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4371 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
4372{
4373 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
4374 AssertReleaseFailed();
4375}
4376
4377
4378IEM_DECL_IMPL_DEF(void, iemAImpl_fisub_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4379 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
4380{
4381 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
4382 AssertReleaseFailed();
4383}
4384
4385
4386IEM_DECL_IMPL_DEF(void, iemAImpl_fisubr_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4387 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
4388{
4389 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
4390 AssertReleaseFailed();
4391}
4392
4393
4394IEM_DECL_IMPL_DEF(void, iemAImpl_fisubr_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4395 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
4396{
4397 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
4398 AssertReleaseFailed();
4399}
4400
4401
4402IEM_DECL_IMPL_DEF(void, iemAImpl_fsub_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4403 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
4404{
4405 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
4406 AssertReleaseFailed();
4407}
4408
4409
4410IEM_DECL_IMPL_DEF(void, iemAImpl_fsub_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4411 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
4412{
4413 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
4414 AssertReleaseFailed();
4415}
4416
4417
4418IEM_DECL_IMPL_DEF(void, iemAImpl_fsub_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4419 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4420{
4421 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4422 AssertReleaseFailed();
4423}
4424
4425
4426IEM_DECL_IMPL_DEF(void, iemAImpl_fsubr_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4427 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
4428{
4429 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
4430 AssertReleaseFailed();
4431}
4432
4433
4434IEM_DECL_IMPL_DEF(void, iemAImpl_fsubr_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4435 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
4436{
4437 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
4438 AssertReleaseFailed();
4439}
4440
4441
4442IEM_DECL_IMPL_DEF(void, iemAImpl_fsubr_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4443 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4444{
4445 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4446 AssertReleaseFailed();
4447}
4448
4449
4450/*********************************************************************************************************************************
4451* x87 FPU Trigometric Operations *
4452*********************************************************************************************************************************/
4453
4454
4455IEM_DECL_IMPL_DEF(void, iemAImpl_fpatan_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4456 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4457{
4458 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4459 AssertReleaseFailed();
4460}
4461
4462#endif /* IEM_WITHOUT_ASSEMBLY */
4463
4464IEM_DECL_IMPL_DEF(void, iemAImpl_fpatan_r80_by_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4465 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4466{
4467 iemAImpl_fpatan_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4468}
4469
4470IEM_DECL_IMPL_DEF(void, iemAImpl_fpatan_r80_by_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4471 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4472{
4473 iemAImpl_fpatan_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4474}
4475
4476
4477#if defined(IEM_WITHOUT_ASSEMBLY)
4478IEM_DECL_IMPL_DEF(void, iemAImpl_fptan_r80_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
4479{
4480 RT_NOREF(pFpuState, pFpuResTwo, pr80Val);
4481 AssertReleaseFailed();
4482}
4483#endif /* IEM_WITHOUT_ASSEMBLY */
4484
4485IEM_DECL_IMPL_DEF(void, iemAImpl_fptan_r80_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
4486{
4487 iemAImpl_fptan_r80_r80(pFpuState, pFpuResTwo, pr80Val);
4488}
4489
4490IEM_DECL_IMPL_DEF(void, iemAImpl_fptan_r80_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
4491{
4492 iemAImpl_fptan_r80_r80(pFpuState, pFpuResTwo, pr80Val);
4493}
4494
4495
4496#ifdef IEM_WITHOUT_ASSEMBLY
4497IEM_DECL_IMPL_DEF(void, iemAImpl_fsin_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
4498{
4499 RT_NOREF(pFpuState, pFpuRes, pr80Val);
4500 AssertReleaseFailed();
4501}
4502#endif /* IEM_WITHOUT_ASSEMBLY */
4503
4504IEM_DECL_IMPL_DEF(void, iemAImpl_fsin_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
4505{
4506 iemAImpl_fsin_r80(pFpuState, pFpuRes, pr80Val);
4507}
4508
4509IEM_DECL_IMPL_DEF(void, iemAImpl_fsin_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
4510{
4511 iemAImpl_fsin_r80(pFpuState, pFpuRes, pr80Val);
4512}
4513
4514#ifdef IEM_WITHOUT_ASSEMBLY
4515IEM_DECL_IMPL_DEF(void, iemAImpl_fsincos_r80_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
4516{
4517 RT_NOREF(pFpuState, pFpuResTwo, pr80Val);
4518 AssertReleaseFailed();
4519}
4520#endif /* IEM_WITHOUT_ASSEMBLY */
4521
4522IEM_DECL_IMPL_DEF(void, iemAImpl_fsincos_r80_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
4523{
4524 iemAImpl_fsincos_r80_r80(pFpuState, pFpuResTwo, pr80Val);
4525}
4526
4527IEM_DECL_IMPL_DEF(void, iemAImpl_fsincos_r80_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
4528{
4529 iemAImpl_fsincos_r80_r80(pFpuState, pFpuResTwo, pr80Val);
4530}
4531
4532
4533#ifdef IEM_WITHOUT_ASSEMBLY
4534IEM_DECL_IMPL_DEF(void, iemAImpl_fcos_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
4535{
4536 RT_NOREF(pFpuState, pFpuRes, pr80Val);
4537 AssertReleaseFailed();
4538}
4539#endif /* IEM_WITHOUT_ASSEMBLY */
4540
4541IEM_DECL_IMPL_DEF(void, iemAImpl_fcos_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
4542{
4543 iemAImpl_fcos_r80(pFpuState, pFpuRes, pr80Val);
4544}
4545
4546IEM_DECL_IMPL_DEF(void, iemAImpl_fcos_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
4547{
4548 iemAImpl_fcos_r80(pFpuState, pFpuRes, pr80Val);
4549}
4550
4551#ifdef IEM_WITHOUT_ASSEMBLY
4552
4553
4554/*********************************************************************************************************************************
4555* x87 FPU Compare and Testing Operations *
4556*********************************************************************************************************************************/
4557
4558IEM_DECL_IMPL_DEF(void, iemAImpl_ftst_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw, PCRTFLOAT80U pr80Val))
4559{
4560 uint16_t fFsw = (7 << X86_FSW_TOP_SHIFT);
4561
4562 if (RTFLOAT80U_IS_ZERO(pr80Val))
4563 fFsw |= X86_FSW_C3;
4564 else if (RTFLOAT80U_IS_NORMAL(pr80Val) || RTFLOAT80U_IS_INF(pr80Val))
4565 fFsw |= pr80Val->s.fSign ? X86_FSW_C0 : 0;
4566 else if (RTFLOAT80U_IS_DENORMAL(pr80Val) || RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val))
4567 {
4568 fFsw |= pr80Val->s.fSign ? X86_FSW_C0 | X86_FSW_DE : X86_FSW_DE;
4569 if (!(pFpuState->FCW & X86_FCW_DM))
4570 fFsw |= X86_FSW_ES | X86_FSW_B;
4571 }
4572 else
4573 {
4574 fFsw |= X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3 | X86_FSW_IE;
4575 if (!(pFpuState->FCW & X86_FCW_IM))
4576 fFsw |= X86_FSW_ES | X86_FSW_B;
4577 }
4578
4579 *pu16Fsw = fFsw;
4580}
4581
4582
4583IEM_DECL_IMPL_DEF(void, iemAImpl_fxam_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw, PCRTFLOAT80U pr80Val))
4584{
4585 RT_NOREF(pFpuState);
4586 uint16_t fFsw = (7 << X86_FSW_TOP_SHIFT);
4587
4588 /* C1 = sign bit (always, even if empty Intel says). */
4589 if (pr80Val->s.fSign)
4590 fFsw |= X86_FSW_C1;
4591
4592 /* Classify the value in C0, C2, C3. */
4593 if (!(pFpuState->FTW & RT_BIT_32(X86_FSW_TOP_GET(pFpuState->FSW))))
4594 fFsw |= X86_FSW_C0 | X86_FSW_C3; /* empty */
4595 else if (RTFLOAT80U_IS_NORMAL(pr80Val))
4596 fFsw |= X86_FSW_C2;
4597 else if (RTFLOAT80U_IS_ZERO(pr80Val))
4598 fFsw |= X86_FSW_C3;
4599 else if (RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(pr80Val))
4600 fFsw |= X86_FSW_C0;
4601 else if (RTFLOAT80U_IS_INF(pr80Val))
4602 fFsw |= X86_FSW_C0 | X86_FSW_C2;
4603 else if (RTFLOAT80U_IS_DENORMAL(pr80Val) || RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val))
4604 fFsw |= X86_FSW_C2 | X86_FSW_C3;
4605 /* whatever else: 0 */
4606
4607 *pu16Fsw = fFsw;
4608}
4609
4610
4611IEM_DECL_IMPL_DEF(void, iemAImpl_fcom_r80_by_r32,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
4612 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
4613{
4614 RT_NOREF(pFpuState, pFSW, pr80Val1, pr32Val2);
4615 AssertReleaseFailed();
4616}
4617
4618
4619IEM_DECL_IMPL_DEF(void, iemAImpl_fcom_r80_by_r64,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
4620 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
4621{
4622 RT_NOREF(pFpuState, pFSW, pr80Val1, pr64Val2);
4623 AssertReleaseFailed();
4624}
4625
4626
4627IEM_DECL_IMPL_DEF(void, iemAImpl_fcom_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
4628 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4629{
4630 RT_NOREF(pFpuState, pFSW, pr80Val1, pr80Val2);
4631 AssertReleaseFailed();
4632}
4633
4634
4635IEM_DECL_IMPL_DEF(uint32_t, iemAImpl_fcomi_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
4636 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4637{
4638 RT_NOREF(pFpuState, pFSW, pr80Val1, pr80Val2);
4639 AssertReleaseFailed();
4640 return 0;
4641}
4642
4643
4644IEM_DECL_IMPL_DEF(void, iemAImpl_fucom_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
4645 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4646{
4647 RT_NOREF(pFpuState, pFSW, pr80Val1, pr80Val2);
4648 AssertReleaseFailed();
4649}
4650
4651
4652IEM_DECL_IMPL_DEF(uint32_t, iemAImpl_fucomi_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw,
4653 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4654{
4655 RT_NOREF(pFpuState, pu16Fsw, pr80Val1, pr80Val2);
4656 AssertReleaseFailed();
4657 return 0;
4658}
4659
4660
4661IEM_DECL_IMPL_DEF(void, iemAImpl_ficom_r80_by_i16,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw,
4662 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
4663{
4664 RT_NOREF(pFpuState, pu16Fsw, pr80Val1, pi16Val2);
4665 AssertReleaseFailed();
4666}
4667
4668
4669IEM_DECL_IMPL_DEF(void, iemAImpl_ficom_r80_by_i32,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw,
4670 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
4671{
4672 RT_NOREF(pFpuState, pu16Fsw, pr80Val1, pi32Val2);
4673 AssertReleaseFailed();
4674}
4675
4676
4677/*********************************************************************************************************************************
4678* x87 FPU Other Operations *
4679*********************************************************************************************************************************/
4680
4681
4682IEM_DECL_IMPL_DEF(void, iemAImpl_frndint_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
4683{
4684 RT_NOREF(pFpuState, pFpuRes, pr80Val);
4685 AssertReleaseFailed();
4686}
4687
4688
4689IEM_DECL_IMPL_DEF(void, iemAImpl_fscale_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4690 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4691{
4692 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4693 AssertReleaseFailed();
4694}
4695
4696
4697IEM_DECL_IMPL_DEF(void, iemAImpl_fsqrt_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
4698{
4699 RT_NOREF(pFpuState, pFpuRes, pr80Val);
4700 AssertReleaseFailed();
4701}
4702
4703
4704IEM_DECL_IMPL_DEF(void, iemAImpl_f2xm1_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
4705{
4706 RT_NOREF(pFpuState, pFpuRes, pr80Val);
4707 AssertReleaseFailed();
4708}
4709
4710#endif /* IEM_WITHOUT_ASSEMBLY */
4711
4712IEM_DECL_IMPL_DEF(void, iemAImpl_f2xm1_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
4713{
4714 iemAImpl_f2xm1_r80(pFpuState, pFpuRes, pr80Val);
4715}
4716
4717IEM_DECL_IMPL_DEF(void, iemAImpl_f2xm1_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
4718{
4719 iemAImpl_f2xm1_r80(pFpuState, pFpuRes, pr80Val);
4720}
4721
4722#ifdef IEM_WITHOUT_ASSEMBLY
4723
4724IEM_DECL_IMPL_DEF(void, iemAImpl_fabs_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
4725{
4726 pFpuRes->FSW = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT);
4727 pFpuRes->r80Result = *pr80Val;
4728 pFpuRes->r80Result.s.fSign = 0;
4729}
4730
4731
4732IEM_DECL_IMPL_DEF(void, iemAImpl_fchs_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
4733{
4734 pFpuRes->FSW = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT);
4735 pFpuRes->r80Result = *pr80Val;
4736 pFpuRes->r80Result.s.fSign = !pr80Val->s.fSign;
4737}
4738
4739
4740IEM_DECL_IMPL_DEF(void, iemAImpl_fxtract_r80_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
4741{
4742 RT_NOREF(pFpuState, pFpuResTwo, pr80Val);
4743 AssertReleaseFailed();
4744}
4745
4746
4747IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2x_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4748 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4749{
4750 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4751 AssertReleaseFailed();
4752}
4753
4754#endif /* IEM_WITHOUT_ASSEMBLY */
4755
4756IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2x_r80_by_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4757 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4758{
4759 iemAImpl_fyl2x_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4760}
4761
4762IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2x_r80_by_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4763 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4764{
4765 iemAImpl_fyl2x_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4766}
4767
4768#if defined(IEM_WITHOUT_ASSEMBLY)
4769
4770IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2xp1_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4771 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4772{
4773 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4774 AssertReleaseFailed();
4775}
4776
4777#endif /* IEM_WITHOUT_ASSEMBLY */
4778
4779IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2xp1_r80_by_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4780 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4781{
4782 iemAImpl_fyl2xp1_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4783}
4784
4785IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2xp1_r80_by_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4786 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4787{
4788 iemAImpl_fyl2xp1_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4789}
4790
4791
4792/*********************************************************************************************************************************
4793* MMX, SSE & AVX *
4794*********************************************************************************************************************************/
4795
4796IEM_DECL_IMPL_DEF(void, iemAImpl_movsldup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
4797{
4798 RT_NOREF(pFpuState);
4799 puDst->au32[0] = puSrc->au32[0];
4800 puDst->au32[1] = puSrc->au32[0];
4801 puDst->au32[2] = puSrc->au32[2];
4802 puDst->au32[3] = puSrc->au32[2];
4803}
4804
4805#ifdef IEM_WITH_VEX
4806
4807IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
4808{
4809 pXState->x87.aXMM[iYRegDst].au32[0] = pXState->x87.aXMM[iYRegSrc].au32[0];
4810 pXState->x87.aXMM[iYRegDst].au32[1] = pXState->x87.aXMM[iYRegSrc].au32[0];
4811 pXState->x87.aXMM[iYRegDst].au32[2] = pXState->x87.aXMM[iYRegSrc].au32[2];
4812 pXState->x87.aXMM[iYRegDst].au32[3] = pXState->x87.aXMM[iYRegSrc].au32[2];
4813 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
4814 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
4815 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
4816 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
4817}
4818
4819
4820IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
4821{
4822 pXState->x87.aXMM[iYRegDst].au32[0] = pSrc->au32[0];
4823 pXState->x87.aXMM[iYRegDst].au32[1] = pSrc->au32[0];
4824 pXState->x87.aXMM[iYRegDst].au32[2] = pSrc->au32[2];
4825 pXState->x87.aXMM[iYRegDst].au32[3] = pSrc->au32[2];
4826 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pSrc->au32[4];
4827 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pSrc->au32[4];
4828 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pSrc->au32[6];
4829 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pSrc->au32[6];
4830}
4831
4832#endif /* IEM_WITH_VEX */
4833
4834
4835IEM_DECL_IMPL_DEF(void, iemAImpl_movshdup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
4836{
4837 RT_NOREF(pFpuState);
4838 puDst->au32[0] = puSrc->au32[1];
4839 puDst->au32[1] = puSrc->au32[1];
4840 puDst->au32[2] = puSrc->au32[3];
4841 puDst->au32[3] = puSrc->au32[3];
4842}
4843
4844
4845IEM_DECL_IMPL_DEF(void, iemAImpl_movddup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, uint64_t uSrc))
4846{
4847 RT_NOREF(pFpuState);
4848 puDst->au64[0] = uSrc;
4849 puDst->au64[1] = uSrc;
4850}
4851
4852#ifdef IEM_WITH_VEX
4853
4854IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
4855{
4856 pXState->x87.aXMM[iYRegDst].au64[0] = pXState->x87.aXMM[iYRegSrc].au64[0];
4857 pXState->x87.aXMM[iYRegDst].au64[1] = pXState->x87.aXMM[iYRegSrc].au64[0];
4858 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
4859 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
4860}
4861
4862IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
4863{
4864 pXState->x87.aXMM[iYRegDst].au64[0] = pSrc->au64[0];
4865 pXState->x87.aXMM[iYRegDst].au64[1] = pSrc->au64[0];
4866 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pSrc->au64[2];
4867 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pSrc->au64[2];
4868}
4869
4870#endif /* IEM_WITH_VEX */
4871
4872#ifdef IEM_WITHOUT_ASSEMBLY
4873
4874IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqb_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
4875{
4876 RT_NOREF(pFpuState, pu64Dst, pu64Src);
4877 AssertReleaseFailed();
4878}
4879
4880
4881IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqb_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
4882{
4883 RT_NOREF(pFpuState, pu128Dst, pu128Src);
4884 AssertReleaseFailed();
4885}
4886
4887
4888IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqw_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
4889{
4890 RT_NOREF(pFpuState, pu64Dst, pu64Src);
4891 AssertReleaseFailed();
4892}
4893
4894
4895IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqw_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
4896{
4897 RT_NOREF(pFpuState, pu128Dst, pu128Src);
4898 AssertReleaseFailed();
4899}
4900
4901
4902IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqd_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
4903{
4904 RT_NOREF(pFpuState, pu64Dst, pu64Src);
4905 AssertReleaseFailed();
4906}
4907
4908
4909IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqd_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
4910{
4911 RT_NOREF(pFpuState, pu128Dst, pu128Src);
4912 AssertReleaseFailed();
4913}
4914
4915
4916IEM_DECL_IMPL_DEF(void, iemAImpl_pxor_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
4917{
4918 RT_NOREF(pFpuState, pu64Dst, pu64Src);
4919 AssertReleaseFailed();
4920}
4921
4922
4923IEM_DECL_IMPL_DEF(void, iemAImpl_pxor_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
4924{
4925 RT_NOREF(pFpuState, pu128Dst, pu128Src);
4926 AssertReleaseFailed();
4927}
4928
4929
4930IEM_DECL_IMPL_DEF(void, iemAImpl_pmovmskb_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
4931{
4932 RT_NOREF(pFpuState, pu64Dst, pu64Src);
4933 AssertReleaseFailed();
4934
4935}
4936
4937
4938IEM_DECL_IMPL_DEF(void, iemAImpl_pmovmskb_u128,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, PCRTUINT128U pu128Src))
4939{
4940 RT_NOREF(pFpuState, pu64Dst, pu128Src);
4941 AssertReleaseFailed();
4942}
4943
4944
4945IEM_DECL_IMPL_DEF(void, iemAImpl_pshufw,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src, uint8_t bEvil))
4946{
4947 RT_NOREF(pFpuState, pu64Dst, pu64Src, bEvil);
4948 AssertReleaseFailed();
4949}
4950
4951
4952IEM_DECL_IMPL_DEF(void, iemAImpl_pshufhw,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src, uint8_t bEvil))
4953{
4954 RT_NOREF(pFpuState, pu128Dst, pu128Src, bEvil);
4955 AssertReleaseFailed();
4956}
4957
4958
4959IEM_DECL_IMPL_DEF(void, iemAImpl_pshuflw,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src, uint8_t bEvil))
4960{
4961 RT_NOREF(pFpuState, pu128Dst, pu128Src, bEvil);
4962 AssertReleaseFailed();
4963}
4964
4965
4966IEM_DECL_IMPL_DEF(void, iemAImpl_pshufd,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src, uint8_t bEvil))
4967{
4968 RT_NOREF(pFpuState, pu128Dst, pu128Src, bEvil);
4969 AssertReleaseFailed();
4970}
4971
4972/* PUNPCKHxxx */
4973
4974IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhbw_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
4975{
4976 RT_NOREF(pFpuState, pu64Dst, pu64Src);
4977 AssertReleaseFailed();
4978}
4979
4980
4981IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhbw_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
4982{
4983 RT_NOREF(pFpuState, pu128Dst, pu128Src);
4984 AssertReleaseFailed();
4985}
4986
4987
4988IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhwd_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
4989{
4990 RT_NOREF(pFpuState, pu64Dst, pu64Src);
4991 AssertReleaseFailed();
4992}
4993
4994
4995IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhwd_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
4996{
4997 RT_NOREF(pFpuState, pu128Dst, pu128Src);
4998 AssertReleaseFailed();
4999}
5000
5001
5002IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhdq_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
5003{
5004 RT_NOREF(pFpuState, pu64Dst, pu64Src);
5005 AssertReleaseFailed();
5006}
5007
5008
5009IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhdq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
5010{
5011 RT_NOREF(pFpuState, pu128Dst, pu128Src);
5012 AssertReleaseFailed();
5013}
5014
5015
5016IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhqdq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
5017{
5018 RT_NOREF(pFpuState, pu128Dst, pu128Src);
5019 AssertReleaseFailed();
5020}
5021
5022/* PUNPCKLxxx */
5023
5024IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklbw_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint32_t const *pu32Src))
5025{
5026 RT_NOREF(pFpuState, pu64Dst, pu32Src);
5027 AssertReleaseFailed();
5028}
5029
5030
5031IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklbw_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
5032{
5033 RT_NOREF(pFpuState, pu128Dst, pu64Src);
5034 AssertReleaseFailed();
5035}
5036
5037
5038IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklwd_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint32_t const *pu32Src))
5039{
5040 RT_NOREF(pFpuState, pu64Dst, pu32Src);
5041 AssertReleaseFailed();
5042}
5043
5044
5045IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklwd_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
5046{
5047 RT_NOREF(pFpuState, pu128Dst, pu64Src);
5048 AssertReleaseFailed();
5049}
5050
5051
5052IEM_DECL_IMPL_DEF(void, iemAImpl_punpckldq_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint32_t const *pu32Src))
5053{
5054 RT_NOREF(pFpuState, pu64Dst, pu32Src);
5055 AssertReleaseFailed();
5056}
5057
5058
5059IEM_DECL_IMPL_DEF(void, iemAImpl_punpckldq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
5060{
5061 RT_NOREF(pFpuState, pu128Dst, pu64Src);
5062 AssertReleaseFailed();
5063}
5064
5065
5066IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklqdq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
5067{
5068 RT_NOREF(pFpuState, pu128Dst, pu64Src);
5069 AssertReleaseFailed();
5070}
5071
5072#endif /* IEM_WITHOUT_ASSEMBLY */
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette